1
0
Fork 0
mirror of https://github.com/archtechx/todo-system.git synced 2025-12-12 00:54:03 +00:00

perf: store glob exclude patterns instead of traversing glob(...)

Previously add_excludes_from_gitignore() would use glob() and
recursively traverse the generated paths to add them to excludes.

Now we store excludes as an enum - Path or Glob - and use the glob
crate's `Pattern.matches_path()` as needed, instead of the preemptive
traversal.
This commit is contained in:
Samuel Štancl 2025-09-13 22:21:48 +02:00
parent ea031d81f0
commit 32a57ab5fb
2 changed files with 33 additions and 16 deletions

View file

@ -4,7 +4,7 @@ use std::path::PathBuf;
use clap::{Parser, ArgAction};
use crate::entries::Entry;
use crate::render::render_entries;
use crate::scan::{Stats, scan_dir, scan_todo_file, scan_readme_file};
use crate::scan::{Stats, scan_dir, scan_todo_file, scan_readme_file, Exclude};
pub mod scan;
pub mod render;
@ -43,7 +43,7 @@ fn main() {
let root_dir: PathBuf = std::env::current_dir().unwrap();
let mut paths: Vec<PathBuf> = vec![];
let mut excludes: Vec<PathBuf> = vec![];
let mut excludes: Vec<Exclude> = vec![];
let mut entries: Vec<Entry> = vec![];
let mut stats = Stats::new(args.verbose);
@ -68,7 +68,7 @@ fn main() {
if path.exists() {
if let Ok(realpath) = canonicalize(path) {
excludes.push(realpath);
excludes.push(Exclude::Path(realpath));
}
}
}
@ -80,13 +80,13 @@ fn main() {
readme_path.push(&args.readme);
if todos_path.exists() {
excludes.push(todos_path.clone());
excludes.push(Exclude::Path(todos_path.clone()));
scan_todo_file(&todos_path, &mut entries).unwrap();
}
if readme_path.exists() {
excludes.push(readme_path.clone());
excludes.push(Exclude::Path(readme_path.clone()));
scan_readme_file(&readme_path, &mut entries).unwrap();
}

View file

@ -1,12 +1,27 @@
use std::io;
use std::fs::{self, canonicalize};
use std::path::{Path, PathBuf};
use glob::glob;
use glob::{Pattern};
const PRIORITY_CHARS: [char; 10] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
use crate::entries::{Entry, EntryData, Location};
#[derive(Debug, PartialEq, Eq)]
pub enum Exclude {
Path(PathBuf),
Glob(Pattern),
}
impl Exclude {
pub fn matches(&self, path: &Path) -> bool {
match self {
Exclude::Path(p) => p == path,
Exclude::Glob(g) => g.matches_path(path),
}
}
}
pub struct Stats {
visited_folder_count: usize,
visited_file_count: usize,
@ -100,7 +115,7 @@ fn clean_line<'a>(line: &'a str, delimiter_word: &str) -> &'a str {
.trim()
}
pub fn add_excludes_from_gitignore(base_dir: &PathBuf, excludes: &mut Vec<PathBuf>) {
pub fn add_excludes_from_gitignore(base_dir: &PathBuf, excludes: &mut Vec<Exclude>) {
let mut gitignore = base_dir.clone();
gitignore.push(".gitignore");
@ -115,7 +130,7 @@ pub fn add_excludes_from_gitignore(base_dir: &PathBuf, excludes: &mut Vec<PathBu
if line.trim() == "*" {
if let Ok(realpath) = canonicalize(base_dir) {
excludes.push(realpath);
excludes.push(Exclude::Path(realpath));
}
break;
@ -132,12 +147,14 @@ pub fn add_excludes_from_gitignore(base_dir: &PathBuf, excludes: &mut Vec<PathBu
let mut pattern = base_dir.clone();
pattern.push(line.trim_end_matches("*/").trim_matches('/'));
if let Some(pattern_str) = pattern.to_str() {
for path in glob(pattern_str).unwrap() {
if let Ok(exclude) = canonicalize(path.unwrap()) {
excludes.push(exclude);
if pattern.to_str().unwrap().contains('*') {
if let Some(str) = pattern.to_str() {
if let Ok(p) = Pattern::new(str) {
excludes.push(Exclude::Glob(p));
}
}
} else {
excludes.push(Exclude::Path(pattern));
}
}
}
@ -226,7 +243,7 @@ pub fn scan_file(path: &Path, entries: &mut Vec<Entry>) -> io::Result<()> {
Ok(())
}
pub fn scan_dir(dir: &Path, entries: &mut Vec<Entry>, excludes: &mut Vec<PathBuf>, stats: &mut Stats) -> io::Result<()> {
pub fn scan_dir(dir: &Path, entries: &mut Vec<Entry>, excludes: &mut Vec<Exclude>, stats: &mut Stats) -> io::Result<()> {
let mut gitignore = dir.to_path_buf().clone();
gitignore.push(".gitignore");
@ -234,11 +251,11 @@ pub fn scan_dir(dir: &Path, entries: &mut Vec<Entry>, excludes: &mut Vec<PathBuf
add_excludes_from_gitignore(&dir.to_path_buf(), excludes);
// `add_excludes_from_gitignore` can add the *entire* directory being scanned here to excludes
// e.g. if it contains a `*` line. Tthe directory is visited first, and gitignore is read second,
// e.g. if it contains a `*` line. The directory is visited first, and gitignore is read second,
// so the exclude would not affect anything inside the for loop. For that reason, we re-check if
// `dir` hasn't become excluded after running `add_excludes_from_gitignore`.
for exclude in &*excludes {
if canonicalize(dir).unwrap() == *exclude {
if Exclude::Path(canonicalize(dir).unwrap()) == *exclude {
return Ok(());
}
}
@ -255,7 +272,7 @@ pub fn scan_dir(dir: &Path, entries: &mut Vec<Entry>, excludes: &mut Vec<PathBuf
}
for exclude in &*excludes {
if canonicalize(&path).unwrap() == *exclude {
if exclude.matches(&path) {
continue 'entry;
}
}