From 0d0795dcaafa7affdb26c2a7e31cf5673513dd17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C5=A0tancl?= Date: Fri, 24 Nov 2023 19:28:52 +0100 Subject: [PATCH] optimize gitignore logic to correctly handle * excludes --- src/main.rs | 18 +++++++---- src/scan.rs | 93 ++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/src/main.rs b/src/main.rs index 0d15091..afb7140 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ +use std::fs::canonicalize; use std::path::PathBuf; -use clap::Parser; +use clap::{Parser, ArgAction}; use scan::{scan_readme_file, add_excludes_from_gitignore}; use crate::entries::Entry; use crate::render::render_entries; @@ -33,8 +34,9 @@ struct Args { exclude: Vec, /// Show stats after listing TODOs - #[arg(short, long, default_value_t = false)] - verbose: bool, + #[arg(short, long)] + #[clap(action = ArgAction::Count)] + verbose: u8, } fn main() { @@ -45,7 +47,7 @@ fn main() { let mut excludes: Vec = vec![]; let mut entries: Vec = vec![]; - let mut stats = Stats::new(); + let mut stats = Stats::new(args.verbose); for p in args.paths { let mut path = root_dir.clone(); @@ -65,7 +67,11 @@ fn main() { let mut path = root_dir.clone(); path.push(exclude); - excludes.push(path); + if path.exists() { + if let Ok(realpath) = canonicalize(path) { + excludes.push(realpath); + } + } } let mut todos_path = root_dir.clone(); @@ -94,7 +100,7 @@ fn main() { render_entries(entries); - if args.verbose { + if args.verbose > 0 { eprint!("\n\n"); stats.print(); eprintln!("Paths ({}): {:?}", &paths.len(), &paths); diff --git a/src/scan.rs b/src/scan.rs index 4f21953..844ee5d 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -8,21 +8,69 @@ const PRIORITY_CHARS: [char; 10] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', use crate::entries::{Entry, EntryData, Location}; pub struct Stats { - visited_folders: usize, - visited_files: usize, + visited_folder_count: usize, + visited_file_count: usize, + visited_folders: Vec, + visited_files: Vec, + verbosity: u8, } impl Stats { - pub fn new() -> Stats { + pub fn new(verbosity: u8) -> Stats { Stats { - visited_folders: 0, - visited_files: 0, + visited_folder_count: 0, + visited_file_count: 0, + visited_folders: vec![], + visited_files: vec![], + verbosity, + } + } + + pub fn add_file(&mut self, file: String) { + self.visited_file_count += 1; + + if self.verbosity >= 3 { + eprintln!("[INFO] Visited file: {}", &file); + } + + if self.verbosity >= 2 { + self.visited_files.push(file); + } + } + + pub fn add_folder(&mut self, folder: String) { + self.visited_folder_count += 1; + + if self.verbosity >= 3 { + eprintln!("[INFO] Visited folder: {}", &folder); + } + + if self.verbosity >= 2 { + self.visited_folders.push(folder); } } pub fn print(&self) { - eprintln!("[INFO] Visited folders: {}", self.visited_folders); - eprintln!("[INFO] Visited files: {}", self.visited_files); + if self.verbosity >= 2 { + eprintln!("[INFO] Visited folders:"); + + for folder in &self.visited_folders { + println!("{}", folder); + } + + eprint!("\n\n"); + + eprintln!("[INFO] Visited files:"); + + for file in &self.visited_files { + println!("{}", file); + } + + eprint!("\n\n"); + } + + eprintln!("[INFO] Visited folders: {}", self.visited_folder_count); + eprintln!("[INFO] Visited files: {}", self.visited_file_count); } } @@ -59,10 +107,11 @@ pub fn add_excludes_from_gitignore(base_dir: &PathBuf, excludes: &mut Vec) -> io::Result<()> { } pub fn scan_dir(dir: &Path, entries: &mut Vec, excludes: &mut Vec, stats: &mut Stats) -> io::Result<()> { - stats.visited_folders += 1; - let mut gitignore = dir.to_path_buf().clone(); gitignore.push(".gitignore"); if gitignore.exists() { add_excludes_from_gitignore(&dir.to_path_buf(), excludes); + + // `add_excludes_from_gitignore` can add the *entire* directory being scanned here to excludes + // e.g. if it contains a `*` line. Tthe directory is visited first, and gitignore is read second, + // so the exclude would not affect anything inside the for loop. For that reason, we re-check if + // `dir` isn't excluded after running `add_excludes_from_gitignore`. + // todo@real see if we can optimize this by checking for parent-child in the `for exclude` loop within 'entry + + for exclude in &*excludes { + if canonicalize(dir.to_path_buf()).unwrap() == *exclude { + return Ok(()); + } + } } + stats.add_folder(dir.to_string_lossy().to_string()); + 'entry: for entry in fs::read_dir(dir)? { let entry = entry?; let path = entry.path(); @@ -170,7 +233,7 @@ pub fn scan_dir(dir: &Path, entries: &mut Vec, excludes: &mut Vec, excludes: &mut Vec