1
0
Fork 0
mirror of https://github.com/archtechx/todo-system.git synced 2025-12-12 00:54:03 +00:00
todo-system/src/scan.rs
Samuel Štancl 32a57ab5fb perf: store glob exclude patterns instead of traversing glob(...)
Previously add_excludes_from_gitignore() would use glob() and
recursively traverse the generated paths to add them to excludes.

Now we store excludes as an enum - Path or Glob - and use the glob
crate's `Pattern.matches_path()` as needed, instead of the preemptive
traversal.
2025-09-13 23:13:30 +02:00

1031 lines
29 KiB
Rust

use std::io;
use std::fs::{self, canonicalize};
use std::path::{Path, PathBuf};
use glob::{Pattern};
const PRIORITY_CHARS: [char; 10] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
use crate::entries::{Entry, EntryData, Location};
#[derive(Debug, PartialEq, Eq)]
pub enum Exclude {
Path(PathBuf),
Glob(Pattern),
}
impl Exclude {
pub fn matches(&self, path: &Path) -> bool {
match self {
Exclude::Path(p) => p == path,
Exclude::Glob(g) => g.matches_path(path),
}
}
}
pub struct Stats {
visited_folder_count: usize,
visited_file_count: usize,
visited_folders: Vec<String>,
visited_files: Vec<String>,
verbosity: u8,
}
impl Stats {
pub fn new(verbosity: u8) -> Stats {
Stats {
visited_folder_count: 0,
visited_file_count: 0,
visited_folders: vec![],
visited_files: vec![],
verbosity,
}
}
pub fn add_file(&mut self, file: String) {
self.visited_file_count += 1;
if self.verbosity >= 3 {
eprintln!("[INFO] Visited file: {}", &file);
}
if self.verbosity >= 2 {
self.visited_files.push(file);
}
}
pub fn add_folder(&mut self, folder: String) {
self.visited_folder_count += 1;
if self.verbosity >= 3 {
eprintln!("[INFO] Visited folder: {}", &folder);
}
if self.verbosity >= 2 {
self.visited_folders.push(folder);
}
}
pub fn print(&self) {
if self.verbosity >= 2 {
eprintln!("[INFO] Visited folders:");
for folder in &self.visited_folders {
println!("{}", folder);
}
eprint!("\n\n");
eprintln!("[INFO] Visited files:");
for file in &self.visited_files {
println!("{}", file);
}
eprint!("\n\n");
}
eprintln!("[INFO] Visited folders: {}", self.visited_folder_count);
eprintln!("[INFO] Visited files: {}", self.visited_file_count);
}
}
fn parse_priority(word: &str) -> Option<isize> {
let lowercase_word = word.to_lowercase();
let priority_substr = lowercase_word.split("todo").nth(1).unwrap();
if priority_substr.len() == 1 {
Some(priority_substr.to_string().parse::<isize>().unwrap())
} else if priority_substr.chars().all(|ch| ch == '0') {
// todo0: 1 - 1 = 0
// todo00: 1 - 2 = -1
Some(1 - priority_substr.len() as isize)
} else {
None // invalid syntax like todo11
}
}
/// Remove closing tags, comments, and whitespace
fn clean_line<'a>(line: &'a str, delimiter_word: &str) -> &'a str {
line.split_once(delimiter_word).unwrap().1
.trim()
.trim_end_matches("*/")
.trim_end_matches("-->")
.trim_end_matches("--}}")
.trim_end_matches("/>")
.trim()
}
pub fn add_excludes_from_gitignore(base_dir: &PathBuf, excludes: &mut Vec<Exclude>) {
let mut gitignore = base_dir.clone();
gitignore.push(".gitignore");
if ! gitignore.exists() {
return;
}
for line in std::fs::read_to_string(gitignore).unwrap().lines() {
if line.trim().is_empty() {
continue;
}
if line.trim() == "*" {
if let Ok(realpath) = canonicalize(base_dir) {
excludes.push(Exclude::Path(realpath));
}
break;
}
if line.trim().starts_with('!') {
continue;
}
if line.trim().starts_with('#') {
continue;
}
let mut pattern = base_dir.clone();
pattern.push(line.trim_end_matches("*/").trim_matches('/'));
if pattern.to_str().unwrap().contains('*') {
if let Some(str) = pattern.to_str() {
if let Ok(p) = Pattern::new(str) {
excludes.push(Exclude::Glob(p));
}
}
} else {
excludes.push(Exclude::Path(pattern));
}
}
}
pub fn scan_string(str: String, filename: PathBuf, entries: &mut Vec<Entry>) {
for (line_num, line) in str.lines().enumerate() {
if ! line.to_lowercase().contains("todo") {
continue;
}
for mut word in line.split_whitespace() {
if ! word.to_lowercase().starts_with("todo") {
continue;
}
let text = clean_line(line, word);
if word.starts_with("todo!(") {
entries.push(Entry {
text: line.trim().to_string(),
location: Location {
file: filename.clone(),
line: line_num + 1,
},
data: EntryData::Generic,
});
break;
}
word = word.trim_end_matches(':');
// Handles: `todo`, `TODO`, `todo:`, `TODO:`
// Also trims `"` and `'` to handle cases like `foo="bar todo"`
if word.to_lowercase().trim_end_matches('"').trim_end_matches('\'') == "todo" {
entries.push(Entry {
text: text.to_string(),
location: Location {
file: filename.clone(),
line: line_num + 1,
},
data: EntryData::Generic,
});
break;
}
if word.contains('@') {
let category = word.split('@').nth(1).unwrap();
entries.push(Entry {
text: text.to_string(),
location: Location {
file: filename.clone(),
line: line_num + 1,
},
data: EntryData::Category(category.to_string()),
});
break;
}
if word.chars().any(|ch| PRIORITY_CHARS.contains(&ch)) {
if let Some(priority) = parse_priority(word) {
entries.push(Entry {
text: text.to_string(),
location: Location {
file: filename.clone(),
line: line_num + 1,
},
data: EntryData::Priority(priority),
});
}
break;
}
}
}
}
pub fn scan_file(path: &Path, entries: &mut Vec<Entry>) -> io::Result<()> {
if let Ok(str) = std::fs::read_to_string(path) {
scan_string(str, path.to_path_buf(), entries);
}
Ok(())
}
pub fn scan_dir(dir: &Path, entries: &mut Vec<Entry>, excludes: &mut Vec<Exclude>, stats: &mut Stats) -> io::Result<()> {
let mut gitignore = dir.to_path_buf().clone();
gitignore.push(".gitignore");
if gitignore.exists() {
add_excludes_from_gitignore(&dir.to_path_buf(), excludes);
// `add_excludes_from_gitignore` can add the *entire* directory being scanned here to excludes
// e.g. if it contains a `*` line. The directory is visited first, and gitignore is read second,
// so the exclude would not affect anything inside the for loop. For that reason, we re-check if
// `dir` hasn't become excluded after running `add_excludes_from_gitignore`.
for exclude in &*excludes {
if Exclude::Path(canonicalize(dir).unwrap()) == *exclude {
return Ok(());
}
}
}
stats.add_folder(dir.to_string_lossy().to_string());
'entry: for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.components().last().unwrap().as_os_str().to_string_lossy().starts_with('.') {
continue;
}
for exclude in &*excludes {
if exclude.matches(&path) {
continue 'entry;
}
}
if path.is_dir() {
scan_dir(path.as_path(), entries, excludes, stats)?
} else {
stats.add_file(path.to_string_lossy().to_string());
scan_file(path.as_path(), entries)?
}
}
Ok(())
}
pub fn scan_todo_file(path: &Path, entries: &mut Vec<Entry>) -> io::Result<()> {
let str = fs::read_to_string(path)?;
let mut current_category: Option<&str> = None;
let mut in_code_block = false;
// This can produce:
// - generic todos (above any category)
// - category todos (below a ## category heading)
// - priority todos (priority keyword part of the line)
'line: for (line_num, line) in str.lines().enumerate() {
if line.starts_with("```") {
// If we are in an *unindented* code block, we ignore lines
// starting with # - they cannot be headings. Indented
// code blocks are irrelevant.
in_code_block = ! in_code_block;
continue;
}
// We need the line to start with # followed by spaces. So we cannot check just for '#'
// There's also no real need for any complex logic iterating over the line's characters,
// we can just check reasonable heading hierarchy like this. Anything else is unlikely.
if ! in_code_block && (false
|| line.starts_with("# ")
|| line.starts_with("## ")
|| line.starts_with("### ")
|| line.starts_with("#### ")
|| line.starts_with("##### ")
) {
current_category = Some(line.split_once("# ").unwrap().1);
continue;
}
if ! line.trim_start().starts_with('-') {
continue;
}
for word in line.split_whitespace() {
if word.to_lowercase().trim_end_matches(':').starts_with("todo") && word.chars().any(|ch| PRIORITY_CHARS.contains(&ch)) {
if let Some(priority) = parse_priority(word.trim_end_matches(':')) {
entries.push(Entry {
text: clean_line(line, word).to_string(),
location: Location {
file: path.to_path_buf(),
line: line_num + 1,
},
data: EntryData::Priority(priority),
});
}
continue 'line;
}
}
let text = line.trim_start().trim_start_matches("- [ ] ").trim_start_matches("- ").to_string();
if let Some(category) = current_category {
entries.push(Entry {
text,
location: Location {
file: path.to_path_buf(),
line: line_num + 1,
},
data: EntryData::Category(category.to_string()),
});
continue;
}
entries.push(Entry {
text,
location: Location {
file: path.to_path_buf(),
line: line_num + 1,
},
data: EntryData::Generic,
});
}
Ok(())
}
pub fn scan_readme_file(path: &Path, entries: &mut Vec<Entry>) -> io::Result<()> {
let str = fs::read_to_string(path)?;
let mut in_todo_section = false;
let mut in_code_block = false;
// This can produce:
// - generic todos (above any category)
// - category todos (below a ## category heading) todo@real add this logic and update README.md
// - priority todos (priority keyword part of the line)
'line: for (line_num, line) in str.lines().enumerate() {
if line.starts_with("```") {
// If we are in an *unindented* code block, we ignore lines
// starting with # - they cannot be headings. Indented
// code blocks are irrelevant.
in_code_block = ! in_code_block;
continue;
}
// We need the line to start with # followed by spaces. So we cannot check just for '#'
// There's also no real need for any complex logic iterating over the line's characters,
// we can just check reasonable heading hierarchy like this. Anything else is unlikely.
if ! in_code_block && (false
|| line.starts_with("# ")
|| line.starts_with("## ")
|| line.starts_with("### ")
|| line.starts_with("#### ")
|| line.starts_with("##### ")
) {
let section = line.split_once("# ").unwrap().1;
let cleaned_section = section.to_lowercase().trim_end_matches(':').trim().to_string();
in_todo_section = cleaned_section == "todo" || cleaned_section == "todos";
continue;
}
if ! in_todo_section {
continue;
}
if ! line.trim_start().starts_with('-') {
continue;
}
for word in line.split_whitespace() {
if word.to_lowercase().trim_end_matches(':').starts_with("todo") && word.chars().any(|ch| PRIORITY_CHARS.contains(&ch)) {
if let Some(priority) = parse_priority(word.trim_end_matches(':')) {
entries.push(Entry {
text: clean_line(line, word).to_string(),
location: Location {
file: path.to_path_buf(),
line: line_num + 1,
},
data: EntryData::Priority(priority),
});
}
continue 'line;
}
}
// README.md can only have priority entries and generic entries
entries.push(Entry {
text: line.trim_start().trim_start_matches("- [ ] ").trim_start_matches("- ").to_string(),
location: Location {
file: path.to_path_buf(),
line: line_num + 1,
},
data: EntryData::Generic,
});
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn generic_test() {
let str = r#"
1
2
// todo foo
/* TODO: foo bar */
/*
* TODO baz
TODO baz2
TODO baz2 todo
<!-- TODO foo2 -->
*/
"#;
let mut entries: Vec<Entry> = vec![];
let mut path = PathBuf::new();
path.push("foo.txt");
scan_string(str.to_string(), path.clone(), &mut entries);
assert_eq!(6, entries.len());
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("foo"),
location: Location {
file: path.clone(),
line: 4,
}
}, entries[0]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("foo bar"),
location: Location {
file: path.clone(),
line: 5,
}
}, entries[1]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("baz"),
location: Location {
file: path.clone(),
line: 8,
}
}, entries[2]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("baz2"),
location: Location {
file: path.clone(),
line: 9,
}
}, entries[3]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("baz2 todo"),
location: Location {
file: path.clone(),
line: 10,
}
}, entries[4]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("foo2"),
location: Location {
file: path.clone(),
line: 11,
}
}, entries[5]);
}
#[test]
fn category_test() {
let str = r#"
1
2
todo@foo
todo@bar abc def
3
todo@baz x y
4
// TODO@baz2 a
/* TODO@baz3 */
// TODO@baz3 b
<!-- TODO@baz3 -->
"#;
let mut entries: Vec<Entry> = vec![];
let mut path = PathBuf::new();
path.push("foo.txt");
scan_string(str.to_string(), path.clone(), &mut entries);
assert_eq!(7, entries.len());
assert_eq!(Entry {
data: EntryData::Category(String::from("foo")),
text: String::from(""),
location: Location {
file: path.clone(),
line: 4,
}
}, entries[0]);
assert_eq!(Entry {
data: EntryData::Category(String::from("bar")),
text: String::from("abc def"),
location: Location {
file: path.clone(),
line: 5,
}
}, entries[1]);
assert_eq!(Entry {
data: EntryData::Category(String::from("baz")),
text: String::from("x y"),
location: Location {
file: path.clone(),
line: 7,
}
}, entries[2]);
assert_eq!(Entry {
data: EntryData::Category(String::from("baz2")),
text: String::from("a"),
location: Location {
file: path.clone(),
line: 9,
}
}, entries[3]);
assert_eq!(Entry {
data: EntryData::Category(String::from("baz3")),
text: String::from(""),
location: Location {
file: path.clone(),
line: 10,
}
}, entries[4]);
assert_eq!(Entry {
data: EntryData::Category(String::from("baz3")),
text: String::from("b"),
location: Location {
file: path.clone(),
line: 11,
}
}, entries[5]);
assert_eq!(Entry {
data: EntryData::Category(String::from("baz3")),
text: String::from(""),
location: Location {
file: path.clone(),
line: 12,
}
}, entries[6]);
}
#[test]
fn priority_test() {
let str = r#"
1
2
todo00
todo000 abc
todo0 abc def
todo1 foo
3
todo1 x y
4
// todo0 bar
// TODO1 a
/* TODO2 */
// TODO3 b
<!-- TODO4 b -->
"#;
let mut entries: Vec<Entry> = vec![];
let mut path = PathBuf::new();
path.push("foo.txt");
scan_string(str.to_string(), path.clone(), &mut entries);
assert_eq!(10, entries.len());
assert_eq!(Entry {
data: EntryData::Priority(-1),
text: String::from(""),
location: Location {
file: path.clone(),
line: 4,
}
}, entries[0]);
assert_eq!(Entry {
data: EntryData::Priority(-2),
text: String::from("abc"),
location: Location {
file: path.clone(),
line: 5,
}
}, entries[1]);
assert_eq!(Entry {
data: EntryData::Priority(0),
text: String::from("abc def"),
location: Location {
file: path.clone(),
line: 6,
}
}, entries[2]);
assert_eq!(Entry {
data: EntryData::Priority(1),
text: String::from("foo"),
location: Location {
file: path.clone(),
line: 7,
}
}, entries[3]);
assert_eq!(Entry {
data: EntryData::Priority(1),
text: String::from("x y"),
location: Location {
file: path.clone(),
line: 9,
}
}, entries[4]);
assert_eq!(Entry {
data: EntryData::Priority(0),
text: String::from("bar"),
location: Location {
file: path.clone(),
line: 11,
}
}, entries[5]);
assert_eq!(Entry {
data: EntryData::Priority(1),
text: String::from("a"),
location: Location {
file: path.clone(),
line: 12,
}
}, entries[6]);
assert_eq!(Entry {
data: EntryData::Priority(2),
text: String::from(""),
location: Location {
file: path.clone(),
line: 13,
}
}, entries[7]);
assert_eq!(Entry {
data: EntryData::Priority(3),
text: String::from("b"),
location: Location {
file: path.clone(),
line: 14,
}
}, entries[8]);
assert_eq!(Entry {
data: EntryData::Priority(4),
text: String::from("b"),
location: Location {
file: path.clone(),
line: 15,
}
}, entries[9]);
}
#[test]
fn sample_test_ts() {
let mut entries: Vec<Entry> = vec![];
let mut path = std::env::current_dir().unwrap();
path.push("samples");
path.push("1.ts");
scan_file(path.as_path(), &mut entries).unwrap();
assert_eq!(10, entries.len());
assert_eq!(Entry {
data: EntryData::Category(String::from("types")),
text: String::from(""),
location: Location {
file: path.clone(),
line: 1,
}
}, entries[0]);
assert_eq!(Entry {
data: EntryData::Category(String::from("types")),
text: String::from("add types"),
location: Location {
file: path.clone(),
line: 5,
}
}, entries[1]);
assert_eq!(Entry {
data: EntryData::Priority(-2),
text: String::from(""),
location: Location {
file: path.clone(),
line: 10,
}
}, entries[2]);
assert_eq!(Entry {
data: EntryData::Priority(-1),
text: String::from("add return typehint"),
location: Location {
file: path.clone(),
line: 14,
}
}, entries[3]);
assert_eq!(Entry {
data: EntryData::Priority(0),
text: String::from("add name typehint"),
location: Location {
file: path.clone(),
line: 19,
}
}, entries[4]);
assert_eq!(Entry {
data: EntryData::Priority(1),
text: String::from("add return typehint"),
location: Location {
file: path.clone(),
line: 23,
}
}, entries[5]);
assert_eq!(Entry {
data: EntryData::Priority(2),
text: String::from("add return typehint"),
location: Location {
file: path.clone(),
line: 27,
}
}, entries[6]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from(""),
location: Location {
file: path.clone(),
line: 31,
}
}, entries[7]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("generic todo 2"),
location: Location {
file: path.clone(),
line: 33,
}
}, entries[8]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("generic todo 3"),
location: Location {
file: path.clone(),
line: 34,
}
}, entries[9]);
}
#[test]
fn sample_test_rs() {
let mut entries: Vec<Entry> = vec![];
let mut path = std::env::current_dir().unwrap();
path.push("samples");
path.push("2.rs");
scan_file(path.as_path(), &mut entries).unwrap();
assert_eq!(4, entries.len());
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("todo!(\"generic\");"),
location: Location {
file: path.clone(),
line: 3,
}
}, entries[0]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("todo!();"),
location: Location {
file: path.clone(),
line: 4,
}
}, entries[1]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("todo!(\"@foo not category\");"),
location: Location {
file: path.clone(),
line: 5,
}
}, entries[2]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("todo!(\"00 not priority\");"),
location: Location {
file: path.clone(),
line: 6,
}
}, entries[3]);
}
#[test]
fn todo_file_test() {
let mut entries: Vec<Entry> = vec![];
let mut path = std::env::current_dir().unwrap();
path.push("samples");
path.push("todo.md");
scan_todo_file(path.as_path(), &mut entries).unwrap();
assert_eq!(8, entries.len());
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("generic foo"),
location: Location {
file: path.clone(),
line: 1,
}
}, entries[0]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("generic bar"),
location: Location {
file: path.clone(),
line: 2,
}
}, entries[1]);
assert_eq!(Entry {
data: EntryData::Priority(-1),
text: String::from("priority bar"),
location: Location {
file: path.clone(),
line: 3,
}
}, entries[2]);
assert_eq!(Entry {
data: EntryData::Priority(0),
text: String::from("a"),
location: Location {
file: path.clone(),
line: 6,
}
}, entries[3]);
assert_eq!(Entry {
data: EntryData::Category(String::from("High priority")),
text: String::from("foo"),
location: Location {
file: path.clone(),
line: 7,
}
}, entries[4]);
assert_eq!(Entry {
data: EntryData::Category(String::from("High priority")),
text: String::from("bar"),
location: Location {
file: path.clone(),
line: 8,
}
}, entries[5]);
assert_eq!(Entry {
data: EntryData::Category(String::from("Responsivity")),
text: String::from("abc"),
location: Location {
file: path.clone(),
line: 11,
}
}, entries[6]);
assert_eq!(Entry {
data: EntryData::Category(String::from("Responsivity")),
text: String::from("def"),
location: Location {
file: path.clone(),
line: 12,
}
}, entries[7]);
}
#[test]
fn readme_file_test() {
let mut entries: Vec<Entry> = vec![];
let mut path = std::env::current_dir().unwrap();
path.push("samples");
path.push("README.md");
scan_readme_file(path.as_path(), &mut entries).unwrap();
assert_eq!(5, entries.len());
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("abc"),
location: Location {
file: path.clone(),
line: 19,
}
}, entries[0]);
assert_eq!(Entry {
data: EntryData::Priority(0),
text: String::from("def"),
location: Location {
file: path.clone(),
line: 20,
}
}, entries[1]);
assert_eq!(Entry {
data: EntryData::Priority(-1),
text: String::from("ghi"),
location: Location {
file: path.clone(),
line: 21,
}
}, entries[2]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("bar"),
location: Location {
file: path.clone(),
line: 22,
}
}, entries[3]);
assert_eq!(Entry {
data: EntryData::Generic,
text: String::from("baz"),
location: Location {
file: path.clone(),
line: 23,
}
}, entries[4]);
}
}