use std::{collections::HashSet, fs, path::PathBuf}; use rottlib::lexer::{DebugTools, TokenizedFile}; /// Read `ignore.txt` (one path per line, `#` for comments) from root directory /// and turn it into a canonicalized [`HashSet`]. fn load_ignore_set(root: &std::path::Path) -> HashSet { let ignore_file = root.join("ignore.txt"); if !ignore_file.exists() { return HashSet::new(); } let content = match fs::read_to_string(&ignore_file) { Ok(content) => content, Err(error) => { eprintln!("Could not read {}: {error}", ignore_file.display()); return HashSet::new(); } }; content .lines() .map(str::trim) .filter(|line| !line.is_empty() && !line.starts_with('#')) .filter_map(|line| { let next_path = PathBuf::from(line); let absolute_path = if next_path.is_absolute() { next_path } else { root.join(next_path) }; fs::canonicalize(absolute_path).ok() }) .collect() } /// CLI: `verify_uc ` - find all `.uc` files in the provided directory /// (except those listed in `ignore.txt` in the root) and test them all. /// /// Reported execution time is the tokenization time, without considering time /// it takes to read files from disk. /// /// `ignore.txt` is for listing specific files, not directories. fn main() { let root_dir = std::env::args().nth(1).unwrap(); // it is fine to crash debug utility let root = PathBuf::from(&root_dir); if !root.exists() { eprintln!("Root directory '{root_dir}' does not exist."); std::process::exit(1); } // Load files let ignored_paths = load_ignore_set(&root); let mut uc_files: Vec<(PathBuf, String)> = Vec::new(); for entry in walkdir::WalkDir::new(&root) .into_iter() .filter_map(Result::ok) // for debug tool this is ok .filter(|entry| { let path = entry.path(); // Skip anything explicitly ignored if let Ok(absolute_path) = fs::canonicalize(path) { if ignored_paths.contains(&absolute_path) { return false; } } // Must be *.uc path.is_file() && path .extension() .and_then(|extension| extension.to_str()) .is_some_and(|extension| extension.eq_ignore_ascii_case("uc")) }) { let path = entry.path(); match fs::read(path) { Ok(raw_bytes) => { // Auto‑detect encoding for old Unreal script sources let (encoding_label, _, _) = chardet::detect(&raw_bytes); let encoding = encoding_rs::Encoding::for_label(encoding_label.as_bytes()) .unwrap_or(encoding_rs::UTF_8); let (decoded_text, _, _) = encoding.decode(&raw_bytes); uc_files.push((path.to_path_buf(), decoded_text.into_owned())); } Err(error) => { eprintln!("Failed to read `{}`: {error}", path.display()); std::process::exit(1); } } } println!("Loaded {} .uc files into memory.", uc_files.len()); // Tokenize and measure performance let start_time = std::time::Instant::now(); let tokenized_files: Vec<(PathBuf, TokenizedFile)> = uc_files .iter() .map(|(path, source_code)| { let tokenized_file = TokenizedFile::from_source(source_code); if tokenized_file.had_errors() { println!("TK: {}", path.display()); } (path.clone(), tokenized_file) }) .collect(); let elapsed_time = start_time.elapsed(); println!( "Tokenized {} files in {:.2?}", tokenized_files.len(), elapsed_time ); // Round‑trip check for ((path, original), (_, tokenized_file)) in uc_files.iter().zip(tokenized_files.iter()) { let reconstructed = tokenized_file.reconstruct_source(); if original != &reconstructed { eprintln!("Reconstruction mismatch in `{}`!", path.display()); std::process::exit(1); } } println!("All .uc files matched successfully."); }