From c377fe6f958ac35e3695f1dc9b499b1bbdad3438 Mon Sep 17 00:00:00 2001 From: James Wu Date: Thu, 21 Mar 2024 22:06:21 -0400 Subject: [PATCH] Libify tlparse --- Cargo.lock | 22 ++++---- Cargo.toml | 9 +++- src/cli.rs | 64 +++++++++++++++++++++++ src/{main.rs => lib.rs} | 109 ++++++++++++++-------------------------- 4 files changed, 121 insertions(+), 83 deletions(-) create mode 100644 src/cli.rs rename src/{main.rs => lib.rs} (78%) diff --git a/Cargo.lock b/Cargo.lock index 5bdee0a..93150c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -105,9 +105,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.5.2" +version = "4.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" +checksum = "949626d00e063efc93b6dca932419ceb5432f99769911c0b995f7e884c778813" dependencies = [ "clap_builder", "clap_derive", @@ -127,9 +127,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.0" +version = "4.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +checksum = "90239a040c80f5e14809ca132ddc4176ab33d5e17e49691793296e3fcb34d72f" dependencies = [ "heck", "proc-macro2", @@ -221,9 +221,9 @@ checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" [[package]] name = "heck" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "html-escape" @@ -430,9 +430,9 @@ checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "syn" -version = "2.0.52" +version = "2.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" dependencies = [ "proc-macro2", "quote", @@ -451,7 +451,7 @@ dependencies = [ [[package]] name = "tlparse" -version = "0.3.7" +version = "0.3.8" dependencies = [ "anyhow", "base16ct", diff --git a/Cargo.toml b/Cargo.toml index 32cb4a9..46b835c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,18 @@ [package] name = "tlparse" -version = "0.3.7" +version = "0.3.8" edition = "2021" authors = ["Edward Z. Yang "] description = "Parse TORCH_LOG logs produced by PyTorch torch.compile" license = "BSD-3-Clause" +[lib] +name = "tlparse" + +[[bin]] +name = "tlparse" +path = "src/cli.rs" + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..a3dbf6c --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,64 @@ +use clap::Parser; + +use std::fs; +use std::path::PathBuf; + +use tlparse::{ParseConfig, parse_path}; + + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +#[command(propagate_version = true)] +pub struct Cli { + path: PathBuf, + /// Output directory, defaults to `tl_out` + #[arg(short, default_value = "tl_out")] + out: PathBuf, + /// Delete out directory if it already exists + #[arg(long)] + overwrite: bool, + /// Return non-zero exit code if unrecognized log lines are found. Mostly useful for unit + /// testing. + #[arg(long)] + strict: bool, + /// Don't open browser at the end + #[arg(long)] + no_browser: bool, +} + + +fn main() -> anyhow::Result<()> { + let cli = Cli::parse(); + let path = cli.path; + let out_path = cli.out; + + if out_path.exists() { + if !cli.overwrite { + panic!( + "{} already exists, pass --overwrite to overwrite", + out_path.display() + ); + } + fs::remove_dir_all(&out_path)?; + } + fs::create_dir(&out_path)?; + + let config = ParseConfig { + strict: cli.strict, + }; + + let output = parse_path(&path, config)?; + + for (filename, path) in output { + let out_file = out_path.join(filename); + if let Some(dir) = out_file.parent() { + fs::create_dir_all(dir)?; + } + fs::write(out_file, path)?; + } + + if !cli.no_browser { + opener::open(out_path.join("index.html"))?; + } + Ok(()) +} diff --git a/src/main.rs b/src/lib.rs similarity index 78% rename from src/main.rs rename to src/lib.rs index c000484..cb0e04f 100644 --- a/src/main.rs +++ b/src/lib.rs @@ -1,73 +1,46 @@ use anyhow::anyhow; -use base16ct; -use clap::Parser; use fxhash::FxHashMap; use md5::{Digest, Md5}; use std::ffi::{OsStr, OsString}; use regex::Regex; -use std::fs; use std::fs::File; use std::io::{self, BufRead}; use std::path::Path; use std::path::PathBuf; use tinytemplate::TinyTemplate; - use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use std::time::Instant; use crate::types::*; use crate::templates::*; -pub mod templates; -pub mod types; - -#[derive(Parser)] -#[command(author, version, about, long_about = None)] -#[command(propagate_version = true)] -pub struct Cli { - path: PathBuf, - /// Output directory, defaults to `tl_out` - #[arg(short, default_value = "tl_out")] - out: PathBuf, - /// Delete out directory if it already exists - #[arg(long)] - overwrite: bool, - /// Return non-zero exit code if unrecognized log lines are found. Mostly useful for unit - /// testing. - #[arg(long)] - strict: bool, - /// Don't open browser at the end - #[arg(long)] - no_browser: bool, -} +mod templates; +mod types; -fn main() -> anyhow::Result<()> { - let cli = Cli::parse(); - let path = cli.path; - let out_path = cli.out; - if out_path.exists() { - if !cli.overwrite { - panic!( - "{} already exists, pass --overwrite to overwrite", - out_path.display() - ); - } - fs::remove_dir_all(&out_path)?; - } - fs::create_dir(&out_path)?; +pub type ParseOutput = Vec<(PathBuf, String)>; +pub struct ParseConfig { + pub strict: bool, +} + +pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result { + let strict = config.strict; let file = File::open(path)?; let metadata = file.metadata()?; let file_size = metadata.len(); + + // TODO: abstract out this spinner to not be part of the library + // Instead, add a callback trait for CLIs to implement let multi = MultiProgress::new(); let pb = multi.add(ProgressBar::new(file_size)); pb.set_style(ProgressStyle::default_bar() .template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} [{bytes_per_sec}] ({eta})")? .progress_chars("#>-")); let spinner = multi.add(ProgressBar::new_spinner()); + let reader = io::BufReader::new(file); let re_glog = Regex::new(concat!( @@ -93,6 +66,9 @@ fn main() -> anyhow::Result<()> { let mut directory: FxHashMap, Vec> = FxHashMap::default(); + // Store results in an output Vec + let mut output : Vec<(PathBuf, String)> = Vec::new(); + let mut tt = TinyTemplate::new(); tt.add_formatter("format_unescaped", tinytemplate::format_unescaped); tt.add_template("index.html", TEMPLATE_INDEX)?; @@ -129,7 +105,6 @@ fn main() -> anyhow::Result<()> { } if end > slowest_time { slowest_time = end; - //println!("{}", line); } let payload = &line[caps.name("payload").unwrap().start()..]; @@ -178,8 +153,7 @@ fn main() -> anyhow::Result<()> { ) .into(); - let subdir = out_path.join(&compile_id_dir); - fs::create_dir_all(&subdir)?; + let subdir = &compile_id_dir; let mut payload = String::new(); if let Some(expect) = e.has_payload { @@ -189,7 +163,7 @@ fn main() -> anyhow::Result<()> { { // Careful! Distinguish between missing EOL and not if !first { - payload.push_str("\n"); + payload.push('\n'); } first = false; payload.push_str(&payload_line[1..]); @@ -198,8 +172,8 @@ fn main() -> anyhow::Result<()> { hasher.update(&payload); let hash = hasher.finalize(); let mut expect_buf = [0u8; 16]; - if let Ok(_) = base16ct::lower::decode(expect, &mut expect_buf) { - if expect_buf != &hash[..] { + if base16ct::lower::decode(expect, &mut expect_buf).is_ok() { + if expect_buf != hash[..] { // TODO: error log stats.fail_payload_md5 += 1; } @@ -224,38 +198,38 @@ fn main() -> anyhow::Result<()> { }; }; - let mut write_dump = + let mut output_dump = |filename: &str, sentinel: Option| -> anyhow::Result<()> { if sentinel.is_some() { let f = subdir.join(filename); - fs::write(&f, &payload)?; + output.push((f, payload.clone())); compile_directory.push(compile_id_dir.join(filename)); } Ok(()) }; - write_dump("optimize_ddp_split_graph.txt", e.optimize_ddp_split_graph)?; - write_dump("compiled_autograd_graph.txt", e.compiled_autograd_graph)?; - write_dump("aot_forward_graph.txt", e.aot_forward_graph)?; - write_dump("aot_backward_graph.txt", e.aot_backward_graph)?; - write_dump("aot_joint_graph.txt", e.aot_joint_graph)?; - write_dump("inductor_post_grad_graph.txt", e.inductor_post_grad_graph)?; + output_dump("optimize_ddp_split_graph.txt", e.optimize_ddp_split_graph)?; + output_dump("compiled_autograd_graph.txt", e.compiled_autograd_graph)?; + output_dump("aot_forward_graph.txt", e.aot_forward_graph)?; + output_dump("aot_backward_graph.txt", e.aot_backward_graph)?; + output_dump("aot_joint_graph.txt", e.aot_joint_graph)?; + output_dump("inductor_post_grad_graph.txt", e.inductor_post_grad_graph)?; if e.dynamo_output_graph.is_some() { // TODO: dump sizes let filename = "dynamo_output_graph.txt"; - let f = subdir.join(&filename); - fs::write(&f, &payload)?; + let f = subdir.join(filename); + output.push((f, payload.clone())); compile_directory.push(compile_id_dir.join(filename)); } if e.dynamo_guards.is_some() { let filename = "dynamo_guards.html"; - let f = subdir.join(&filename); + let f = subdir.join(filename); match serde_json::from_str::>(payload.as_str()) { Ok(guards) => { - let guards_context = DynamoGuardsContext { guards: guards }; - fs::write(&f, tt.render("dynamo_guards.html", &guards_context)?)?; + let guards_context = DynamoGuardsContext { guards }; + output.push((f, tt.render("dynamo_guards.html", &guards_context)?)); compile_directory.push(compile_id_dir.join(filename)); } Err(err) => { @@ -280,14 +254,14 @@ fn main() -> anyhow::Result<()> { }, ); let f = subdir.join(&filename); - fs::write(&f, &payload)?; + output.push((f, payload.clone())); compile_directory.push(compile_id_dir.join(filename)); } if let Some(metadata) = e.optimize_ddp_split_child { let filename = format!("optimize_ddp_split_child_{}.txt", metadata.name); let f = subdir.join(&filename); - fs::write(&f, &payload)?; + output.push((f, payload.clone())); compile_directory.push(compile_id_dir.join(filename)); } } @@ -304,18 +278,11 @@ fn main() -> anyhow::Result<()> { .collect(), stack_trie_html: stack_trie.to_string(), }; - fs::write( - out_path.join("index.html"), - tt.render("index.html", &index_context)?, - )?; - - if !cli.no_browser { - opener::open(out_path.join("index.html"))?; - } + output.push((PathBuf::from("index.html"), tt.render("index.html", &index_context)?)); // other_rank is included here because you should only have logs from one rank when // configured properly - if cli.strict + if strict && (stats.fail_glog + stats.fail_json + stats.fail_payload_md5 @@ -327,5 +294,5 @@ fn main() -> anyhow::Result<()> { return Err(anyhow!("Something went wrong")); } - Ok(()) + Ok(output) }