From 074525613fa89597d9ae6ad9ee5b86b16e8e4ed1 Mon Sep 17 00:00:00 2001 From: ysthakur <45539777+ysthakur@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:51:02 -0400 Subject: [PATCH] feat: Add type4 parser (untested) --- README.md | 5 +++-- src/parse/mod.rs | 12 +++++++++--- src/parse/type1.rs | 4 ++-- src/parse/type2.rs | 7 +++++-- src/parse/type3.rs | 7 +++++-- src/parse/type4.rs | 39 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 63 insertions(+), 11 deletions(-) create mode 100644 src/parse/type4.rs diff --git a/README.md b/README.md index 264cf6f..a557a74 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,8 @@ so to configure that, set the `RUST_LOG` environment variable (the link has inst Things to do: -- Port type 3, type 4, darwin, scdoc, and degroff parsers +- Port darwin, scdoc, and degroff parsers +- Find samples of type 4 to test - Ensure nested subcommands and multiple subcommands work -- Test .gz, test excluding/including commands and directories +- Add .gz files to the tests, test excluding/including commands and directories - Figure out why fish only seems to use man1, man6, and man8 diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 8618101..f8a0d5f 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,6 +1,7 @@ mod type1; mod type2; mod type3; +mod type4; mod util; use std::{ @@ -60,13 +61,18 @@ where /// Parse flags from a man page, trying all of the different parsers and merging /// their results if multiple parsers could parse the man page. Returns /// None if none of them could parse the man page. -pub fn parse_manpage_text(text: S) -> Option> +pub fn parse_manpage_text(cmd_name: &str, text: S) -> Option> where S: AsRef, { let text = text.as_ref(); let mut all_flags: Option> = None; - for res in vec![type1::parse(text), type2::parse(text), type3::parse(text)] { + for res in vec![ + type1::parse(cmd_name, text), + type2::parse(cmd_name, text), + type3::parse(cmd_name, text), + type4::parse(cmd_name, text), + ] { if let Some(mut flags) = res { match &mut all_flags { Some(prev_flags) => { @@ -121,7 +127,7 @@ pub fn parse_from( if let Some(path) = pre_info.path { match read_manpage(&path) { Ok(text) => { - if let Some(mut parsed) = parse_manpage_text(text) { + if let Some(mut parsed) = parse_manpage_text(cmd_name, text) { flags.append(&mut parsed); } else { errors.push(anyhow!("Could not parse man page for '{}'", cmd_name)); diff --git a/src/parse/type1.rs b/src/parse/type1.rs index 295c4b9..1818a36 100644 --- a/src/parse/type1.rs +++ b/src/parse/type1.rs @@ -5,7 +5,7 @@ use super::{util, Flag}; /// Ported from Fish's `Type1ManParser` /// /// todo implement fallback and fallback2 like the Fish script -pub fn parse(page_text: &str) -> Option> { +pub fn parse(cmd_name: &str, page_text: &str) -> Option> { match util::get_section(r#""OPTIONS""#, page_text) { Some(content) => { let mut flags = Vec::new(); @@ -24,7 +24,7 @@ pub fn parse(page_text: &str) -> Option> { } } else { debug!( - "No .RE found to end description, para: {}", + "In command {cmd_name}, no .RE found to end description, para: {}", util::truncate(para, 40) ); } diff --git a/src/parse/type2.rs b/src/parse/type2.rs index e37638d..fe9fbc5 100644 --- a/src/parse/type2.rs +++ b/src/parse/type2.rs @@ -4,7 +4,7 @@ use regex::Regex; use super::{util, Flag}; /// Ported from Fish's `Type2ManParser` -pub fn parse(page_text: &str) -> Option> { +pub fn parse(cmd_name: &str, page_text: &str) -> Option> { match util::get_section("OPTIONS", page_text) { Some(content) => { let mut flags = Vec::new(); @@ -30,7 +30,10 @@ pub fn parse(page_text: &str) -> Option> { util::make_flag(options, Some(desc)) } else { // todo should this be an error instead? - debug!("No description, data: {}", util::truncate(data, 40)); + debug!( + "In command {cmd_name}, no description, data: {}", + util::truncate(data, 40) + ); util::make_flag(data, None) }; if let Some(flag) = flag { diff --git a/src/parse/type3.rs b/src/parse/type3.rs index a932aa5..271f115 100644 --- a/src/parse/type3.rs +++ b/src/parse/type3.rs @@ -7,7 +7,7 @@ use super::{util, Flag}; /// Fish's `Type3ManParser` doesn't handle HP...IP...HP, but the man page for /// sed, at least, uses that, so this parser handles that too. #[allow(clippy::case_sensitive_file_extension_comparisons)] -pub fn parse(page_text: &str) -> Option> { +pub fn parse(cmd_name: &str, page_text: &str) -> Option> { match util::get_section("DESCRIPTION", page_text) { Some(content) => { let mut flags = Vec::new(); @@ -52,7 +52,10 @@ pub fn parse(page_text: &str) -> Option> { } } else { // todo should this be an error instead? - debug!("No description, data: {}", util::truncate(data, 40)); + debug!( + "In command {cmd_name}, no description, data: {}", + util::truncate(data, 40) + ); } } } diff --git a/src/parse/type4.rs b/src/parse/type4.rs new file mode 100644 index 0000000..4de8e8d --- /dev/null +++ b/src/parse/type4.rs @@ -0,0 +1,39 @@ +use log::warn; + +use super::{util, Flag}; + +/// Ported from Fish's `Type4ManParser` +/// +/// TODO This is completely untested +#[allow(clippy::case_sensitive_file_extension_comparisons)] +pub fn parse(cmd_name: &str, page_text: &str) -> Option> { + match util::get_section("FUNCTION LETTERS", page_text) { + Some(content) => { + let mut flags = Vec::new(); + + let mut paras = content.split(".TP"); + paras.next(); // Discard the part before the first option + for para in paras { + let data = util::remove_groff_formatting(para); + let data = data.trim(); + if let Some((options, desc)) = data.split_once("\n") { + if let Some(flag) = util::make_flag(options, Some(desc)) { + flags.push(flag); + } + } else { + warn!( + "In command {cmd_name}, no description, data: {}", + util::truncate(data, 40) + ); + } + } + + if flags.is_empty() { + None + } else { + Some(flags) + } + } + None => None, + } +}