feat: Allow descriptions to be empty

ysthakur · Aug 8, 2023 · 51175d1 · 51175d1
1 parent 9c9633c
commit 51175d1
Show file tree

Hide file tree

Showing 8 changed files with 158 additions and 133 deletions.
diff --git a/.rustfmt.toml b/.rustfmt.toml
@@ -1,4 +1,2 @@
 tab_spaces = 2
 max_width = 90
-
-wrap_comments = true
diff --git a/src/gen/json.rs b/src/gen/json.rs
@@ -32,51 +32,67 @@ impl Completions for JsonCompletions {
 /// * `indent` - The indentation level (how many subcommands in we are)
 /// * `last` - Whether this is the last command at this level. Used for deciding
 ///   whether or not to put a trailing comma
-fn generate_cmd(cmd: &str, cmd_info: CommandInfo, indent: usize, last: bool, out: &mut String) {
+fn generate_cmd(
+  cmd: &str,
+  cmd_info: CommandInfo,
+  indent: usize,
+  last: bool,
+  out: &mut String,
+) {
   let cmd = quote(cmd);
   // Avoid trailing commas
   let end = if last { "]" } else { "]," };
   let mut args = cmd_info.args.into_iter();
   if let Some(mut arg) = args.next() {
-    print_indent(indent, out, format!("{cmd}: ["));
+    println_indent(indent, out, format!("{cmd}: ["));
     while {
-      print_indent(indent + 1, out, "{");
+      println_indent(indent + 1, out, "{");
       let forms = arg
         .forms
         .iter()
         .map(|a| quote(&a))
         .collect::<Vec<_>>()
         .join(", ");
-      print_indent(indent + 2, out, format!(r#""forms": [{forms}],"#));
-      print_indent(
-        indent + 2,
-        out,
-        format!(r#""description": {}"#, quote(&arg.desc)),
-      );
+      print_indent(indent + 2, out, format!(r#""forms": [{forms}]"#));
+      if let Some(desc) = &arg.desc {
+        out.push_str(",\n");
+        println_indent(
+          indent + 2,
+          out,
+          format!(r#""description": {}"#, quote(desc)),
+        );
+      } else {
+        out.push_str("\n");
+      }
       if let Some(next) = args.next() {
-        print_indent(indent + 1, out, "},");
+        println_indent(indent + 1, out, "},");
         arg = next;
         true
       } else {
         // Avoid trailing comma
-        print_indent(indent + 1, out, "}");
+        println_indent(indent + 1, out, "}");
         false
       }
     } {}
-    print_indent(indent, out, end);
+    println_indent(indent, out, end);
   } else {
     // If no arguments, print `"cmd": []` on a single line
-    print_indent(indent, out, format!("{cmd}: [{end}"))
+    println_indent(indent, out, format!("{cmd}: [{end}"))
   }
 }
 
 fn quote(s: &str) -> String {
   format!("\"{}\"", s.replace('\\', r"\\").replace('"', "\\\""))
 }
 
-/// Helper to print at a specific indentation level
+/// Like print_indent, but with a newline
+fn println_indent<S: AsRef<str>>(indent: usize, out: &mut String, text: S) {
+  print_indent(indent, out, text);
+  out.push_str("\n");
+}
+
+/// Helper to print at a specific indentation level with a newline
 fn print_indent<S: AsRef<str>>(indent: usize, out: &mut String, text: S) {
   out.push_str(&INDENT.repeat(indent));
   out.push_str(text.as_ref());
-  out.push_str("\n");
 }
diff --git a/src/gen/mod.rs b/src/gen/mod.rs
@@ -21,7 +21,9 @@ pub trait Completions {
   {
     cmds
       .into_iter()
-      .map(|(cmd_name, cmd_info)| <Self as Completions>::generate(cmd_name, cmd_info, &out_dir))
+      .map(|(cmd_name, cmd_info)| {
+        <Self as Completions>::generate(cmd_name, cmd_info, &out_dir)
+      })
       .collect()
   }
 }
diff --git a/src/gen/zsh.rs b/src/gen/zsh.rs
@@ -83,8 +83,9 @@ fn generate_fn(
   }
   out.push_str(&format!("{INDENT}_arguments -C \\\n"));
   for opt in cmd_info.args {
+    let desc = opt.desc.unwrap_or_default();
     for form in opt.forms {
-      let text = quote(&format!("{form}[{}]", opt.desc));
+      let text = quote(&format!("{form}[{}]", desc));
       out.push_str(&format!("{INDENT}{INDENT}{text} \\\n"));
     }
   }

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
@@ -1,4 +1,5 @@
 mod type1;
+pub(super) mod util;
 
 use anyhow::{anyhow, Result};
 use flate2::bufread::GzDecoder;
@@ -12,8 +13,6 @@ use std::{
   process::Command,
 };
 
-pub use type1::Type1Parser;
-
 #[derive(Debug)]
 pub struct CommandInfo {
   pub args: Vec<Arg>,
@@ -23,7 +22,7 @@ pub struct CommandInfo {
 #[derive(Debug)]
 pub struct Arg {
   pub forms: Vec<String>,
-  pub desc: String,
+  pub desc: Option<String>,
 }
 
 pub fn parse_manpage_at_path<P>(cmd_name: &str, path: P) -> Result<Option<Vec<Arg>>>
@@ -35,15 +34,7 @@ where
 }
 
 pub fn parse_manpage_text<S: AsRef<str>>(cmd_name: &str, text: S) -> Option<Vec<Arg>> {
-  Type1Parser.parse(cmd_name, text.as_ref())
-}
-
-/// Parser to parse options from a man page
-///
-/// TODO possibly get rid of this
-pub trait ManParser {
-  /// Returns a Some if it was able to parse the page, None otherwise
-  fn parse(self, cmd_name: &str, page_text: &str) -> Option<Vec<Arg>>;
+  type1::parse(cmd_name, text.as_ref())
 }
 
 pub struct ManParseConfig {

diff --git a/src/parse/type1.rs b/src/parse/type1.rs
@@ -1,63 +1,44 @@
 use log::debug;
-use regex::{Regex, RegexBuilder};
+use regex::Regex;
 
-use super::{Arg, ManParser};
+use super::{util, Arg};
 
-/// Maximum length of a description
+/// Ported from Fish's `Type1ManParser`
 ///
-/// After this, `...` will be added
-const MAX_DESC_LEN: usize = 80;
-
-const ELLIPSIS: &str = "...";
-
-pub struct Type1Parser;
-
-impl ManParser for Type1Parser {
-  fn parse(self, cmd_name: &str, page_text: &str) -> Option<Vec<Arg>> {
-    let re = regex_for_section(r#""OPTIONS""#);
-    match re.captures(page_text) {
-      Some(captures) => {
-        let content = captures.get(1).unwrap().as_str();
-        let mut args = Vec::new();
-
-        for para in content.split(".PP") {
-          if let Some(end) = para.find(".RE") {
-            let data = &para[0..end];
-            let data = remove_groff_formatting(data);
-            let mut data = data.split(".RS 4");
-            let options = data.next().unwrap();
-            if let Some(desc) = data.next() {
-              if let Some(arg) = make_arg(options, desc) {
-                args.push(arg);
-              }
-            } else {
-              debug!("No indent in description, cmd: {}", cmd_name);
-            }
+/// todo implement fallback and fallback2 like the Fish script
+pub fn parse(cmd_name: &str, page_text: &str) -> Option<Vec<Arg>> {
+  let re = util::regex_for_section(r#""OPTIONS""#);
+  match re.captures(page_text) {
+    Some(captures) => {
+      let content = captures.get(1).unwrap().as_str();
+      let mut args = Vec::new();
+
+      for para in content.split(".PP") {
+        if let Some(end) = para.find(".RE") {
+          let data = &para[0..end];
+          let data = util::remove_groff_formatting(data);
+          let mut data = data.split(".RS 4");
+          let options = data.next().unwrap();
+          let desc = data.next();
+          if let Some(arg) = make_arg(options, desc) {
+            args.push(arg);
           }
         }
-
-        Some(args)
       }
-      None => None,
+
+      Some(args)
     }
+    None => None,
   }
 }
 
-/// Regex to get the contents of a section with the given title
-fn regex_for_section(title: &str) -> Regex {
-  RegexBuilder::new(&format!(r#"\.SH {title}(.*?)(\.SH|\z)"#))
-    .multi_line(true)
-    .dot_matches_new_line(true)
-    .build()
-    .unwrap()
-}
-
-// Copied more or less directly from Fish's `built_command`
-fn make_arg(options: &str, desc: &str) -> Option<Arg> {
-  let mut forms = Vec::new();
-
-  // Unquote the options
-  let options = if options.len() == 1 {
+/// Parse the line of options after .PP and the description after it
+///
+/// Ported from Fish's `built_command`
+fn make_arg(options: &str, desc: Option<&str>) -> Option<Arg> {
+  // Unquote the options string
+  let options = options.trim();
+  let options = if options.len() < 2 {
     options
   } else if options.starts_with('"') && options.ends_with('"') {
     &options[1..options.len() - 1]
@@ -66,76 +47,46 @@ fn make_arg(options: &str, desc: &str) -> Option<Arg> {
   } else {
     options
   };
+
+  let mut forms = Vec::new();
   let delim = Regex::new(r#"[ ,="|]"#).unwrap();
   for option in delim.split(options) {
     let option = Regex::new(r"\[.*\]").unwrap().replace(option, "");
+    // todo Fish doesn't replace <.*> so maybe this is wrong
+    let option = Regex::new(r"<.*>").unwrap().replace(&option, "");
     // todo this is ridiculously verbose
     let option =
-      option.trim_matches(" \t\r\n[](){}.,:!".chars().collect::<Vec<_>>().as_slice());
+      option.trim_matches(" \t\r\n[](){}.:!".chars().collect::<Vec<_>>().as_slice());
     if !option.starts_with('-') || option == "-" || option == "--" {
       continue;
     }
-    // todo use str.matches instead
     if Regex::new(r"\{\}\(\)").unwrap().is_match(option) {
       continue;
     }
     forms.push(option.to_owned());
   }
 
   if forms.is_empty() {
-    debug!(
-      "No options found in '{}', desc: {}",
-      options.trim(),
+    let desc = if let Some(desc) = desc {
       &desc.trim()[..40]
-    );
+    } else {
+      ""
+    };
+    debug!("No options found in '{}', desc: '{}'", options.trim(), desc);
     return None;
   }
 
-  let desc = desc.trim().replace("\n", " ");
-  let desc = desc.trim_end_matches('.');
-  // Remove bogus escapes
-  let desc = desc.replace(r"\'", "").replace(r"\.", "");
-
-  // TODO port the sentence-splitting part too
-
-  let desc = if desc.len() > MAX_DESC_LEN {
-    format!("{}{}", &desc[0..MAX_DESC_LEN - ELLIPSIS.len()], ELLIPSIS)
-  } else {
-    desc
-  };
+  match desc {
+    Some(desc) => {
+      let desc = desc.trim().replace("\n", " ");
+      let desc = desc.trim_end_matches('.');
+      // Remove bogus escapes
+      let desc = desc.replace(r"\'", "").replace(r"\.", "");
 
-  Some(Arg { forms, desc })
-}
-
-// Copied more or less directly from Fish
-fn remove_groff_formatting(data: &str) -> String {
-  let data = data
-    .replace(r"\fI", "")
-    .replace(r"\fP", "")
-    .replace(r"\f1", "")
-    .replace(r"\fB", "")
-    .replace(r"\fR", "")
-    .replace(r"\e", "");
-  // TODO check if this one is necessary
-  // also, fish uses a slightly different regex: `.PD( \d+)`, check if that's fine
-  let re = Regex::new(r"\.PD \d+").unwrap();
-  let data = re.replace_all(&data, "");
-  data
-    .replace(".BI", "")
-    .replace(".BR", "")
-    .replace("0.5i", "")
-    .replace(".rb", "")
-    .replace(r"\^", "")
-    .replace("{ ", "")
-    .replace(" }", "")
-    .replace(r"\ ", "")
-    .replace(r"\-", "-")
-    .replace(r"\&", "")
-    .replace(".B", "")
-    .replace(r"\-", "-")
-    .replace(".I", "")
-    .replace("\u{C}", "")
-    .replace(r"\(cq", "'")
-
-  // TODO .sp is being left behind, see how Fish handles it
+      let desc = util::trim_desc(desc);
+      let desc = if desc.is_empty() { None } else { Some(desc) };
+      Some(Arg { forms, desc })
+    }
+    None => Some(Arg { forms, desc: None }),
+  }
 }