Skip to content

Commit

Permalink
feat: Implement type3
Browse files Browse the repository at this point in the history
  • Loading branch information
ysthakur committed Aug 18, 2023
1 parent 8549cb7 commit 03c8e2f
Show file tree
Hide file tree
Showing 9 changed files with 1,031 additions and 19 deletions.
433 changes: 433 additions & 0 deletions samples/type3/sed.1

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion src/parse/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod type1;
mod type2;
mod type3;
mod util;

use std::{
Expand Down Expand Up @@ -61,7 +62,9 @@ where
S: AsRef<str>,
{
let text = text.as_ref();
type1::parse(text).or_else(|| type2::parse(text))
type1::parse(text)
.or_else(|| type2::parse(text))
.or_else(|| type3::parse(text))
}

/// Decompress a manpage if necessary
Expand Down
6 changes: 2 additions & 4 deletions src/parse/type1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@ use super::{util, Flag};
///
/// todo implement fallback and fallback2 like the Fish script
pub fn parse(page_text: &str) -> Option<Vec<Flag>> {
let re = util::regex_for_section(r#""OPTIONS""#);
match re.captures(page_text) {
Some(captures) => {
let content = captures.get(1).unwrap().as_str();
match util::get_section(r#""OPTIONS""#, page_text) {
Some(content) => {
let mut flags = Vec::new();

let mut paras = content.split(".PP");
Expand Down
10 changes: 3 additions & 7 deletions src/parse/type2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,9 @@ use regex::Regex;
use super::{util, Flag};

/// Ported from Fish's `Type2ManParser`
///
/// TODO actually test this
pub fn parse(page_text: &str) -> Option<Vec<Flag>> {
let re = util::regex_for_section("OPTIONS");
match re.captures(page_text) {
Some(captures) => {
let content = captures.get(1).unwrap().as_str();
match util::get_section("OPTIONS", page_text) {
Some(content) => {
let mut flags = Vec::new();

// todo this diverges from the Fish impl for splitting, check if it's okay
Expand All @@ -19,7 +15,7 @@ pub fn parse(page_text: &str) -> Option<Vec<Flag>> {
Regex::new(&format!(r"\.[IT]P( {}i?)?", util::NUM_RE)).unwrap();
let para_end = Regex::new(r"\.(IP|TP|UNINDENT|UN|SH)").unwrap();

let mut paras = para_re.split(content);
let mut paras = para_re.split(&content);
paras.next(); // Discard the part before the first option
for para in paras {
let data = if let Some(mat) = para_end.find(para) {
Expand Down
67 changes: 67 additions & 0 deletions src/parse/type3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use log::debug;
use regex::{Regex, RegexBuilder};

use super::{util, Flag};

/// Ported from Fish's `Type3ManParser`
/// Fish's `Type3ManParser` doesn't handle HP...IP...HP, but the man page for
/// sed, at least, uses that, so this parser handles that too.
pub fn parse(page_text: &str) -> Option<Vec<Flag>> {
match util::get_section("DESCRIPTION", page_text) {
Some(content) => {
let mut flags = Vec::new();

let start_re = RegexBuilder::new(r"\.[HT]P(.*?)(\.[HPT]P|\z)")
.dot_matches_new_line(true)
.build()
.unwrap();

// Where the last match ended
let mut last_end = 0;

while let Some(mat) = start_re.find_at(&content, last_end) {
let data = &content[mat.start() + 3..mat.end()];
// Remove the .HP/.TP/.PP at the end
// todo this is kinda verbose
let end_offset = if data.ends_with(".HP")
|| data.ends_with(".TP")
|| data.ends_with(".PP")
{
3
} else {
0
};
let data = &data[..data.len() - end_offset];
last_end = mat.end() - end_offset;

if let Some((options, desc)) = data.split_once(".IP") {
// This means there is a .HP before the options
let options = util::remove_groff_formatting(options);
let desc = util::remove_groff_formatting(desc);
if let Some(flag) = util::make_flag(&options, Some(&desc)) {
flags.push(flag);
}
} else {
// This means there is a .TP before the options
let data = util::remove_groff_formatting(data);
let data = data.trim();
if let Some((options, desc)) = data.split_once('\n') {
if let Some(flag) = util::make_flag(options, Some(desc)) {
flags.push(flag);
}
} else {
// todo should this be an error instead?
debug!("No description, data: {}", util::truncate(data, 40));
}
}
}

if flags.is_empty() {
None
} else {
Some(flags)
}
}
None => None,
}
}
12 changes: 8 additions & 4 deletions src/parse/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,17 @@ pub fn trim_desc(desc: &str) -> String {
}
}

/// Regex to get the contents of a section with the given title
pub fn regex_for_section(title: &str) -> Regex {
RegexBuilder::new(&format!(r#"\.SH {title}(.*?)(\.SH|\z)"#))
/// Get the contents of a section with the given title
pub fn get_section(title: &str, text: &str) -> Option<String> {
let re = RegexBuilder::new(&format!(r#"\.SH {title}(.*?)(\.SH|\z)"#))
.multi_line(true)
.dot_matches_new_line(true)
.build()
.unwrap()
.unwrap();
re.captures(text).map(|captures| {
let content = captures.get(1).unwrap().as_str();
content.strip_suffix(".SH").unwrap_or(content).to_string()
})
}

/// Copied more or less directly from Fish's `remove_groff_formatting`
Expand Down
17 changes: 14 additions & 3 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,15 @@ fn run_test(shell: &str, outputs: &[&str], args: &[&str]) {
continue;
}

let expected = fs::read(exp_file).unwrap();
let got = fs::read(&got_file).unwrap();
if expected != got {
if exp_file.exists() {
let expected = fs::read(exp_file).unwrap();
let got = fs::read(&got_file).unwrap();
if expected != got {
not_match.push(file_name);
continue;
}
} else {
println!("No {file_name} found in expected folder");
not_match.push(file_name);
continue;
}
Expand Down Expand Up @@ -125,3 +131,8 @@ fn git_json() {
fn rfcomm_json() {
run_test("json", &["rfcomm"], &["--cmds", "^rfcomm"]);
}

#[test]
fn sed_json() {
run_test("json", &["sed"], &["--cmds", "^sed"]);
}
67 changes: 67 additions & 0 deletions tests/resources/expected/sed.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 03c8e2f

Please sign in to comment.