Skip to content

Commit

Permalink
feat: Implement type 2 parser
Browse files Browse the repository at this point in the history
  • Loading branch information
ysthakur committed Aug 9, 2023
1 parent 51175d1 commit cef1e21
Show file tree
Hide file tree
Showing 12 changed files with 9,923 additions and 69 deletions.
4,683 changes: 4,683 additions & 0 deletions samples/type1/git-log.1

Large diffs are not rendered by default.

120 changes: 120 additions & 0 deletions samples/type2/df.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.3.
.TH DF "1" "February 2022" "GNU coreutils 8.32" "User Commands"
.SH NAME
df \- report file system disk space usage
.SH SYNOPSIS
.B df
[\fI\,OPTION\/\fR]... [\fI\,FILE\/\fR]...
.SH DESCRIPTION
This manual page
documents the GNU version of
.BR df .
.B df
displays the amount of disk space available on the file system
containing each file name argument. If no file name is given, the
space available on all currently mounted file systems is shown. Disk
space is shown in 1K blocks by default, unless the environment
variable POSIXLY_CORRECT is set, in which case 512-byte blocks are
used.
.PP
If an argument is the absolute file name of a disk device node containing a
mounted file system,
.B df
shows the space available on that file system rather than on the
file system containing the device node. This version of
.B df
cannot show the space available on unmounted file systems, because on
most kinds of systems doing so requires very nonportable intimate
knowledge of file system structures.
.SH OPTIONS
.PP
Show information about the file system on which each FILE resides,
or all file systems by default.
.PP
Mandatory arguments to long options are mandatory for short options too.
.TP
\fB\-a\fR, \fB\-\-all\fR
include pseudo, duplicate, inaccessible file systems
.TP
\fB\-B\fR, \fB\-\-block\-size\fR=\fI\,SIZE\/\fR
scale sizes by SIZE before printing them; e.g.,
\&'\-BM' prints sizes in units of 1,048,576 bytes;
see SIZE format below
.TP
\fB\-h\fR, \fB\-\-human\-readable\fR
print sizes in powers of 1024 (e.g., 1023M)
.TP
\fB\-H\fR, \fB\-\-si\fR
print sizes in powers of 1000 (e.g., 1.1G)
.TP
\fB\-i\fR, \fB\-\-inodes\fR
list inode information instead of block usage
.TP
\fB\-k\fR
like \fB\-\-block\-size\fR=\fI\,1K\/\fR
.TP
\fB\-l\fR, \fB\-\-local\fR
limit listing to local file systems
.TP
\fB\-\-no\-sync\fR
do not invoke sync before getting usage info (default)
.TP
\fB\-\-output\fR[=\fI\,FIELD_LIST\/\fR]
use the output format defined by FIELD_LIST,
or print all fields if FIELD_LIST is omitted.
.TP
\fB\-P\fR, \fB\-\-portability\fR
use the POSIX output format
.TP
\fB\-\-sync\fR
invoke sync before getting usage info
.TP
\fB\-\-total\fR
elide all entries insignificant to available space,
and produce a grand total
.TP
\fB\-t\fR, \fB\-\-type\fR=\fI\,TYPE\/\fR
limit listing to file systems of type TYPE
.TP
\fB\-T\fR, \fB\-\-print\-type\fR
print file system type
.TP
\fB\-x\fR, \fB\-\-exclude\-type\fR=\fI\,TYPE\/\fR
limit listing to file systems not of type TYPE
.TP
\fB\-v\fR
(ignored)
.TP
\fB\-\-help\fR
display this help and exit
.TP
\fB\-\-version\fR
output version information and exit
.PP
Display values are in units of the first available SIZE from \fB\-\-block\-size\fR,
and the DF_BLOCK_SIZE, BLOCK_SIZE and BLOCKSIZE environment variables.
Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set).
.PP
The SIZE argument is an integer and optional unit (example: 10K is 10*1024).
Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000).
Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
.PP
FIELD_LIST is a comma\-separated list of columns to be included. Valid
field names are: 'source', 'fstype', 'itotal', 'iused', 'iavail', 'ipcent',
\&'size', 'used', 'avail', 'pcent', 'file' and 'target' (see info page).
.SH AUTHOR
Written by Torbjorn Granlund, David MacKenzie, and Paul Eggert.
.SH "REPORTING BUGS"
GNU coreutils online help: <https://www.gnu.org/software/coreutils/>
.br
Report any translation bugs to <https://translationproject.org/team/>
.SH COPYRIGHT
Copyright \(co 2020 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
.br
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
.SH "SEE ALSO"
Full documentation <https://www.gnu.org/software/coreutils/df>
.br
or available locally via: info \(aq(coreutils) df invocation\(aq
128 changes: 128 additions & 0 deletions samples/type2/rfcomm.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
.\" Man page generated from reStructuredText.
.
.
.nr rst2man-indent-level 0
.
.de1 rstReportMargin
\\$1 \\n[an-margin]
level \\n[rst2man-indent-level]
level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
-
\\n[rst2man-indent0]
\\n[rst2man-indent1]
\\n[rst2man-indent2]
..
.de1 INDENT
.\" .rstReportMargin pre:
. RS \\$1
. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
. nr rst2man-indent-level +1
.\" .rstReportMargin post:
..
.de UNINDENT
. RE
.\" indent \\n[an-margin]
.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
.nr rst2man-indent-level -1
.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
..
.TH "RFCOMM" 1 "April 28, 2002" "BlueZ" "Linux System Administration"
.SH NAME
rfcomm \- RFCOMM configuration utility
.SH SYNOPSIS
.sp
\fBrfcomm\fP [\fIOPTIONS\fP] <\fICOMMAND\fP> <\fIdev\fP>
.SH DESCRIPTION
.sp
\fBrfcomm(1)\fP is used to set up, maintain, and inspect the RFCOMM configuration
of the Bluetooth subsystem in the Linux kernel. If no \fBcommand\fP is given,
or if the option \fB\-a\fP is used, \fBrfcomm\fP prints information about the
configured RFCOMM devices.
.SH OPTIONS
.INDENT 0.0
.TP
.B \-h
Gives a list of possible commands.
.TP
.B \-a
Prints information about all configured RFCOMM devices.
.TP
.B \-r
Switch TTY into raw mode (doesn\(aqt work with "bind").
.UNINDENT
.INDENT 0.0
.TP
.B \-i <\fIhciX\fP> | <\fIbdaddr\fP>
The command is applied to device \fIhciX\fP, which must be the name or the
address of an installed Bluetooth device. If not specified, the command
will be use the first available Bluetooth device.
.UNINDENT
.INDENT 0.0
.TP
.B \-A
Enable authentication
.TP
.B \-E
Enable encryption
.TP
.B \-S
Secure connection
.TP
.B \-C
Become the central of a piconet
.TP
.BI \-L \ <seconds>
Set linger timeout
.UNINDENT
.SH COMMANDS
.INDENT 0.0
.TP
.B show <\fIdev\fP>
Display the information about the specified device.
.TP
.B connect <\fIdev\fP> [\fIbdaddr\fP] [\fIchannel\fP]
Connect the RFCOMM device to the remote Bluetooth device on the specified
channel. If no channel is specified, it will use the channel
number \fB1\fP\&. This command can be terminated with the key sequence CTRL\-C.
.TP
.B listen <\fIdev\fP> [\fIchannel\fP] [\fIcmd\fP]
Listen on a specified RFCOMM channel for incoming connections. If no
channel is specified, it will use the channel number \fB1\fP, but a channel
must be specified before cmd. If cmd is given, it will be executed as soon
as a client connects. When the child process terminates or the client
disconnect, the command will terminate. Occurrences of {} in cmd will be
replaced by the name of the device used by the connection. This command
can be terminated with the key sequence CTRL\-C.
.TP
.B watch <\fIdev\fP> [\fIchannel\fP] [\fIcmd\fP]
Watch is identical to listen except that when the child process
terminates or the client disconnect, the command will restart listening
with the same parameters.
.TP
.B bind <\fIdev\fP> [\fIbdaddr\fP] [\fIchannel\fP]
This binds the RFCOMM device to a remote Bluetooth device. The command
does not establish a connection to the remote device, it only creates
the binding. The connection will be established right after an application
tries to open the RFCOMM device. If no channel number is specified, it
uses the channel number \fB1\fP\&.
.TP
.B release <\fIdev\fP>
This command releases a defined RFCOMM binding.
.sp
If \fBall\fP is specified for the RFCOMM device, then all bindings will be
removed.
.UNINDENT
.SH RESOURCES
.sp
\fI\%http://www.bluez.org\fP
.SH REPORTING BUGS
.sp
\fI\%linux\-bluetooth@vger.kernel.org\fP
.SH AUTHOR
Marcel Holtmann <marcel@holtmann.org>
.SH COPYRIGHT
Free use of this software is granted under ther terms of the GNU
Lesser General Public Licenses (LGPL).
.\" Generated by docutils manpage writer.
.
6 changes: 4 additions & 2 deletions src/parse/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod type1;
mod type2;
pub(super) mod util;

use anyhow::{anyhow, Result};
Expand Down Expand Up @@ -34,7 +35,8 @@ where
}

pub fn parse_manpage_text<S: AsRef<str>>(cmd_name: &str, text: S) -> Option<Vec<Arg>> {
type1::parse(cmd_name, text.as_ref())
let text = text.as_ref();
type1::parse(cmd_name, text).or_else(|| type2::parse(cmd_name, text))
}

pub struct ManParseConfig {
Expand Down Expand Up @@ -241,7 +243,7 @@ fn filter_pages(
if include {
debug!("Found man page for {} at {}", cmd, path.display());
}
if include && exclude {
if exclude && include_commands.is_some() {
warn!("Command was both explicitly included and excluded: {}", cmd);
}

Expand Down
71 changes: 9 additions & 62 deletions src/parse/type1.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use log::debug;
use regex::Regex;

use super::{util, Arg};

Expand All @@ -13,16 +12,23 @@ pub fn parse(cmd_name: &str, page_text: &str) -> Option<Vec<Arg>> {
let content = captures.get(1).unwrap().as_str();
let mut args = Vec::new();

for para in content.split(".PP") {
let mut paras = content.split(".PP");
paras.next(); // Discard the part before the first option
for para in paras {
if let Some(end) = para.find(".RE") {
let data = &para[0..end];
let data = util::remove_groff_formatting(data);
let mut data = data.split(".RS 4");
let options = data.next().unwrap();
let desc = data.next();
if let Some(arg) = make_arg(options, desc) {
if let Some(arg) = util::make_arg(options, desc) {
args.push(arg);
}
} else {
debug!(
"No .RE found to end description, para: {}",
util::truncate(&para, 40)
);
}
}

Expand All @@ -31,62 +37,3 @@ pub fn parse(cmd_name: &str, page_text: &str) -> Option<Vec<Arg>> {
None => None,
}
}

/// Parse the line of options after .PP and the description after it
///
/// Ported from Fish's `built_command`
fn make_arg(options: &str, desc: Option<&str>) -> Option<Arg> {
// Unquote the options string
let options = options.trim();
let options = if options.len() < 2 {
options
} else if options.starts_with('"') && options.ends_with('"') {
&options[1..options.len() - 1]
} else if options.starts_with('\'') && options.ends_with('\'') {
&options[1..options.len() - 1]
} else {
options
};

let mut forms = Vec::new();
let delim = Regex::new(r#"[ ,="|]"#).unwrap();
for option in delim.split(options) {
let option = Regex::new(r"\[.*\]").unwrap().replace(option, "");
// todo Fish doesn't replace <.*> so maybe this is wrong
let option = Regex::new(r"<.*>").unwrap().replace(&option, "");
// todo this is ridiculously verbose
let option =
option.trim_matches(" \t\r\n[](){}.:!".chars().collect::<Vec<_>>().as_slice());
if !option.starts_with('-') || option == "-" || option == "--" {
continue;
}
if Regex::new(r"\{\}\(\)").unwrap().is_match(option) {
continue;
}
forms.push(option.to_owned());
}

if forms.is_empty() {
let desc = if let Some(desc) = desc {
&desc.trim()[..40]
} else {
""
};
debug!("No options found in '{}', desc: '{}'", options.trim(), desc);
return None;
}

match desc {
Some(desc) => {
let desc = desc.trim().replace("\n", " ");
let desc = desc.trim_end_matches('.');
// Remove bogus escapes
let desc = desc.replace(r"\'", "").replace(r"\.", "");

let desc = util::trim_desc(desc);
let desc = if desc.is_empty() { None } else { Some(desc) };
Some(Arg { forms, desc })
}
None => Some(Arg { forms, desc: None }),
}
}
Loading

0 comments on commit cef1e21

Please sign in to comment.