# Parsing with nom

Resources:

https://iximiuz.com/en/posts/rust-writing-parsers-with-nom/

https://blog.logrocket.com/parsing-in-rust-with-nom/

To return error from parser:
```rust
Err(nom::Err::Error(nom::error::Error::from_error_kind(input, nom::error::ErrorKind::Char)))
```

## 1. URL Parser

In [4]:
extern crate nom;

In [5]:

use nom::bytes::complete::tag;
use nom::character::complete::*;
use nom::multi::*;
use nom::sequence::*;
use nom::IResult;
use nom::branch::alt;
use nom::complete::*;
use nom::InputTakeAtPosition;
use nom::AsChar;


In [103]:
type Res<'a> = IResult<&'a str, &'a str>;
type ResStr<'a> = IResult<&'a str, String>;//, NomError<&'a str>>;

In [51]:
let t : Res = terminated(alphanumeric1, tag("."))("ciao.");


In [52]:
t

Ok(("", "ciao"))

In [53]:
let t : ResStr = 
    many1(terminated(alphanumeric1, tag(".")))("first.second.")
        .map(|(remaining, parsed)| {
            ("", format!("{:?}", parsed))
});

In [54]:
t

Ok(("", "[\"first\", \"second\"]"))

In [56]:
fn is_alpha_char(c: char) -> bool { 
    c.is_alphabetic()
}

In [63]:
let t : ResStr = 
    alt(
        (many1
            (terminated
                (alphanumeric1, tag("."))),
         many_m_n(1, 1, take_while(is_alpha_char))))
    ("first.second.third")
    .map(|(remaining, parsed)| {
            ("", format!("{:?}", parsed))
    });

### Adding hyphen to alphanumeric1
Original code:
```
nom/character/complete.rs
```
```rust
pub fn alphanumeric1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E>
where
  T: InputTakeAtPosition,
  <T as InputTakeAtPosition>::Item: AsChar,
{
  input.split_at_position1_complete(|item| !item.is_alphanum(), ErrorKind::AlphaNumeric)
}  
```

In [73]:

fn alphahyphen1<T>(i: T) -> IResult<T, T>
where
    T: InputTakeAtPosition,
    <T as InputTakeAtPosition>::Item: AsChar,
{
    i.split_at_position1_complete(
        |item| {
            let char_item = item.as_char();
            !(char_item == '-') && !char_item.is_alphanum()
        },
        ErrorKind::AlphaNumeric,
    )
}

In [95]:
let t : ResStr = 
    separated_list1(tag("."), alphahyphen1)("^x.b.c/ccc")
    .map(|(rem, v)| (rem, format!("{:?}", v)));

In [96]:
t

Err(Error(Error { input: "^x.b.c/ccc", code: AlphaNumeric }))

In [118]:
// The map function cannot return an error, need to map to a type like
// enum IP {
//   Address([u8;4]),
//   Invalid
// }
// return empty string in this case
let x : ResStr = 
    separated_list1(tag("."), digit1)("234.12.56.13")
    .map(|(remaining, v)|   {
        if v.iter().any(
            |x| x.parse::<i32>().unwrap() < 1 || 
                x.parse::<i32>().unwrap() > 254) {
            return (remaining, "".to_owned());
            // also check for reserved addresses 10., broadcast...
        }
        else {
            return (remaining, format!("{:?}", v));
        }
    });


In [117]:
x

Ok(("", "[\"234\", \"12\", \"56\", \"13\"]"))

## 2. Error Handling

### Look here

https://iximiuz.com/en/posts/rust-writing-parsers-with-nom/

for a great explanation of how to implement custom errors and handle partial error parsing (hint: use `Failure`).  

### Return error from parser:
```rust
Err(nom::Err::Error(nom::error::Error::from_error_kind(input, nom::error::ErrorKind::Char)))
```
### Print line info

```rust
use nom::IResult;
use nom::bytes::complete::tag;
use nom::character::complete::digit1;
use nom::combinator::fail;
use nom::error::{context, VerboseError};
use nom::error::convert_error;
use nom::Finish;

fn dup(s: &str) -> IResult<&str, u64, VerboseError<&str>> {
    let r: IResult<_,_,VerboseError<_>> = tag("\ndup")(s);
    match r {
        Ok((s, _)) => {
                let (sd, n) = digit1(s)?;
            
                let n = match n {
                    "0" => 0,
                    "1" => 1,
                    "2" => 2,
                    _ => return context("using an out-of-bounds dup", fail)(s),
                };
            
                return Ok((sd, n));
        },
        //Err(_e) => return context("Error", fail)(s),
        Err(_) => return Err(nom::Err::Error(VerboseError{errors: vec![]})),
    }
}

fn main() {
    let input = "\ndip3";
    let result = context("dup", dup)(input).finish().err().unwrap();
    println!("{}", convert_error(input, result));
}

```

## Custom errors

In [49]:
extern crate nom;

use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::Err::Error;
use nom::IResult;
use nom::character::complete::digit1;

#[derive(Debug, PartialEq)]
pub enum CustomError<I> {
  MyError((I, String)),
  Nom(I, ErrorKind),
}

impl<I> ParseError<I> for CustomError<I> {
  fn from_error_kind(input: I, kind: ErrorKind) -> Self {
    CustomError::Nom(input, kind)
  }

  fn append(_: I, _: ErrorKind, other: Self) -> Self {
    other
  }
}

pub fn parse<'a>(input: &'a str, msg: &str) -> IResult<&'a str, &'a str, CustomError<&'a str>> {
    //let _xs = digit1(input)?; // return standard Nom error through 'from_error_kind'
    // return custom error
    Err(Error(CustomError::MyError((input, format!("added by parser + {msg}")))))
}

//fn main() {}

//#[cfg(test)]
mod tests {
  use super::parse;
  use super::CustomError;
  use nom::Err::Error;

  //#[test]
  pub /*pub just for running within jupyter notebooks*/ fn it_works() {
    let err = parse("", "error message").unwrap_err();
    match err {
      Error(CustomError::MyError((i, msg))) => {
        assert_eq!(msg, "added by parser + error message");
        //assert_eq!(e, CustomError::MyError("error message".to_string()));
      },
      Error(CustomError::Nom(i, k)) => {
        println!("{i} > {k:?}");  
      },
      _ => panic!("Unexpected error: {:?}", err),
    
    }
  }
}

In [11]:
tests::it_works();

 > Digit


## Print line and column information

Consider using the following crates:
* nom-supreme
* nom-locate

And re-implementing the `convert_error` function for your own error types.
The trick is just to use the `offset` and `filter` functions to count the number of end of line symbols in the input stream until the location where the error occurs.

```rust
/// Transforms a `VerboseError` into a trace with input position information
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn convert_error<I: core::ops::Deref<Target = str>>(
  input: I,
  e: VerboseError<I>, // <- replace with your own error type
) -> crate::lib::std::string::String {
  use crate::lib::std::fmt::Write;
  use crate::traits::Offset;

  let mut result = crate::lib::std::string::String::new();

  for (i, (substring, kind)) in e.errors.iter().enumerate() {
    let offset = input.offset(substring);

    if input.is_empty() {
      match kind {
        VerboseErrorKind::Char(c) => {
          write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c)
        }
        VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s),
        VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e),
      }
    } else {
      let prefix = &input.as_bytes()[..offset];

      // Count the number of newlines in the first `offset` bytes of input
      let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1;

      // Find the line that includes the subslice:
      // Find the *last* newline before the substring starts
      let line_begin = prefix
        .iter()
        .rev()
        .position(|&b| b == b'\n')
        .map(|pos| offset - pos)
        .unwrap_or(0);

      // Find the full line after that newline
      let line = input[line_begin..]
        .lines()
        .next()
        .unwrap_or(&input[line_begin..])
        .trim_end();

      // The (1-indexed) column number is the offset of our substring into that line
      let column_number = line.offset(substring) + 1;

      match kind {
        VerboseErrorKind::Char(c) => {
          if let Some(actual) = substring.chars().next() {
            write!(
              &mut result,
              "{i}: at line {line_number}:\n\
               {line}\n\
               {caret:>column$}\n\
               expected '{expected}', found {actual}\n\n",
              i = i,
              line_number = line_number,
              line = line,
              caret = '^',
              column = column_number,
              expected = c,
              actual = actual,
            )
          } else {
            write!(
              &mut result,
              "{i}: at line {line_number}:\n\
               {line}\n\
               {caret:>column$}\n\
               expected '{expected}', got end of input\n\n",
              i = i,
              line_number = line_number,
              line = line,
              caret = '^',
              column = column_number,
              expected = c,
            )
          }
        }
        VerboseErrorKind::Context(s) => write!(
          &mut result,
          "{i}: at line {line_number}, in {context}:\n\
             {line}\n\
             {caret:>column$}\n\n",
          i = i,
          line_number = line_number,
          context = s,
          line = line,
          caret = '^',
          column = column_number,
        ),
        VerboseErrorKind::Nom(e) => write!(
          &mut result,
          "{i}: at line {line_number}, in {nom_err:?}:\n\
             {line}\n\
             {caret:>column$}\n\n",
          i = i,
          line_number = line_number,
          nom_err = e,
          line = line,
          caret = '^',
          column = column_number,
        ),
      }
    }
    // Because `write!` to a `String` is infallible, this `unwrap` is fine.
    .unwrap();
  }

  result
}
```




In [47]:
/// CustomError implementation

/// @warning ASCII only
fn str_offset(s1: &str, s2: &str) -> usize {
    s2.as_ptr() as usize - s2.as_ptr() as usize
}

pub fn convert_error(
  input: &str,
  e: CustomError<&str>,
) -> String {
  use std::fmt::Write;

  let mut result = String::new();

  if let CustomError::MyError((i, msg)) = e {
      let offset = str_offset(input, i);
      let prefix = &input.as_bytes()[..offset];
      let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1;
      // Find the line that includes the subslice:
      // Find the *last* newline before the substring starts
      let line_begin = prefix
        .iter()
        .rev()
        .position(|&b| b == b'\n')
        .map(|pos| offset - pos)
        .unwrap_or(0);

      // Find the full line after that newline
      let line = input[line_begin..]
        .lines()
        .next()
        .unwrap_or(&input[line_begin..])
        .trim_end();

      // The (1-indexed) column number is the offset of our substring into that line
      let column_number = str_offset(line, i) + 1;
      write!(&mut result, "{line_number}, {column_number}: {msg}").unwrap();
      
  }
  result
}

## 3. Custom Parsers

In [43]:
use nom::error::ParseError;
/// Parses strings like [a-zA-Z_][a-zA-Z0-9_]*
fn ipaddress(input: &str) -> IResult<&str, String> {
    let s = separated_list1(tag("."), digit1)(input);
    if let Err(e) = s {
        return Err(e);
    } else {
        let (rem, p) = s.map(|(remaining, v)|   {
                    if v.iter().any(
                        |x| x.parse::<i32>().unwrap() < 1 || 
                            x.parse::<i32>().unwrap() > 254) {
                        return (remaining, "".to_owned());
                        // also check for reserved addresses 10., broadcast...
                    }
                    else {
                        return (remaining, format!("{:?}", v));
                    }}).unwrap();
        if p.is_empty() {
            Err(nom::Err::Error(nom::error::Error::from_error_kind(rem, nom::error::ErrorKind::TooLarge)))
            // WANT TO USE:
//             Err(nom::Err::Failure(ParseError::partial(
//              "regex",
//              "closing '/' symbol",
//              rest,
//            )))
            // complains about missing 'dyn'
         
        } else {
            Ok((rem, p))
        }
    }
      
}

In [131]:
let x : ResStr = ipaddress("255.6.7.7");

In [132]:
x

Err(Error(Error { input: "", code: TooLarge }))

In [44]:
let ip_address_str = ["194", "221", "A", "12"];
let ip_address = ip_address_str.iter().map(|d| d.parse::<i32>()).collect::<Result<Vec<_>,_>>();
match ip_address {
    Ok(_) => print!("OK"),
    _ => print!("No way")
}

Error: consider importing one of these items

In [45]:
let x = [1,4,5,6];

In [46]:
let ok : IResult<_,_> = nom::number::complete::f32(endian)
if let Ok((_, x)) = ok {
    let _ = println!("{x}");
}

Error: expected `;`, found keyword `fn`

Error: cannot find value `endian` in this scope

Error: mismatched types