From 94420d2fa30e697aae198facb073b09d7283ffe4 Mon Sep 17 00:00:00 2001 From: Jose Emilio Labra Gayo Date: Sun, 19 Mar 2023 13:03:40 +0100 Subject: [PATCH] Seems to parse prefix decl --- iri_s/Cargo.toml | 1 + iri_s/src/iri.rs | 52 ++++++++++----- iri_s/src/lib.rs | 5 +- prefix_map/src/prefix_map.rs | 26 +++++--- shex_ast/src/schema_builder.rs | 48 ++------------ shex_pest/Cargo.toml | 5 +- shex_pest/src/lib.rs | 1 + shex_pest/src/parser.rs | 118 +++++++++++++++++++++++++++++---- shex_pest/src/parser_error.rs | 40 ++++++----- shex_pest/src/shexc_error.rs | 8 ++- srdf/src/lib.rs | 6 +- 11 files changed, 204 insertions(+), 106 deletions(-) diff --git a/iri_s/Cargo.toml b/iri_s/Cargo.toml index 6aeb704..8d45604 100644 --- a/iri_s/Cargo.toml +++ b/iri_s/Cargo.toml @@ -8,3 +8,4 @@ edition.workspace = true license.workspace = true [dependencies] +oxiri = "0.2.2" diff --git a/iri_s/src/iri.rs b/iri_s/src/iri.rs index 8010089..89905bc 100644 --- a/iri_s/src/iri.rs +++ b/iri_s/src/iri.rs @@ -1,4 +1,6 @@ -use std::{ops::Add, fmt}; +use std::fmt; +use std::str::FromStr; +use oxiri::{IriRef, IriParseError}; pub trait IRI { // fn to_string(&self) -> String ; @@ -6,7 +8,8 @@ pub trait IRI { #[derive(Debug, Clone, PartialEq)] pub struct IriS { - s: String + s: String, + iri: IriRef } impl IriS { @@ -14,38 +17,53 @@ impl IriS { self.s.as_str() } - pub fn from_str(str: &str) -> IriS { - IriS { s: str.to_owned() } + pub fn extend(&self, str: &str) -> Result { + let s = self.s.clone() + str; + let iri = IriRef::parse(s)?; + Ok(IriS { s: iri.to_string(), iri: iri }) } - pub fn extend(&self, str: &str) -> Self { - let s = self.s.clone() + str; - IriS { s: s } + pub fn is_absolute(&self) -> bool { + self.iri.is_absolute() } } + impl fmt::Display for IriS { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f,"<{}>", self.s) } } -impl Add for IriS { - type Output = Self; +#[derive(Debug)] +pub struct IriError { + msg: String +} + + +impl FromStr for IriS { + type Err = IriError; - fn add(self, other: Self) -> Self { - IriS { - s: self.s + other.s.as_str() - } + fn from_str(s: &str) -> Result { + parse_iri(s) } } +impl From for IriError { + fn from(e: IriParseError) -> Self { + IriError { msg: format!("IriParserError: {:?}",e.to_string())} + } +} + +fn parse_iri(s:&str) -> Result { + let iri = IriRef::parse(s.to_owned())?; + Ok(IriS { s: iri.to_string(), iri: iri }) +} + + impl IRI for IriS { - /* fn to_string(&self) -> String { - self.s.clone() - }*/ } @@ -55,7 +73,7 @@ mod tests { #[test] fn creating_iris() { - let iri = IriS::from_str("http://example.org/") ; + let iri = IriS::from_str("http://example.org/").unwrap() ; assert_eq!(iri.to_string(), ""); } diff --git a/iri_s/src/lib.rs b/iri_s/src/lib.rs index 8ac4d03..1a84acd 100644 --- a/iri_s/src/lib.rs +++ b/iri_s/src/lib.rs @@ -1,6 +1,7 @@ pub mod iri; pub use iri::*; +use std::str::FromStr; #[cfg(test)] mod tests { @@ -8,8 +9,8 @@ mod tests { #[test] fn iri_s_test() { - let iri1: IriS = IriS::from_str("http://example.org/iri"); - let iri2 = IriS::from_str("http://example.org/iri"); + let iri1: IriS = IriS::from_str("http://example.org/iri").unwrap(); + let iri2 = IriS::from_str("http://example.org/iri").unwrap(); assert_eq!(iri1, iri2); } } diff --git a/prefix_map/src/prefix_map.rs b/prefix_map/src/prefix_map.rs index 38bba4e..81c5c2e 100644 --- a/prefix_map/src/prefix_map.rs +++ b/prefix_map/src/prefix_map.rs @@ -1,6 +1,7 @@ use std::fmt; use indexmap::IndexMap; use iri_s::*; +use std::str::FromStr; #[derive(Debug)] pub struct PrefixMap<'a> { @@ -32,17 +33,24 @@ impl <'a> PrefixMap<'a> { } } - pub fn resolve(&self, str: &str) -> Option { + pub fn resolve(&self, str: &str) -> Result, IriError> { match split(str) { Some((alias, local_name)) => { match self.find(alias) { Some(iri) => { - Some(iri.extend(local_name)) + let new_iri = iri.extend(local_name)?; + Ok(Some(new_iri)) }, - None => Some(IriS::from_str(str)) + None => { + let iri = IriS::from_str(str)?; + Ok(Some(iri)) + } } }, - None => Some(IriS::from_str(str)) + None => { + let iri = IriS::from_str(str)?; + Ok(Some(iri)) + } } } @@ -76,18 +84,18 @@ mod tests { #[test] fn prefix_map1() { let mut pm = PrefixMap::new(); - let binding = IriS::from_str("http://example.org/"); + let binding = IriS::from_str("http://example.org/").unwrap(); pm.insert("ex", &binding); - let resolved = IriS::from_str("http://example.org/name"); - assert_eq!(pm.resolve("ex:name").unwrap(), resolved); + let resolved = IriS::from_str("http://example.org/name").unwrap(); + assert_eq!(pm.resolve("ex:name").unwrap().unwrap(), resolved); } #[test] fn prefix_map_display() { let mut pm = PrefixMap::new(); - let ex_iri = IriS::from_str("http://example.org/"); + let ex_iri = IriS::from_str("http://example.org/").unwrap(); pm.insert("ex", &ex_iri); - let ex_rdf = IriS::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + let ex_rdf = IriS::from_str("http://www.w3.org/1999/02/22-rdf-syntax-ns#").unwrap(); pm.insert("rdf", &ex_rdf); assert_eq!(pm.to_string(), "ex \nrdf \n"); diff --git a/shex_ast/src/schema_builder.rs b/shex_ast/src/schema_builder.rs index f51b12b..58c2c91 100644 --- a/shex_ast/src/schema_builder.rs +++ b/shex_ast/src/schema_builder.rs @@ -2,6 +2,7 @@ use prefix_map::PrefixMap; use srdf::iri::IriS; use std::{error::Error, fmt}; use crate::schema::Schema; +use std::str::FromStr; pub struct SchemaBuilder<'a> { inner: Result, ErrorBuildingSchema> @@ -106,51 +107,10 @@ mod tests { #[test] fn test_update() { let sb = SchemaBuilder::new(); - let schema = - update_base(sb, IriS::from_str("http://example.org/")).unwrap().build().unwrap(); - assert_eq!(schema.base(), &Some(Box::new(IriS::from_str("http://example.org/")))); - /*match update_base(&mut sb, IriS::from_str("http://example.org/")) { - Ok(sb) => { - let s = sb.build(); - assert_eq!(s.base.unwrap(), Box::new(IriS::from_str("http://example.org/"))); - }, - Err(err) => { - assert_eq!(2+2,4); - } - }; */ - // let s = r.build(); - //let r = sb; - + let iri = IriS::from_str("http://example.org/").unwrap(); + let schema = update_base(sb, iri).unwrap().build().unwrap(); + assert_eq!(schema.base(), &Some(Box::new(IriS::from_str("http://example.org/").unwrap()))); } } -/*#[test] -fn builder_test() { - use iri_s::IriS; - let foo = Schema { - id: None, - base: Some(Box::new(IriS::from_str("hi"))), - prefixes: Some(PrefixMap::new()) - }; - let mut builder = SchemaBuilder::new(); - builder.set_base(IriS::from_str("hi")); - let foo_from_builder = builder.build(); - let r1 = foo.base().map(|s| {Some(s)}).unwrap(); - let r2 = foo_from_builder.unwrap().base(); - assert_eq!(r1, *r2); -} */ - -/*#[test] -fn fn_builder() { - use iri_s::IriS; - let ex = IriS::from_str("http://example.org"); - let mut sb = SchemaBuilder::new(); - sb.set_base(IriS::from_str("hi")) - .add_prefix("rdf", &ex); - let schema = sb.build(); - let schema_base = schema.unwrap().base; - assert_eq!( - schema_base, - Some(Box::new(IriS::from_str("hi")))); -}*/ \ No newline at end of file diff --git a/shex_pest/Cargo.toml b/shex_pest/Cargo.toml index ea72d81..c7d4f52 100644 --- a/shex_pest/Cargo.toml +++ b/shex_pest/Cargo.toml @@ -11,8 +11,9 @@ iri_s = { path = "../iri_s" } shex_ast = { path = "../shex_ast" } pest = { version = "2.5.6" } pest_derive = { version = "2.5.6" } - +regex = "1.7" [dev-dependencies] env_logger = "0.10" -pretty_env_logger = "0.4" \ No newline at end of file +pretty_env_logger = "0.4" +lazy_static = "1.4" diff --git a/shex_pest/src/lib.rs b/shex_pest/src/lib.rs index 9e03af5..c070474 100644 --- a/shex_pest/src/lib.rs +++ b/shex_pest/src/lib.rs @@ -5,6 +5,7 @@ mod shexc_error; extern crate pest; #[macro_use] extern crate pest_derive; +extern crate regex; #[cfg(test)] mod tests { diff --git a/shex_pest/src/parser.rs b/shex_pest/src/parser.rs index b7998e9..4936ac4 100644 --- a/shex_pest/src/parser.rs +++ b/shex_pest/src/parser.rs @@ -1,9 +1,11 @@ use shex_ast::SchemaBuilder; use pest::iterators::{Pair, Pairs}; use crate::pest::Parser; -use crate::parser_error::ParserErrorFactory; +use crate::parser_error::*; use crate::shexc_error::ShExCError; use iri_s::IriS; +use std::str::FromStr; +use regex::Regex; #[derive(Parser)] #[grammar = "shex.pest"] @@ -15,30 +17,89 @@ pub fn parse_text<'a>(input: &'a str) -> Result, ShExCError> { let mut parsed = ShExParser::parse(Rule::shexDoc, input)?; let top_node = parsed.next().unwrap(); cnv_pairs(top_node, sb) -// todo!() } fn cnv_pairs<'a>(input_pair: Pair<'a, Rule>, sb: SchemaBuilder<'a>) -> Result, ShExCError> { - let sb = sb.set_base(IriS::from_str("http://example.org/")); + // let sb = sb.set_base(IriS::from_str("http://example.org/")); match input_pair.as_rule() { - Rule::shexDoc => { - let mut directive = input_pair.into_inner().next().unwrap(); - parse_directive(directive, sb) - }, - _ => Err(unexpected(&input_pair)) + Rule::shexDoc => { + input_pair.into_inner().fold(Ok(sb), |acc, inner_pair| { + match inner_pair.as_rule() { + Rule::directive => { + // println!("directive: {:?}", inner_pair); + directive(inner_pair.into_inner().next().unwrap(), acc) + }, + Rule::EOI => { + println!("done parsing!"); + acc + }, + _ => Err(unexpected(&inner_pair)) + } + })}, + _ => Err(unexpected(&input_pair)) } +} + + +fn directive<'a>( + pair: Pair, + acc: Result, ShExCError>) -> Result, ShExCError> { + acc.and_then(|sb| { + match pair.as_rule() { + Rule::prefixDecl => { + let mut pairs = pair.into_inner(); + println!("PrefixDecl pairs {:?}", pairs); + let alias = pname_ns(pairs.next().unwrap())?; + println!("Alias! {:?}", alias); + let iri = iri_ref(pairs.next().unwrap())?; + println!("PrefixDecl...alias {} as iri {}", alias, iri); + Ok(sb) // Ok(sb.add_prefix(alias.as_str(), &iri.clone())) + }, + Rule::baseDecl => { + let iri = iri_ref(pair.into_inner().next().unwrap())?; + Ok(sb.set_base(iri)) + } + _ => { + println!("Unexpected: {:?}", pair); + Err(unexpected(&pair)) + } + } + }) } -fn parse_directive<'a>( - pairs: Pair, - sb: SchemaBuilder<'a>) -> Result, ShExCError> { - Ok(sb) +fn iri_ref(input_pair: Pair<'_, Rule>) -> Result { + if input_pair.as_rule() == Rule::IRIREF { + let iri = input_pair.as_str().to_string(); + // strip the '<' and '>' characters. + let iri_str = unescape_iri(&iri[1..iri.len() - 1]); + let iri = IriS::from_str(&iri_str)?; + if iri.is_absolute() { + Ok(iri) + } else { + Err(absoluteIriExpected(iri)) + } + } else { + Err(unexpected(&input_pair)) + } } +fn pname_ns(input_pair: Pair<'_, Rule>) -> Result { + if input_pair.as_rule() == Rule::PNAME_NS { + let alias = input_pair.as_str().to_string(); + Ok(alias) + } else { + Err(unexpected(&input_pair)) + } +} + + +fn absoluteIriExpected(iri: IriS) -> ShExCError { + ShExCError::AbsoluteIRIExpectedError { iri: iri } +} fn unexpected(pair: &Pair) -> ShExCError { let e = ParserErrorFactory::new("ShExC") @@ -47,6 +108,35 @@ fn unexpected(pair: &Pair) -> ShExCError { } +fn unescape_iri(iri: &str) -> String { + let unicode_esc = Regex::new(r"(\\U[[:xdigit:]]{8})|(\\u[[:xdigit:]]{4})").unwrap(); + let (new_iri, end) = + unicode_esc + .captures_iter(iri) + .fold((String::new(), 0), |(so_far, start), cap| { + let cap = cap.get(0).unwrap(); + ( + format!( + "{}{}{}", + so_far, + &iri[start..cap.start()], + unescape_uchar(cap.as_str()) + ), + cap.end(), + ) + }); + + format!("{}{}", new_iri, &iri[end..]) +} + +fn unescape_uchar(uchar: &str) -> char { + use std::char; + let uchar = &uchar[2..]; + let uchar_u32 = u32::from_str_radix(uchar, 16).unwrap(); + char::from_u32(uchar_u32).unwrap() +} + + #[cfg(test)] @@ -64,7 +154,7 @@ prefix rdfs: assert!(result.is_ok()); } - #[test] +/* #[test] fn parse_simple_error() { let result: Result = parse_text( r###"bse @@ -73,6 +163,6 @@ prefix rdfs: "###, ); assert!(result.is_err()); - } + } */ } \ No newline at end of file diff --git a/shex_pest/src/parser_error.rs b/shex_pest/src/parser_error.rs index 9781dac..3e79153 100644 --- a/shex_pest/src/parser_error.rs +++ b/shex_pest/src/parser_error.rs @@ -1,4 +1,5 @@ use std::fmt::{Display, Formatter}; +use iri_s::IriError; use pest::{error::Error, RuleType}; use pest::iterators::Pair; use crate::parser::Rule; @@ -28,11 +29,27 @@ impl ParserErrorFactory { } +#[derive(Debug, Clone)] +pub struct ParserError { + repr: String, + fn_name: String, + rule: Option, + expecting: Option, + unreachable: bool, + context: Option, +} + + impl ParserError { pub(crate) fn unexpected(&mut self, pair: &Pair<'_, T>) -> &mut Self { self.context = Some(format!("{:?}: {:?}", pair.as_rule(), pair.as_str())); self - } + } + + pub(crate) fn absoluteIRIExpected(&mut self, str: String) -> &mut Self { + self.context = Some(format!("Absolute IRI expected. Found {:?}", str)); + self + } } impl From> for ShExCError { @@ -47,17 +64,10 @@ impl From for ShExCError { } } - - - -#[derive(Debug, Clone)] -pub struct ParserError { - repr: String, - fn_name: String, - rule: Option, - expecting: Option, - unreachable: bool, - context: Option, +impl From for ShExCError { + fn from(e: IriError) -> Self { + ShExCError::IRIError { msg: format!("IriError: {:?}",e)} + } } impl std::error::Error for ParserError {} @@ -72,11 +82,11 @@ impl Display for ParserError { &self.fn_name, match &self.rule { None => String::new(), - Some(s) => format!(", rule: {}", s), + Some(s) => format!(", rule: {s}"), }, match &self.expecting { None => String::new(), - Some(s) => format!(", expecting: {}", s), + Some(s) => format!(", expecting: {s}"), }, if self.unreachable { ", should have been unreachable".to_string() @@ -86,7 +96,7 @@ impl Display for ParserError { ), match &self.context { None => String::new(), - Some(s) => format!(", context: '{}'", s), + Some(s) => format!(", context: '{s}'"), } ) } diff --git a/shex_pest/src/shexc_error.rs b/shex_pest/src/shexc_error.rs index 7ce6760..dc7b112 100644 --- a/shex_pest/src/shexc_error.rs +++ b/shex_pest/src/shexc_error.rs @@ -1,6 +1,12 @@ +use iri_s::IriS; + use crate::parser_error::ParserError; +#[derive(Debug)] pub enum ShExCError { ParseError{ msg: String }, + IRIError{ msg: String }, + AbsoluteIRIExpectedError{ iri: IriS }, Unexpected(ParserError) -} \ No newline at end of file +} + diff --git a/srdf/src/lib.rs b/srdf/src/lib.rs index 7b5ad42..efe8f2b 100644 --- a/srdf/src/lib.rs +++ b/srdf/src/lib.rs @@ -8,12 +8,14 @@ pub use bnode::*; #[cfg(test)] mod tests { + use std::str::FromStr; + use super::*; #[test] fn check_2_iris() { - let iri1: IriS = IriS::from_str("http://example.org/iri"); - let iri2 = IriS::from_str("http://example.org/iri"); + let iri1: IriS = IriS::from_str("http://example.org/iri").unwrap(); + let iri2 = IriS::from_str("http://example.org/iri").unwrap(); assert_eq!(iri1, iri2); } }