diff --git a/xml-schema/src/asts.rs b/xml-schema/src/asts.rs new file mode 100644 index 0000000..aa08c05 --- /dev/null +++ b/xml-schema/src/asts.rs @@ -0,0 +1,76 @@ + +pub mod non_recursive { + use names::FullName; + use support::Facets; + + // TODO: Make this use &str so it can implement Copy, and spare clones later in the code + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub struct ConcreteName(pub String, pub String); + + #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum ComplexType<'input, TAttrs, TExtra> { + Any, + Empty, + Alias(ConcreteName), + Extension(ConcreteName, ConcreteName), + Restriction(ConcreteName, ConcreteName), + ElementRef(usize, usize, ConcreteName), + Element( + usize, + usize, + FullName<'input>, + TAttrs, + ConcreteName, + ), + Choice(usize, usize, Vec<ConcreteName>), + Sequence(usize, usize, Vec<ConcreteName>), + Simple(ConcreteName), + Extra(TExtra), + } + + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum SimpleType<'input> { + Alias(ConcreteName), + Restriction(ConcreteName, Facets<'input>), + List(ConcreteName), + Union(Vec<ConcreteName>), + Empty, + } +} + +pub mod recursive { + use names::FullName; + use support::Facets; + + #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum ComplexType<'input, TAttrs, TExtra> { + Any, + Empty, + Alias(FullName<'input>), + Extension(FullName<'input>, Box<ComplexType<'input, TAttrs, TExtra>>), + Restriction(FullName<'input>, Box<ComplexType<'input, TAttrs, TExtra>>), + ElementRef(usize, usize, FullName<'input>), + Element( + usize, + usize, + FullName<'input>, + TAttrs, + Box<ComplexType<'input, TAttrs, TExtra>>, + ), + GroupRef(usize, usize, FullName<'input>), + Choice(usize, usize, Vec<ComplexType<'input, TAttrs, TExtra>>), + Sequence(usize, usize, Vec<ComplexType<'input, TAttrs, TExtra>>), + Extra(TExtra), + Simple(SimpleType<'input>), + } + + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum SimpleType<'input> { + Primitive(&'static str, &'static str), + Alias(FullName<'input>), + Restriction(FullName<'input>, Facets<'input>), + List(Box<SimpleType<'input>>), + Union(Vec<SimpleType<'input>>), + Empty, + } +} diff --git a/xml-schema/src/attrs.rs b/xml-schema/src/attrs.rs new file mode 100644 index 0000000..d68e96b --- /dev/null +++ b/xml-schema/src/attrs.rs @@ -0,0 +1,44 @@ +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum AttrUse { + Prohibited, + Required, + Optional, +} + +pub mod with_refs { + use names::FullName; + + use super::AttrUse; + + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub struct Attrs<'input, TSimpleType: Clone> { + pub named: Vec<(FullName<'input>, AttrUse, Option<TSimpleType>)>, + pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, + pub group_refs: Vec<FullName<'input>>, + pub any_attributes: bool, + } + + impl<'input, TSimpleType> Attrs<'input, TSimpleType> where TSimpleType: Clone { + pub fn new() -> Attrs<'input, TSimpleType> { + Attrs { + named: Vec::new(), + refs: Vec::new(), + group_refs: Vec::new(), + any_attributes: false, + } + } + + pub fn extend(&mut self, other: Attrs<'input, TSimpleType>) { + let Attrs { + named, + refs, + group_refs, + any_attributes, + } = other; + self.named.extend(named); + self.refs.extend(refs); + self.group_refs.extend(group_refs); + self.any_attributes |= any_attributes; + } + } +} diff --git a/xml-schema/src/lib.rs b/xml-schema/src/lib.rs index a80376d..69d2951 100644 --- a/xml-schema/src/lib.rs +++ b/xml-schema/src/lib.rs @@ -13,6 +13,17 @@ pub mod support; pub mod primitives; pub mod bigfloat; +pub mod utils; + +pub mod asts; +pub mod attrs; +pub mod toplevel; +pub mod processor2; +pub mod lift_attrs; +pub mod name_allocator; +//pub mod ungroup; +//pub mod parser_generator2; + pub mod parser; pub mod processor; pub mod parser_generator; diff --git a/xml-schema/src/lift_attrs.rs b/xml-schema/src/lift_attrs.rs new file mode 100644 index 0000000..ed03f79 --- /dev/null +++ b/xml-schema/src/lift_attrs.rs @@ -0,0 +1,263 @@ +//! Pushes attr definitions from the inner types of an element to the +//! top-level element +//! +//! This module must compute a transitive closure in case of circular +//! references: +//! "Circular reference is not disallowed. [...] The effect is to take the +//! transitive closure of the reference relation" +//! https://www.w3.org/TR/xmlschema11-1/#declare-attributeGroup-core + +use std::collections::{HashMap, HashSet}; + +use asts; +use asts::recursive::ComplexType as RComplexType; +use names::FullName; +use utils::Bottom; + +use processor2::ComplexTypeExtra as InComplexTypeExtra; +use processor2::OutAttrs as InAttrs; +use processor2::OutComplexType as InComplexType; +use processor2::OutSimpleType as InSimpleType; + +pub type OutSimpleType<'input> = InSimpleType<'input>; +pub type OutComplexType<'input> = asts::recursive::ComplexType<'input, OutAttrs<'input>, Bottom>; +pub type OutAttrs<'input> = InAttrs<'input>; + +pub struct AttrsLifter<'input> { + /// For each type name, stores a list of types that reference it. + /// So if the former's list of attrs is updated, then the latter's + /// must be updated as well. + reverse_deps: HashMap<FullName<'input>, HashSet<FullName<'input>>>, + + /// (A subset of) the attrs of each complex type. Can converge to the full + /// attrs by calling `make_transitive_closure()` + attrs_of_complex_type: HashMap<FullName<'input>, OutAttrs<'input>>, + + /// Set of elements of `attrs_of_complex_type` that are strict subset of + /// what they should be, and therefore should be updated. + outdated_complex_types: HashSet<FullName<'input>>, + + complex_types: HashMap<FullName<'input>, InComplexType<'input>>, +} + +impl<'input> AttrsLifter<'input> { + pub fn with_capacity(capacity: usize) -> AttrsLifter<'input> { + AttrsLifter { + reverse_deps: HashMap::with_capacity(capacity), + attrs_of_complex_type: HashMap::with_capacity(capacity), + outdated_complex_types: HashSet::with_capacity(capacity), + complex_types: HashMap::with_capacity(capacity), + } + } + + pub fn add_complex_type( + &mut self, + name: FullName<'input>, + complex_type: InComplexType<'input>, + ) { + self.add_reverse_deps(name, &complex_type); + self.complex_types.insert(name, complex_type); + self.outdated_complex_types.insert(name); + } + + pub fn transform_complex_type( + &mut self, + complex_type: &InComplexType<'input>, + ) -> (OutAttrs<'input>, OutComplexType<'input>) { + assert!(self.outdated_complex_types.is_empty(), "There are outdated complex types, make_transitive_closure() should be called before transform_complex_type()."); + let (attrs, type_) = self.get_attrs_step(complex_type); + (attrs.unwrap_or(OutAttrs::new()), type_) + } + + pub fn make_transitive_closure(&mut self) { + while let Some(&name) = self.outdated_complex_types.iter().next() { + let complex_type = self.complex_types.get(&name).expect("Name {} was supposed to be updated, but it missing from AttrsLifter.complex_types."); + let (attrs, _inner_type) = self.get_attrs_step(complex_type); // inner_type is discarded, what a waste + if self.attrs_of_complex_type.get(&name) != attrs.as_ref() { + self.outdated_complex_types.insert(name); + let attrs = attrs.expect( + "attrs were Some() but became None while computing transitive closure.", + ); + self.attrs_of_complex_type.insert(name, attrs); + } + } + } + + fn add_reverse_deps( + &mut self, + my_name: FullName<'input>, + complex_type: &InComplexType<'input>, + ) { + let add_rev_dep = &mut |rev_dep| { + self.reverse_deps + .get_mut(rev_dep) + .expect(&format!("Reverse deps map is missing {:?} entry", rev_dep)) + .insert(my_name); + }; + + match complex_type { + // Trivial cases + RComplexType::Any => {} + RComplexType::Empty => {} + RComplexType::Alias(fullname) => add_rev_dep(fullname), + // The actual work + RComplexType::Extension(base, inner) | RComplexType::Restriction(base, inner) => { + add_rev_dep(base); + self.add_reverse_deps(my_name, inner); + } + RComplexType::ElementRef(_min_occurs, _max_occurs, fullname) => add_rev_dep(fullname), + RComplexType::Choice(_min_occurs, _max_occurs, inners) + | RComplexType::Sequence(_min_occurs, _max_occurs, inners) => { + for inner in inners { + self.add_reverse_deps(my_name, inner); + } + } + RComplexType::Simple(_simple_type) => {} + + RComplexType::Element(_min_occurs, _max_occurs, _fullname, _attrs, inner) => { + self.add_reverse_deps(my_name, inner); + } + RComplexType::GroupRef(_min_occurs, _max_occurs, _fullname) => unimplemented!(), + RComplexType::Extra(InComplexTypeExtra::AttrDecl(_attrs, inner)) => { + self.add_reverse_deps(my_name, inner); + } + } + } + + fn get_attrs_step( + &self, + complex_type: &InComplexType<'input>, + ) -> (Option<OutAttrs<'input>>, OutComplexType<'input>) { + // TODO: this function is called by make_transitive_closure, which discards the + // OutComplexType; this means this function is doing useless computation when it's not + // called by transform_complex_type. + // On the other hand, keeping that computation here avoids duplicating this function's code + // for computing OutAttrs. + let merge_attrs = |attrs1: Option<&OutAttrs<'input>>, attrs2| match (attrs1, attrs2) { + (None, attrs2) => attrs2, + (Some(attrs1), None) => Some(attrs1.clone()), + (Some(attrs1), Some(attrs2)) => { + let mut attrs: OutAttrs<'input> = attrs1.clone(); + attrs.extend(attrs2); + Some(attrs) + } + }; + match complex_type { + RComplexType::Any => (None, RComplexType::Any), + RComplexType::Empty => (None, RComplexType::Any), + RComplexType::Alias(fullname) => ( + self.attrs_of_complex_type.get(fullname).cloned(), + RComplexType::Alias(*fullname), + ), + + RComplexType::Extension(base, inner) => { + let (inner_attrs, inner_type) = self.get_attrs_step(inner); + ( + merge_attrs(self.attrs_of_complex_type.get(base), inner_attrs), + RComplexType::Extension(*base, Box::new(inner_type)), + ) + } + RComplexType::Restriction(base, inner) => { + let (inner_attrs, inner_type) = self.get_attrs_step(inner); + // Attributes are inherited from the base: + // "However, attribute declarations do not need to be repeated in the derived type definition" + // https://www.w3.org/TR/xmlschema-0/#DerivByRestrict + ( + merge_attrs(self.attrs_of_complex_type.get(base), inner_attrs), + RComplexType::Restriction(*base, Box::new(inner_type)), + ) + } + RComplexType::ElementRef(min_occurs, max_occurs, fullname) => ( + None, + RComplexType::ElementRef(*min_occurs, *max_occurs, *fullname), + ), + + RComplexType::Choice(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.get_attrs_step(inner)) + .collect(); + for (inner_attrs, _inner_type) in inners.iter() { + if inner_attrs.is_some() { + unimplemented!( + "Choice got attribute declaration. \ + I don't know what to do with that." + ); + } + } + ( + None, + RComplexType::Choice( + *min_occurs, + *max_occurs, + inners + .into_iter() + .map(|(_inner_attr, inner_type)| inner_type) + .collect(), + ), + ) + } + RComplexType::Sequence(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.get_attrs_step(inner)) + .collect(); + for (inner_attrs, _inner_type) in inners.iter() { + if inner_attrs.is_some() { + unimplemented!( + "Sequence got attribute declaration. \ + I don't know what to do with that." + ); + } + } + ( + None, + RComplexType::Sequence( + *min_occurs, + *max_occurs, + inners + .into_iter() + .map(|(_inner_attr, inner_type)| inner_type) + .collect(), + ), + ) + } + RComplexType::Simple(simple_type) => (None, RComplexType::Simple(simple_type.clone())), + + RComplexType::Element(min_occurs, max_occurs, fullname, attrs, inner) => { + let mut attrs = attrs.clone(); + let (inner_attrs, inner_type) = self.get_attrs_step(inner); + if let Some(inner_attrs) = inner_attrs { + attrs.extend(inner_attrs) + }; + // Elements capture the attrs for themselves and don't pass any up + ( + None, + RComplexType::Element( + *min_occurs, + *max_occurs, + *fullname, + attrs, + Box::new(inner_type), + ), + ) + } + RComplexType::GroupRef(min_occurs, max_occurs, fullname) => ( + self.attrs_of_complex_type.get(fullname).cloned(), + RComplexType::GroupRef(*min_occurs, *max_occurs, *fullname), + ), + RComplexType::Extra(InComplexTypeExtra::AttrDecl(attrs, inner)) => { + let (inner_attrs, inner_type) = self.get_attrs_step(inner); + let attrs = match inner_attrs { + Some(inner_attrs) => { + let mut attrs = attrs.clone(); + attrs.extend(inner_attrs); + attrs + } + None => attrs.clone(), + }; + (Some(attrs), inner_type) + } + } + } +} diff --git a/xml-schema/src/name_allocator.rs b/xml-schema/src/name_allocator.rs new file mode 100644 index 0000000..1bfc2be --- /dev/null +++ b/xml-schema/src/name_allocator.rs @@ -0,0 +1,268 @@ +//! Transforms recursive types into flat types with unique names + +use std::collections::hash_map::Entry; +use std::collections::HashMap; + +use asts; +use attrs::with_refs::Attrs; +use names::{name_from_hint, FullName, NameGenerator, NameHint}; + +use asts::non_recursive::ComplexType as NRComplexType; +use asts::non_recursive::ConcreteName; +use asts::non_recursive::SimpleType as NRSimpleType; +use asts::recursive::ComplexType as RComplexType; +use asts::recursive::SimpleType as RSimpleType; + +use lift_attrs::OutAttrs as InAttrs; +use lift_attrs::OutComplexType as InComplexType; +use lift_attrs::OutSimpleType as InSimpleType; + +pub type OutSimpleType<'input> = asts::non_recursive::SimpleType<'input>; +pub type OutComplexType<'input> = + asts::non_recursive::ComplexType<'input, OutAttrs<'input>, ComplexTypeExtra<'input>>; +pub type OutAttrs<'input> = Attrs<'input, ConcreteName>; + +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum ComplexTypeExtra<'input> { + GroupRef(usize, usize, FullName<'input>), +} + +fn allocate_namespace<'a, 'input>( + module_name_gen: &'a mut NameGenerator, + module_names: &'a mut HashMap<Option<&'input str>, (String, NameGenerator)>, + namespace: Option<&'input str>, +) -> (String, &'a mut NameGenerator) { + let (ref mod_name, ref mut name_gen) = module_names.entry(namespace).or_insert_with(|| { + let mod_name = module_name_gen.gen_name(namespace.unwrap_or("unqualified").to_string()); + (mod_name, NameGenerator::new()) + }); + (mod_name.to_string(), name_gen) +} + +#[derive(Debug)] +pub struct NameAllocator<'input> { + module_name_gen: NameGenerator, + module_names: HashMap<Option<&'input str>, (String, NameGenerator)>, // namespace -> (mod_name, name_gen) + fullname_to_concrete_name: HashMap<FullName<'input>, ConcreteName>, + complex_types: HashMap<ConcreteName, OutComplexType<'input>>, + simple_types: HashMap<ConcreteName, OutSimpleType<'input>>, +} + +impl<'input> NameAllocator<'input> { + pub fn new() -> NameAllocator<'input> { + NameAllocator { + module_name_gen: NameGenerator::new(), + module_names: HashMap::new(), + fullname_to_concrete_name: HashMap::new(), + complex_types: HashMap::new(), + simple_types: HashMap::new(), + } + } + + pub fn allocate_fullname(&mut self, fullname: FullName<'input>) -> ConcreteName { + let NameAllocator { + ref mut module_name_gen, + ref mut module_names, + .. + } = self; + let concrete_name = self + .fullname_to_concrete_name + .entry(fullname) + .or_insert_with(|| { + let (module_name, name_gen) = + allocate_namespace(module_name_gen, module_names, fullname.namespace()); + let type_name = name_gen.gen_name(fullname.local_name().to_string()); + ConcreteName(module_name, type_name) + }); + concrete_name.clone() + } + + /// Allocates names for anonymous types not made of other types + fn allocate_anonymous(&mut self, namespace: Option<&'input str>, name: &str) -> ConcreteName { + let name_hint = NameHint::new(name); + let (module_name, name_gen) = + allocate_namespace(&mut self.module_name_gen, &mut self.module_names, namespace); + let type_name = name_gen.gen_name(name_from_hint(&name_hint).unwrap()); + ConcreteName(module_name, type_name) + } + + /// Allocates names for anonymous types made of other types (possibly + /// anonymous themselves) + fn allocate_anonymous_compound( + &mut self, + namespace: Option<&'input str>, + prefix: &str, + subtypes: &[&ConcreteName], + ) -> ConcreteName { + let mut name_hint = NameHint::new(prefix); + for ConcreteName(_subtype_mod_name, subtype_type_name) in subtypes.iter() { + name_hint.push(subtype_type_name); + } + let (module_name, name_gen) = + allocate_namespace(&mut self.module_name_gen, &mut self.module_names, namespace); + let type_name = name_gen.gen_name(name_from_hint(&name_hint).unwrap()); + ConcreteName(module_name, type_name) + } + + pub fn allocate_complex_type( + &mut self, + namespace: Option<&'input str>, + recursive_complex_type: &InComplexType<'input>, + ) -> ConcreteName { + let (concrete_name, ty) = match recursive_complex_type { + RComplexType::Any => ( + self.allocate_anonymous(namespace, "any"), + NRComplexType::Any, + ), + RComplexType::Empty => ( + self.allocate_anonymous(namespace, "empty"), + NRComplexType::Empty, + ), + RComplexType::Alias(fullname) => { + let referee = self.allocate_fullname(*fullname); + (referee.clone(), NRComplexType::Alias(referee)) + } + RComplexType::Extension(base, inner) => { + let base = self.allocate_fullname(*base); + let inner = self.allocate_complex_type(namespace, inner); + ( + self.allocate_anonymous_compound(namespace, "extension", &[&base, &inner]), + NRComplexType::Extension(base, inner), + ) + } + RComplexType::Restriction(base, inner) => { + let base = self.allocate_fullname(*base); + let inner = self.allocate_complex_type(namespace, inner); + ( + self.allocate_anonymous_compound(namespace, "restriction", &[&base, &inner]), + NRComplexType::Restriction(base, inner), + ) + } + RComplexType::ElementRef(min_occurs, max_occurs, fullname) => { + let referee = self.allocate_fullname(*fullname); + ( + self.allocate_anonymous_compound(namespace, "elementref", &[&referee]), + NRComplexType::ElementRef(*min_occurs, *max_occurs, referee), + ) + } + RComplexType::Element(min_occurs, max_occurs, fullname, attrs, inner) => { + let inner = self.allocate_complex_type(namespace, inner); + let attrs = self.allocate_attrs(namespace, attrs); + ( + self.allocate_fullname(*fullname), + NRComplexType::Element(*min_occurs, *max_occurs, *fullname, attrs, inner), + ) + } + RComplexType::GroupRef(min_occurs, max_occurs, fullname) => { + let referee = self.allocate_fullname(*fullname); + ( + self.allocate_anonymous_compound(namespace, "groupref", &[&referee]), + NRComplexType::ElementRef(*min_occurs, *max_occurs, referee), + ) + } + RComplexType::Choice(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.allocate_complex_type(namespace, inner)) + .collect(); + ( + self.allocate_anonymous_compound( + namespace, + "choice", + &inners.iter().collect::<Vec<_>>(), + ), + NRComplexType::Choice(*min_occurs, *max_occurs, inners), + ) + } + RComplexType::Sequence(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.allocate_complex_type(namespace, inner)) + .collect(); + ( + self.allocate_anonymous_compound( + namespace, + "sequence", + &inners.iter().collect::<Vec<_>>(), + ), + NRComplexType::Sequence(*min_occurs, *max_occurs, inners), + ) + } + RComplexType::Simple(inner) => { + let inner = self.allocate_simple_type(namespace, inner); + (inner.clone(), NRComplexType::Simple(inner)) + } + RComplexType::Extra(_) => unreachable!("It's the bottom type!"), + }; + let entry = self.complex_types.entry(concrete_name.clone()); + if let Entry::Occupied(_) = entry { + panic!("Duplicate name {:?}", concrete_name) + } + entry.or_insert(ty); + concrete_name + } + + pub fn allocate_simple_type( + &mut self, + namespace: Option<&'input str>, + recursive_simple_type: &InSimpleType<'input>, + ) -> ConcreteName { + let (concrete_name, ty) = match recursive_simple_type { + RSimpleType::Primitive(mod_name, type_name) => { + let concrete_name = ConcreteName(mod_name.to_string(), type_name.to_string()); + (concrete_name.clone(), NRSimpleType::Alias(concrete_name)) + } + RSimpleType::Alias(fullname) => { + let referee = self.allocate_fullname(*fullname); + (referee.clone(), NRSimpleType::Alias(referee)) + } + RSimpleType::Restriction(base, facets) => { + let base = self.allocate_fullname(*base); + ( + self.allocate_anonymous_compound(namespace, "simplerestriction", &[&base]), + NRSimpleType::Restriction(base, facets.clone()), + ) + } + RSimpleType::Union(inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.allocate_simple_type(namespace, inner)) + .collect(); + ( + self.allocate_anonymous_compound( + namespace, + "union", + &inners.iter().collect::<Vec<_>>(), + ), + NRSimpleType::Union(inners), + ) + } + RSimpleType::List(inner) => { + let inner = self.allocate_simple_type(namespace, inner); + ( + self.allocate_anonymous_compound(namespace, "list", &[&inner]), + NRSimpleType::List(inner), + ) + } + RSimpleType::Empty => ( + self.allocate_anonymous(namespace, "empty"), + NRSimpleType::Empty, + ), + }; + + let entry = self.simple_types.entry(concrete_name.clone()); + if let Entry::Occupied(_) = entry { + panic!("Duplicate name {:?}", concrete_name) + } + entry.or_insert(ty); + concrete_name + } + + fn allocate_attrs( + &mut self, + _namespace: Option<&'input str>, + _attrs: &InAttrs<'input>, + ) -> OutAttrs<'input> { + unimplemented!() + } +} diff --git a/xml-schema/src/names.rs b/xml-schema/src/names.rs index e8e688a..a0d7322 100644 --- a/xml-schema/src/names.rs +++ b/xml-schema/src/names.rs @@ -13,6 +13,7 @@ fn escape_keyword(name: &str) -> String { } } +#[derive(Debug, Default)] pub(crate) struct NameGenerator(HashMap<String, usize>); impl NameGenerator { diff --git a/xml-schema/src/parser_generator.rs b/xml-schema/src/parser_generator.rs index ebed18a..06245b7 100644 --- a/xml-schema/src/parser_generator.rs +++ b/xml-schema/src/parser_generator.rs @@ -1,3 +1,5 @@ +#![allow(warnings)] + use std::collections::{HashMap, HashSet}; use codegen as cg; diff --git a/xml-schema/src/primitives.rs b/xml-schema/src/primitives.rs index 580e6ac..e48bbce 100644 --- a/xml-schema/src/primitives.rs +++ b/xml-schema/src/primitives.rs @@ -145,7 +145,8 @@ pub struct QName<'input> { } impl<'input> ParseXmlStr<'input> for QName<'input> { const NODE_NAME: &'static str = "QName"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, QName<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, QName<'input>)> { + // TODO: use facets if input.len() == 0 { return None; } @@ -214,7 +215,8 @@ impl<'input> fmt::Display for QName<'input> { pub struct AnyUri<'input>(pub &'input str); impl<'input> ParseXmlStr<'input> for AnyUri<'input> { const NODE_NAME: &'static str = "AnyUri"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, AnyUri<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, AnyUri<'input>)> { + // TODO: use facets if input.len() == 0 { return None; } @@ -411,7 +413,8 @@ pub struct AnySimpleType<'input>(pub &'input str); impl<'input> ParseXmlStr<'input> for AnySimpleType<'input> { const NODE_NAME: &'static str = "AnySimpleType"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, AnySimpleType<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, AnySimpleType<'input>)> { + // TODO: use facets Some(("", AnySimpleType(input))) } } @@ -448,7 +451,8 @@ impl<'input> ParseXmlStr<'input> for NcName<'input> { pub struct Boolean<'input>(bool, PhantomData<&'input ()>); impl<'input> ParseXmlStr<'input> for Boolean<'input> { const NODE_NAME: &'static str = "Boolean"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, Boolean<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, Boolean<'input>)> { + // TODO: use facets if input.len() >= 1 { match &input[0..1] { "0" => return Some((&input[1..], Boolean(false, PhantomData::default()))), diff --git a/xml-schema/src/processor.rs b/xml-schema/src/processor.rs index b433294..4a60117 100644 --- a/xml-schema/src/processor.rs +++ b/xml-schema/src/processor.rs @@ -1,3 +1,5 @@ +#![allow(warnings)] + use std::fmt::Debug; use std::hash::Hash; use std::collections::{HashMap, HashSet}; diff --git a/xml-schema/src/processor2.rs b/xml-schema/src/processor2.rs new file mode 100644 index 0000000..cb8103f --- /dev/null +++ b/xml-schema/src/processor2.rs @@ -0,0 +1,672 @@ +//! Contains a more idiomatic AST to work on, and functions to generate it from the full AST. + +use std::hash::Hash; + +use xmlparser::{TextUnescape, XmlSpace}; + +use asts; +use asts::recursive::{ComplexType, SimpleType}; +use attrs::with_refs::Attrs; +use attrs::AttrUse; +use names::FullName; +use parser::*; +use primitives::{AnyUri, NonNegativeInteger, QName}; +use support::Facets; +use toplevel::Toplevel; + +pub const SCHEMA_URI: &'static str = "http://www.w3.org/2001/XMLSchema"; + +fn parse_min_occurs(x: &Option<NonNegativeInteger>) -> usize { + match x { + None => 1, + Some(n) => n.0 as usize, + } +} +fn parse_max_occurs(x: &Option<unions::UnionNonNegativeIntegerNmtoken>) -> usize { + match x { + None => 1, + Some(unions::UnionNonNegativeIntegerNmtoken::NonNegativeInteger(n)) => n.0 as usize, + Some(unions::UnionNonNegativeIntegerNmtoken::Nmtoken(restrictions::Unbounded(_))) => { + usize::max_value() + } + } +} + +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[must_use] +pub struct Documentation<'input>(Vec<&'input str>); +impl<'input> Documentation<'input> { + pub fn new() -> Documentation<'input> { + Documentation(Vec::new()) + } + pub fn extend(&mut self, v: &Documentation<'input>) { + self.0.extend(v.0.iter()); + } +} + +impl<'input> ToString for Documentation<'input> { + fn to_string(&self) -> String { + self.0 + .iter() + .map(|doc| TextUnescape::unescape(doc, XmlSpace::Default)) + .collect::<Vec<_>>() + .join("\n") + } +} + +pub type OutSimpleType<'input> = asts::recursive::SimpleType<'input>; +pub type OutComplexType<'input> = asts::recursive::ComplexType< + 'input, + OutAttrs<'input>, + ComplexTypeExtra<'input, OutAttrs<'input>>, +>; +pub type OutAttrs<'input> = Attrs<'input, OutSimpleType<'input>>; + +/// Other possibilities for SimpleType that will be shaven off by +/// other passes +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum ComplexTypeExtra<'input, TAttrs> { + AttrDecl( + TAttrs, + Box<ComplexType<'input, TAttrs, ComplexTypeExtra<'input, TAttrs>>>, + ), +} + +#[derive(Debug)] +pub struct SimpleToplevel<'input> { + pub target_namespace: Option<&'input str>, + pub element_form_default_qualified: bool, + pub attribute_form_default_qualified: bool, + pub elements: HashMap<FullName<'input>, OutComplexType<'input>>, + pub simple_types: HashMap<FullName<'input>, OutSimpleType<'input>>, + pub complex_types: HashMap<FullName<'input>, OutComplexType<'input>>, + pub groups: HashMap<FullName<'input>, OutComplexType<'input>>, + pub attribute_groups: HashMap<FullName<'input>, OutAttrs<'input>>, +} + +fn hashmap_map<K: Hash + Eq, V1, V2, F>(map: HashMap<K, V1>, mut mapper: F) -> HashMap<K, V2> +where + F: FnMut(V1) -> V2, +{ + map.into_iter().map(|(k, v)| (k, mapper(v))).collect() +} + +impl<'input> SimpleToplevel<'input> { + pub fn new_from_toplevel<'ast>(toplevel: Toplevel<'ast, 'input>) -> SimpleToplevel<'input> { + let Toplevel { + target_namespace, + element_form_default_qualified, + attribute_form_default_qualified, + elements, + simple_types, + complex_types, + groups, + attribute_groups, + } = toplevel; + + let mut processor = Processor { + target_namespace, + element_form_default_qualified, + _phantom: PhantomData::default(), + }; + + SimpleToplevel { + target_namespace, + element_form_default_qualified, + attribute_form_default_qualified, + elements: hashmap_map(elements, |e| processor.process_toplevel_element(e)), + simple_types: hashmap_map(simple_types, |t| processor.process_toplevel_simple_type(t)), + complex_types: hashmap_map(complex_types, |t| { + processor.process_toplevel_complex_type(t) + }), + groups: hashmap_map(groups, |g| processor.process_toplevel_group(g)), + attribute_groups: hashmap_map(attribute_groups, |g| { + processor.process_toplevel_attribute_group(g) + }), + } + } +} + +struct Processor<'ast, 'input: 'ast> { + target_namespace: Option<&'input str>, + element_form_default_qualified: bool, + _phantom: PhantomData<&'ast ()>, // To avoid repetition in each method +} + +impl<'ast, 'input: 'ast> Processor<'ast, 'input> { + fn process_toplevel_element( + &mut self, + element: &'ast xs::Element<'input>, + ) -> OutComplexType<'input> { + let xs::Element { + ref attr_type, + ref attr_name, + type_: ref child_type, + .. + } = element; + + // TODO: substitution group + + match (attr_type, &child_type) { + (None, Some(ref c)) => match c { + enums::Type::SimpleType(ref e) => ComplexType::Simple(self.process_local_simple_type(e)), + enums::Type::ComplexType(ref e) => self.process_local_complex_type(e), + }, + (Some(t), None) => ComplexType::Alias(FullName::from_qname(t, self.target_namespace)), + (None, None) => ComplexType::Empty, + (Some(ref t1), Some(ref t2)) => { + panic!( + "Toplevel element '{}:{}' has both a type attribute ({:?}) and a child type ({:?}).", + self.target_namespace.unwrap_or(""), attr_name.0, t1, t2 + ) + } + } + } + + fn process_toplevel_complex_type( + &mut self, + complex_type: &'ast xs::ComplexType<'input>, + ) -> OutComplexType<'input> { + let xs::ComplexType { + ref complex_type_model, + .. + } = complex_type; + + self.process_complex_type_model(complex_type_model) + } + + fn process_complex_type_model( + &mut self, + complex_type_model: &'ast xs::ComplexTypeModel<'input>, + ) -> OutComplexType<'input> { + match complex_type_model { + xs::ComplexTypeModel::SimpleContent(_) => unimplemented!("simpleContent"), + xs::ComplexTypeModel::ComplexContent(ref model) => self.process_complex_content(model), + xs::ComplexTypeModel::CompleteContentModel { + ref open_content, + ref type_def_particle, + ref attr_decls, + ref assertions, + } => self.process_complete_content_model( + open_content, + type_def_particle, + attr_decls, + assertions, + ), + } + } + + fn process_local_complex_type( + &mut self, + complex_type: &'ast inline_elements::LocalComplexType<'input>, + ) -> OutComplexType<'input> { + let inline_elements::LocalComplexType { + ref complex_type_model, + .. + } = complex_type; + self.process_complex_type_model(complex_type_model) + } + + fn process_toplevel_simple_type( + &mut self, + simple_type: &'ast xs::SimpleType<'input>, + ) -> SimpleType<'input> { + let xs::SimpleType { + ref simple_derivation, + .. + } = simple_type; + match simple_derivation { + xs::SimpleDerivation::Restriction(e) => self.process_simple_restriction(e), + xs::SimpleDerivation::List(ref e) => self.process_list(e), + xs::SimpleDerivation::Union(ref e) => self.process_union(e), + } + } + + fn process_local_simple_type( + &mut self, + simple_type: &'ast inline_elements::LocalSimpleType<'input>, + ) -> SimpleType<'input> { + let inline_elements::LocalSimpleType { + ref simple_derivation, + .. + } = simple_type; + match simple_derivation { + xs::SimpleDerivation::Restriction(e) => self.process_simple_restriction(e), + xs::SimpleDerivation::List(ref e) => self.process_list(e), + xs::SimpleDerivation::Union(ref e) => self.process_union(e), + } + } + + fn process_simple_restriction( + &mut self, + restriction: &'ast xs::Restriction<'input>, + ) -> SimpleType<'input> { + let xs::Restriction { + ref attr_base, + ref simple_restriction_model, + .. + } = restriction; + let base = attr_base; + let base = base.unwrap_or(QName { + namespace: Some(SCHEMA_URI), + local_name: "anySimpleType", + }); + let xs::SimpleRestrictionModel { + ref local_simple_type, + ref choice_facet_any, + } = simple_restriction_model; + let facets = self.process_facets(choice_facet_any); + + let base = FullName::from_qname(&base, self.target_namespace); + + match local_simple_type { + Some(inline_elements::LocalSimpleType { .. }) => { + SimpleType::Restriction(base, facets) // TODO: use the simple_derivation + } + None => SimpleType::Restriction(base, facets), + } + } + + fn process_facets( + &mut self, + facet_list: &Vec<enums::ChoiceFacetAny<'input>>, + ) -> Facets<'input> { + let mut facets = Facets::default(); + use parser::xs::Facet::*; + for facet_or_any in facet_list { + match facet_or_any { + enums::ChoiceFacetAny::Facet(e) => { + match **e { + FacetHead(_) => panic!("abstract element"), + MinExclusive(ref e) => { + facets.min_exclusive = + Some(e.attr_value.0.parse().expect("invalid minexclusive")) + } + MinInclusive(ref e) => { + facets.min_inclusive = + Some(e.attr_value.0.parse().expect("invalid mininclusive")) + } + MaxExclusive(ref e) => { + facets.max_exclusive = + Some(e.attr_value.0.parse().expect("invalid maxexclusive")) + } + MaxInclusive(ref e) => { + facets.max_inclusive = + Some(e.attr_value.0.parse().expect("invalid maxinclusive")) + } + TotalDigits(ref e) => facets.total_digits = Some(e.attr_value.0), + FractionDigits(ref e) => facets.fraction_digits = Some(e.attr_value.0), + Length(ref e) => facets.length = Some(e.attr_value.0 as usize), + MinLength(ref e) => facets.min_length = Some(e.attr_value.0 as usize), + MaxLength(ref e) => facets.max_length = Some(e.attr_value.0 as usize), + Enumeration(ref e) => facets + .enumeration + .get_or_insert(Vec::new()) + .push(e.attr_value.0), + WhiteSpace(ref e) => facets.white_space = Some(((e.attr_value.0).0).0), + Pattern(ref e) => facets.pattern = Some(e.attr_value.0), + Assertion(_) => unimplemented!("assertion facet"), + ExplicitTimezone(ref e) => { + facets.explicit_timezone = Some(((e.attr_value.0).0).0) + } + }; + } + enums::ChoiceFacetAny::Any(_) => (), // TODO (probably just whitespaces) + } + } + facets + } + + fn process_list(&mut self, list: &'ast xs::List<'input>) -> SimpleType<'input> { + let item_type = list.attr_item_type; + let item_type = item_type + .as_ref() + .map(|n| FullName::from_qname(n, self.target_namespace)); + + let t = match (item_type, &list.local_simple_type) { + (None, Some(st)) => self.process_local_simple_type(st), + (Some(n), None) => SimpleType::Alias(n), + (None, None) => panic!("<list> with no itemType or child type."), + (Some(ref t1), Some(ref t2)) => panic!( + "<list> has both an itemType attribute ({:?}) and a child type ({:?}).", + t1, t2 + ), + }; + + SimpleType::List(Box::new(t)) + } + + fn process_union(&mut self, union: &'ast xs::Union<'input>) -> SimpleType<'input> { + let member_types = union + .local_simple_type + .iter() + .map(|t| self.process_local_simple_type(t)) + .collect(); + + SimpleType::Union(member_types) + } + + fn process_toplevel_group(&mut self, group: &'ast xs::Group<'input>) -> OutComplexType<'input> { + let xs::Group { + choice_all_choice_sequence: ref content, + .. + } = group; + + match content { + enums::ChoiceAllChoiceSequence::All(_) => unimplemented!("all"), + enums::ChoiceAllChoiceSequence::Choice(e) => self.process_choice(e), + enums::ChoiceAllChoiceSequence::Sequence(e) => self.process_sequence(e), + } + } + + fn process_toplevel_attribute_group( + &mut self, + group: &'ast xs::AttributeGroup<'input>, + ) -> OutAttrs<'input> { + self.process_attr_decls(&group.attr_decls) + } + + fn process_attr_decls(&mut self, attr_decls: &'ast xs::AttrDecls<'input>) -> OutAttrs<'input> { + let mut attrs = OutAttrs::new(); + for attr_decl in &attr_decls.attribute { + match attr_decl { + enums::AttrOrAttrGroup::Attribute(e) => { + let name = e + .attr_name + .as_ref() + .map(|ncn| FullName::new(self.target_namespace, ncn.0)); + let type_attr: Option<QName<'input>> = e.attr_type; + let use_ = match e.attr_use.as_ref().map(|x| ((x.0).0).0) { + Some("prohibited") => AttrUse::Prohibited, + Some("required") => AttrUse::Required, + Some("optional") => AttrUse::Optional, + None => AttrUse::Optional, // TODO + Some(s) => panic!("Unknown attribute value use={:?}", s), + }; + match (name, e.attr_ref, type_attr, &e.local_simple_type) { + (Some(name), None, Some(t), None) => { + let t = FullName::from_qname(&t, self.target_namespace); + attrs.named.push((name, use_, Some(SimpleType::Alias(t)))); + } + (Some(name), None, None, Some(t)) => { + let t = self.process_local_simple_type(t); + attrs.named.push((name, use_, Some(t))); + } + (Some(name), None, None, None) => attrs.named.push((name, use_, None)), + (None, None, None, None) => panic!("no attribute on <attribute>."), + (_, _, Some(ref t1), Some(ref t2)) => panic!( + "<attribute> has both a type attribute ({:?}) and a child type ({:?}).", + t1, t2 + ), + (None, None, Some(_), None) | (None, None, None, Some(_)) => { + panic!("<attribute> has a type but no name.") + } + (_, Some(_), Some(_), None) | (_, Some(_), None, Some(_)) => { + panic!("<attribute> has a type and a ref.") + } + (_, Some(_ref), None, None) => (), // TODO + } + } + enums::AttrOrAttrGroup::AttributeGroup(e) => { + attrs + .group_refs + .push(FullName::from_qname(&e.attr_ref, self.target_namespace)); + } + } + } + if attr_decls.any_attribute.is_some() { + attrs.any_attributes = true; + } + attrs + } + + fn process_complex_content( + &mut self, + model: &'ast xs::ComplexContent<'input>, + ) -> OutComplexType<'input> { + let xs::ComplexContent { + ref choice_restriction_extension, + .. + } = model; + match choice_restriction_extension { + enums::ChoiceRestrictionExtension::Restriction(ref r) => { + let inline_elements::ComplexRestrictionType { + ref attr_base, + ref sequence_open_content_type_def_particle, + .. + } = **r; + match sequence_open_content_type_def_particle { + Some(sequences::SequenceOpenContentTypeDefParticle { + type_def_particle, + .. + }) => self.process_complex_restriction(attr_base, type_def_particle), + None => ComplexType::Empty, + } + } + enums::ChoiceRestrictionExtension::Extension(ref e) => { + let inline_elements::ExtensionType { + ref attrs, + ref attr_base, + ref type_def_particle, + .. + } = **e; + match type_def_particle { + Some(type_def_particle) => { + self.process_extension(attrs, attr_base, type_def_particle) + } + None => self.process_trivial_extension(attrs, attr_base), + } + } + } + } + + fn process_extension( + &mut self, + _attrs: &'ast HashMap<FullName<'input>, &'input str>, + attr_base: &'ast QName<'input>, + type_def_particle: &'ast xs::TypeDefParticle<'input>, + ) -> OutComplexType<'input> { + let base = FullName::from_qname(attr_base, self.target_namespace); + ComplexType::Extension( + base, + Box::new(self.process_type_def_particle(type_def_particle)), + ) + } + + fn process_trivial_extension( + &mut self, + _attrs: &'ast HashMap<FullName<'input>, &'input str>, + attr_base: &'ast QName<'input>, + ) -> OutComplexType<'input> { + let base = FullName::from_qname(&attr_base, self.target_namespace); + ComplexType::Alias(base) + } + + fn process_complete_content_model( + &mut self, + _open_content: &'ast Option<Box<xs::OpenContent<'input>>>, + type_def_particle: &'ast Option<Box<xs::TypeDefParticle<'input>>>, + attr_decls: &'ast xs::AttrDecls<'input>, + _assertions: &'ast xs::Assertions<'input>, + ) -> OutComplexType<'input> { + let ty = match type_def_particle.as_ref() { + Some(type_def_particle) => self.process_type_def_particle(type_def_particle), + None => ComplexType::Empty, + }; + ComplexType::Extra(ComplexTypeExtra::AttrDecl( + self.process_attr_decls(attr_decls), + Box::new(ty), + )) + } + + fn process_complex_restriction( + &mut self, + attr_base: &'ast QName<'input>, + type_def_particle: &'ast xs::TypeDefParticle<'input>, + ) -> OutComplexType<'input> { + // TODO: use the base + let base = FullName::from_qname(attr_base, self.target_namespace); + let ty = self.process_type_def_particle(type_def_particle); + ComplexType::Restriction(base, Box::new(ty)) + } + + fn process_type_def_particle( + &mut self, + particle: &'ast xs::TypeDefParticle<'input>, + ) -> OutComplexType<'input> { + match particle { + xs::TypeDefParticle::Group(e) => self.process_group_ref(e), + xs::TypeDefParticle::All(_) => unimplemented!("all"), + xs::TypeDefParticle::Choice(e) => self.process_choice(e), + xs::TypeDefParticle::Sequence(e) => self.process_sequence(e), + } + } + + fn process_group_ref( + &mut self, + group_ref: &'ast inline_elements::GroupRef<'input>, + ) -> OutComplexType<'input> { + let inline_elements::GroupRef { + ref attr_ref, + ref attr_min_occurs, + ref attr_max_occurs, + .. + } = group_ref; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + let ref_ = FullName::from_qname(attr_ref, self.target_namespace); + + ComplexType::GroupRef(min_occurs, max_occurs, ref_) + } + + fn process_choice(&mut self, choice: &'ast xs::Choice<'input>) -> OutComplexType<'input> { + let xs::Choice { + ref attr_min_occurs, + ref attr_max_occurs, + ref nested_particle, + .. + } = choice; + let particles = nested_particle; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + + let items = particles + .iter() + .map(|particle| self.process_nested_particle(particle)) + .collect(); + + ComplexType::Choice(min_occurs, max_occurs, items) + } + + fn process_sequence(&mut self, seq: &'ast xs::Sequence<'input>) -> OutComplexType<'input> { + let xs::Sequence { + ref attr_min_occurs, + ref attr_max_occurs, + ref nested_particle, + .. + } = seq; + let particles = nested_particle; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + + let items = particles + .iter() + .map(|particle| self.process_nested_particle(particle)) + .collect(); + + ComplexType::Sequence(min_occurs, max_occurs, items) + } + + fn process_nested_particle( + &mut self, + particle: &'ast xs::NestedParticle<'input>, + ) -> OutComplexType<'input> { + match particle { + xs::NestedParticle::Element(e) => self.process_local_element(e), + xs::NestedParticle::Group(e) => self.process_group_ref(e), + xs::NestedParticle::Choice(e) => self.process_choice(e), + xs::NestedParticle::Sequence(e) => self.process_sequence(e), + xs::NestedParticle::Any(e) => self.process_any(e), + } + } + + fn process_any(&mut self, _any: &'ast xs::Any<'input>) -> OutComplexType<'input> { + ComplexType::Any + } + + fn process_local_element( + &mut self, + element: &'ast inline_elements::LocalElement<'input>, + ) -> OutComplexType<'input> { + let inline_elements::LocalElement { + ref attr_name, + ref attr_ref, + ref attr_min_occurs, + ref attr_max_occurs, + ref attr_type, + ref attr_form, + ref attr_target_namespace, + ref type_, + .. + } = element; + let name = attr_name; + let type_attr = attr_type; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + + if let Some(ref_) = attr_ref { + if let Some(name) = name { + panic!("<element> has both ref={:?} and name={:?}", ref_, name); + } + if let Some(attr_target_namespace) = attr_target_namespace { + panic!( + "<element> has both ref={:?} and target_namespace={:?}", + ref_, attr_target_namespace + ); + } + if let Some(attr_form) = attr_form { + panic!("<element> has both ref={:?} and form={:?}", ref_, attr_form); + } + let ref_ = FullName::from_qname(ref_, self.target_namespace); + ComplexType::ElementRef(min_occurs, max_occurs, ref_) + } else { + let name = name.as_ref().expect("<element> has no name.").0; + + // https://www.w3.org/TR/xmlschema11-1/#dcl.elt.local + let qualified_form = match attr_form.as_ref().map(|x| ((x.0).0).0) { + Some("qualified") => true, + Some("unqualified") => false, + None => self.element_form_default_qualified, + _ => unreachable!(), + }; + let namespace = match (attr_target_namespace, qualified_form) { + (Some(AnyUri(target_namespace)), _) => Some(*target_namespace), + (None, true) => self.target_namespace, + (None, false) => None, + }; + + let t = match (type_attr, &type_) { + (None, Some(enums::Type::SimpleType(ref e))) => { + ComplexType::Simple(self.process_local_simple_type(e)) + } + (None, Some(enums::Type::ComplexType(ref e))) => self.process_local_complex_type(e), + (Some(t), None) => { + let t = FullName::from_qname(t, self.target_namespace); + ComplexType::Alias(t) + } + (None, None) => ComplexType::Empty, + (Some(ref t1), Some(ref t2)) => panic!( + "Element '{:?}' has both a type attribute ({:?}) and a child type ({:?}).", + name, t1, t2 + ), + }; + ComplexType::Element( + min_occurs, + max_occurs, + FullName::new(namespace, name), + OutAttrs::new(), + Box::new(t), + ) + } + } +} diff --git a/xml-schema/src/support.rs b/xml-schema/src/support.rs index f19141a..eb6578b 100644 --- a/xml-schema/src/support.rs +++ b/xml-schema/src/support.rs @@ -167,7 +167,7 @@ impl<'input, T> ParseXml<'input> for T where T: ParseXmlStr<'input> { Some(XmlToken::Text(strspan)) => { match Self::parse_self_xml_str(strspan.to_str(), parse_context, parent_context, &Facets::default()) { Some(("", out)) => Some(out), - Some((unparsed, _)) => None, + Some((_unparsed, _)) => None, None => None, } } diff --git a/xml-schema/src/toplevel.rs b/xml-schema/src/toplevel.rs new file mode 100644 index 0000000..6b0fb51 --- /dev/null +++ b/xml-schema/src/toplevel.rs @@ -0,0 +1,110 @@ +//! Collects named entities and global information from the root of the AST. +use std::collections::hash_map::{Entry, HashMap}; + +use names::FullName; +use parser::xs; + +fn insert_unique<'ast, 'input: 'ast, T>( + type_name: &'static str, + map: &mut HashMap<FullName<'input>, T>, + name: FullName<'input>, + sub_ast: T, +) { + let entry = map.entry(name); + match entry { + Entry::Occupied(_) => panic!("Duplicate {}: {:?}", type_name, name), + Entry::Vacant(e) => { + e.insert(sub_ast); + } + } +} + +#[derive(Debug)] +pub struct Toplevel<'ast, 'input: 'ast> { + pub target_namespace: Option<&'input str>, + pub element_form_default_qualified: bool, + pub attribute_form_default_qualified: bool, + pub elements: HashMap<FullName<'input>, &'ast xs::Element<'input>>, + pub simple_types: HashMap<FullName<'input>, &'ast xs::SimpleType<'input>>, + pub complex_types: HashMap<FullName<'input>, &'ast xs::ComplexType<'input>>, + pub groups: HashMap<FullName<'input>, &'ast xs::Group<'input>>, + pub attribute_groups: HashMap<FullName<'input>, &'ast xs::AttributeGroup<'input>>, +} + +impl<'ast, 'input: 'ast> Toplevel<'ast, 'input> { + pub fn new(ast: &'ast xs::Schema<'input>) -> Toplevel<'ast, 'input> { + let target_namespace = ast.attr_target_namespace.as_ref().map(|t| t.0); + let element_form_default_qualified = + match ast.attr_element_form_default.as_ref().map(|x| ((x.0).0).0) { + Some("qualified") => true, + Some("unqualified") | None => false, + _ => unreachable!(), + }; + let attribute_form_default_qualified = match ast + .attr_attribute_form_default + .as_ref() + .map(|x| ((x.0).0).0) + { + Some("qualified") => true, + Some("unqualified") | None => false, + _ => unreachable!(), + }; + let mut toplevel = Toplevel { + target_namespace, + element_form_default_qualified, + attribute_form_default_qualified, + elements: HashMap::new(), + simple_types: HashMap::new(), + complex_types: HashMap::new(), + groups: HashMap::new(), + attribute_groups: HashMap::new(), + }; + toplevel.process_ast(ast); + toplevel + } + + pub fn process_ast(&mut self, ast: &'ast xs::Schema<'input>) { + for top_level_item in ast.sequence_schema_top_annotation.iter() { + match top_level_item.schema_top { + xs::SchemaTop::Redefinable(ref r) => self.process_redefinable(r), + xs::SchemaTop::Element(ref e) => self.process_element(e), + xs::SchemaTop::Attribute(_) => unimplemented!("top-level attribute"), + xs::SchemaTop::Notation(_) => unimplemented!("notation"), + } + } + } + + fn process_redefinable(&mut self, r: &'ast xs::Redefinable<'input>) { + match r { + xs::Redefinable::SimpleType(ref e) => self.process_simple_type(e), + xs::Redefinable::ComplexType(e) => self.process_complex_type(e), + xs::Redefinable::Group(e) => self.process_named_group(e), + xs::Redefinable::AttributeGroup(e) => self.process_attribute_group(e), + } + } + + fn process_element(&mut self, element: &'ast xs::Element<'input>) { + let name = FullName::new(self.target_namespace, element.attr_name.0); + insert_unique("element", &mut self.elements, name, element); + } + + fn process_simple_type(&mut self, simple_type: &'ast xs::SimpleType<'input>) { + let name = FullName::new(self.target_namespace, simple_type.attr_name.0.clone()); + self.simple_types.insert(name, simple_type); + } + + fn process_complex_type(&mut self, complex_type: &'ast xs::ComplexType<'input>) { + let name = FullName::new(self.target_namespace, complex_type.attr_name.0.clone()); + self.complex_types.insert(name, complex_type); + } + + fn process_named_group(&mut self, group: &'ast xs::Group<'input>) { + let name = FullName::new(self.target_namespace, group.attr_name.0.clone()); + self.groups.insert(name, group); + } + + fn process_attribute_group(&mut self, attribute_group: &'ast xs::AttributeGroup<'input>) { + let name = FullName::new(self.target_namespace, attribute_group.attr_name.0.clone()); + self.attribute_groups.insert(name, attribute_group); + } +} diff --git a/xml-schema/src/ungroup.rs b/xml-schema/src/ungroup.rs new file mode 100644 index 0000000..11c7f45 --- /dev/null +++ b/xml-schema/src/ungroup.rs @@ -0,0 +1,80 @@ +/// Replaces `ComplexType::GroupRef`, `Attrs.refs` with the content of their target. (ie. inlines them) +use std::collections::HashMap; + +use asts; +use asts::non_recursive::ComplexType as NRComplexType; +use asts::non_recursive::ConcreteName; +use asts::non_recursive::SimpleType as NRSimpleType; +use attrs::with_refs::Attrs as InAttrs; +use attrs::AttrUse; +use names::FullName; +use utils::Bottom; + +use name_allocator::ComplexTypeExtra as InComplexTypeExtra; +use name_allocator::OutComplexType as InComplexType; +use name_allocator::OutSimpleType as InSimpleType; + +pub type OutSimpleType<'input> = InSimpleType<'input>; +pub type OutComplexType<'input> = + asts::non_recursive::ComplexType<'input, Attrs<'input, ConcreteName>, Bottom>; + +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct Attrs<'input, TSimpleType: Clone> { + pub named: Vec<(FullName<'input>, AttrUse, Option<TSimpleType>)>, + pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, + pub group_refs: Vec<FullName<'input>>, + pub any_attributes: bool, +} + +impl<'input, TSimpleType> Attrs<'input, TSimpleType> +where + TSimpleType: Clone, +{ + fn extend(&mut self, other: Attrs<'input, TSimpleType>) { + let Attrs { + named, + refs, + group_refs, + any_attributes, + } = other; + self.named.extend(named); + self.refs.extend(refs); + self.group_refs.extend(group_refs); + self.any_attributes |= any_attributes; + } +} + +pub type OutAttrs<'input> = Attrs<'input, OutSimpleType<'input>>; + +pub fn ungroup_complex_type<'input>( + _fullname_to_concrete_name: &HashMap<FullName<'input>, ConcreteName>, + _groups: HashMap<FullName<'input>, &InComplexType<'input>>, + complex_type: InComplexType<'input>, +) -> OutComplexType<'input> { + match complex_type { + // Trivial cases + NRComplexType::Any => NRComplexType::Any, + NRComplexType::Empty => NRComplexType::Empty, + NRComplexType::Alias(cn) => NRComplexType::Alias(cn), + NRComplexType::Extension(cn1, cn2) => NRComplexType::Extension(cn1, cn2), + NRComplexType::Restriction(cn1, cn2) => NRComplexType::Restriction(cn1, cn2), + NRComplexType::ElementRef(min_occurs, max_occurs, cn) => { + NRComplexType::ElementRef(min_occurs, max_occurs, cn) + } + NRComplexType::Choice(min_occurs, max_occurs, cns) => { + NRComplexType::Choice(min_occurs, max_occurs, cns) + } + NRComplexType::Sequence(min_occurs, max_occurs, cns) => { + NRComplexType::Sequence(min_occurs, max_occurs, cns) + } + NRComplexType::Simple(cn) => NRComplexType::Simple(cn), + + // The actual work + NRComplexType::Element(min_occurs, max_occurs, fullname, attrs, cn) => { + NRComplexType::Element(min_occurs, max_occurs, fullname, attrs, cn) + } + NRComplexType::Extra(InComplexTypeExtra::GroupRef(_min_occurs, _max_occurs, _cn)) => { + unimplemented!() + } + } +} diff --git a/xml-schema/src/utils.rs b/xml-schema/src/utils.rs new file mode 100644 index 0000000..b4c4e57 --- /dev/null +++ b/xml-schema/src/utils.rs @@ -0,0 +1,4 @@ +/// A structure that cannot be instantiated +pub struct Bottom { + _private_attr: (), +}