From 534cba8153cc55278346cff2c0d466dfb3828d98 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Tue, 12 Nov 2019 22:20:52 +0100 Subject: [PATCH 01/11] Add toplevel.rs --- xml-schema/src/toplevel.rs | 110 +++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 xml-schema/src/toplevel.rs diff --git a/xml-schema/src/toplevel.rs b/xml-schema/src/toplevel.rs new file mode 100644 index 0000000..6b0fb51 --- /dev/null +++ b/xml-schema/src/toplevel.rs @@ -0,0 +1,110 @@ +//! Collects named entities and global information from the root of the AST. +use std::collections::hash_map::{Entry, HashMap}; + +use names::FullName; +use parser::xs; + +fn insert_unique<'ast, 'input: 'ast, T>( + type_name: &'static str, + map: &mut HashMap<FullName<'input>, T>, + name: FullName<'input>, + sub_ast: T, +) { + let entry = map.entry(name); + match entry { + Entry::Occupied(_) => panic!("Duplicate {}: {:?}", type_name, name), + Entry::Vacant(e) => { + e.insert(sub_ast); + } + } +} + +#[derive(Debug)] +pub struct Toplevel<'ast, 'input: 'ast> { + pub target_namespace: Option<&'input str>, + pub element_form_default_qualified: bool, + pub attribute_form_default_qualified: bool, + pub elements: HashMap<FullName<'input>, &'ast xs::Element<'input>>, + pub simple_types: HashMap<FullName<'input>, &'ast xs::SimpleType<'input>>, + pub complex_types: HashMap<FullName<'input>, &'ast xs::ComplexType<'input>>, + pub groups: HashMap<FullName<'input>, &'ast xs::Group<'input>>, + pub attribute_groups: HashMap<FullName<'input>, &'ast xs::AttributeGroup<'input>>, +} + +impl<'ast, 'input: 'ast> Toplevel<'ast, 'input> { + pub fn new(ast: &'ast xs::Schema<'input>) -> Toplevel<'ast, 'input> { + let target_namespace = ast.attr_target_namespace.as_ref().map(|t| t.0); + let element_form_default_qualified = + match ast.attr_element_form_default.as_ref().map(|x| ((x.0).0).0) { + Some("qualified") => true, + Some("unqualified") | None => false, + _ => unreachable!(), + }; + let attribute_form_default_qualified = match ast + .attr_attribute_form_default + .as_ref() + .map(|x| ((x.0).0).0) + { + Some("qualified") => true, + Some("unqualified") | None => false, + _ => unreachable!(), + }; + let mut toplevel = Toplevel { + target_namespace, + element_form_default_qualified, + attribute_form_default_qualified, + elements: HashMap::new(), + simple_types: HashMap::new(), + complex_types: HashMap::new(), + groups: HashMap::new(), + attribute_groups: HashMap::new(), + }; + toplevel.process_ast(ast); + toplevel + } + + pub fn process_ast(&mut self, ast: &'ast xs::Schema<'input>) { + for top_level_item in ast.sequence_schema_top_annotation.iter() { + match top_level_item.schema_top { + xs::SchemaTop::Redefinable(ref r) => self.process_redefinable(r), + xs::SchemaTop::Element(ref e) => self.process_element(e), + xs::SchemaTop::Attribute(_) => unimplemented!("top-level attribute"), + xs::SchemaTop::Notation(_) => unimplemented!("notation"), + } + } + } + + fn process_redefinable(&mut self, r: &'ast xs::Redefinable<'input>) { + match r { + xs::Redefinable::SimpleType(ref e) => self.process_simple_type(e), + xs::Redefinable::ComplexType(e) => self.process_complex_type(e), + xs::Redefinable::Group(e) => self.process_named_group(e), + xs::Redefinable::AttributeGroup(e) => self.process_attribute_group(e), + } + } + + fn process_element(&mut self, element: &'ast xs::Element<'input>) { + let name = FullName::new(self.target_namespace, element.attr_name.0); + insert_unique("element", &mut self.elements, name, element); + } + + fn process_simple_type(&mut self, simple_type: &'ast xs::SimpleType<'input>) { + let name = FullName::new(self.target_namespace, simple_type.attr_name.0.clone()); + self.simple_types.insert(name, simple_type); + } + + fn process_complex_type(&mut self, complex_type: &'ast xs::ComplexType<'input>) { + let name = FullName::new(self.target_namespace, complex_type.attr_name.0.clone()); + self.complex_types.insert(name, complex_type); + } + + fn process_named_group(&mut self, group: &'ast xs::Group<'input>) { + let name = FullName::new(self.target_namespace, group.attr_name.0.clone()); + self.groups.insert(name, group); + } + + fn process_attribute_group(&mut self, attribute_group: &'ast xs::AttributeGroup<'input>) { + let name = FullName::new(self.target_namespace, attribute_group.attr_name.0.clone()); + self.attribute_groups.insert(name, attribute_group); + } +} From 3faac9b813bd37c5cd126e439f23acf09ccd5f56 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Wed, 13 Nov 2019 19:52:59 +0100 Subject: [PATCH 02/11] add processor2.rs --- xml-schema/src/processor2.rs | 710 +++++++++++++++++++++++++++++++++++ 1 file changed, 710 insertions(+) create mode 100644 xml-schema/src/processor2.rs diff --git a/xml-schema/src/processor2.rs b/xml-schema/src/processor2.rs new file mode 100644 index 0000000..c86c4c2 --- /dev/null +++ b/xml-schema/src/processor2.rs @@ -0,0 +1,710 @@ +//! Contains a more idiomatic AST to work on, and functions to generate it from the full AST. + +use std::hash::Hash; + +use xmlparser::{TextUnescape, XmlSpace}; + +use names::FullName; +use parser::*; +use primitives::{AnyUri, NonNegativeInteger, QName}; +use support::Facets; +use toplevel::Toplevel; + +pub const SCHEMA_URI: &'static str = "http://www.w3.org/2001/XMLSchema"; + +fn parse_min_occurs(x: &Option<NonNegativeInteger>) -> usize { + match x { + None => 1, + Some(n) => n.0 as usize, + } +} +fn parse_max_occurs(x: &Option<unions::UnionNonNegativeIntegerNmtoken>) -> usize { + match x { + None => 1, + Some(unions::UnionNonNegativeIntegerNmtoken::NonNegativeInteger(n)) => n.0 as usize, + Some(unions::UnionNonNegativeIntegerNmtoken::Nmtoken(restrictions::Unbounded(_))) => { + usize::max_value() + } + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[must_use] +pub struct Documentation<'input>(Vec<&'input str>); +impl<'input> Documentation<'input> { + pub fn new() -> Documentation<'input> { + Documentation(Vec::new()) + } + pub fn extend(&mut self, v: &Documentation<'input>) { + self.0.extend(v.0.iter()); + } +} + +impl<'input> ToString for Documentation<'input> { + fn to_string(&self) -> String { + self.0 + .iter() + .map(|doc| TextUnescape::unescape(doc, XmlSpace::Default)) + .collect::<Vec<_>>() + .join("\n") + } +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum AttrUse { + Prohibited, + Required, + Optional, +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct Attrs<'input> { + pub named: Vec<(FullName<'input>, AttrUse, Option<SimpleType<'input>>)>, + pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, + pub group_refs: Vec<FullName<'input>>, + pub any_attributes: bool, +} +impl<'input> Attrs<'input> { + pub fn new() -> Attrs<'input> { + Attrs { + named: Vec::new(), + refs: Vec::new(), + group_refs: Vec::new(), + any_attributes: false, + } + } + fn extend(&mut self, other: Attrs<'input>) { + let Attrs { + named, + refs, + group_refs, + any_attributes, + } = other; + self.named.extend(named); + self.refs.extend(refs); + self.group_refs.extend(group_refs); + self.any_attributes |= any_attributes; + } +} + +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum ComplexType<'input> { + Any, + Empty, + Alias(FullName<'input>), + Extension(FullName<'input>, Box<ComplexType<'input>>), + Restriction(FullName<'input>, Box<ComplexType<'input>>), + ElementRef(usize, usize, FullName<'input>), + Element( + usize, + usize, + Option<&'input str>, + &'input str, + Box<ComplexType<'input>>, + ), + Group(usize, usize, FullName<'input>), + Choice(usize, usize, Vec<ComplexType<'input>>), + Sequence(usize, usize, Vec<ComplexType<'input>>), + Simple(SimpleType<'input>), +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum SimpleType<'input> { + Primitive(&'static str, &'static str), + Alias(FullName<'input>), + Restriction(FullName<'input>, Facets<'input>), + List(Box<SimpleType<'input>>), + Union(Vec<SimpleType<'input>>), + Empty, +} + +#[derive(Debug)] +pub struct SimpleToplevel<'ast, 'input: 'ast> { + pub target_namespace: Option<&'input str>, + pub element_form_default_qualified: bool, + pub attribute_form_default_qualified: bool, + pub elements: HashMap<FullName<'input>, ComplexType<'input>>, + pub simple_types: HashMap<FullName<'input>, SimpleType<'input>>, + pub complex_types: HashMap<FullName<'input>, ComplexType<'input>>, + pub groups: HashMap<FullName<'input>, ComplexType<'input>>, + pub attribute_groups: HashMap<FullName<'input>, Attrs<'input>>, + _phantom: PhantomData<&'ast ()>, // Sometimes I need 'ast when prototyping +} + +fn hashmap_map<K: Hash + Eq, V1, V2, F>(map: HashMap<K, V1>, mut mapper: F) -> HashMap<K, V2> +where + F: FnMut(V1) -> V2, +{ + map.into_iter().map(|(k, v)| (k, mapper(v))).collect() +} + +impl<'ast, 'input: 'ast> SimpleToplevel<'ast, 'input> { + pub fn new_from_toplevel(toplevel: Toplevel<'ast, 'input>) -> SimpleToplevel<'ast, 'input> { + let Toplevel { + target_namespace, + element_form_default_qualified, + attribute_form_default_qualified, + elements, + simple_types, + complex_types, + groups, + attribute_groups, + } = toplevel; + + let mut processor = Processor { + target_namespace, + element_form_default_qualified, + _phantom: PhantomData::default(), + }; + + SimpleToplevel { + target_namespace, + element_form_default_qualified, + attribute_form_default_qualified, + elements: hashmap_map(elements, |e| processor.process_toplevel_element(e)), + simple_types: hashmap_map(simple_types, |t| processor.process_toplevel_simple_type(t)), + complex_types: hashmap_map(complex_types, |t| { + processor.process_toplevel_complex_type(t) + }), + groups: hashmap_map(groups, |g| processor.process_toplevel_group(g)), + attribute_groups: hashmap_map(attribute_groups, |g| { + processor.process_toplevel_attribute_group(g) + }), + _phantom: PhantomData::default(), + } + } +} + +struct Processor<'ast, 'input: 'ast> { + target_namespace: Option<&'input str>, + element_form_default_qualified: bool, + _phantom: PhantomData<&'ast ()>, // To avoid repetition in each method +} + +impl<'ast, 'input: 'ast> Processor<'ast, 'input> { + fn process_toplevel_element( + &mut self, + element: &'ast xs::Element<'input>, + ) -> ComplexType<'input> { + let xs::Element { + ref attr_type, + ref attr_name, + type_: ref child_type, + .. + } = element; + + // TODO: substitution group + + match (attr_type, &child_type) { + (None, Some(ref c)) => match c { + enums::Type::SimpleType(ref e) => ComplexType::Simple(self.process_local_simple_type(e)), + enums::Type::ComplexType(ref e) => self.process_local_complex_type(e), + }, + (Some(t), None) => ComplexType::Alias(FullName::from_qname(t, self.target_namespace)), + (None, None) => ComplexType::Empty, + (Some(ref t1), Some(ref t2)) => { + panic!( + "Toplevel element '{}:{}' has both a type attribute ({:?}) and a child type ({:?}).", + self.target_namespace.unwrap_or(""), attr_name.0, t1, t2 + ) + } + } + } + + fn process_toplevel_complex_type( + &mut self, + complex_type: &'ast xs::ComplexType<'input>, + ) -> ComplexType<'input> { + let xs::ComplexType { + ref complex_type_model, + .. + } = complex_type; + + self.process_complex_type_model(complex_type_model) + } + + fn process_complex_type_model( + &mut self, + complex_type_model: &'ast xs::ComplexTypeModel<'input>, + ) -> ComplexType<'input> { + match complex_type_model { + xs::ComplexTypeModel::SimpleContent(_) => unimplemented!("simpleContent"), + xs::ComplexTypeModel::ComplexContent(ref model) => self.process_complex_content(model), + xs::ComplexTypeModel::CompleteContentModel { + ref open_content, + ref type_def_particle, + ref attr_decls, + ref assertions, + } => self.process_complete_content_model( + open_content, + type_def_particle, + attr_decls, + assertions, + ), + } + } + + fn process_local_complex_type( + &mut self, + complex_type: &'ast inline_elements::LocalComplexType<'input>, + ) -> ComplexType<'input> { + let inline_elements::LocalComplexType { + ref complex_type_model, + .. + } = complex_type; + self.process_complex_type_model(complex_type_model) + } + + fn process_toplevel_simple_type( + &mut self, + simple_type: &'ast xs::SimpleType<'input>, + ) -> SimpleType<'input> { + let xs::SimpleType { + ref simple_derivation, + .. + } = simple_type; + match simple_derivation { + xs::SimpleDerivation::Restriction(e) => self.process_simple_restriction(e), + xs::SimpleDerivation::List(ref e) => self.process_list(e), + xs::SimpleDerivation::Union(ref e) => self.process_union(e), + } + } + + fn process_local_simple_type( + &mut self, + simple_type: &'ast inline_elements::LocalSimpleType<'input>, + ) -> SimpleType<'input> { + let inline_elements::LocalSimpleType { + ref simple_derivation, + .. + } = simple_type; + match simple_derivation { + xs::SimpleDerivation::Restriction(e) => self.process_simple_restriction(e), + xs::SimpleDerivation::List(ref e) => self.process_list(e), + xs::SimpleDerivation::Union(ref e) => self.process_union(e), + } + } + + fn process_simple_restriction( + &mut self, + restriction: &'ast xs::Restriction<'input>, + ) -> SimpleType<'input> { + let xs::Restriction { + ref attr_base, + ref simple_restriction_model, + .. + } = restriction; + let base = attr_base; + let base = base.unwrap_or(QName { + namespace: Some(SCHEMA_URI), + local_name: "anySimpleType", + }); + let xs::SimpleRestrictionModel { + ref local_simple_type, + ref choice_facet_any, + } = simple_restriction_model; + let facets = self.process_facets(choice_facet_any); + + let base = FullName::from_qname(&base, self.target_namespace); + + match local_simple_type { + Some(inline_elements::LocalSimpleType { .. }) => { + SimpleType::Restriction(base, facets) // TODO: use the simple_derivation + } + None => SimpleType::Restriction(base, facets), + } + } + + fn process_facets( + &mut self, + facet_list: &Vec<enums::ChoiceFacetAny<'input>>, + ) -> Facets<'input> { + let mut facets = Facets::default(); + use parser::xs::Facet::*; + for facet_or_any in facet_list { + match facet_or_any { + enums::ChoiceFacetAny::Facet(e) => { + match **e { + FacetHead(_) => panic!("abstract element"), + MinExclusive(ref e) => { + facets.min_exclusive = + Some(e.attr_value.0.parse().expect("invalid minexclusive")) + } + MinInclusive(ref e) => { + facets.min_inclusive = + Some(e.attr_value.0.parse().expect("invalid mininclusive")) + } + MaxExclusive(ref e) => { + facets.max_exclusive = + Some(e.attr_value.0.parse().expect("invalid maxexclusive")) + } + MaxInclusive(ref e) => { + facets.max_inclusive = + Some(e.attr_value.0.parse().expect("invalid maxinclusive")) + } + TotalDigits(ref e) => facets.total_digits = Some(e.attr_value.0), + FractionDigits(ref e) => facets.fraction_digits = Some(e.attr_value.0), + Length(ref e) => facets.length = Some(e.attr_value.0 as usize), + MinLength(ref e) => facets.min_length = Some(e.attr_value.0 as usize), + MaxLength(ref e) => facets.max_length = Some(e.attr_value.0 as usize), + Enumeration(ref e) => facets + .enumeration + .get_or_insert(Vec::new()) + .push(e.attr_value.0), + WhiteSpace(ref e) => facets.white_space = Some(((e.attr_value.0).0).0), + Pattern(ref e) => facets.pattern = Some(e.attr_value.0), + Assertion(_) => unimplemented!("assertion facet"), + ExplicitTimezone(ref e) => { + facets.explicit_timezone = Some(((e.attr_value.0).0).0) + } + }; + } + enums::ChoiceFacetAny::Any(_) => (), // TODO (probably just whitespaces) + } + } + facets + } + + fn process_list(&mut self, list: &'ast xs::List<'input>) -> SimpleType<'input> { + let item_type = list.attr_item_type; + let item_type = item_type + .as_ref() + .map(|n| FullName::from_qname(n, self.target_namespace)); + + let t = match (item_type, &list.local_simple_type) { + (None, Some(st)) => self.process_local_simple_type(st), + (Some(n), None) => SimpleType::Alias(n), + (None, None) => panic!("<list> with no itemType or child type."), + (Some(ref t1), Some(ref t2)) => panic!( + "<list> has both an itemType attribute ({:?}) and a child type ({:?}).", + t1, t2 + ), + }; + + SimpleType::List(Box::new(t)) + } + + fn process_union(&mut self, union: &'ast xs::Union<'input>) -> SimpleType<'input> { + let member_types = union + .local_simple_type + .iter() + .map(|t| self.process_local_simple_type(t)) + .collect(); + + SimpleType::Union(member_types) + } + + fn process_toplevel_group(&mut self, group: &'ast xs::Group<'input>) -> ComplexType<'input> { + let xs::Group { + choice_all_choice_sequence: ref content, + .. + } = group; + + match content { + enums::ChoiceAllChoiceSequence::All(_) => unimplemented!("all"), + enums::ChoiceAllChoiceSequence::Choice(e) => self.process_choice(e), + enums::ChoiceAllChoiceSequence::Sequence(e) => self.process_sequence(e), + } + } + + fn process_toplevel_attribute_group( + &mut self, + group: &'ast xs::AttributeGroup<'input>, + ) -> Attrs<'input> { + self.process_attr_decls(&group.attr_decls) + } + + fn process_attr_decls(&mut self, attr_decls: &'ast xs::AttrDecls<'input>) -> Attrs<'input> { + let mut attrs = Attrs::new(); + for attr_decl in &attr_decls.attribute { + match attr_decl { + enums::AttrOrAttrGroup::Attribute(e) => { + let name = e + .attr_name + .as_ref() + .map(|ncn| FullName::new(self.target_namespace, ncn.0)); + let type_attr: Option<QName<'input>> = e.attr_type; + let use_ = match e.attr_use.as_ref().map(|x| ((x.0).0).0) { + Some("prohibited") => AttrUse::Prohibited, + Some("required") => AttrUse::Required, + Some("optional") => AttrUse::Optional, + None => AttrUse::Optional, // TODO + Some(s) => panic!("Unknown attribute value use={:?}", s), + }; + match (name, e.attr_ref, type_attr, &e.local_simple_type) { + (Some(name), None, Some(t), None) => { + let t = FullName::from_qname(&t, self.target_namespace); + attrs.named.push((name, use_, Some(SimpleType::Alias(t)))); + } + (Some(name), None, None, Some(t)) => { + let t = self.process_local_simple_type(t); + attrs.named.push((name, use_, Some(t))); + } + (Some(name), None, None, None) => attrs.named.push((name, use_, None)), + (None, None, None, None) => panic!("no attribute on <attribute>."), + (_, _, Some(ref t1), Some(ref t2)) => panic!( + "<attribute> has both a type attribute ({:?}) and a child type ({:?}).", + t1, t2 + ), + (None, None, Some(_), None) | (None, None, None, Some(_)) => { + panic!("<attribute> has a type but no name.") + } + (_, Some(_), Some(_), None) | (_, Some(_), None, Some(_)) => { + panic!("<attribute> has a type and a ref.") + } + (_, Some(_ref), None, None) => (), // TODO + } + } + enums::AttrOrAttrGroup::AttributeGroup(e) => { + attrs + .group_refs + .push(FullName::from_qname(&e.attr_ref, self.target_namespace)); + } + } + } + if attr_decls.any_attribute.is_some() { + attrs.any_attributes = true; + } + attrs + } + + fn process_complex_content( + &mut self, + model: &'ast xs::ComplexContent<'input>, + ) -> ComplexType<'input> { + let xs::ComplexContent { + ref choice_restriction_extension, + .. + } = model; + match choice_restriction_extension { + enums::ChoiceRestrictionExtension::Restriction(ref r) => { + let inline_elements::ComplexRestrictionType { + ref attr_base, + ref sequence_open_content_type_def_particle, + .. + } = **r; + match sequence_open_content_type_def_particle { + Some(sequences::SequenceOpenContentTypeDefParticle { + type_def_particle, + .. + }) => self.process_complex_restriction(attr_base, type_def_particle), + None => ComplexType::Empty, + } + } + enums::ChoiceRestrictionExtension::Extension(ref e) => { + let inline_elements::ExtensionType { + ref attrs, + ref attr_base, + ref type_def_particle, + .. + } = **e; + match type_def_particle { + Some(type_def_particle) => { + self.process_extension(attrs, attr_base, type_def_particle) + } + None => self.process_trivial_extension(attrs, attr_base), + } + } + } + } + + fn process_extension( + &mut self, + _attrs: &'ast HashMap<FullName<'input>, &'input str>, + attr_base: &'ast QName<'input>, + type_def_particle: &'ast xs::TypeDefParticle<'input>, + ) -> ComplexType<'input> { + let base = FullName::from_qname(attr_base, self.target_namespace); + ComplexType::Extension( + base, + Box::new(self.process_type_def_particle(type_def_particle)), + ) + } + + fn process_trivial_extension( + &mut self, + _attrs: &'ast HashMap<FullName<'input>, &'input str>, + attr_base: &'ast QName<'input>, + ) -> ComplexType<'input> { + let base = FullName::from_qname(&attr_base, self.target_namespace); + ComplexType::Alias(base) + } + + fn process_complete_content_model( + &mut self, + _open_content: &'ast Option<Box<xs::OpenContent<'input>>>, + type_def_particle: &'ast Option<Box<xs::TypeDefParticle<'input>>>, + _attr_decls: &'ast xs::AttrDecls<'input>, + _assertions: &'ast xs::Assertions<'input>, + ) -> ComplexType<'input> { + match type_def_particle.as_ref() { + Some(type_def_particle) => self.process_type_def_particle(type_def_particle), + None => ComplexType::Empty, + } + } + + fn process_complex_restriction( + &mut self, + attr_base: &'ast QName<'input>, + type_def_particle: &'ast xs::TypeDefParticle<'input>, + ) -> ComplexType<'input> { + // TODO: use the base + let base = FullName::from_qname(attr_base, self.target_namespace); + let ty = self.process_type_def_particle(type_def_particle); + ComplexType::Restriction(base, Box::new(ty)) + } + + fn process_type_def_particle( + &mut self, + particle: &'ast xs::TypeDefParticle<'input>, + ) -> ComplexType<'input> { + match particle { + xs::TypeDefParticle::Group(e) => self.process_group_ref(e), + xs::TypeDefParticle::All(_) => unimplemented!("all"), + xs::TypeDefParticle::Choice(e) => self.process_choice(e), + xs::TypeDefParticle::Sequence(e) => self.process_sequence(e), + } + } + + fn process_group_ref( + &mut self, + group_ref: &'ast inline_elements::GroupRef<'input>, + ) -> ComplexType<'input> { + let inline_elements::GroupRef { + ref attr_ref, + ref attr_min_occurs, + ref attr_max_occurs, + .. + } = group_ref; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + let ref_ = FullName::from_qname(attr_ref, self.target_namespace); + + ComplexType::Group(min_occurs, max_occurs, ref_) + } + + fn process_choice(&mut self, choice: &'ast xs::Choice<'input>) -> ComplexType<'input> { + let xs::Choice { + ref attr_min_occurs, + ref attr_max_occurs, + ref nested_particle, + .. + } = choice; + let particles = nested_particle; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + + let items = particles + .iter() + .map(|particle| self.process_nested_particle(particle)) + .collect(); + + ComplexType::Choice(min_occurs, max_occurs, items) + } + + fn process_sequence(&mut self, seq: &'ast xs::Sequence<'input>) -> ComplexType<'input> { + let xs::Sequence { + ref attr_min_occurs, + ref attr_max_occurs, + ref nested_particle, + .. + } = seq; + let particles = nested_particle; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + + let items = particles + .iter() + .map(|particle| self.process_nested_particle(particle)) + .collect(); + + ComplexType::Sequence(min_occurs, max_occurs, items) + } + + fn process_nested_particle( + &mut self, + particle: &'ast xs::NestedParticle<'input>, + ) -> ComplexType<'input> { + match particle { + xs::NestedParticle::Element(e) => self.process_local_element(e), + xs::NestedParticle::Group(e) => self.process_group_ref(e), + xs::NestedParticle::Choice(e) => self.process_choice(e), + xs::NestedParticle::Sequence(e) => self.process_sequence(e), + xs::NestedParticle::Any(e) => self.process_any(e), + } + } + + fn process_any(&mut self, _any: &'ast xs::Any<'input>) -> ComplexType<'input> { + ComplexType::Any + } + + fn process_local_element( + &mut self, + element: &'ast inline_elements::LocalElement<'input>, + ) -> ComplexType<'input> { + let inline_elements::LocalElement { + ref attr_name, + ref attr_ref, + ref attr_min_occurs, + ref attr_max_occurs, + ref attr_type, + ref attr_form, + ref attr_target_namespace, + ref type_, + .. + } = element; + let name = attr_name; + let type_attr = attr_type; + let min_occurs = parse_min_occurs(attr_min_occurs); + let max_occurs = parse_max_occurs(attr_max_occurs); + + if let Some(ref_) = attr_ref { + if let Some(name) = name { + panic!("<element> has both ref={:?} and name={:?}", ref_, name); + } + if let Some(attr_target_namespace) = attr_target_namespace { + panic!( + "<element> has both ref={:?} and target_namespace={:?}", + ref_, attr_target_namespace + ); + } + if let Some(attr_form) = attr_form { + panic!("<element> has both ref={:?} and form={:?}", ref_, attr_form); + } + let ref_ = FullName::from_qname(ref_, self.target_namespace); + ComplexType::ElementRef(min_occurs, max_occurs, ref_) + } else { + let name = name.as_ref().expect("<element> has no name.").0; + + // https://www.w3.org/TR/xmlschema11-1/#dcl.elt.local + let qualified_form = match attr_form.as_ref().map(|x| ((x.0).0).0) { + Some("qualified") => true, + Some("unqualified") => false, + None => self.element_form_default_qualified, + _ => unreachable!(), + }; + let namespace = match (attr_target_namespace, qualified_form) { + (Some(AnyUri(target_namespace)), _) => Some(*target_namespace), + (None, true) => self.target_namespace, + (None, false) => None, + }; + + let t = match (type_attr, &type_) { + (None, Some(enums::Type::SimpleType(ref e))) => { + ComplexType::Simple(self.process_local_simple_type(e)) + } + (None, Some(enums::Type::ComplexType(ref e))) => self.process_local_complex_type(e), + (Some(t), None) => { + let t = FullName::from_qname(t, self.target_namespace); + ComplexType::Alias(t) + } + (None, None) => ComplexType::Empty, + (Some(ref t1), Some(ref t2)) => panic!( + "Element '{:?}' has both a type attribute ({:?}) and a child type ({:?}).", + name, t1, t2 + ), + }; + ComplexType::Element(min_occurs, max_occurs, namespace, name, Box::new(t)) + } + } +} From 2d3a5671eeb36926e6e6cc3323828d68f64788f9 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Wed, 13 Nov 2019 21:51:01 +0100 Subject: [PATCH 03/11] Rename ComplexType -> RecursiveComplexType --- xml-schema/src/processor2.rs | 142 +++++++++++++++++++---------------- 1 file changed, 78 insertions(+), 64 deletions(-) diff --git a/xml-schema/src/processor2.rs b/xml-schema/src/processor2.rs index c86c4c2..3eb2193 100644 --- a/xml-schema/src/processor2.rs +++ b/xml-schema/src/processor2.rs @@ -28,7 +28,7 @@ fn parse_max_occurs(x: &Option<unions::UnionNonNegativeIntegerNmtoken>) -> usize } } -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[must_use] pub struct Documentation<'input>(Vec<&'input str>); impl<'input> Documentation<'input> { @@ -57,9 +57,13 @@ pub enum AttrUse { Optional, } -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct Attrs<'input> { - pub named: Vec<(FullName<'input>, AttrUse, Option<SimpleType<'input>>)>, + pub named: Vec<( + FullName<'input>, + AttrUse, + Option<RecursiveSimpleType<'input>>, + )>, pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, pub group_refs: Vec<FullName<'input>>, pub any_attributes: bool, @@ -87,34 +91,36 @@ impl<'input> Attrs<'input> { } } +/// Direct retranscription of XSD's complexType in a Rust-friendly way #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum ComplexType<'input> { +pub enum RecursiveComplexType<'input> { Any, Empty, Alias(FullName<'input>), - Extension(FullName<'input>, Box<ComplexType<'input>>), - Restriction(FullName<'input>, Box<ComplexType<'input>>), + Extension(FullName<'input>, Box<RecursiveComplexType<'input>>), + Restriction(FullName<'input>, Box<RecursiveComplexType<'input>>), ElementRef(usize, usize, FullName<'input>), Element( usize, usize, Option<&'input str>, &'input str, - Box<ComplexType<'input>>, + Box<RecursiveComplexType<'input>>, ), - Group(usize, usize, FullName<'input>), - Choice(usize, usize, Vec<ComplexType<'input>>), - Sequence(usize, usize, Vec<ComplexType<'input>>), - Simple(SimpleType<'input>), + GroupRef(usize, usize, FullName<'input>), + Choice(usize, usize, Vec<RecursiveComplexType<'input>>), + Sequence(usize, usize, Vec<RecursiveComplexType<'input>>), + Simple(RecursiveSimpleType<'input>), } -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum SimpleType<'input> { +/// Direct retranscription of XSD's simpleType in a Rust-friendly way +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum RecursiveSimpleType<'input> { Primitive(&'static str, &'static str), Alias(FullName<'input>), Restriction(FullName<'input>, Facets<'input>), - List(Box<SimpleType<'input>>), - Union(Vec<SimpleType<'input>>), + List(Box<RecursiveSimpleType<'input>>), + Union(Vec<RecursiveSimpleType<'input>>), Empty, } @@ -123,10 +129,10 @@ pub struct SimpleToplevel<'ast, 'input: 'ast> { pub target_namespace: Option<&'input str>, pub element_form_default_qualified: bool, pub attribute_form_default_qualified: bool, - pub elements: HashMap<FullName<'input>, ComplexType<'input>>, - pub simple_types: HashMap<FullName<'input>, SimpleType<'input>>, - pub complex_types: HashMap<FullName<'input>, ComplexType<'input>>, - pub groups: HashMap<FullName<'input>, ComplexType<'input>>, + pub elements: HashMap<FullName<'input>, RecursiveComplexType<'input>>, + pub simple_types: HashMap<FullName<'input>, RecursiveSimpleType<'input>>, + pub complex_types: HashMap<FullName<'input>, RecursiveComplexType<'input>>, + pub groups: HashMap<FullName<'input>, RecursiveComplexType<'input>>, pub attribute_groups: HashMap<FullName<'input>, Attrs<'input>>, _phantom: PhantomData<&'ast ()>, // Sometimes I need 'ast when prototyping } @@ -185,7 +191,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_toplevel_element( &mut self, element: &'ast xs::Element<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let xs::Element { ref attr_type, ref attr_name, @@ -197,11 +203,11 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { match (attr_type, &child_type) { (None, Some(ref c)) => match c { - enums::Type::SimpleType(ref e) => ComplexType::Simple(self.process_local_simple_type(e)), + enums::Type::SimpleType(ref e) => RecursiveComplexType::Simple(self.process_local_simple_type(e)), enums::Type::ComplexType(ref e) => self.process_local_complex_type(e), }, - (Some(t), None) => ComplexType::Alias(FullName::from_qname(t, self.target_namespace)), - (None, None) => ComplexType::Empty, + (Some(t), None) => RecursiveComplexType::Alias(FullName::from_qname(t, self.target_namespace)), + (None, None) => RecursiveComplexType::Empty, (Some(ref t1), Some(ref t2)) => { panic!( "Toplevel element '{}:{}' has both a type attribute ({:?}) and a child type ({:?}).", @@ -214,7 +220,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_toplevel_complex_type( &mut self, complex_type: &'ast xs::ComplexType<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let xs::ComplexType { ref complex_type_model, .. @@ -226,7 +232,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_complex_type_model( &mut self, complex_type_model: &'ast xs::ComplexTypeModel<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { match complex_type_model { xs::ComplexTypeModel::SimpleContent(_) => unimplemented!("simpleContent"), xs::ComplexTypeModel::ComplexContent(ref model) => self.process_complex_content(model), @@ -247,7 +253,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_local_complex_type( &mut self, complex_type: &'ast inline_elements::LocalComplexType<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let inline_elements::LocalComplexType { ref complex_type_model, .. @@ -258,7 +264,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_toplevel_simple_type( &mut self, simple_type: &'ast xs::SimpleType<'input>, - ) -> SimpleType<'input> { + ) -> RecursiveSimpleType<'input> { let xs::SimpleType { ref simple_derivation, .. @@ -273,7 +279,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_local_simple_type( &mut self, simple_type: &'ast inline_elements::LocalSimpleType<'input>, - ) -> SimpleType<'input> { + ) -> RecursiveSimpleType<'input> { let inline_elements::LocalSimpleType { ref simple_derivation, .. @@ -288,7 +294,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_simple_restriction( &mut self, restriction: &'ast xs::Restriction<'input>, - ) -> SimpleType<'input> { + ) -> RecursiveSimpleType<'input> { let xs::Restriction { ref attr_base, ref simple_restriction_model, @@ -309,9 +315,9 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { match local_simple_type { Some(inline_elements::LocalSimpleType { .. }) => { - SimpleType::Restriction(base, facets) // TODO: use the simple_derivation + RecursiveSimpleType::Restriction(base, facets) // TODO: use the simple_derivation } - None => SimpleType::Restriction(base, facets), + None => RecursiveSimpleType::Restriction(base, facets), } } @@ -365,7 +371,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { facets } - fn process_list(&mut self, list: &'ast xs::List<'input>) -> SimpleType<'input> { + fn process_list(&mut self, list: &'ast xs::List<'input>) -> RecursiveSimpleType<'input> { let item_type = list.attr_item_type; let item_type = item_type .as_ref() @@ -373,7 +379,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { let t = match (item_type, &list.local_simple_type) { (None, Some(st)) => self.process_local_simple_type(st), - (Some(n), None) => SimpleType::Alias(n), + (Some(n), None) => RecursiveSimpleType::Alias(n), (None, None) => panic!("<list> with no itemType or child type."), (Some(ref t1), Some(ref t2)) => panic!( "<list> has both an itemType attribute ({:?}) and a child type ({:?}).", @@ -381,20 +387,23 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { ), }; - SimpleType::List(Box::new(t)) + RecursiveSimpleType::List(Box::new(t)) } - fn process_union(&mut self, union: &'ast xs::Union<'input>) -> SimpleType<'input> { + fn process_union(&mut self, union: &'ast xs::Union<'input>) -> RecursiveSimpleType<'input> { let member_types = union .local_simple_type .iter() .map(|t| self.process_local_simple_type(t)) .collect(); - SimpleType::Union(member_types) + RecursiveSimpleType::Union(member_types) } - fn process_toplevel_group(&mut self, group: &'ast xs::Group<'input>) -> ComplexType<'input> { + fn process_toplevel_group( + &mut self, + group: &'ast xs::Group<'input>, + ) -> RecursiveComplexType<'input> { let xs::Group { choice_all_choice_sequence: ref content, .. @@ -434,7 +443,9 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { match (name, e.attr_ref, type_attr, &e.local_simple_type) { (Some(name), None, Some(t), None) => { let t = FullName::from_qname(&t, self.target_namespace); - attrs.named.push((name, use_, Some(SimpleType::Alias(t)))); + attrs + .named + .push((name, use_, Some(RecursiveSimpleType::Alias(t)))); } (Some(name), None, None, Some(t)) => { let t = self.process_local_simple_type(t); @@ -471,7 +482,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_complex_content( &mut self, model: &'ast xs::ComplexContent<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let xs::ComplexContent { ref choice_restriction_extension, .. @@ -488,7 +499,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { type_def_particle, .. }) => self.process_complex_restriction(attr_base, type_def_particle), - None => ComplexType::Empty, + None => RecursiveComplexType::Empty, } } enums::ChoiceRestrictionExtension::Extension(ref e) => { @@ -513,9 +524,9 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { _attrs: &'ast HashMap<FullName<'input>, &'input str>, attr_base: &'ast QName<'input>, type_def_particle: &'ast xs::TypeDefParticle<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let base = FullName::from_qname(attr_base, self.target_namespace); - ComplexType::Extension( + RecursiveComplexType::Extension( base, Box::new(self.process_type_def_particle(type_def_particle)), ) @@ -525,9 +536,9 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { &mut self, _attrs: &'ast HashMap<FullName<'input>, &'input str>, attr_base: &'ast QName<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let base = FullName::from_qname(&attr_base, self.target_namespace); - ComplexType::Alias(base) + RecursiveComplexType::Alias(base) } fn process_complete_content_model( @@ -536,10 +547,10 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { type_def_particle: &'ast Option<Box<xs::TypeDefParticle<'input>>>, _attr_decls: &'ast xs::AttrDecls<'input>, _assertions: &'ast xs::Assertions<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { match type_def_particle.as_ref() { Some(type_def_particle) => self.process_type_def_particle(type_def_particle), - None => ComplexType::Empty, + None => RecursiveComplexType::Empty, } } @@ -547,17 +558,17 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { &mut self, attr_base: &'ast QName<'input>, type_def_particle: &'ast xs::TypeDefParticle<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { // TODO: use the base let base = FullName::from_qname(attr_base, self.target_namespace); let ty = self.process_type_def_particle(type_def_particle); - ComplexType::Restriction(base, Box::new(ty)) + RecursiveComplexType::Restriction(base, Box::new(ty)) } fn process_type_def_particle( &mut self, particle: &'ast xs::TypeDefParticle<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { match particle { xs::TypeDefParticle::Group(e) => self.process_group_ref(e), xs::TypeDefParticle::All(_) => unimplemented!("all"), @@ -569,7 +580,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_group_ref( &mut self, group_ref: &'ast inline_elements::GroupRef<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let inline_elements::GroupRef { ref attr_ref, ref attr_min_occurs, @@ -580,10 +591,10 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { let max_occurs = parse_max_occurs(attr_max_occurs); let ref_ = FullName::from_qname(attr_ref, self.target_namespace); - ComplexType::Group(min_occurs, max_occurs, ref_) + RecursiveComplexType::GroupRef(min_occurs, max_occurs, ref_) } - fn process_choice(&mut self, choice: &'ast xs::Choice<'input>) -> ComplexType<'input> { + fn process_choice(&mut self, choice: &'ast xs::Choice<'input>) -> RecursiveComplexType<'input> { let xs::Choice { ref attr_min_occurs, ref attr_max_occurs, @@ -599,10 +610,13 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { .map(|particle| self.process_nested_particle(particle)) .collect(); - ComplexType::Choice(min_occurs, max_occurs, items) + RecursiveComplexType::Choice(min_occurs, max_occurs, items) } - fn process_sequence(&mut self, seq: &'ast xs::Sequence<'input>) -> ComplexType<'input> { + fn process_sequence( + &mut self, + seq: &'ast xs::Sequence<'input>, + ) -> RecursiveComplexType<'input> { let xs::Sequence { ref attr_min_occurs, ref attr_max_occurs, @@ -618,13 +632,13 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { .map(|particle| self.process_nested_particle(particle)) .collect(); - ComplexType::Sequence(min_occurs, max_occurs, items) + RecursiveComplexType::Sequence(min_occurs, max_occurs, items) } fn process_nested_particle( &mut self, particle: &'ast xs::NestedParticle<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { match particle { xs::NestedParticle::Element(e) => self.process_local_element(e), xs::NestedParticle::Group(e) => self.process_group_ref(e), @@ -634,14 +648,14 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { } } - fn process_any(&mut self, _any: &'ast xs::Any<'input>) -> ComplexType<'input> { - ComplexType::Any + fn process_any(&mut self, _any: &'ast xs::Any<'input>) -> RecursiveComplexType<'input> { + RecursiveComplexType::Any } fn process_local_element( &mut self, element: &'ast inline_elements::LocalElement<'input>, - ) -> ComplexType<'input> { + ) -> RecursiveComplexType<'input> { let inline_elements::LocalElement { ref attr_name, ref attr_ref, @@ -672,7 +686,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { panic!("<element> has both ref={:?} and form={:?}", ref_, attr_form); } let ref_ = FullName::from_qname(ref_, self.target_namespace); - ComplexType::ElementRef(min_occurs, max_occurs, ref_) + RecursiveComplexType::ElementRef(min_occurs, max_occurs, ref_) } else { let name = name.as_ref().expect("<element> has no name.").0; @@ -691,20 +705,20 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { let t = match (type_attr, &type_) { (None, Some(enums::Type::SimpleType(ref e))) => { - ComplexType::Simple(self.process_local_simple_type(e)) + RecursiveComplexType::Simple(self.process_local_simple_type(e)) } (None, Some(enums::Type::ComplexType(ref e))) => self.process_local_complex_type(e), (Some(t), None) => { let t = FullName::from_qname(t, self.target_namespace); - ComplexType::Alias(t) + RecursiveComplexType::Alias(t) } - (None, None) => ComplexType::Empty, + (None, None) => RecursiveComplexType::Empty, (Some(ref t1), Some(ref t2)) => panic!( "Element '{:?}' has both a type attribute ({:?}) and a child type ({:?}).", name, t1, t2 ), }; - ComplexType::Element(min_occurs, max_occurs, namespace, name, Box::new(t)) + RecursiveComplexType::Element(min_occurs, max_occurs, namespace, name, Box::new(t)) } } } From d657c69e25ff36f2e0034b0689df563167fb0e7d Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Wed, 13 Nov 2019 23:08:37 +0100 Subject: [PATCH 04/11] Use FullName in Element. --- xml-schema/src/processor2.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/xml-schema/src/processor2.rs b/xml-schema/src/processor2.rs index 3eb2193..589619c 100644 --- a/xml-schema/src/processor2.rs +++ b/xml-schema/src/processor2.rs @@ -103,8 +103,7 @@ pub enum RecursiveComplexType<'input> { Element( usize, usize, - Option<&'input str>, - &'input str, + FullName<'input>, Box<RecursiveComplexType<'input>>, ), GroupRef(usize, usize, FullName<'input>), @@ -718,7 +717,12 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { name, t1, t2 ), }; - RecursiveComplexType::Element(min_occurs, max_occurs, namespace, name, Box::new(t)) + RecursiveComplexType::Element( + min_occurs, + max_occurs, + FullName::new(namespace, name), + Box::new(t), + ) } } } From 63840f70e5cd0d124d7aaf7fcfdb645b8fb03440 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Wed, 13 Nov 2019 23:08:48 +0100 Subject: [PATCH 05/11] Add name_allocator.rs. --- xml-schema/src/name_allocator.rs | 269 +++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 xml-schema/src/name_allocator.rs diff --git a/xml-schema/src/name_allocator.rs b/xml-schema/src/name_allocator.rs new file mode 100644 index 0000000..9d8d510 --- /dev/null +++ b/xml-schema/src/name_allocator.rs @@ -0,0 +1,269 @@ +//! Transforms recursive types into flat types with unique names + +use std::collections::hash_map::Entry; +use std::collections::HashMap; + +use names::{name_from_hint, FullName, NameGenerator, NameHint}; +use processor2::{RecursiveComplexType, RecursiveSimpleType}; +use support::Facets; + +// TODO: Make this use &str so it can implement Copy, and spare clones later in the code +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct ConcreteName(String, String); + +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum ComplexType<'input> { + Any, + Empty, + Alias(ConcreteName), + Extension(ConcreteName, ConcreteName), + Restriction(ConcreteName, ConcreteName), + ElementRef(usize, usize, ConcreteName), + Element(usize, usize, FullName<'input>, ConcreteName), + GroupRef(usize, usize, ConcreteName), + Choice(usize, usize, Vec<ConcreteName>), + Sequence(usize, usize, Vec<ConcreteName>), + Simple(ConcreteName), +} + +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum SimpleType<'input> { + Alias(ConcreteName), + Restriction(ConcreteName, Facets<'input>), + List(ConcreteName), + Union(Vec<ConcreteName>), + Empty, +} + +fn allocate_namespace<'a, 'input>( + module_name_gen: &'a mut NameGenerator, + module_names: &'a mut HashMap<Option<&'input str>, (String, NameGenerator)>, + namespace: Option<&'input str>, +) -> (String, &'a mut NameGenerator) { + let (ref mod_name, ref mut name_gen) = module_names.entry(namespace).or_insert_with(|| { + let mod_name = module_name_gen.gen_name(namespace.unwrap_or("unqualified").to_string()); + (mod_name, NameGenerator::new()) + }); + (mod_name.to_string(), name_gen) +} + +#[derive(Debug)] +pub struct NameAllocator<'input> { + module_name_gen: NameGenerator, + module_names: HashMap<Option<&'input str>, (String, NameGenerator)>, // namespace -> (mod_name, name_gen) + fullname_to_concrete_name: HashMap<FullName<'input>, ConcreteName>, + complex_types: HashMap<ConcreteName, ComplexType<'input>>, + simple_types: HashMap<ConcreteName, SimpleType<'input>>, +} + +impl<'input> NameAllocator<'input> { + pub fn new() -> NameAllocator<'input> { + NameAllocator { + module_name_gen: NameGenerator::new(), + module_names: HashMap::new(), + fullname_to_concrete_name: HashMap::new(), + complex_types: HashMap::new(), + simple_types: HashMap::new(), + } + } + + pub fn allocate_fullname(&mut self, fullname: FullName<'input>) -> ConcreteName { + let NameAllocator { + ref mut module_name_gen, + ref mut module_names, + .. + } = self; + let concrete_name = self + .fullname_to_concrete_name + .entry(fullname) + .or_insert_with(|| { + let (module_name, name_gen) = + allocate_namespace(module_name_gen, module_names, fullname.namespace()); + let type_name = name_gen.gen_name(fullname.local_name().to_string()); + ConcreteName(module_name, type_name) + }); + concrete_name.clone() + } + + /// Allocates names for anonymous types not made of other types + fn allocate_anonymous(&mut self, namespace: Option<&'input str>, name: &str) -> ConcreteName { + let name_hint = NameHint::new(name); + let (module_name, name_gen) = + allocate_namespace(&mut self.module_name_gen, &mut self.module_names, namespace); + let type_name = name_gen.gen_name(name_from_hint(&name_hint).unwrap()); + ConcreteName(module_name, type_name) + } + + /// Allocates names for anonymous types made of other types (possibly + /// anonymous themselves) + fn allocate_anonymous_compound( + &mut self, + namespace: Option<&'input str>, + prefix: &str, + subtypes: &[&ConcreteName], + ) -> ConcreteName { + let mut name_hint = NameHint::new(prefix); + for ConcreteName(_subtype_mod_name, subtype_type_name) in subtypes.iter() { + name_hint.push(subtype_type_name); + } + let (module_name, name_gen) = + allocate_namespace(&mut self.module_name_gen, &mut self.module_names, namespace); + let type_name = name_gen.gen_name(name_from_hint(&name_hint).unwrap()); + ConcreteName(module_name, type_name) + } + + pub fn allocate_complex_type( + &mut self, + namespace: Option<&'input str>, + recursive_complex_type: &RecursiveComplexType<'input>, + ) -> ConcreteName { + let (concrete_name, ty) = match recursive_complex_type { + RecursiveComplexType::Any => { + (self.allocate_anonymous(namespace, "any"), ComplexType::Any) + } + RecursiveComplexType::Empty => ( + self.allocate_anonymous(namespace, "empty"), + ComplexType::Empty, + ), + RecursiveComplexType::Alias(fullname) => { + let referee = self.allocate_fullname(*fullname); + (referee.clone(), ComplexType::Alias(referee)) + } + RecursiveComplexType::Extension(base, inner) => { + let base = self.allocate_fullname(*base); + let inner = self.allocate_complex_type(namespace, inner); + ( + self.allocate_anonymous_compound(namespace, "extension", &[&base, &inner]), + ComplexType::Extension(base, inner), + ) + } + RecursiveComplexType::Restriction(base, inner) => { + let base = self.allocate_fullname(*base); + let inner = self.allocate_complex_type(namespace, inner); + ( + self.allocate_anonymous_compound(namespace, "restriction", &[&base, &inner]), + ComplexType::Restriction(base, inner), + ) + } + RecursiveComplexType::ElementRef(min_occurs, max_occurs, fullname) => { + let referee = self.allocate_fullname(*fullname); + ( + self.allocate_anonymous_compound(namespace, "elementref", &[&referee]), + ComplexType::ElementRef(*min_occurs, *max_occurs, referee), + ) + } + RecursiveComplexType::Element(min_occurs, max_occurs, fullname, inner) => { + let inner = self.allocate_complex_type(namespace, inner); + ( + self.allocate_fullname(*fullname), + ComplexType::Element(*min_occurs, *max_occurs, *fullname, inner), + ) + } + RecursiveComplexType::GroupRef(min_occurs, max_occurs, fullname) => { + let referee = self.allocate_fullname(*fullname); + ( + self.allocate_anonymous_compound(namespace, "groupref", &[&referee]), + ComplexType::ElementRef(*min_occurs, *max_occurs, referee), + ) + } + RecursiveComplexType::Choice(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.allocate_complex_type(namespace, inner)) + .collect(); + ( + self.allocate_anonymous_compound( + namespace, + "choice", + &inners.iter().collect::<Vec<_>>(), + ), + ComplexType::Choice(*min_occurs, *max_occurs, inners), + ) + } + RecursiveComplexType::Sequence(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.allocate_complex_type(namespace, inner)) + .collect(); + ( + self.allocate_anonymous_compound( + namespace, + "sequence", + &inners.iter().collect::<Vec<_>>(), + ), + ComplexType::Sequence(*min_occurs, *max_occurs, inners), + ) + } + RecursiveComplexType::Simple(inner) => { + let inner = self.allocate_simple_type(namespace, inner); + (inner.clone(), ComplexType::Simple(inner)) + } + }; + let entry = self.complex_types.entry(concrete_name.clone()); + if let Entry::Occupied(_) = entry { + panic!("Duplicate name {:?}", concrete_name) + } + entry.or_insert(ty); + concrete_name + } + + pub fn allocate_simple_type( + &mut self, + namespace: Option<&'input str>, + recursive_simple_type: &RecursiveSimpleType<'input>, + ) -> ConcreteName { + let (concrete_name, ty) = match recursive_simple_type { + RecursiveSimpleType::Primitive(mod_name, type_name) => { + let concrete_name = ConcreteName(mod_name.to_string(), type_name.to_string()); + (concrete_name.clone(), SimpleType::Alias(concrete_name)) + } + RecursiveSimpleType::Alias(fullname) => { + let referee = self.allocate_fullname(*fullname); + (referee.clone(), SimpleType::Alias(referee)) + } + RecursiveSimpleType::Restriction(base, facets) => { + let base = self.allocate_fullname(*base); + ( + self.allocate_anonymous_compound(namespace, "simplerestriction", &[&base]), + SimpleType::Restriction(base, facets.clone()), + ) + } + RecursiveSimpleType::Union(inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.allocate_simple_type(namespace, inner)) + .collect(); + ( + self.allocate_anonymous_compound( + namespace, + "union", + &inners.iter().collect::<Vec<_>>(), + ), + SimpleType::Union(inners), + ) + } + RecursiveSimpleType::List(inner) => { + let inner = self.allocate_simple_type(namespace, inner); + ( + self.allocate_anonymous_compound( + namespace, + "list", + &[&inner], + ), + SimpleType::List(inner), + ) + } + RecursiveSimpleType::Empty => ( + self.allocate_anonymous(namespace, "empty"), + SimpleType::Empty, + ), + }; + + let entry = self.simple_types.entry(concrete_name.clone()); + if let Entry::Occupied(_) = entry { + panic!("Duplicate name {:?}", concrete_name) + } + entry.or_insert(ty); + concrete_name + } +} From 8224ef37611b637527c90afd43d3692d0f7bd556 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Thu, 14 Nov 2019 19:06:42 +0100 Subject: [PATCH 06/11] Drop useless 'ast --- xml-schema/src/processor2.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/xml-schema/src/processor2.rs b/xml-schema/src/processor2.rs index 589619c..51c782b 100644 --- a/xml-schema/src/processor2.rs +++ b/xml-schema/src/processor2.rs @@ -124,7 +124,7 @@ pub enum RecursiveSimpleType<'input> { } #[derive(Debug)] -pub struct SimpleToplevel<'ast, 'input: 'ast> { +pub struct SimpleToplevel<'input> { pub target_namespace: Option<&'input str>, pub element_form_default_qualified: bool, pub attribute_form_default_qualified: bool, @@ -133,7 +133,6 @@ pub struct SimpleToplevel<'ast, 'input: 'ast> { pub complex_types: HashMap<FullName<'input>, RecursiveComplexType<'input>>, pub groups: HashMap<FullName<'input>, RecursiveComplexType<'input>>, pub attribute_groups: HashMap<FullName<'input>, Attrs<'input>>, - _phantom: PhantomData<&'ast ()>, // Sometimes I need 'ast when prototyping } fn hashmap_map<K: Hash + Eq, V1, V2, F>(map: HashMap<K, V1>, mut mapper: F) -> HashMap<K, V2> @@ -143,8 +142,8 @@ where map.into_iter().map(|(k, v)| (k, mapper(v))).collect() } -impl<'ast, 'input: 'ast> SimpleToplevel<'ast, 'input> { - pub fn new_from_toplevel(toplevel: Toplevel<'ast, 'input>) -> SimpleToplevel<'ast, 'input> { +impl<'input> SimpleToplevel<'input> { + pub fn new_from_toplevel<'ast>(toplevel: Toplevel<'ast, 'input>) -> SimpleToplevel<'input> { let Toplevel { target_namespace, element_form_default_qualified, @@ -175,7 +174,6 @@ impl<'ast, 'input: 'ast> SimpleToplevel<'ast, 'input> { attribute_groups: hashmap_map(attribute_groups, |g| { processor.process_toplevel_attribute_group(g) }), - _phantom: PhantomData::default(), } } } From 9e0e525b5ccd2697d0b87c099767f8b0a4d9ed97 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Thu, 14 Nov 2019 22:02:40 +0100 Subject: [PATCH 07/11] Introduce Attrs where needed, and add asts.rs to share common asts with 'Extra' variant. --- xml-schema/src/asts.rs | 76 ++++++++++++++ xml-schema/src/name_allocator.rs | 136 +++++++++++++------------- xml-schema/src/names.rs | 1 + xml-schema/src/processor2.rs | 163 +++++++++++++------------------ 4 files changed, 214 insertions(+), 162 deletions(-) create mode 100644 xml-schema/src/asts.rs diff --git a/xml-schema/src/asts.rs b/xml-schema/src/asts.rs new file mode 100644 index 0000000..d13fc6d --- /dev/null +++ b/xml-schema/src/asts.rs @@ -0,0 +1,76 @@ + +pub mod non_recursive { + use names::FullName; + use support::Facets; + + // TODO: Make this use &str so it can implement Copy, and spare clones later in the code + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub struct ConcreteName(pub String, pub String); + + #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum ComplexType<'input, TAttrs, TExtra> { + Any, + Empty, + Alias(ConcreteName), + Extension(ConcreteName, ConcreteName), + Restriction(ConcreteName, ConcreteName), + ElementRef(usize, usize, ConcreteName), + Element( + usize, + usize, + FullName<'input>, + TAttrs, + ConcreteName, + ), + Choice(usize, usize, Vec<ConcreteName>), + Sequence(usize, usize, Vec<ConcreteName>), + Simple(ConcreteName), + Extra(TExtra), + } + + #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum SimpleType<'input> { + Alias(ConcreteName), + Restriction(ConcreteName, Facets<'input>), + List(ConcreteName), + Union(Vec<ConcreteName>), + Empty, + } +} + +pub mod recursive { + use names::FullName; + use support::Facets; + + #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum ComplexType<'input, TAttrs, TExtra> { + Any, + Empty, + Alias(FullName<'input>), + Extension(FullName<'input>, Box<ComplexType<'input, TAttrs, TExtra>>), + Restriction(FullName<'input>, Box<ComplexType<'input, TAttrs, TExtra>>), + ElementRef(usize, usize, FullName<'input>), + Element( + usize, + usize, + FullName<'input>, + TAttrs, + Box<ComplexType<'input, TAttrs, TExtra>>, + ), + GroupRef(usize, usize, FullName<'input>), + Choice(usize, usize, Vec<ComplexType<'input, TAttrs, TExtra>>), + Sequence(usize, usize, Vec<ComplexType<'input, TAttrs, TExtra>>), + Extra(TExtra), + Simple(SimpleType<'input>), + } + + #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub enum SimpleType<'input> { + Primitive(&'static str, &'static str), + Alias(FullName<'input>), + Restriction(FullName<'input>, Facets<'input>), + List(Box<SimpleType<'input>>), + Union(Vec<SimpleType<'input>>), + Empty, + } +} diff --git a/xml-schema/src/name_allocator.rs b/xml-schema/src/name_allocator.rs index 9d8d510..5400336 100644 --- a/xml-schema/src/name_allocator.rs +++ b/xml-schema/src/name_allocator.rs @@ -3,36 +3,27 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; +use asts; +use attrs_bubble_up::Attrs; use names::{name_from_hint, FullName, NameGenerator, NameHint}; -use processor2::{RecursiveComplexType, RecursiveSimpleType}; -use support::Facets; +use utils::Bottom; -// TODO: Make this use &str so it can implement Copy, and spare clones later in the code -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct ConcreteName(String, String); +use asts::non_recursive::ComplexType as NRComplexType; +use asts::non_recursive::ConcreteName; +use asts::non_recursive::SimpleType as NRSimpleType; +use asts::recursive::ComplexType as RComplexType; +use asts::recursive::SimpleType as RSimpleType; -#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum ComplexType<'input> { - Any, - Empty, - Alias(ConcreteName), - Extension(ConcreteName, ConcreteName), - Restriction(ConcreteName, ConcreteName), - ElementRef(usize, usize, ConcreteName), - Element(usize, usize, FullName<'input>, ConcreteName), - GroupRef(usize, usize, ConcreteName), - Choice(usize, usize, Vec<ConcreteName>), - Sequence(usize, usize, Vec<ConcreteName>), - Simple(ConcreteName), -} +pub type InSimpleType<'input> = asts::recursive::SimpleType<'input>; +pub type InComplexType<'input> = + asts::recursive::ComplexType<'input, Attrs<'input, InSimpleType<'input>>, Bottom>; +pub type OutSimpleType<'input> = asts::non_recursive::SimpleType<'input>; +pub type OutComplexType<'input> = + asts::non_recursive::ComplexType<'input, Attrs<'input, ConcreteName>, ComplexTypeExtra<'input>>; #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum SimpleType<'input> { - Alias(ConcreteName), - Restriction(ConcreteName, Facets<'input>), - List(ConcreteName), - Union(Vec<ConcreteName>), - Empty, +pub enum ComplexTypeExtra<'input> { + GroupRef(usize, usize, FullName<'input>), } fn allocate_namespace<'a, 'input>( @@ -52,8 +43,8 @@ pub struct NameAllocator<'input> { module_name_gen: NameGenerator, module_names: HashMap<Option<&'input str>, (String, NameGenerator)>, // namespace -> (mod_name, name_gen) fullname_to_concrete_name: HashMap<FullName<'input>, ConcreteName>, - complex_types: HashMap<ConcreteName, ComplexType<'input>>, - simple_types: HashMap<ConcreteName, SimpleType<'input>>, + complex_types: HashMap<ConcreteName, OutComplexType<'input>>, + simple_types: HashMap<ConcreteName, OutSimpleType<'input>>, } impl<'input> NameAllocator<'input> { @@ -115,58 +106,60 @@ impl<'input> NameAllocator<'input> { pub fn allocate_complex_type( &mut self, namespace: Option<&'input str>, - recursive_complex_type: &RecursiveComplexType<'input>, + recursive_complex_type: &InComplexType<'input>, ) -> ConcreteName { let (concrete_name, ty) = match recursive_complex_type { - RecursiveComplexType::Any => { - (self.allocate_anonymous(namespace, "any"), ComplexType::Any) - } - RecursiveComplexType::Empty => ( + RComplexType::Any => ( + self.allocate_anonymous(namespace, "any"), + NRComplexType::Any, + ), + RComplexType::Empty => ( self.allocate_anonymous(namespace, "empty"), - ComplexType::Empty, + NRComplexType::Empty, ), - RecursiveComplexType::Alias(fullname) => { + RComplexType::Alias(fullname) => { let referee = self.allocate_fullname(*fullname); - (referee.clone(), ComplexType::Alias(referee)) + (referee.clone(), NRComplexType::Alias(referee)) } - RecursiveComplexType::Extension(base, inner) => { + RComplexType::Extension(base, inner) => { let base = self.allocate_fullname(*base); let inner = self.allocate_complex_type(namespace, inner); ( self.allocate_anonymous_compound(namespace, "extension", &[&base, &inner]), - ComplexType::Extension(base, inner), + NRComplexType::Extension(base, inner), ) } - RecursiveComplexType::Restriction(base, inner) => { + RComplexType::Restriction(base, inner) => { let base = self.allocate_fullname(*base); let inner = self.allocate_complex_type(namespace, inner); ( self.allocate_anonymous_compound(namespace, "restriction", &[&base, &inner]), - ComplexType::Restriction(base, inner), + NRComplexType::Restriction(base, inner), ) } - RecursiveComplexType::ElementRef(min_occurs, max_occurs, fullname) => { + RComplexType::ElementRef(min_occurs, max_occurs, fullname) => { let referee = self.allocate_fullname(*fullname); ( self.allocate_anonymous_compound(namespace, "elementref", &[&referee]), - ComplexType::ElementRef(*min_occurs, *max_occurs, referee), + NRComplexType::ElementRef(*min_occurs, *max_occurs, referee), ) } - RecursiveComplexType::Element(min_occurs, max_occurs, fullname, inner) => { + RComplexType::Element(min_occurs, max_occurs, fullname, attrs, inner) => { let inner = self.allocate_complex_type(namespace, inner); + let attrs = self.allocate_attrs(namespace, attrs); ( self.allocate_fullname(*fullname), - ComplexType::Element(*min_occurs, *max_occurs, *fullname, inner), + NRComplexType::Element(*min_occurs, *max_occurs, *fullname, attrs, inner), ) } - RecursiveComplexType::GroupRef(min_occurs, max_occurs, fullname) => { + RComplexType::GroupRef(min_occurs, max_occurs, fullname) => { let referee = self.allocate_fullname(*fullname); ( self.allocate_anonymous_compound(namespace, "groupref", &[&referee]), - ComplexType::ElementRef(*min_occurs, *max_occurs, referee), + NRComplexType::ElementRef(*min_occurs, *max_occurs, referee), ) } - RecursiveComplexType::Choice(min_occurs, max_occurs, inners) => { + RComplexType::Choice(min_occurs, max_occurs, inners) => { let inners: Vec<_> = inners .iter() .map(|inner| self.allocate_complex_type(namespace, inner)) @@ -177,10 +170,10 @@ impl<'input> NameAllocator<'input> { "choice", &inners.iter().collect::<Vec<_>>(), ), - ComplexType::Choice(*min_occurs, *max_occurs, inners), + NRComplexType::Choice(*min_occurs, *max_occurs, inners), ) } - RecursiveComplexType::Sequence(min_occurs, max_occurs, inners) => { + RComplexType::Sequence(min_occurs, max_occurs, inners) => { let inners: Vec<_> = inners .iter() .map(|inner| self.allocate_complex_type(namespace, inner)) @@ -191,13 +184,14 @@ impl<'input> NameAllocator<'input> { "sequence", &inners.iter().collect::<Vec<_>>(), ), - ComplexType::Sequence(*min_occurs, *max_occurs, inners), + NRComplexType::Sequence(*min_occurs, *max_occurs, inners), ) } - RecursiveComplexType::Simple(inner) => { + RComplexType::Simple(inner) => { let inner = self.allocate_simple_type(namespace, inner); - (inner.clone(), ComplexType::Simple(inner)) + (inner.clone(), NRComplexType::Simple(inner)) } + RComplexType::Extra(_) => unreachable!("It's the bottom type!"), }; let entry = self.complex_types.entry(concrete_name.clone()); if let Entry::Occupied(_) = entry { @@ -210,25 +204,25 @@ impl<'input> NameAllocator<'input> { pub fn allocate_simple_type( &mut self, namespace: Option<&'input str>, - recursive_simple_type: &RecursiveSimpleType<'input>, + recursive_simple_type: &InSimpleType<'input>, ) -> ConcreteName { let (concrete_name, ty) = match recursive_simple_type { - RecursiveSimpleType::Primitive(mod_name, type_name) => { + RSimpleType::Primitive(mod_name, type_name) => { let concrete_name = ConcreteName(mod_name.to_string(), type_name.to_string()); - (concrete_name.clone(), SimpleType::Alias(concrete_name)) + (concrete_name.clone(), NRSimpleType::Alias(concrete_name)) } - RecursiveSimpleType::Alias(fullname) => { + RSimpleType::Alias(fullname) => { let referee = self.allocate_fullname(*fullname); - (referee.clone(), SimpleType::Alias(referee)) + (referee.clone(), NRSimpleType::Alias(referee)) } - RecursiveSimpleType::Restriction(base, facets) => { + RSimpleType::Restriction(base, facets) => { let base = self.allocate_fullname(*base); ( self.allocate_anonymous_compound(namespace, "simplerestriction", &[&base]), - SimpleType::Restriction(base, facets.clone()), + NRSimpleType::Restriction(base, facets.clone()), ) } - RecursiveSimpleType::Union(inners) => { + RSimpleType::Union(inners) => { let inners: Vec<_> = inners .iter() .map(|inner| self.allocate_simple_type(namespace, inner)) @@ -239,23 +233,19 @@ impl<'input> NameAllocator<'input> { "union", &inners.iter().collect::<Vec<_>>(), ), - SimpleType::Union(inners), + NRSimpleType::Union(inners), ) } - RecursiveSimpleType::List(inner) => { + RSimpleType::List(inner) => { let inner = self.allocate_simple_type(namespace, inner); ( - self.allocate_anonymous_compound( - namespace, - "list", - &[&inner], - ), - SimpleType::List(inner), + self.allocate_anonymous_compound(namespace, "list", &[&inner]), + NRSimpleType::List(inner), ) } - RecursiveSimpleType::Empty => ( + RSimpleType::Empty => ( self.allocate_anonymous(namespace, "empty"), - SimpleType::Empty, + NRSimpleType::Empty, ), }; @@ -266,4 +256,12 @@ impl<'input> NameAllocator<'input> { entry.or_insert(ty); concrete_name } + + fn allocate_attrs( + &mut self, + _namespace: Option<&'input str>, + _attrs: &Attrs<'input, InSimpleType>, + ) -> Attrs<'input, ConcreteName> { + unimplemented!() + } } diff --git a/xml-schema/src/names.rs b/xml-schema/src/names.rs index e8e688a..a0d7322 100644 --- a/xml-schema/src/names.rs +++ b/xml-schema/src/names.rs @@ -13,6 +13,7 @@ fn escape_keyword(name: &str) -> String { } } +#[derive(Debug, Default)] pub(crate) struct NameGenerator(HashMap<String, usize>); impl NameGenerator { diff --git a/xml-schema/src/processor2.rs b/xml-schema/src/processor2.rs index 51c782b..943b8d7 100644 --- a/xml-schema/src/processor2.rs +++ b/xml-schema/src/processor2.rs @@ -4,6 +4,8 @@ use std::hash::Hash; use xmlparser::{TextUnescape, XmlSpace}; +use asts; +use asts::recursive::{ComplexType, SimpleType}; use names::FullName; use parser::*; use primitives::{AnyUri, NonNegativeInteger, QName}; @@ -59,11 +61,7 @@ pub enum AttrUse { #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct Attrs<'input> { - pub named: Vec<( - FullName<'input>, - AttrUse, - Option<RecursiveSimpleType<'input>>, - )>, + pub named: Vec<(FullName<'input>, AttrUse, Option<OutSimpleType<'input>>)>, pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, pub group_refs: Vec<FullName<'input>>, pub any_attributes: bool, @@ -91,36 +89,18 @@ impl<'input> Attrs<'input> { } } -/// Direct retranscription of XSD's complexType in a Rust-friendly way -#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum RecursiveComplexType<'input> { - Any, - Empty, - Alias(FullName<'input>), - Extension(FullName<'input>, Box<RecursiveComplexType<'input>>), - Restriction(FullName<'input>, Box<RecursiveComplexType<'input>>), - ElementRef(usize, usize, FullName<'input>), - Element( - usize, - usize, - FullName<'input>, - Box<RecursiveComplexType<'input>>, - ), - GroupRef(usize, usize, FullName<'input>), - Choice(usize, usize, Vec<RecursiveComplexType<'input>>), - Sequence(usize, usize, Vec<RecursiveComplexType<'input>>), - Simple(RecursiveSimpleType<'input>), -} +pub type OutSimpleType<'input> = asts::recursive::SimpleType<'input>; +pub type OutComplexType<'input> = + asts::recursive::ComplexType<'input, Attrs<'input>, ComplexTypeExtra<'input, Attrs<'input>>>; -/// Direct retranscription of XSD's simpleType in a Rust-friendly way +/// Other possibilities for SimpleType that will be shaven off by +/// other passes #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum RecursiveSimpleType<'input> { - Primitive(&'static str, &'static str), - Alias(FullName<'input>), - Restriction(FullName<'input>, Facets<'input>), - List(Box<RecursiveSimpleType<'input>>), - Union(Vec<RecursiveSimpleType<'input>>), - Empty, +pub enum ComplexTypeExtra<'input, TAttrs> { + AttrDecl( + TAttrs, + Box<ComplexType<'input, TAttrs, ComplexTypeExtra<'input, TAttrs>>>, + ), } #[derive(Debug)] @@ -128,10 +108,10 @@ pub struct SimpleToplevel<'input> { pub target_namespace: Option<&'input str>, pub element_form_default_qualified: bool, pub attribute_form_default_qualified: bool, - pub elements: HashMap<FullName<'input>, RecursiveComplexType<'input>>, - pub simple_types: HashMap<FullName<'input>, RecursiveSimpleType<'input>>, - pub complex_types: HashMap<FullName<'input>, RecursiveComplexType<'input>>, - pub groups: HashMap<FullName<'input>, RecursiveComplexType<'input>>, + pub elements: HashMap<FullName<'input>, OutComplexType<'input>>, + pub simple_types: HashMap<FullName<'input>, OutSimpleType<'input>>, + pub complex_types: HashMap<FullName<'input>, OutComplexType<'input>>, + pub groups: HashMap<FullName<'input>, OutComplexType<'input>>, pub attribute_groups: HashMap<FullName<'input>, Attrs<'input>>, } @@ -188,7 +168,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_toplevel_element( &mut self, element: &'ast xs::Element<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let xs::Element { ref attr_type, ref attr_name, @@ -200,11 +180,11 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { match (attr_type, &child_type) { (None, Some(ref c)) => match c { - enums::Type::SimpleType(ref e) => RecursiveComplexType::Simple(self.process_local_simple_type(e)), + enums::Type::SimpleType(ref e) => ComplexType::Simple(self.process_local_simple_type(e)), enums::Type::ComplexType(ref e) => self.process_local_complex_type(e), }, - (Some(t), None) => RecursiveComplexType::Alias(FullName::from_qname(t, self.target_namespace)), - (None, None) => RecursiveComplexType::Empty, + (Some(t), None) => ComplexType::Alias(FullName::from_qname(t, self.target_namespace)), + (None, None) => ComplexType::Empty, (Some(ref t1), Some(ref t2)) => { panic!( "Toplevel element '{}:{}' has both a type attribute ({:?}) and a child type ({:?}).", @@ -217,7 +197,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_toplevel_complex_type( &mut self, complex_type: &'ast xs::ComplexType<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let xs::ComplexType { ref complex_type_model, .. @@ -229,7 +209,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_complex_type_model( &mut self, complex_type_model: &'ast xs::ComplexTypeModel<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { match complex_type_model { xs::ComplexTypeModel::SimpleContent(_) => unimplemented!("simpleContent"), xs::ComplexTypeModel::ComplexContent(ref model) => self.process_complex_content(model), @@ -250,7 +230,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_local_complex_type( &mut self, complex_type: &'ast inline_elements::LocalComplexType<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let inline_elements::LocalComplexType { ref complex_type_model, .. @@ -261,7 +241,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_toplevel_simple_type( &mut self, simple_type: &'ast xs::SimpleType<'input>, - ) -> RecursiveSimpleType<'input> { + ) -> SimpleType<'input> { let xs::SimpleType { ref simple_derivation, .. @@ -276,7 +256,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_local_simple_type( &mut self, simple_type: &'ast inline_elements::LocalSimpleType<'input>, - ) -> RecursiveSimpleType<'input> { + ) -> SimpleType<'input> { let inline_elements::LocalSimpleType { ref simple_derivation, .. @@ -291,7 +271,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_simple_restriction( &mut self, restriction: &'ast xs::Restriction<'input>, - ) -> RecursiveSimpleType<'input> { + ) -> SimpleType<'input> { let xs::Restriction { ref attr_base, ref simple_restriction_model, @@ -312,9 +292,9 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { match local_simple_type { Some(inline_elements::LocalSimpleType { .. }) => { - RecursiveSimpleType::Restriction(base, facets) // TODO: use the simple_derivation + SimpleType::Restriction(base, facets) // TODO: use the simple_derivation } - None => RecursiveSimpleType::Restriction(base, facets), + None => SimpleType::Restriction(base, facets), } } @@ -368,7 +348,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { facets } - fn process_list(&mut self, list: &'ast xs::List<'input>) -> RecursiveSimpleType<'input> { + fn process_list(&mut self, list: &'ast xs::List<'input>) -> SimpleType<'input> { let item_type = list.attr_item_type; let item_type = item_type .as_ref() @@ -376,7 +356,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { let t = match (item_type, &list.local_simple_type) { (None, Some(st)) => self.process_local_simple_type(st), - (Some(n), None) => RecursiveSimpleType::Alias(n), + (Some(n), None) => SimpleType::Alias(n), (None, None) => panic!("<list> with no itemType or child type."), (Some(ref t1), Some(ref t2)) => panic!( "<list> has both an itemType attribute ({:?}) and a child type ({:?}).", @@ -384,23 +364,20 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { ), }; - RecursiveSimpleType::List(Box::new(t)) + SimpleType::List(Box::new(t)) } - fn process_union(&mut self, union: &'ast xs::Union<'input>) -> RecursiveSimpleType<'input> { + fn process_union(&mut self, union: &'ast xs::Union<'input>) -> SimpleType<'input> { let member_types = union .local_simple_type .iter() .map(|t| self.process_local_simple_type(t)) .collect(); - RecursiveSimpleType::Union(member_types) + SimpleType::Union(member_types) } - fn process_toplevel_group( - &mut self, - group: &'ast xs::Group<'input>, - ) -> RecursiveComplexType<'input> { + fn process_toplevel_group(&mut self, group: &'ast xs::Group<'input>) -> OutComplexType<'input> { let xs::Group { choice_all_choice_sequence: ref content, .. @@ -440,9 +417,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { match (name, e.attr_ref, type_attr, &e.local_simple_type) { (Some(name), None, Some(t), None) => { let t = FullName::from_qname(&t, self.target_namespace); - attrs - .named - .push((name, use_, Some(RecursiveSimpleType::Alias(t)))); + attrs.named.push((name, use_, Some(SimpleType::Alias(t)))); } (Some(name), None, None, Some(t)) => { let t = self.process_local_simple_type(t); @@ -479,7 +454,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_complex_content( &mut self, model: &'ast xs::ComplexContent<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let xs::ComplexContent { ref choice_restriction_extension, .. @@ -496,7 +471,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { type_def_particle, .. }) => self.process_complex_restriction(attr_base, type_def_particle), - None => RecursiveComplexType::Empty, + None => ComplexType::Empty, } } enums::ChoiceRestrictionExtension::Extension(ref e) => { @@ -521,9 +496,9 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { _attrs: &'ast HashMap<FullName<'input>, &'input str>, attr_base: &'ast QName<'input>, type_def_particle: &'ast xs::TypeDefParticle<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let base = FullName::from_qname(attr_base, self.target_namespace); - RecursiveComplexType::Extension( + ComplexType::Extension( base, Box::new(self.process_type_def_particle(type_def_particle)), ) @@ -533,39 +508,43 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { &mut self, _attrs: &'ast HashMap<FullName<'input>, &'input str>, attr_base: &'ast QName<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let base = FullName::from_qname(&attr_base, self.target_namespace); - RecursiveComplexType::Alias(base) + ComplexType::Alias(base) } fn process_complete_content_model( &mut self, _open_content: &'ast Option<Box<xs::OpenContent<'input>>>, type_def_particle: &'ast Option<Box<xs::TypeDefParticle<'input>>>, - _attr_decls: &'ast xs::AttrDecls<'input>, + attr_decls: &'ast xs::AttrDecls<'input>, _assertions: &'ast xs::Assertions<'input>, - ) -> RecursiveComplexType<'input> { - match type_def_particle.as_ref() { + ) -> OutComplexType<'input> { + let ty = match type_def_particle.as_ref() { Some(type_def_particle) => self.process_type_def_particle(type_def_particle), - None => RecursiveComplexType::Empty, - } + None => ComplexType::Empty, + }; + ComplexType::Extra(ComplexTypeExtra::AttrDecl( + self.process_attr_decls(attr_decls), + Box::new(ty), + )) } fn process_complex_restriction( &mut self, attr_base: &'ast QName<'input>, type_def_particle: &'ast xs::TypeDefParticle<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { // TODO: use the base let base = FullName::from_qname(attr_base, self.target_namespace); let ty = self.process_type_def_particle(type_def_particle); - RecursiveComplexType::Restriction(base, Box::new(ty)) + ComplexType::Restriction(base, Box::new(ty)) } fn process_type_def_particle( &mut self, particle: &'ast xs::TypeDefParticle<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { match particle { xs::TypeDefParticle::Group(e) => self.process_group_ref(e), xs::TypeDefParticle::All(_) => unimplemented!("all"), @@ -577,7 +556,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_group_ref( &mut self, group_ref: &'ast inline_elements::GroupRef<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let inline_elements::GroupRef { ref attr_ref, ref attr_min_occurs, @@ -588,10 +567,10 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { let max_occurs = parse_max_occurs(attr_max_occurs); let ref_ = FullName::from_qname(attr_ref, self.target_namespace); - RecursiveComplexType::GroupRef(min_occurs, max_occurs, ref_) + ComplexType::GroupRef(min_occurs, max_occurs, ref_) } - fn process_choice(&mut self, choice: &'ast xs::Choice<'input>) -> RecursiveComplexType<'input> { + fn process_choice(&mut self, choice: &'ast xs::Choice<'input>) -> OutComplexType<'input> { let xs::Choice { ref attr_min_occurs, ref attr_max_occurs, @@ -607,13 +586,10 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { .map(|particle| self.process_nested_particle(particle)) .collect(); - RecursiveComplexType::Choice(min_occurs, max_occurs, items) + ComplexType::Choice(min_occurs, max_occurs, items) } - fn process_sequence( - &mut self, - seq: &'ast xs::Sequence<'input>, - ) -> RecursiveComplexType<'input> { + fn process_sequence(&mut self, seq: &'ast xs::Sequence<'input>) -> OutComplexType<'input> { let xs::Sequence { ref attr_min_occurs, ref attr_max_occurs, @@ -629,13 +605,13 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { .map(|particle| self.process_nested_particle(particle)) .collect(); - RecursiveComplexType::Sequence(min_occurs, max_occurs, items) + ComplexType::Sequence(min_occurs, max_occurs, items) } fn process_nested_particle( &mut self, particle: &'ast xs::NestedParticle<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { match particle { xs::NestedParticle::Element(e) => self.process_local_element(e), xs::NestedParticle::Group(e) => self.process_group_ref(e), @@ -645,14 +621,14 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { } } - fn process_any(&mut self, _any: &'ast xs::Any<'input>) -> RecursiveComplexType<'input> { - RecursiveComplexType::Any + fn process_any(&mut self, _any: &'ast xs::Any<'input>) -> OutComplexType<'input> { + ComplexType::Any } fn process_local_element( &mut self, element: &'ast inline_elements::LocalElement<'input>, - ) -> RecursiveComplexType<'input> { + ) -> OutComplexType<'input> { let inline_elements::LocalElement { ref attr_name, ref attr_ref, @@ -683,7 +659,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { panic!("<element> has both ref={:?} and form={:?}", ref_, attr_form); } let ref_ = FullName::from_qname(ref_, self.target_namespace); - RecursiveComplexType::ElementRef(min_occurs, max_occurs, ref_) + ComplexType::ElementRef(min_occurs, max_occurs, ref_) } else { let name = name.as_ref().expect("<element> has no name.").0; @@ -702,23 +678,24 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { let t = match (type_attr, &type_) { (None, Some(enums::Type::SimpleType(ref e))) => { - RecursiveComplexType::Simple(self.process_local_simple_type(e)) + ComplexType::Simple(self.process_local_simple_type(e)) } (None, Some(enums::Type::ComplexType(ref e))) => self.process_local_complex_type(e), (Some(t), None) => { let t = FullName::from_qname(t, self.target_namespace); - RecursiveComplexType::Alias(t) + ComplexType::Alias(t) } - (None, None) => RecursiveComplexType::Empty, + (None, None) => ComplexType::Empty, (Some(ref t1), Some(ref t2)) => panic!( "Element '{:?}' has both a type attribute ({:?}) and a child type ({:?}).", name, t1, t2 ), }; - RecursiveComplexType::Element( + ComplexType::Element( min_occurs, max_occurs, FullName::new(namespace, name), + Attrs::new(), Box::new(t), ) } From c7785ffdcdac10d577389ba75759b4107f9d2159 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Thu, 14 Nov 2019 22:07:51 +0100 Subject: [PATCH 08/11] Add missing modules --- xml-schema/src/attrs_bubble_up.rs | 21 +++++++++++++ xml-schema/src/name_allocator.rs | 6 ++-- xml-schema/src/ungroup.rs | 51 +++++++++++++++++++++++++++++++ xml-schema/src/utils.rs | 4 +++ 4 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 xml-schema/src/attrs_bubble_up.rs create mode 100644 xml-schema/src/ungroup.rs create mode 100644 xml-schema/src/utils.rs diff --git a/xml-schema/src/attrs_bubble_up.rs b/xml-schema/src/attrs_bubble_up.rs new file mode 100644 index 0000000..d68aa61 --- /dev/null +++ b/xml-schema/src/attrs_bubble_up.rs @@ -0,0 +1,21 @@ +//! Pushes attr definitions from the inner types of an element to the top-level element + +use asts; +use names::FullName; +use processor2::AttrUse; +use utils::Bottom; + +use processor2::OutComplexType as InComplexType; +use processor2::OutSimpleType as InSimpleType; + +pub type OutSimpleType<'input> = InSimpleType<'input>; +pub type OutComplexType<'input> = + asts::recursive::ComplexType<'input, Attrs<'input, OutSimpleType<'input>>, Bottom>; + +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct Attrs<'input, TSimpleType> { + pub named: Vec<(FullName<'input>, AttrUse, Option<TSimpleType>)>, + pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, + pub group_refs: Vec<FullName<'input>>, + pub any_attributes: bool, +} diff --git a/xml-schema/src/name_allocator.rs b/xml-schema/src/name_allocator.rs index 5400336..ecfc05d 100644 --- a/xml-schema/src/name_allocator.rs +++ b/xml-schema/src/name_allocator.rs @@ -14,9 +14,9 @@ use asts::non_recursive::SimpleType as NRSimpleType; use asts::recursive::ComplexType as RComplexType; use asts::recursive::SimpleType as RSimpleType; -pub type InSimpleType<'input> = asts::recursive::SimpleType<'input>; -pub type InComplexType<'input> = - asts::recursive::ComplexType<'input, Attrs<'input, InSimpleType<'input>>, Bottom>; +use attrs_bubble_up::OutSimpleType as InSimpleType; +use attrs_bubble_up::OutComplexType as InComplexType; + pub type OutSimpleType<'input> = asts::non_recursive::SimpleType<'input>; pub type OutComplexType<'input> = asts::non_recursive::ComplexType<'input, Attrs<'input, ConcreteName>, ComplexTypeExtra<'input>>; diff --git a/xml-schema/src/ungroup.rs b/xml-schema/src/ungroup.rs new file mode 100644 index 0000000..3b4b8c2 --- /dev/null +++ b/xml-schema/src/ungroup.rs @@ -0,0 +1,51 @@ +/// Replaces `ComplexType::GroupRef`, `Attrs.refs` with the content of their target. (ie. inlines them) +use std::collections::HashMap; + +use asts; +use asts::non_recursive::ComplexType as NRComplexType; +use asts::non_recursive::ConcreteName; +use asts::non_recursive::SimpleType as NRSimpleType; +use attrs_bubble_up::Attrs; +use names::FullName; +use utils::Bottom; + +use name_allocator::ComplexTypeExtra as InComplexTypeExtra; +use name_allocator::OutComplexType as InComplexType; +use name_allocator::OutSimpleType as InSimpleType; + +pub type OutSimpleType<'input> = InSimpleType<'input>; +pub type OutComplexType<'input> = + asts::non_recursive::ComplexType<'input, Attrs<'input, ConcreteName>, Bottom>; + +pub fn ungroup_complex_type<'input>( + _fullname_to_concrete_name: &HashMap<FullName<'input>, ConcreteName>, + _groups: HashMap<FullName<'input>, &InComplexType<'input>>, + complex_type: InComplexType<'input>, +) -> OutComplexType<'input> { + match complex_type { + // Trivial cases + NRComplexType::Any => NRComplexType::Any, + NRComplexType::Empty => NRComplexType::Empty, + NRComplexType::Alias(cn) => NRComplexType::Alias(cn), + NRComplexType::Extension(cn1, cn2) => NRComplexType::Extension(cn1, cn2), + NRComplexType::Restriction(cn1, cn2) => NRComplexType::Restriction(cn1, cn2), + NRComplexType::ElementRef(min_occurs, max_occurs, cn) => { + NRComplexType::ElementRef(min_occurs, max_occurs, cn) + } + NRComplexType::Choice(min_occurs, max_occurs, cns) => { + NRComplexType::Choice(min_occurs, max_occurs, cns) + } + NRComplexType::Sequence(min_occurs, max_occurs, cns) => { + NRComplexType::Sequence(min_occurs, max_occurs, cns) + } + NRComplexType::Simple(cn) => NRComplexType::Simple(cn), + + // The actual work + NRComplexType::Element(min_occurs, max_occurs, fullname, attrs, cn) => { + NRComplexType::Element(min_occurs, max_occurs, fullname, attrs, cn) + } + NRComplexType::Extra(InComplexTypeExtra::GroupRef(_min_occurs, _max_occurs, _cn)) => { + unimplemented!() + } + } +} diff --git a/xml-schema/src/utils.rs b/xml-schema/src/utils.rs new file mode 100644 index 0000000..b4c4e57 --- /dev/null +++ b/xml-schema/src/utils.rs @@ -0,0 +1,4 @@ +/// A structure that cannot be instantiated +pub struct Bottom { + _private_attr: (), +} From 9bce1f498179cc390a53c714609369652c9bceb7 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Thu, 14 Nov 2019 22:16:04 +0100 Subject: [PATCH 09/11] silence warnings, that code is going to be removed anyway. --- xml-schema/src/lib.rs | 10 ++++++++++ xml-schema/src/parser_generator.rs | 2 ++ xml-schema/src/primitives.rs | 12 ++++++++---- xml-schema/src/processor.rs | 2 ++ xml-schema/src/support.rs | 2 +- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/xml-schema/src/lib.rs b/xml-schema/src/lib.rs index a80376d..7746ad8 100644 --- a/xml-schema/src/lib.rs +++ b/xml-schema/src/lib.rs @@ -13,6 +13,16 @@ pub mod support; pub mod primitives; pub mod bigfloat; +pub mod utils; + +pub mod asts; +pub mod toplevel; +pub mod processor2; +pub mod attrs_bubble_up; +pub mod name_allocator; +pub mod ungroup; +//pub mod parser_generator2; + pub mod parser; pub mod processor; pub mod parser_generator; diff --git a/xml-schema/src/parser_generator.rs b/xml-schema/src/parser_generator.rs index ebed18a..06245b7 100644 --- a/xml-schema/src/parser_generator.rs +++ b/xml-schema/src/parser_generator.rs @@ -1,3 +1,5 @@ +#![allow(warnings)] + use std::collections::{HashMap, HashSet}; use codegen as cg; diff --git a/xml-schema/src/primitives.rs b/xml-schema/src/primitives.rs index 580e6ac..e48bbce 100644 --- a/xml-schema/src/primitives.rs +++ b/xml-schema/src/primitives.rs @@ -145,7 +145,8 @@ pub struct QName<'input> { } impl<'input> ParseXmlStr<'input> for QName<'input> { const NODE_NAME: &'static str = "QName"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, QName<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, QName<'input>)> { + // TODO: use facets if input.len() == 0 { return None; } @@ -214,7 +215,8 @@ impl<'input> fmt::Display for QName<'input> { pub struct AnyUri<'input>(pub &'input str); impl<'input> ParseXmlStr<'input> for AnyUri<'input> { const NODE_NAME: &'static str = "AnyUri"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, AnyUri<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, AnyUri<'input>)> { + // TODO: use facets if input.len() == 0 { return None; } @@ -411,7 +413,8 @@ pub struct AnySimpleType<'input>(pub &'input str); impl<'input> ParseXmlStr<'input> for AnySimpleType<'input> { const NODE_NAME: &'static str = "AnySimpleType"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, AnySimpleType<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, AnySimpleType<'input>)> { + // TODO: use facets Some(("", AnySimpleType(input))) } } @@ -448,7 +451,8 @@ impl<'input> ParseXmlStr<'input> for NcName<'input> { pub struct Boolean<'input>(bool, PhantomData<&'input ()>); impl<'input> ParseXmlStr<'input> for Boolean<'input> { const NODE_NAME: &'static str = "Boolean"; - fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, facets: &Facets<'a>) -> Option<(&'input str, Boolean<'input>)> { + fn parse_self_xml_str<'a, TParseContext: ParseContext<'input>>(input: &'input str, _parse_context: &mut TParseContext, _parent_context: &ParentContext<'input>, _facets: &Facets<'a>) -> Option<(&'input str, Boolean<'input>)> { + // TODO: use facets if input.len() >= 1 { match &input[0..1] { "0" => return Some((&input[1..], Boolean(false, PhantomData::default()))), diff --git a/xml-schema/src/processor.rs b/xml-schema/src/processor.rs index b433294..4a60117 100644 --- a/xml-schema/src/processor.rs +++ b/xml-schema/src/processor.rs @@ -1,3 +1,5 @@ +#![allow(warnings)] + use std::fmt::Debug; use std::hash::Hash; use std::collections::{HashMap, HashSet}; diff --git a/xml-schema/src/support.rs b/xml-schema/src/support.rs index f19141a..eb6578b 100644 --- a/xml-schema/src/support.rs +++ b/xml-schema/src/support.rs @@ -167,7 +167,7 @@ impl<'input, T> ParseXml<'input> for T where T: ParseXmlStr<'input> { Some(XmlToken::Text(strspan)) => { match Self::parse_self_xml_str(strspan.to_str(), parse_context, parent_context, &Facets::default()) { Some(("", out)) => Some(out), - Some((unparsed, _)) => None, + Some((_unparsed, _)) => None, None => None, } } From bae923f8c0ce45531ff00c715f8cde213bf2b872 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Fri, 15 Nov 2019 23:24:46 +0100 Subject: [PATCH 10/11] Add lift_attrs.rs. --- xml-schema/src/asts.rs | 4 +- xml-schema/src/attrs.rs | 44 ++++++++ xml-schema/src/attrs_bubble_up.rs | 21 ---- xml-schema/src/lib.rs | 5 +- xml-schema/src/lift_attrs.rs | 181 ++++++++++++++++++++++++++++++ xml-schema/src/name_allocator.rs | 15 +-- xml-schema/src/processor2.rs | 57 +++------- xml-schema/src/ungroup.rs | 31 ++++- 8 files changed, 281 insertions(+), 77 deletions(-) create mode 100644 xml-schema/src/attrs.rs delete mode 100644 xml-schema/src/attrs_bubble_up.rs create mode 100644 xml-schema/src/lift_attrs.rs diff --git a/xml-schema/src/asts.rs b/xml-schema/src/asts.rs index d13fc6d..aa08c05 100644 --- a/xml-schema/src/asts.rs +++ b/xml-schema/src/asts.rs @@ -28,7 +28,7 @@ pub mod non_recursive { Extra(TExtra), } - #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub enum SimpleType<'input> { Alias(ConcreteName), Restriction(ConcreteName, Facets<'input>), @@ -64,7 +64,7 @@ pub mod recursive { Simple(SimpleType<'input>), } - #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub enum SimpleType<'input> { Primitive(&'static str, &'static str), Alias(FullName<'input>), diff --git a/xml-schema/src/attrs.rs b/xml-schema/src/attrs.rs new file mode 100644 index 0000000..d68e96b --- /dev/null +++ b/xml-schema/src/attrs.rs @@ -0,0 +1,44 @@ +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum AttrUse { + Prohibited, + Required, + Optional, +} + +pub mod with_refs { + use names::FullName; + + use super::AttrUse; + + #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub struct Attrs<'input, TSimpleType: Clone> { + pub named: Vec<(FullName<'input>, AttrUse, Option<TSimpleType>)>, + pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, + pub group_refs: Vec<FullName<'input>>, + pub any_attributes: bool, + } + + impl<'input, TSimpleType> Attrs<'input, TSimpleType> where TSimpleType: Clone { + pub fn new() -> Attrs<'input, TSimpleType> { + Attrs { + named: Vec::new(), + refs: Vec::new(), + group_refs: Vec::new(), + any_attributes: false, + } + } + + pub fn extend(&mut self, other: Attrs<'input, TSimpleType>) { + let Attrs { + named, + refs, + group_refs, + any_attributes, + } = other; + self.named.extend(named); + self.refs.extend(refs); + self.group_refs.extend(group_refs); + self.any_attributes |= any_attributes; + } + } +} diff --git a/xml-schema/src/attrs_bubble_up.rs b/xml-schema/src/attrs_bubble_up.rs deleted file mode 100644 index d68aa61..0000000 --- a/xml-schema/src/attrs_bubble_up.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Pushes attr definitions from the inner types of an element to the top-level element - -use asts; -use names::FullName; -use processor2::AttrUse; -use utils::Bottom; - -use processor2::OutComplexType as InComplexType; -use processor2::OutSimpleType as InSimpleType; - -pub type OutSimpleType<'input> = InSimpleType<'input>; -pub type OutComplexType<'input> = - asts::recursive::ComplexType<'input, Attrs<'input, OutSimpleType<'input>>, Bottom>; - -#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Attrs<'input, TSimpleType> { - pub named: Vec<(FullName<'input>, AttrUse, Option<TSimpleType>)>, - pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, - pub group_refs: Vec<FullName<'input>>, - pub any_attributes: bool, -} diff --git a/xml-schema/src/lib.rs b/xml-schema/src/lib.rs index 7746ad8..69d2951 100644 --- a/xml-schema/src/lib.rs +++ b/xml-schema/src/lib.rs @@ -16,11 +16,12 @@ pub mod bigfloat; pub mod utils; pub mod asts; +pub mod attrs; pub mod toplevel; pub mod processor2; -pub mod attrs_bubble_up; +pub mod lift_attrs; pub mod name_allocator; -pub mod ungroup; +//pub mod ungroup; //pub mod parser_generator2; pub mod parser; diff --git a/xml-schema/src/lift_attrs.rs b/xml-schema/src/lift_attrs.rs new file mode 100644 index 0000000..5752e12 --- /dev/null +++ b/xml-schema/src/lift_attrs.rs @@ -0,0 +1,181 @@ +//! Pushes attr definitions from the inner types of an element to the +//! top-level element +//! +//! This module must compute a transitive closure in case of circular +//! references: +//! "Circular reference is not disallowed. [...] The effect is to take the +//! transitive closure of the reference relation" +//! https://www.w3.org/TR/xmlschema11-1/#declare-attributeGroup-core + +use std::collections::{HashMap, HashSet}; + +use asts; +use asts::recursive::ComplexType as RComplexType; +use names::FullName; +use utils::Bottom; + +use processor2::ComplexTypeExtra as InComplexTypeExtra; +use processor2::OutAttrs as InAttrs; +use processor2::OutComplexType as InComplexType; +use processor2::OutSimpleType as InSimpleType; + +pub type OutSimpleType<'input> = InSimpleType<'input>; +pub type OutComplexType<'input> = asts::recursive::ComplexType<'input, OutAttrs<'input>, Bottom>; +pub type OutAttrs<'input> = InAttrs<'input>; + +pub struct AttrsLifter<'input> { + /// For each type name, stores a list of types that reference it. + /// So if the former's list of attrs is updated, then the latter's + /// must be updated as well. + reverse_deps: HashMap<FullName<'input>, HashSet<FullName<'input>>>, + + /// (A subset of) the attrs of each complex type. Can converge to the full + /// attrs by calling `make_transitive_closure()` + attrs_of_complex_type: HashMap<FullName<'input>, OutAttrs<'input>>, + + /// Set of elements of `attrs_of_complex_type` that are strict subset of + /// what they should be, and therefore should be updated. + outdated_complex_types: HashSet<FullName<'input>>, + + complex_types: HashMap<FullName<'input>, InComplexType<'input>>, +} + +impl<'input> AttrsLifter<'input> { + pub fn with_capacity(capacity: usize) -> AttrsLifter<'input> { + AttrsLifter { + reverse_deps: HashMap::with_capacity(capacity), + attrs_of_complex_type: HashMap::with_capacity(capacity), + outdated_complex_types: HashSet::with_capacity(capacity), + complex_types: HashMap::with_capacity(capacity), + } + } + + pub fn add_complex_type( + &mut self, + name: FullName<'input>, + complex_type: InComplexType<'input>, + ) { + self.add_reverse_deps(name, &complex_type); + self.complex_types.insert(name, complex_type); + self.outdated_complex_types.insert(name); + } + + pub fn make_transitive_closure(&mut self) { + while let Some(&name) = self.outdated_complex_types.iter().next() { + let complex_type = self.complex_types.get(&name).expect("Name {} was supposed to be updated, but it missing from AttrsLifter.complex_types."); + let attrs = self.get_attrs_step(complex_type); + if self.attrs_of_complex_type.get(&name) != attrs.as_ref() { + self.outdated_complex_types.insert(name); + let attrs = attrs.expect( + "attrs were Some() but became None while computing transitive closure.", + ); + self.attrs_of_complex_type.insert(name, attrs); + } + } + } + + fn add_reverse_deps( + &mut self, + my_name: FullName<'input>, + complex_type: &InComplexType<'input>, + ) { + let add_rev_dep = &mut |rev_dep| { + self.reverse_deps + .get_mut(rev_dep) + .expect(&format!("Reverse deps map is missing {:?} entry", rev_dep)) + .insert(my_name); + }; + + match complex_type { + // Trivial cases + RComplexType::Any => {} + RComplexType::Empty => {} + RComplexType::Alias(fullname) => add_rev_dep(fullname), + // The actual work + RComplexType::Extension(base, inner) | RComplexType::Restriction(base, inner) => { + add_rev_dep(base); + self.add_reverse_deps(my_name, inner); + } + RComplexType::ElementRef(_min_occurs, _max_occurs, fullname) => add_rev_dep(fullname), + RComplexType::Choice(_min_occurs, _max_occurs, inners) + | RComplexType::Sequence(_min_occurs, _max_occurs, inners) => { + for inner in inners { + self.add_reverse_deps(my_name, inner); + } + } + RComplexType::Simple(_simple_type) => {} + + RComplexType::Element(_min_occurs, _max_occurs, _fullname, _attrs, inner) => { + self.add_reverse_deps(my_name, inner); + } + RComplexType::GroupRef(_min_occurs, _max_occurs, _fullname) => unimplemented!(), + RComplexType::Extra(InComplexTypeExtra::AttrDecl(_attrs, inner)) => { + self.add_reverse_deps(my_name, inner); + } + } + } + + fn get_attrs_step(&self, complex_type: &InComplexType<'input>) -> Option<OutAttrs<'input>> { + let merge_attrs = |attrs1: Option<&OutAttrs<'input>>, attrs2| match (attrs1, attrs2) { + (None, attrs2) => attrs2, + (Some(attrs1), None) => Some(attrs1.clone()), + (Some(attrs1), Some(attrs2)) => { + let mut attrs: OutAttrs<'input> = attrs1.clone(); + attrs.extend(attrs2); + Some(attrs) + } + }; + match complex_type { + RComplexType::Any => None, + RComplexType::Empty => None, + RComplexType::Alias(fullname) => self.attrs_of_complex_type.get(fullname).cloned(), + + RComplexType::Extension(base, inner) => merge_attrs( + self.attrs_of_complex_type.get(base), + self.get_attrs_step(inner), + ), + RComplexType::Restriction(base, inner) => { + // Attributes are inherited from the base: + // "However, attribute declarations do not need to be repeated in the derived type definition" + // https://www.w3.org/TR/xmlschema-0/#DerivByRestrict + merge_attrs( + self.attrs_of_complex_type.get(base), + self.get_attrs_step(inner), + ) + } + RComplexType::ElementRef(_min_occurs, _max_occurs, _fullname) => None, + + RComplexType::Choice(_min_occurs, _max_occurs, inners) + | RComplexType::Sequence(_min_occurs, _max_occurs, inners) => { + for inner in inners { + if self.get_attrs_step(inner).is_some() { + unimplemented!( + "Sequence/choice got attribute declaration. \ + I don't know what to do with that." + ); + } + } + None + } + RComplexType::Simple(_simple_type) => None, + + RComplexType::Element(_min_occurs, _max_occurs, _fullname, _attrs, _inner) => { + // Elements capture the attrs for themselves and don't pass any up + None + } + RComplexType::GroupRef(_min_occurs, _max_occurs, fullname) => { + self.attrs_of_complex_type.get(fullname).cloned() + } + RComplexType::Extra(InComplexTypeExtra::AttrDecl(attrs, inner)) => { + match self.get_attrs_step(inner) { + Some(inner_attrs) => { + let mut attrs = attrs.clone(); + attrs.extend(inner_attrs); + Some(attrs) + } + None => Some(attrs.clone()), + } + } + } + } +} diff --git a/xml-schema/src/name_allocator.rs b/xml-schema/src/name_allocator.rs index ecfc05d..1bfc2be 100644 --- a/xml-schema/src/name_allocator.rs +++ b/xml-schema/src/name_allocator.rs @@ -4,9 +4,8 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use asts; -use attrs_bubble_up::Attrs; +use attrs::with_refs::Attrs; use names::{name_from_hint, FullName, NameGenerator, NameHint}; -use utils::Bottom; use asts::non_recursive::ComplexType as NRComplexType; use asts::non_recursive::ConcreteName; @@ -14,12 +13,14 @@ use asts::non_recursive::SimpleType as NRSimpleType; use asts::recursive::ComplexType as RComplexType; use asts::recursive::SimpleType as RSimpleType; -use attrs_bubble_up::OutSimpleType as InSimpleType; -use attrs_bubble_up::OutComplexType as InComplexType; +use lift_attrs::OutAttrs as InAttrs; +use lift_attrs::OutComplexType as InComplexType; +use lift_attrs::OutSimpleType as InSimpleType; pub type OutSimpleType<'input> = asts::non_recursive::SimpleType<'input>; pub type OutComplexType<'input> = - asts::non_recursive::ComplexType<'input, Attrs<'input, ConcreteName>, ComplexTypeExtra<'input>>; + asts::non_recursive::ComplexType<'input, OutAttrs<'input>, ComplexTypeExtra<'input>>; +pub type OutAttrs<'input> = Attrs<'input, ConcreteName>; #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub enum ComplexTypeExtra<'input> { @@ -260,8 +261,8 @@ impl<'input> NameAllocator<'input> { fn allocate_attrs( &mut self, _namespace: Option<&'input str>, - _attrs: &Attrs<'input, InSimpleType>, - ) -> Attrs<'input, ConcreteName> { + _attrs: &InAttrs<'input>, + ) -> OutAttrs<'input> { unimplemented!() } } diff --git a/xml-schema/src/processor2.rs b/xml-schema/src/processor2.rs index 943b8d7..cb8103f 100644 --- a/xml-schema/src/processor2.rs +++ b/xml-schema/src/processor2.rs @@ -6,6 +6,8 @@ use xmlparser::{TextUnescape, XmlSpace}; use asts; use asts::recursive::{ComplexType, SimpleType}; +use attrs::with_refs::Attrs; +use attrs::AttrUse; use names::FullName; use parser::*; use primitives::{AnyUri, NonNegativeInteger, QName}; @@ -52,46 +54,13 @@ impl<'input> ToString for Documentation<'input> { } } -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum AttrUse { - Prohibited, - Required, - Optional, -} - -#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Attrs<'input> { - pub named: Vec<(FullName<'input>, AttrUse, Option<OutSimpleType<'input>>)>, - pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, - pub group_refs: Vec<FullName<'input>>, - pub any_attributes: bool, -} -impl<'input> Attrs<'input> { - pub fn new() -> Attrs<'input> { - Attrs { - named: Vec::new(), - refs: Vec::new(), - group_refs: Vec::new(), - any_attributes: false, - } - } - fn extend(&mut self, other: Attrs<'input>) { - let Attrs { - named, - refs, - group_refs, - any_attributes, - } = other; - self.named.extend(named); - self.refs.extend(refs); - self.group_refs.extend(group_refs); - self.any_attributes |= any_attributes; - } -} - pub type OutSimpleType<'input> = asts::recursive::SimpleType<'input>; -pub type OutComplexType<'input> = - asts::recursive::ComplexType<'input, Attrs<'input>, ComplexTypeExtra<'input, Attrs<'input>>>; +pub type OutComplexType<'input> = asts::recursive::ComplexType< + 'input, + OutAttrs<'input>, + ComplexTypeExtra<'input, OutAttrs<'input>>, +>; +pub type OutAttrs<'input> = Attrs<'input, OutSimpleType<'input>>; /// Other possibilities for SimpleType that will be shaven off by /// other passes @@ -112,7 +81,7 @@ pub struct SimpleToplevel<'input> { pub simple_types: HashMap<FullName<'input>, OutSimpleType<'input>>, pub complex_types: HashMap<FullName<'input>, OutComplexType<'input>>, pub groups: HashMap<FullName<'input>, OutComplexType<'input>>, - pub attribute_groups: HashMap<FullName<'input>, Attrs<'input>>, + pub attribute_groups: HashMap<FullName<'input>, OutAttrs<'input>>, } fn hashmap_map<K: Hash + Eq, V1, V2, F>(map: HashMap<K, V1>, mut mapper: F) -> HashMap<K, V2> @@ -393,12 +362,12 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { fn process_toplevel_attribute_group( &mut self, group: &'ast xs::AttributeGroup<'input>, - ) -> Attrs<'input> { + ) -> OutAttrs<'input> { self.process_attr_decls(&group.attr_decls) } - fn process_attr_decls(&mut self, attr_decls: &'ast xs::AttrDecls<'input>) -> Attrs<'input> { - let mut attrs = Attrs::new(); + fn process_attr_decls(&mut self, attr_decls: &'ast xs::AttrDecls<'input>) -> OutAttrs<'input> { + let mut attrs = OutAttrs::new(); for attr_decl in &attr_decls.attribute { match attr_decl { enums::AttrOrAttrGroup::Attribute(e) => { @@ -695,7 +664,7 @@ impl<'ast, 'input: 'ast> Processor<'ast, 'input> { min_occurs, max_occurs, FullName::new(namespace, name), - Attrs::new(), + OutAttrs::new(), Box::new(t), ) } diff --git a/xml-schema/src/ungroup.rs b/xml-schema/src/ungroup.rs index 3b4b8c2..11c7f45 100644 --- a/xml-schema/src/ungroup.rs +++ b/xml-schema/src/ungroup.rs @@ -5,7 +5,8 @@ use asts; use asts::non_recursive::ComplexType as NRComplexType; use asts::non_recursive::ConcreteName; use asts::non_recursive::SimpleType as NRSimpleType; -use attrs_bubble_up::Attrs; +use attrs::with_refs::Attrs as InAttrs; +use attrs::AttrUse; use names::FullName; use utils::Bottom; @@ -17,6 +18,34 @@ pub type OutSimpleType<'input> = InSimpleType<'input>; pub type OutComplexType<'input> = asts::non_recursive::ComplexType<'input, Attrs<'input, ConcreteName>, Bottom>; +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct Attrs<'input, TSimpleType: Clone> { + pub named: Vec<(FullName<'input>, AttrUse, Option<TSimpleType>)>, + pub refs: Vec<(Option<FullName<'input>>, AttrUse, FullName<'input>)>, + pub group_refs: Vec<FullName<'input>>, + pub any_attributes: bool, +} + +impl<'input, TSimpleType> Attrs<'input, TSimpleType> +where + TSimpleType: Clone, +{ + fn extend(&mut self, other: Attrs<'input, TSimpleType>) { + let Attrs { + named, + refs, + group_refs, + any_attributes, + } = other; + self.named.extend(named); + self.refs.extend(refs); + self.group_refs.extend(group_refs); + self.any_attributes |= any_attributes; + } +} + +pub type OutAttrs<'input> = Attrs<'input, OutSimpleType<'input>>; + pub fn ungroup_complex_type<'input>( _fullname_to_concrete_name: &HashMap<FullName<'input>, ConcreteName>, _groups: HashMap<FullName<'input>, &InComplexType<'input>>, From e544ded917a642dcc5e61267c77559f3c982b79b Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <progval+git@progval.net> Date: Sat, 16 Nov 2019 16:59:41 +0100 Subject: [PATCH 11/11] Add method transform_complex_type to get AttrsLifter's output. --- xml-schema/src/lift_attrs.rs | 140 +++++++++++++++++++++++++++-------- 1 file changed, 111 insertions(+), 29 deletions(-) diff --git a/xml-schema/src/lift_attrs.rs b/xml-schema/src/lift_attrs.rs index 5752e12..ed03f79 100644 --- a/xml-schema/src/lift_attrs.rs +++ b/xml-schema/src/lift_attrs.rs @@ -60,10 +60,19 @@ impl<'input> AttrsLifter<'input> { self.outdated_complex_types.insert(name); } + pub fn transform_complex_type( + &mut self, + complex_type: &InComplexType<'input>, + ) -> (OutAttrs<'input>, OutComplexType<'input>) { + assert!(self.outdated_complex_types.is_empty(), "There are outdated complex types, make_transitive_closure() should be called before transform_complex_type()."); + let (attrs, type_) = self.get_attrs_step(complex_type); + (attrs.unwrap_or(OutAttrs::new()), type_) + } + pub fn make_transitive_closure(&mut self) { while let Some(&name) = self.outdated_complex_types.iter().next() { let complex_type = self.complex_types.get(&name).expect("Name {} was supposed to be updated, but it missing from AttrsLifter.complex_types."); - let attrs = self.get_attrs_step(complex_type); + let (attrs, _inner_type) = self.get_attrs_step(complex_type); // inner_type is discarded, what a waste if self.attrs_of_complex_type.get(&name) != attrs.as_ref() { self.outdated_complex_types.insert(name); let attrs = attrs.expect( @@ -115,7 +124,15 @@ impl<'input> AttrsLifter<'input> { } } - fn get_attrs_step(&self, complex_type: &InComplexType<'input>) -> Option<OutAttrs<'input>> { + fn get_attrs_step( + &self, + complex_type: &InComplexType<'input>, + ) -> (Option<OutAttrs<'input>>, OutComplexType<'input>) { + // TODO: this function is called by make_transitive_closure, which discards the + // OutComplexType; this means this function is doing useless computation when it's not + // called by transform_complex_type. + // On the other hand, keeping that computation here avoids duplicating this function's code + // for computing OutAttrs. let merge_attrs = |attrs1: Option<&OutAttrs<'input>>, attrs2| match (attrs1, attrs2) { (None, attrs2) => attrs2, (Some(attrs1), None) => Some(attrs1.clone()), @@ -126,55 +143,120 @@ impl<'input> AttrsLifter<'input> { } }; match complex_type { - RComplexType::Any => None, - RComplexType::Empty => None, - RComplexType::Alias(fullname) => self.attrs_of_complex_type.get(fullname).cloned(), - - RComplexType::Extension(base, inner) => merge_attrs( - self.attrs_of_complex_type.get(base), - self.get_attrs_step(inner), + RComplexType::Any => (None, RComplexType::Any), + RComplexType::Empty => (None, RComplexType::Any), + RComplexType::Alias(fullname) => ( + self.attrs_of_complex_type.get(fullname).cloned(), + RComplexType::Alias(*fullname), ), + + RComplexType::Extension(base, inner) => { + let (inner_attrs, inner_type) = self.get_attrs_step(inner); + ( + merge_attrs(self.attrs_of_complex_type.get(base), inner_attrs), + RComplexType::Extension(*base, Box::new(inner_type)), + ) + } RComplexType::Restriction(base, inner) => { + let (inner_attrs, inner_type) = self.get_attrs_step(inner); // Attributes are inherited from the base: // "However, attribute declarations do not need to be repeated in the derived type definition" // https://www.w3.org/TR/xmlschema-0/#DerivByRestrict - merge_attrs( - self.attrs_of_complex_type.get(base), - self.get_attrs_step(inner), + ( + merge_attrs(self.attrs_of_complex_type.get(base), inner_attrs), + RComplexType::Restriction(*base, Box::new(inner_type)), ) } - RComplexType::ElementRef(_min_occurs, _max_occurs, _fullname) => None, + RComplexType::ElementRef(min_occurs, max_occurs, fullname) => ( + None, + RComplexType::ElementRef(*min_occurs, *max_occurs, *fullname), + ), - RComplexType::Choice(_min_occurs, _max_occurs, inners) - | RComplexType::Sequence(_min_occurs, _max_occurs, inners) => { - for inner in inners { - if self.get_attrs_step(inner).is_some() { + RComplexType::Choice(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.get_attrs_step(inner)) + .collect(); + for (inner_attrs, _inner_type) in inners.iter() { + if inner_attrs.is_some() { unimplemented!( - "Sequence/choice got attribute declaration. \ + "Choice got attribute declaration. \ I don't know what to do with that." ); } } - None + ( + None, + RComplexType::Choice( + *min_occurs, + *max_occurs, + inners + .into_iter() + .map(|(_inner_attr, inner_type)| inner_type) + .collect(), + ), + ) + } + RComplexType::Sequence(min_occurs, max_occurs, inners) => { + let inners: Vec<_> = inners + .iter() + .map(|inner| self.get_attrs_step(inner)) + .collect(); + for (inner_attrs, _inner_type) in inners.iter() { + if inner_attrs.is_some() { + unimplemented!( + "Sequence got attribute declaration. \ + I don't know what to do with that." + ); + } + } + ( + None, + RComplexType::Sequence( + *min_occurs, + *max_occurs, + inners + .into_iter() + .map(|(_inner_attr, inner_type)| inner_type) + .collect(), + ), + ) } - RComplexType::Simple(_simple_type) => None, + RComplexType::Simple(simple_type) => (None, RComplexType::Simple(simple_type.clone())), - RComplexType::Element(_min_occurs, _max_occurs, _fullname, _attrs, _inner) => { + RComplexType::Element(min_occurs, max_occurs, fullname, attrs, inner) => { + let mut attrs = attrs.clone(); + let (inner_attrs, inner_type) = self.get_attrs_step(inner); + if let Some(inner_attrs) = inner_attrs { + attrs.extend(inner_attrs) + }; // Elements capture the attrs for themselves and don't pass any up - None - } - RComplexType::GroupRef(_min_occurs, _max_occurs, fullname) => { - self.attrs_of_complex_type.get(fullname).cloned() + ( + None, + RComplexType::Element( + *min_occurs, + *max_occurs, + *fullname, + attrs, + Box::new(inner_type), + ), + ) } + RComplexType::GroupRef(min_occurs, max_occurs, fullname) => ( + self.attrs_of_complex_type.get(fullname).cloned(), + RComplexType::GroupRef(*min_occurs, *max_occurs, *fullname), + ), RComplexType::Extra(InComplexTypeExtra::AttrDecl(attrs, inner)) => { - match self.get_attrs_step(inner) { + let (inner_attrs, inner_type) = self.get_attrs_step(inner); + let attrs = match inner_attrs { Some(inner_attrs) => { let mut attrs = attrs.clone(); attrs.extend(inner_attrs); - Some(attrs) + attrs } - None => Some(attrs.clone()), - } + None => attrs.clone(), + }; + (Some(attrs), inner_type) } } }