Permalink
Browse files

Cleaned up the main file validator, seperated the specific validation…

…s into new files that are called from the main validatory
  • Loading branch information...
1 parent de8022a commit 7db62b5ca695d1d6c399e7650b0761709a62a6ad @jjensenmike jjensenmike committed Sep 1, 2011
Showing with 132 additions and 134 deletions.
  1. +5 −134 filevalidator.py
  2. +54 −0 fullrequiredvalidator.py
  3. +73 −0 streetsegmentvalidator.py
View
@@ -1,5 +1,7 @@
import argparse, urllib, sys, os, re, schema
from lxml import etree
+from streetsegmentvalidator import streetsegCheck
+from fullrequiredvalidator import fullrequiredCheck
def get_parsed_args():
@@ -94,149 +96,28 @@ def semanticCheck(elem):
continue
semanticCheck(subelem)
-def streetsegCheck(tree, datafile):
-
- filesize = os.path.getsize(datafile)
- ecount = 0
- wcount = 0
- streetmap = {}
- streetError = False
-
- for elem in tree:
- if elem.tag == "street_segment":
- ident = elem.get("id")
- tempmap={}
-
- for se in elem:
- if se.tag == "non_house_address":
- for vals in se:
- tempmap[vals.tag] = vals.text
- else:
- tempmap[se.tag] = se.text
-
- streetname = ""
- for f in streetsegfields:
- if f in tempmap and tempmap[f] != None:
- streetname += tempmap[f].strip() + "_"
- elif f in streetsegrequiredfields:
- ferr.write("Error in street segment with ID '" + str(ident) + "' missing required '" + f + "'\n")
- streetError = True
-
- if streetError:
- streetError = False
- continue
-
- streetname = streetname.rstrip("_").lower().replace(" ","_")
-
- streetside = tempmap["odd_even_both"]
-
- startnum = int(tempmap["start_house_number"])
- endnum = int(tempmap["end_house_number"])
-
- if not(streetname in streetmap):
- streetmap[streetname] = {}
- if not(streetside in streetmap[streetname]):
- streetmap[streetname][streetside] = []
- else:
- for i in range(len(streetmap[streetname][streetside])):
- tempstreet = streetmap[streetname][streetside][i]
- if (tempstreet["start_house"] <= startnum <= tempstreet["end_house"] or tempstreet["start_house"] <= endnum <= tempstreet["end_house"]):
- if tempstreet["precinct_id"] != tempmap["precinct_id"]:
- ferr.write("Error: House numbering error: Street Segments '" + str(tempstreet["id"]) + "' and '" + str(ident) + "' overlap house numbers and point to two different precincts\n")
- ecount += 1
- else:
- fwarn.write("Warning: House numbering overlaps but precinct IDs are consistent for Street Segments '" + str(tempstreet["id"]) + "' and '" + str(ident) + "'\n")
- wcount += 1
- streetmap[streetname][streetside].append({"start_house":startnum, "end_house":endnum, "id":ident, "precinct_id":tempmap["precinct_id"]})
- if (ecount>5000 or wcount > 5000) and filesize > sizelimit:
- ferr.write("Too many warnings and/or errors to complete validation")
- fwarn.write("Too many warnings and/or errors to complete validation")
- break
- ferr.write("Error Count: " + str(ecount))
- fwarn.write("Warning Count: " + str(wcount))
-
-def getTags(elements):
- tags = []
- for e in elements:
- tags.append(e.tag)
- return tags
-
-def checkType(elem, elemtype, schema):
- taglist = getTags(elem)
- requiredList = schema[elemtype]["requireds"]
- for k in range(len(requiredList)):
- if not(requiredList[k] in taglist):
- return False
- return True
-
-def fullrequiredCheck(root, schema):
- for vipelem in root:
- ident = vipelem.get("id")
- if schema["vip_object"][vipelem.tag]["indicator"] == "all":
- children = vipelem.getchildren()
- childrenTags = getTags(children)
- requiredList = schema["vip_object"][vipelem.tag]["requireds"]
- for i in range(len(schema["vip_object"][vipelem.tag]["elements"])):
- if schema["vip_object"][vipelem.tag]["elements"][i]["name"] in requiredList:
- if not(schema["vip_object"][vipelem.tag]["elements"][i]["name"] in childrenTags):
- ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing " + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "\n")
- elif schema["vip_object"][vipelem.tag]["elements"][i]["type"][0:3] != "xs:" and schema[schema["vip_object"][vipelem.tag]["elements"][i]["type"]]["type"] == "complexType":
- for c in children:
- if schema["vip_object"][vipelem.tag]["elements"][i]["name"] == c.tag:
- if not(checkType(c,schema["vip_object"][vipelem.tag]["elements"][i]["type"],schema)):
- ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing field in '" + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "'\n")
- break
- else:
- children = vipelem.getchildren()
- j = 0
- for i in range(len(schema["vip_object"][vipelem.tag]["elements"])):
- if j >= len(children):
- if schema["vip_object"][vipelem.tag]["elements"][i]["required"] == "True":
- ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing " + schema["vip_object"][vipelem.tag]["elements"][i]["name"]+"\n")
- break
- elif schema["vip_object"][vipelem.tag]["elements"][i]["name"] == children[j].tag:
- if schema["vip_object"][vipelem.tag]["elements"][i]["type"][0:3] != "xs:":
- if not(checkType(children[j],schema["vip_object"][vipelem.tag]["elements"][i]["type"][0:3])):
- ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing field in '" + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "'\n")
- break
- j+=1
- elif schema["vip_object"][vipelem.tag]["elements"][i]["required"] == "True":
- ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing " + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "\n")
- break
-
-baseSchemaUrl = "http://election-info-standard.googlecode.com/files/vip_spec_v"
-version = "2.3"
-versionList = ["2.0","2.1","2.2","2.3","3.0"]
localityTypes = ['county','city','town','township','borough','parish','village','region']
sizelimit = 150000000
-streetsegfields = ["city","zip","street_direction","street_name","address_direction","start_house_number","end_house_number","odd_even_both"]
-streetsegrequiredfields = ["city","zip","street_name","start_house_number","end_house_number","odd_even_both"]
zipcode = re.compile("\d{5}(?:[-\s]\d{4})?")
email = re.compile("[a-zA-Z0-9+_\-\.]+@[0-9a-zA-Z][.-0-9a-zA-Z]*.[a-zA-Z]")
url = re.compile("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))")
xmlparser = etree.XMLParser()
-parsedschema = {}
-intList = []
startHouseNum = -1
totalVotes = 0
results = get_parsed_args()
if results.version:
version = results.version
-if version == "2.2":
- fschema = urllib.urlopen(baseSchemaUrl + version + "a.xsd")
-else:
- fschema = urllib.urlopen(baseSchemaUrl + version + ".xsd")
-
-schema = schema.Schema("version","2.3")
fname = results.files[0]
data = etree.parse(open(fname),xmlparser)
root = data.getroot()
+
+schema = schema.Schema("version",version)
basicCheck = schema.xmlschema.validate(data)
print "Basic Schema Check for " + str(fname) + ": " + str(basicCheck)
@@ -257,17 +138,7 @@ def fullrequiredCheck(root, schema):
ferr.close()
fwarn.close()
-ferr = open(fname + "streetseg.err","w")
-fwarn = open(fname + "streetseg.warn","w")
-print "Checking street segment values...."
streetsegCheck(root, fname)
-print "Finished checking street segment values, data located in " + fname + "streetseg.err and " + fname + "streeseg.warn"
-ferr.close()
-fwarn.close()
if not(basicCheck):
- print "Running full check on required xml fields"
- ferr = open(fname + "fullerrors.err","w")
- fullrequiredCheck(root,schema.schema)
- print "Finished full required xml field check, data located in " + fname + "fullerrors.err"
- ferr.close()
+ fullrequiredCheck(root,schema.schema,fname)
View
@@ -0,0 +1,54 @@
+def getTags(elements):
+ tags = []
+ for e in elements:
+ tags.append(e.tag)
+ return tags
+
+def checkType(elem, elemtype, schema):
+ taglist = getTags(elem)
+ requiredList = schema[elemtype]["requireds"]
+ for k in range(len(requiredList)):
+ if not(requiredList[k] in taglist):
+ return False
+ return True
+
+def fullrequiredCheck(root, schema, fname):
+
+ print "Running full check on required xml fields"
+ ferr = open(fname + "fullerrors.err","w")
+
+ for vipelem in root:
+ ident = vipelem.get("id")
+ if schema["vip_object"][vipelem.tag]["indicator"] == "all":
+ children = vipelem.getchildren()
+ childrenTags = getTags(children)
+ requiredList = schema["vip_object"][vipelem.tag]["requireds"]
+ for i in range(len(schema["vip_object"][vipelem.tag]["elements"])):
+ if schema["vip_object"][vipelem.tag]["elements"][i]["name"] in requiredList:
+ if not(schema["vip_object"][vipelem.tag]["elements"][i]["name"] in childrenTags):
+ ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing " + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "\n")
+ elif schema["vip_object"][vipelem.tag]["elements"][i]["type"][0:3] != "xs:" and schema[schema["vip_object"][vipelem.tag]["elements"][i]["type"]]["type"] == "complexType":
+ for c in children:
+ if schema["vip_object"][vipelem.tag]["elements"][i]["name"] == c.tag:
+ if not(checkType(c,schema["vip_object"][vipelem.tag]["elements"][i]["type"],schema)):
+ ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing field in '" + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "'\n")
+ break
+ else:
+ children = vipelem.getchildren()
+ j = 0
+ for i in range(len(schema["vip_object"][vipelem.tag]["elements"])):
+ if j >= len(children):
+ if schema["vip_object"][vipelem.tag]["elements"][i]["required"] == "True":
+ ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing " + schema["vip_object"][vipelem.tag]["elements"][i]["name"]+"\n")
+ break
+ elif schema["vip_object"][vipelem.tag]["elements"][i]["name"] == children[j].tag:
+ if schema["vip_object"][vipelem.tag]["elements"][i]["type"][0:3] != "xs:":
+ if not(checkType(children[j],schema["vip_object"][vipelem.tag]["elements"][i]["type"][0:3])):
+ ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing field in '" + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "'\n")
+ break
+ j+=1
+ elif schema["vip_object"][vipelem.tag]["elements"][i]["required"] == "True":
+ ferr.write("Error '" + vipelem.tag + "' ID:" + str(ident) + " missing " + schema["vip_object"][vipelem.tag]["elements"][i]["name"] + "\n")
+ break
+ print "Finished full required xml field check, data located in " + fname + "fullerrors.err"
+ ferr.close()
View
@@ -0,0 +1,73 @@
+import os
+
+STREETSEGFIELDS = ["city","zip","street_direction","street_name","address_direction","start_house_number","end_house_number","odd_even_both"]
+STREETSEGREQUIREDFIELDS = ["city","zip","street_name","start_house_number","end_house_number","odd_even_both"]
+
+def streetsegCheck(tree, datafile):
+
+ ferr = open(datafile + "streetseg.err","w")
+ fwarn = open(datafile + "streetseg.warn","w")
+ print "Checking street segment values...."
+
+ filesize = os.path.getsize(datafile)
+ ecount = 0
+ wcount = 0
+ streetmap = {}
+ streetError = False
+
+ for elem in tree:
+ if elem.tag == "street_segment":
+ ident = elem.get("id")
+ tempmap={}
+
+ for se in elem:
+ if se.tag == "non_house_address":
+ for vals in se:
+ tempmap[vals.tag] = vals.text
+ else:
+ tempmap[se.tag] = se.text
+
+ streetname = ""
+ for f in STREETSEGFIELDS:
+ if f in tempmap and tempmap[f] != None:
+ streetname += tempmap[f].strip() + "_"
+ elif f in STREETSEGREQUIREDFIELDS:
+ ferr.write("Error in street segment with ID '" + str(ident) + "' missing required '" + f + "'\n")
+ streetError = True
+
+ if streetError:
+ streetError = False
+ continue
+
+ streetname = streetname.rstrip("_").lower().replace(" ","_")
+
+ streetside = tempmap["odd_even_both"]
+
+ startnum = int(tempmap["start_house_number"])
+ endnum = int(tempmap["end_house_number"])
+
+ if not(streetname in streetmap):
+ streetmap[streetname] = {}
+ if not(streetside in streetmap[streetname]):
+ streetmap[streetname][streetside] = []
+ else:
+ for i in range(len(streetmap[streetname][streetside])):
+ tempstreet = streetmap[streetname][streetside][i]
+ if (tempstreet["start_house"] <= startnum <= tempstreet["end_house"] or tempstreet["start_house"] <= endnum <= tempstreet["end_house"]):
+ if tempstreet["precinct_id"] != tempmap["precinct_id"]:
+ ferr.write("Error: House numbering error: Street Segments '" + str(tempstreet["id"]) + "' and '" + str(ident) + "' overlap house numbers and point to two different precincts\n")
+ ecount += 1
+ else:
+ fwarn.write("Warning: House numbering overlaps but precinct IDs are consistent for Street Segments '" + str(tempstreet["id"]) + "' and '" + str(ident) + "'\n")
+ wcount += 1
+ streetmap[streetname][streetside].append({"start_house":startnum, "end_house":endnum, "id":ident, "precinct_id":tempmap["precinct_id"]})
+ if (ecount>5000 or wcount > 5000) and filesize > sizelimit:
+ ferr.write("Too many warnings and/or errors to complete validation")
+ fwarn.write("Too many warnings and/or errors to complete validation")
+ break
+ ferr.write("Error Count: " + str(ecount))
+ fwarn.write("Warning Count: " + str(wcount))
+
+ print "Finished checking street segment values, data located in " + datafile + "streetseg.err and " + datafile + "streeseg.warn"
+ ferr.close()
+ fwarn.close()

0 comments on commit 7db62b5

Please sign in to comment.