Mastering Oracle+Python, Part 6: Python Meets XML
by Przemyslaw Piotrowski 

https://www.oracle.com/technetwork/articles/dsl/mastering-oracle-python-xml-1391344.html

In [1]:
from xml.dom.minidom import parseString

In [2]:
import cx_Oracle

In [3]:
def to_minidom(sql):
    with cx_Oracle.connect("hr/hr@127.0.0.1/xe") as db:
        cursor = db.cursor()
        cursor.execute("select dbms_xmlgen.getxml('%s') from dual" % sql)
        return parseString(cursor.fetchone()[0].read())

In [5]:
md = to_minidom("select * from departments")
rows = md.getElementsByTagName("ROW")
type(rows) 

xml.dom.minicompat.NodeList

In [6]:
len(rows)

27

In [9]:
import os

In [10]:
os.getcwd()

'C:\\GitHub\\py4kids\\lesson-14-db'

 parse departments into dictionary 

In [13]:
from xml.dom.minidom import parse

class ParseDept(dict):
  def __init__(self, xmlfile):
    dict.__init__(self)
    self.dom = parse(xmlfile)

    for i in self.dom.getElementsByTagName("ROW"):
      dept_id = i.getElementsByTagName("DEPARTMENT_ID").pop().firstChild.data
      self[dept_id] = {}
      for j in i.childNodes:
        if j.nodeType==self.dom.ELEMENT_NODE:
          self[dept_id][j.tagName.lower()] = j.firstChild.data

In [14]:
dept = ParseDept("./cx_Oracle/dept.xml")

In [15]:
dept

{'10': {'department_id': '10',
  'department_name': 'Administration',
  'manager_id': '200',
  'location_id': '1700'},
 '20': {'department_id': '20',
  'department_name': 'Marketing',
  'manager_id': '201',
  'location_id': '1800'},
 '30': {'department_id': '30',
  'department_name': 'Purchasing',
  'manager_id': '114',
  'location_id': '1700'},
 '40': {'department_id': '40',
  'department_name': 'Human Resources',
  'manager_id': '203',
  'location_id': '2400'},
 '50': {'department_id': '50',
  'department_name': 'Shipping',
  'manager_id': '121',
  'location_id': '1500'},
 '60': {'department_id': '60',
  'department_name': 'IT',
  'manager_id': '103',
  'location_id': '1400'},
 '70': {'department_id': '70',
  'department_name': 'Public Relations',
  'manager_id': '204',
  'location_id': '2700'},
 '80': {'department_id': '80',
  'department_name': 'Sales',
  'manager_id': '145',
  'location_id': '2500'},
 '90': {'department_id': '90',
  'department_name': 'Executive',
  'manager_id': 

Processing department data with xml.sax

In [16]:
from xml.sax import make_parser
from xml.sax.handler import ContentHandler

class DeptHandler(ContentHandler):
  dept = {}
  
  def startElement(self, name, attrs):
    self.text = ""
    return

  def characters(self, ch):
    self.text += ch
    return

  def endElement(self, name):
    if name=="DEPARTMENT_ID":
      self.curr = self.text
      self.dept[self.text] = {}
    elif name!="ROW":
      self.dept[self.curr][name.lower()] = self.text

  def __del__(self):
    print(self.dept)

In [17]:
parser = make_parser()
parser.setContentHandler(DeptHandler())


In [18]:
xdoc=parser.parse(open("./cx_Oracle/dept.xml"))

In [20]:
type(xdoc)

NoneType

Parsing XML with ElementTree module

In [21]:
from xml.etree.ElementTree import ElementTree

class DeptTree:
  def __init__(self, xmlfile):
    self.tree = ElementTree()
    self.tree.parse(xmlfile)

  def traverse(self):
    dept = {}
    rows = self.tree.findall("ROW")
    for row in rows:
      for elem in row.findall("*"):
        if elem.tag=="DEPARTMENT_ID":
          dept_id = elem.text
          dept[dept_id] = {}
        else:
          dept[dept_id][elem.tag.lower()] = elem.text
    return dept 

In [22]:
dt = DeptTree("./cx_Oracle/dept.xml").traverse()

In [23]:
dt

{'10': {'department_name': 'Administration',
  'manager_id': '200',
  'location_id': '1700'},
 '20': {'department_name': 'Marketing',
  'manager_id': '201',
  'location_id': '1800'},
 '30': {'department_name': 'Purchasing',
  'manager_id': '114',
  'location_id': '1700'},
 '40': {'department_name': 'Human Resources',
  'manager_id': '203',
  'location_id': '2400'},
 '50': {'department_name': 'Shipping',
  'manager_id': '121',
  'location_id': '1500'},
 '60': {'department_name': 'IT', 'manager_id': '103', 'location_id': '1400'},
 '70': {'department_name': 'Public Relations',
  'manager_id': '204',
  'location_id': '2700'},
 '80': {'department_name': 'Sales',
  'manager_id': '145',
  'location_id': '2500'},
 '90': {'department_name': 'Executive',
  'manager_id': '100',
  'location_id': '1700'},
 '100': {'department_name': 'Finance',
  'manager_id': '108',
  'location_id': '1700'},
 '110': {'department_name': 'Accounting',
  'manager_id': '205',
  'location_id': '1700'},
 '120': {'departm

Judging from these implementations, ElementTree is clearly a winner in areas of usability and development speed. It complements Python with powerful processing capabilities that are easy to use and just feel very natural. And then there's its C implementation called cElementTree (remember cPickle?), which makes it perform really efficiently. 