This notebook reads $\texttt{victorinox.xml}$ & $\texttt{victorinox.json}$, files saved in $\texttt{/Files}$.  For each file, it generates a Pandas DataFrame of the Victorinox Catalog.  It also has sample sarches of the xml (utilizing object oriented programming) and json files to find which SAKs have (1) Parcel Hook, (2) Small Blade.

In [None]:
from pathlib import Path
import xml.etree.ElementTree as ET
import json
import numpy as np
import pandas as pd

In [None]:
# If remounting (i.e. to reload updated data), run cell twice
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
directory = Path('/content/gdrive/MyDrive/Colab Notebooks/Files/')

# XML

In [None]:
xml_path = directory.joinpath('victorinox.xml')
tree = ET.parse(xml_path)
root = tree.getroot()

In [None]:
data = []
for record in root.findall('.//knife'):
    knife = record.attrib['id']
    length = int(record.find('length').text)
    layers = int(record.find('layers').text)
    front_tool = []
    for tool in record.findall('front_tool'):
        front_tool.append(tool.text)
    back_tool = []
    for tool in record.findall('back_tool'):
        back_tool.append(tool.text)
    scale_tool = record.find('scale_tool').text
    scales = record.find('scales').text
    data.append([
        knife, length, layers, front_tool, back_tool, scale_tool, scales
    ])
xml_df = pd.DataFrame(data, columns=[
    'knife', 'length', 'layers', 'front_tool', 'back_tool', 'scale_tool', 'scales'
])
display(xml_df.tail())

Unnamed: 0,knife,length,layers,front_tool,back_tool,scale_tool,scales
15,Deluxe Tinker,91,4,"[Main Blade, Small Blade, Can Opener, Bottle O...","[Awl, Phillips, Parcel Hook]",True,Cellidor
16,Pioneer,93,2,"[Main Blade, Awl, Can Opener, Bottle Opener]",[],False,Alox
17,Farmer,93,3,"[Main Blade, Awl, Saw, Can Opener, Bottle Opener]",[],False,Alox
18,Pioneer X,93,3,"[Main Blade, Awl, Scissors, Can Opener, Bottle...",[],False,Alox
19,Farmer X,93,4,"[Main Blade, Awl, Scissors, Saw, Can Opener, B...",[],False,Alox


# XML with OOP

In [None]:
xml_path = directory.joinpath('victorinox.xml')
tree = ET.parse(xml_path)
root = tree.getroot()

In [None]:
# Object for Each XML Entry
class SAK:
    # SAK Class contains same prameters as XML File
    def __init__(self, record):
        self.knife = record.attrib['id']
        self.length = int(record.find('length').text)
        self.layers = int(record.find('layers').text)
        self.front_tool = []
        for tool in record.findall('front_tool'):
            self.front_tool.append(tool.text)
        self.back_tool = []
        for tool in record.findall('back_tool'):
            self.back_tool.append(tool.text)
        self.scale_tool = record.find('scale_tool').text
        self.scales = record.find('scales').text

## XML Entries as a List

In [167]:
saks = []
for record in root.findall('.//knife'):
    saks.append(SAK(record))

In [168]:
# %%timeit
# 1.42 µs ± 96.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
results = []
for sak in saks:
    if "Parcel Hook" in sak.back_tool:
        results.append(sak.knife)
results = sorted(results)
results

['Climber',
 'Compact',
 'Deluxe Tinker',
 'Fieldmaster',
 'Huntsman',
 'Super Tinker']

## XML Entries as a Dictionary

In [169]:
saks = {}
for record in root.findall('.//knife'):
    saks.update({record.attrib['id']:SAK(record)})

In [173]:
# %%timeit
# 3.24 µs ± 709 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
results = []
for knife in saks:
    if "Parcel Hook" in saks[knife].back_tool:
        results.append(saks[knife].knife)
results = sorted(results)
results

['Climber',
 'Compact',
 'Deluxe Tinker',
 'Fieldmaster',
 'Huntsman',
 'Super Tinker']

In [None]:
results = []
for sak in saks:
    if "Small Blade" in sak.front_tool:
        results.append(sak.knife)
results = sorted(results)
results

['Camper',
 'Climber',
 'Deluxe Tinker',
 'Fieldmaster',
 'Hiker',
 'Huntsman',
 'Recruit',
 'Spartan',
 'Super Tinker',
 'Tinker']

# JSON

In [None]:
json_path = directory.joinpath('victorinox.json')

In [None]:
with open(json_path, 'r') as file:
    vic_json = json.load(file)

In [None]:
# JSON to Pandas DataFrame
json_df = pd.DataFrame.from_dict(vic_json)
display(json_df.tail())

Unnamed: 0,knife,length,layers,front_tool,back_tool,scale_tool,scales
15,Huntsman,91,4,"[Main Blade, Small Blade, Scissors, Saw, Can O...","[Awl, Parcel Hook, Cork Screw]",True,Cellidor
16,Pioneer,93,2,"[Main Blade, Awl, Can Opener, Bottle Opener]",[None],False,Alox
17,Farmer,93,3,"[Main Blade, Awl, Saw, Can Opener, Bottle Opener]",[None],False,Alox
18,Pioneer X,93,3,"[Main Blade, Awl, Scissors, Can Opener, Bottle...",[None],False,Alox
19,Farmer X,93,4,"[Main Blade, Awl, Scissors, Awl, Can Opener, B...",[None],False,Alox


In [None]:
# %%timeit results for Parcel Hook
# 2.35 µs ± 67.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
results = []
for entry in vic_json:
    if "Parcel Hook" in entry['back_tool']:
        results.append(entry['knife'])
results = sorted(results)
results

['Climber',
 'Compact',
 'Deluxe Tinker',
 'Field Master',
 'Huntsman',
 'Super Tinker']

In [None]:
results = []
for entry in vic_json:
    if "Small Blade" in entry['front_tool']:
        results.append(entry['knife'])
results = sorted(results)
results

['Camper',
 'Climber',
 'Deluxe Tinker',
 'Field Master',
 'Hiker',
 'Huntsman',
 'Recruit',
 'Spartan',
 'Super Tinker',
 'Tinker']

# Results

Searching through the XML file is quicker than the JSON file.  To find which Victorinox Knives have a Parcel Hook using %%timeit:
* XML with OOP: 1.42 µs ± 96.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
* JSON: 2.35 µs ± 67.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)