Because there's every chance that I might not be allowed to actually use Mongo, I'm going to try recreating the last file using `pickle` instead, since this might be easier, and pickling retains the Python data structures better than a text-based data storage option like JSON or XML (and even Mongo to some extent)

`pickle` will work well with `pandas` since a pandas DataFrame can be exported directly as a .pkl object

## Why did I pick JSON?

JSON is useful since it is easy to read and is lightweight compared with XML. Since these data are 2 dimensional, there is little need for the parameters that are used in XML.
JSON is more legible that XML
JSON has a smaller file size and is faster to transfer
XML has extraneous properties for this dataset
XML's rigid structure is nice, but not necessary for data of this type

In [None]:
import pandas as pd
from os import path, makedirs
import csv
import json

DATA_FOLDER = 'data'

def read_pickle_backup(filename: str):
    '''Read Serialised Pickle file and load as a DataFrame
    
    Parameters:
        filename: the name of the file located in designated DATA_FOLDER, excluding the extension
    
    Returns:
        df: Pandas DataFrame containing contents of Pickle file'''
    

    
    df = pd.read_pickle(path.join(DATA_FOLDER, 'pickle', filename + '.pkl'))
    return df

def pickle_backup(out_file: str, df: pd.DataFrame):
    '''Serialise Pandas DataFrame and save in a Pickle file
    
    Parameters:
        out_file: the name of the output file located in designated DATA_FOLDER, excluding extension
        df: Pandas DataFrame to serialise'''
    


    makedirs(path.join(DATA_FOLDER, 'pickle'), exist_ok=True)

    df.to_pickle(path.join(DATA_FOLDER, 'pickle', out_file + '.pkl'))

def csv_to_json(filename: str):
    '''Load Comma Separated Values (CSV) text file and parse as JSON
    
    Parameters:
        filename: the name of the file located in designated DATA_FOLDER, excluding the extension'''
    


    csvfilename = path.join(DATA_FOLDER, 'csv', filename + '.csv')

    # create a dictionary
    data = {}
    
    # Open a csv reader called DictReader
    with open(csvfilename, encoding='utf-8') as csvf:
        csvReader = csv.DictReader(csvf)
        
        # Convert each row into a dictionary 
        # and add it to data
        for index, rows in enumerate(csvReader):
            
            # Set index
            data[index] = rows
    
    df_to_json(filename, data)

def json_to_df(filename: str):
    '''Load JSON file as Pandas DataFrame
    
    Parameters:
        filename: the name of the file located in designated DATA_FOLDER, excluding the extension
    
    Returns:
        df: Pandas DataFrame containing contents of loaded JSON file'''
    

    
    with open(path.join(DATA_FOLDER, 'json', filename + '.json'), 'r') as f:
        j = json.load(f)

    return pd.DataFrame(j).T  # Transpose since it loads each record as a column

def df_to_json(out_file: str, df: pd.DataFrame | dict):
    '''Dump Pandas DataFrame or dict to JSON file
    
    Parameters:
        out_file: the name of the output file located in designated DATA_FOLDER, excluding extension
        df: Pandas DataFrame or JSON dict to parse as JSON file'''
    

    
    # Create directory
    makedirs(path.join(DATA_FOLDER, 'json'), exist_ok=True)
    
    out_file = path.join(DATA_FOLDER, 'json', out_file + '.json')

    # DataFrame to JSON
    if isinstance(df, pd.DataFrame):
        # orient='records'
        df.to_json(out_file, orient='columns', indent=4)

    # Parsed CSV to JSON
    elif isinstance(df, dict):
        with open(out_file, 'w+') as file:
            json.dump(df, file, indent=4)