In [8]:
import pandas as pd
from os import PathLike
import numpy as np
import pymongo as pym
from pymongo import database as mong_db
from pymongo import collection as mong_coll
from pymongo import errors

In [None]:
def is_temp_db(db_or_coll: mong_db.Database | mong_coll.Collection):
    '''
    Determines if a database or collection instance is loaded from a previous session
    '''
    if isinstance(db_or_coll, mong_db.Database):
        return True if db_or_coll.name.startswith('temp_') else False
    else:
        return True if db_or_coll.database.name.startswith('temp_') else False


def get_mongo_client(host: str = 'localhost', port: int = 27017) -> pym.MongoClient:
    '''
    Get the MongoClient object
    '''
    return pym.MongoClient(host=host, port=port)

def connect_mongo_db(database_name: str, load_last_state: bool = True) -> mong_db.Database:  
    '''
    Get Mongo Database object from Client, option to load previous saved state
    '''  
    if not load_last_state:
        return get_mongo_client()[database_name]
    else:
        return get_mongo_client()['temp_' + database_name]

def get_database_names(client: pym.MongoClient = get_mongo_client()) -> list[str]:
    '''
    Get list of names of databases belonging to given Client
    '''
    return client.list_database_names()

# This overwrites the 'temp' version, but retains original copy
def save_current_state(df: pd.DataFrame, collection: mong_coll.Collection):
    '''
    Saves the current state of the DataFrame to a nearby 'temp_' database, 
    preserving the name of the Collection object     
    '''
    # Prevents recursion of temp_ naming system
    if is_temp_db(collection):
        # Use the 'temp_' version if it's already in use
        output_db = collection.database
    else:
        # Output to MongoDB in 'temp' version of database
        output_db = collection.database.client['temp_' + collection.database.name]
    output_coll = output_db[collection.name]

    # Delete all to overwrite
    output_coll.delete_many({})

    # Convert to dict to pass to MongoDB - loses datetime formatting
    df_dict = df.to_dict('records')

    output_coll.insert_many(df_dict)

def load_collection_as_df(collection: mong_coll.Collection):
    '''
    Convert a Collection object to a pandas DataFrame
    '''
    data = collection.find({})
    df = pd.DataFrame(list(data))
    try:
        df = df.set_index('_id')
    except KeyError as error:
        raise errors.InvalidOperation(f'{collection.name} doesn\'t contain any data')
    return (df, collection)

def import_csv_to_mongo(csv_file: str | PathLike, collection: mong_coll.Collection, append=False):
    '''
    Parse CSV file and store it in Mongo DB via pandas DataFrame
    '''
    if is_temp_db(collection):
        # Don't read directly into a temp_ database
        collection = collection.database.client\
            [collection.database.name.replace('temp_', '')]\
            [collection.name]

    df = pd.read_csv(csv_file).to_dict('records')

    if not append:
        # Overwrite
        collection.delete_many({})

    # Insert
    collection.insert_many(df)

def commit_temp_to_main(collection: mong_coll.Collection):
    '''
    Save previous session data in the main database
    '''
    if is_temp_db(collection):
        targetcoll = collection.database.client\
            [collection.database.name.replace('temp_', '')]\
            [collection.name]
        
        targetcoll.delete_many({})

        targetcoll.insert_many(load_collection_as_df(collection)[0].to_dict('records'))

In [13]:
db = connect_mongo_db('summative', load_last_state=False)
df, collection = load_collection_as_df(db['component_codes'])
df

Unnamed: 0_level_0,Component,Code
_id,Unnamed: 1_level_1,Unnamed: 2_level_1
67378e19faaa5193fb07f97e,Course,Cour
67378e19faaa5193fb07f97f,Quiz,Quiz
67378e19faaa5193fb07f980,Assignment,Assign
67378e19faaa5193fb07f981,System,Sys
67378e19faaa5193fb07f982,Lecture,Lect
67378e19faaa5193fb07f983,Study_material,Study_mat
67378e19faaa5193fb07f984,Manual,Man
67378e19faaa5193fb07f985,Survey,Survey
67378e19faaa5193fb07f986,Folder,Fold
67378e19faaa5193fb07f987,Attendence,Attend
