In [2]:
import pandas as pd
from pandas import DataFrame
from datetime import datetime
from typing import List, Dict, Tuple, Optional
import pickle

import pickle
from beanie import PydanticObjectId
from mongo_backend import (init_db, bulk_insert, construct_formulation_list_from_df, insert_compounds, 
                         construct_MLformulation_list_from_df, insert_preparation_ML_formulations)

In [3]:
prep_df = pd.read_csv("datasets\\preparation_formulation.csv")
prep_list = construct_formulation_list_from_df(prep_df, "volume_percent")

In [4]:
prep_list

[{'formulation': [{'compound_id': 'PC',
    'quantity': 1.0,
    'quantity_type': 'volume_percent'},
   {'compound_id': 'LiClO4', 'quantity': 1.0, 'quantity_type': 'moles'},
   {'compound_id': 'FEC',
    'quantity': 0.05,
    'quantity_type': 'volume_percent'}],
  'description': '1 M LICLO4-PC + 5% FEC'},
 {'formulation': [{'compound_id': 'EC',
    'quantity': 0.5,
    'quantity_type': 'volume_percent'},
   {'compound_id': 'LiPF6', 'quantity': 1.0, 'quantity_type': 'moles'},
   {'compound_id': 'DMC', 'quantity': 0.5, 'quantity_type': 'volume_percent'}],
  'description': '1 M LIPF6 EC-DMC (1:1 V) 2% VC'},
 {'formulation': [{'compound_id': 'EC',
    'quantity': 0.5,
    'quantity_type': 'volume_percent'},
   {'compound_id': 'LiBF4', 'quantity': 1.0, 'quantity_type': 'moles'},
   {'compound_id': 'DMC', 'quantity': 0.5, 'quantity_type': 'volume_percent'}],
  'description': '1 M LIBF4 EC-DMC (1:1 V) 2% VC'},
 {'formulation': [{'compound_id': 'PC',
    'quantity': 1.0,
    'quantity_type': '

In [5]:
ds_df = pd.read_csv("datasets\\datascience.csv")
ml_list = construct_MLformulation_list_from_df(ds_df)

In [6]:
ml_list

[{'formulation': [{'compound_id': 'PC',
    'quantity': 11.754564689287674,
    'quantity_type': 'moles'},
   {'compound_id': 'LiClO4', 'quantity': 1.0, 'quantity_type': 'moles'},
   {'compound_id': 'FEC',
    'quantity': 0.6854998397043016,
    'quantity_type': 'moles'}],
  'description': '1 M LICLO4-PC + 5% FEC',
  'element_ratio': {'FC': 0.013968481227056,
   'OC': 0.841984658474856,
   'FO': 0.0165899474372349,
   'InOr': 0.8763302140905531,
   'F': 0.0041378841237753,
   'sF': 0.0041378841237753,
   'aF': 0.0,
   'O': 0.2494211714310907,
   'sO': 0.2252759630204788,
   'aO': 0.0241452084106118,
   'C': 0.2962300665701964,
   'sC': 0.2962300665701964,
   'aC': 0.0},
  'CE': 80.0,
  'LCE': 0.6989700043360189,
  'current': 0.5,
  'capacity': 0.083,
  'cycle': 30.0},
 {'formulation': [{'compound_id': 'EC',
    'quantity': 7.494719629352049,
    'quantity_type': 'moles'},
   {'compound_id': 'LiPF6', 'quantity': 1.0, 'quantity_type': 'moles'},
   {'compound_id': 'DMC',
    'quantity': 5

In [7]:
with open('datasets\\test_raw_mats_v2.pkl', 'rb') as f:
    test_mat_dict = pickle.load(f)

test_mat_list = []
for key, val in test_mat_dict.items():
    val["compound_id"] = key
    test_mat_list.append(val)

In [8]:
await init_db()

In [9]:
# bulk insert the compounds
mat_dict = await insert_compounds(test_mat_list)

In [10]:
mat_dict

{'DME': ObjectId('68be2a10c1da24c9aac1339a'),
 'EC': ObjectId('68be2a10c1da24c9aac1339b'),
 'PC': ObjectId('68be2a10c1da24c9aac1339c'),
 'DEC': ObjectId('68be2a10c1da24c9aac1339d'),
 'DMC': ObjectId('68be2a10c1da24c9aac1339e'),
 'FEC': ObjectId('68be2a10c1da24c9aac1339f'),
 'FDMB': ObjectId('68be2a10c1da24c9aac133a0'),
 'BTFE': ObjectId('68be2a10c1da24c9aac133a1'),
 'FEMC': ObjectId('68be2a10c1da24c9aac133a2'),
 'D2': ObjectId('68be2a10c1da24c9aac133a3'),
 'TTE': ObjectId('68be2a10c1da24c9aac133a4'),
 'DOL': ObjectId('68be2a10c1da24c9aac133a5'),
 'Diethyl ether': ObjectId('68be2a10c1da24c9aac133a6'),
 'FM': ObjectId('68be2a10c1da24c9aac133a7'),
 'DMB': ObjectId('68be2a10c1da24c9aac133a8'),
 'TMS': ObjectId('68be2a10c1da24c9aac133a9'),
 'TFEO': ObjectId('68be2a10c1da24c9aac133aa'),
 'TFEC': ObjectId('68be2a10c1da24c9aac133ab'),
 'TEP': ObjectId('68be2a10c1da24c9aac133ac'),
 'EMC': ObjectId('68be2a10c1da24c9aac133ad'),
 'VC': ObjectId('68be2a10c1da24c9aac133ae'),
 'GBL': ObjectId('68be2a

In [11]:
prep_ml_id_dict = await insert_preparation_ML_formulations(prep_list, ml_list, mat_dict)

In [12]:
prep_list

[{'formulation': [{'compound_id': 'PC',
    'quantity': 1.0,
    'quantity_type': 'volume_percent',
    'compound': ObjectId('68be2a10c1da24c9aac1339c')},
   {'compound_id': 'LiClO4',
    'quantity': 1.0,
    'quantity_type': 'moles',
    'compound': ObjectId('68be2a10c1da24c9aac133d9')},
   {'compound_id': 'FEC',
    'quantity': 0.05,
    'quantity_type': 'volume_percent',
    'compound': ObjectId('68be2a10c1da24c9aac1339f')}],
  'description': '1 M LICLO4-PC + 5% FEC'},
 {'formulation': [{'compound_id': 'EC',
    'quantity': 0.5,
    'quantity_type': 'volume_percent',
    'compound': ObjectId('68be2a10c1da24c9aac1339b')},
   {'compound_id': 'LiPF6',
    'quantity': 1.0,
    'quantity_type': 'moles',
    'compound': ObjectId('68be2a10c1da24c9aac133d0')},
   {'compound_id': 'DMC',
    'quantity': 0.5,
    'quantity_type': 'volume_percent',
    'compound': ObjectId('68be2a10c1da24c9aac1339e')}],
  'description': '1 M LIPF6 EC-DMC (1:1 V) 2% VC'},
 {'formulation': [{'compound_id': 'EC',


In [13]:
import inspect
inspect.iscoroutinefunction(insert_preparation_ML_formulations)


True