In [49]:
import pandas as pd
import numpy as np

In [50]:
df = pd.read_csv("NiCOlit_extensive.csv")
df.columns

Index(['substrate', 'coupling_partner', 'effective_coupling_partner',
       'solvent', 'time', 'temperature', 'catalyst_precursor', 'reagents',
       'effective_reagents', 'effective_reagents_covalent', 'reductant',
       'ligand', 'effective_ligand', 'product', 'analytical_yield',
       'isolated_yield', 'coupling_partner_class', 'DOI', 'origin',
       'eq_substrate', 'eq_coupling_partner', 'eq_catalyst', 'eq_ligand',
       'eq_reagent', '2_steps', 'scheme_table', 'review', 'Mechanism',
       'sub_concentration (mol/L)', 'sub_moles (mmol)', 'volume (mL)',
       'Atmosphere'],
      dtype='object')

In [58]:
# clean review and PhD manuscript:
df = df[df["review"] != "Review"]
df = df[df["DOI"] != "Thèse"]

# remove ligand in effective ligand when no ligand is added
effective_lig = df.effective_ligand.to_list()
introduce_lig = df.ligand.to_list()

lig = []
for i, ligand in enumerate(introduce_lig):
    if ligand != ligand:
        lig.append('')
    else:
        lig.append(effective_lig[i])
        
df["ligand"] = lig

# add reductant to reagents SMILES
reagents = df.effective_reagents.to_list()
reductants = df.reductant .to_list()
reagents_and_reductant = []
for i, reagent in enumerate(reagents):
    if reagent == reagent:
        if reductants[i] == reductants[i]:
            new_smi = str(reagent + '.' + reductants[i])
            reagents_and_reductant.append(new_smi)
        else:
            reagents_and_reductant.append(reagent)
    else:
        if reductants[i] == reductants[i]:
            reagents_and_reductant.append(reductants[i])
        else:
            reagents_and_reductant.append('')

df.effective_reagents = reagents_and_reductant

# change eq to mmol
# substrate
sub_mol = np.float32(np.array(df["sub_moles (mmol)"]))
sub_eq = np.float32(np.array(df["eq_substrate"]))
df["substrate_mmol"] = sub_mol

# coupling partner
cp_eq = np.float32(np.array(df.eq_coupling_partner))
df["coupling_partner_mmol"] = np.divide(np.multiply(sub_mol, cp_eq), sub_eq)

# ligand
lig_eq = np.float32(np.array(df.eq_ligand))
df["ligand_mmol"] = np.divide(np.multiply(sub_mol, lig_eq), sub_eq)

# catalyst
cat_eq = np.float32(np.array(df.eq_catalyst))
df["catalyst_mmol"] = np.divide(np.multiply(sub_mol, cat_eq), sub_eq)

# reagent
rea_eq = np.float32(np.array(df.eq_reagent))
df["reagent_mmol"] = np.divide(np.multiply(sub_mol, rea_eq), sub_eq)







































C[Li]





C[Li]
C[Li]




























































[K+].[K+].O=C([O-])[O-]
[Cs+].[Cs+].O=C([O-])[O-]
[K+].[F-]
[K+].[O-]C(C)(C)C
[K+].[K+].[K+].[O-]P(=O)([O-])[O-].O.O.O.O.O.O
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[K+].[K+].[K+].[O-]









[Na+].[O-]C(C)(C)C
[Cs+].[F-]












































[Na+].[O-]C(C)(C)C
































[Na+].[Na+].[O-]C(=O)[O-]
[K+].[K+].[O-]C(=O)[O-]
[K+].[K+].[K+].[O-]P(=O)([O-])[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
CC[O-].[Na+]
CC(C)(C)[O-].[Li+]
CC(C)(C)[O-].[Na+]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs+].[Cs+].[O-]C(=O)[O-]
[Cs

In [55]:
df.columns

Index(['substrate', 'coupling_partner', 'effective_coupling_partner',
       'solvent', 'time', 'temperature', 'catalyst_precursor', 'reagents',
       'effective_reagents', 'effective_reagents_covalent', 'reductant',
       'ligand', 'effective_ligand', 'product', 'analytical_yield',
       'isolated_yield', 'coupling_partner_class', 'DOI', 'origin',
       'eq_substrate', 'eq_coupling_partner', 'eq_catalyst', 'eq_ligand',
       'eq_reagent', '2_steps', 'scheme_table', 'review', 'Mechanism',
       'sub_concentration (mol/L)', 'sub_moles (mmol)', 'volume (mL)',
       'Atmosphere', 'substrate_mmol', 'coupling_partner_mmol', 'ligand_mmol',
       'catalyst_mmol', 'reagent_mmol'],
      dtype='object')

In [56]:
df_ORD = df[['substrate', 'effective_coupling_partner', 'catalyst_precursor', 
             'ligand', 'effective_reagents', 'product',
       
             'solvent', 'time', 'temperature', 
             
             'volume (mL)', 'substrate_mmol', 'coupling_partner_mmol', 'ligand_mmol',
       'catalyst_mmol', 'reagent_mmol',
             
            'analytical_yield', 'isolated_yield',
             
            'DOI']]

In [59]:
df_ORD.to_csv("NiCOlit_for_ORD_extensive.csv")