In [1]:
import numpy as np
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*') 

df = pd.read_csv('Data_test09032021.csv', sep = ',')

In [2]:
# this file generates the inputs for pyscf optimisations from the desire molecules

In [3]:
# generation of inputs files for the reactants

In [4]:
react = df["Reactant Smile (C-O)"]
unik_react = []

In [5]:
for i in react:
    j = Chem.MolToSmiles(Chem.MolFromSmiles(i))
    if j not in unik_react:
        unik_react.append(j)

In [6]:
len(unik_react)

393

In [38]:
def gen_inp(smi, i, func, basis, path):
    m = Chem.MolFromSmiles(smi)
    m2 = Chem.AddHs(m)
    AllChem.EmbedMolecule(m2)
    # rdkiyt mean filed optimization (add ref)
    AllChem.MMFFOptimizeMolecule(m2)
    xyz = Chem.MolToXYZBlock(m2)
    with open("{}mol_inp{}.com".format(path, f"{i:03}"), 'w') as f:
        f.write('%chk=opt.chk\n%mem=2gb\n%nproc=1\n')
        f.write("#p {} {} opt scf(xqc,tight)\n\nopt calculation\n\n".format(func, basis))
        f.write("0 1\n")
        f.write(xyz[4:])
        f.write('\n')
    with open("{}do".format(path), 'w') as f2:
        f2.write('#! /bin/bash\n#SBATCH -J runG09\n#SBATCH --ntasks=1\n')
        f2.write('#SBATCH --time=1000:00:00\n')       
        f2.write("#SBATCH --output mol_{}.out\n".format(f"{i:03}"))
        f2.write("#SBATCH --mail-type=ALL\n")
        f2.write("#SBATCH --mail-user=jules.schleinitz@ens.fr\n\n")
        f2.write("#########################################################\n\n")
        f2.write("module purge\nmodule load gaussian\n\n")
        f2.write("source ${g09root}/g09.profile\n\n")
        f2.write("g09 mol_inp{}\n".format(f"{i:03}"))


In [39]:
for i, smi in enumerate(unik_react):
    os.mkdir("inputs/{}".format(f"{i:03}"), mode=0o777 )
    gen_inp(smi, i, 'b3lyp', '6-31G*', "inputs/{}/".format(f"{i:03}"))