### Generate organic molecule structure for gaussian calculation

In [1]:
import sys
sys.path.append('../03-code/')

import pandas as pd
from organic_toolkit import dataframe_to_xyz_file, dataframe_to_image_file
from config import PROJECT_ROOT_DIRECTORY
from organic_featurization_helper import gather_mo_energetics_dataframe, gather_organic_dimension_dataframe

**Save image and xyz file of organic molecules**

In [2]:
grid_image_path = PROJECT_ROOT_DIRECTORY + '01-rawdata/02-organic-repository/gridimage/'
gaussian_organic_xyz_path = PROJECT_ROOT_DIRECTORY + '01-rawdata/03-gaussian-organic-xyz/'

In [3]:
organic_genome_dataframe = pd.read_csv(
    PROJECT_ROOT_DIRECTORY + '02-metadata/06-csv-files/01-organic-genome.csv', index_col='identifier')

In [8]:
organic_genome_dataframe.loc[27665:27665]

Unnamed: 0_level_0,smiles_canonical,generation
identifier,Unnamed: 1_level_1,Unnamed: 2_level_1
27665,[NH3+]Cc1ccc2cc3c(F)c(C[NH3+])ccc3cc2c1F,4.0


In [9]:
dataframe_to_xyz_file(
    dataframe=organic_genome_dataframe.loc[27665:27665],
    output_directory=gaussian_organic_xyz_path)

In [7]:
organic_genome_dataframe.loc[1:100]

Unnamed: 0_level_0,smiles_canonical,generation
identifier,Unnamed: 1_level_1,Unnamed: 2_level_1
1,[NH3+]Cc1cc2sc(C[NH3+])cc2s1,2.0
2,[NH3+]Cc1cc2sc([NH3+])cc2s1,3.0
3,[NH3+]c1cc2sc([NH3+])cc2s1,4.0
4,[NH3+]Cc1ccc(C[NH3+])s1,1.0
5,[NH3+]Cc1ccc(C[NH3+])o1,2.0
...,...,...
96,[NH3+]c1cc2oc([NH3+])cc2o1,
97,[NH3+]Cc1ccc2cc(C[NH3+])[nH]c2c1,2.0
98,[NH3+]Cc1cc2sc3cc(C[NH3+])sc3c2s1,3.0
99,[NH3+]Cc1cc2sc3c4sc(C[NH3+])cc4sc3c2s1,4.0


In [5]:
dataframe_to_image_file(organic_genome_dataframe.loc[151:200], grid_image_path=grid_image_path) # for example, 1:50, 51:100

**MO energetics dataframe**

In [10]:
mo_energetics_dataframe = gather_mo_energetics_dataframe(save=True)
mo_energetics_dataframe


Unnamed: 0_level_0,HOMO,LUMO,HOMO_LUMO_gap
identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,-12.63000,-7.88800,4.74200
2,-13.18500,-8.34600,4.83900
3,-13.80100,-8.84700,4.95400
4,-14.22100,-8.49400,5.72700
5,-14.14700,-8.37000,5.77700
...,...,...,...
40616,-9.25623,-6.24121,3.01502
40617,-12.28790,-8.32805,3.95985
40618,-11.05650,-8.16451,2.89199
40619,-10.18470,-8.01866,2.16604


**organic dimension dataframe**

In [5]:
organic_dimension_dataframe = gather_organic_dimension_dataframe(save=True)
organic_dimension_dataframe

Unnamed: 0_level_0,height,width,length
identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,11.092407,3.746522,4.458895
4,8.833344,3.258605,3.283378
5,8.106967,3.239609,3.446841
6,8.858128,3.388596,3.861989
7,9.376746,3.177429,5.169995
...,...,...,...
36233,15.036807,1.545527,5.094488
36234,10.523134,1.665807,4.911966
36235,10.196945,1.668126,4.666310
36236,16.185018,1.936901,4.981983


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem

# 1. Build molecule and add hydrogens
mol = Chem.AddHs(Chem.MolFromSmiles("CCOc1ccc(CCN)cc1"))

# 2. Generate multiple 3D conformers with ETKDG
params = AllChem.ETKDGv3()
params.enforceChirality = True
conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=50, params=params)

# 3. Optimize all conformers and get energies (kcal/mol)
results = AllChem.MMFFOptimizeMoleculeConfs(mol, mmffVariant='MMFF94s', numThreads=0)
energies = [res[1] for res in results]  # res = (converged_flag, energy)

# 4. Find conformer with lowest energy
min_idx = min(range(len(energies)), key=energies.__getitem__)
min_conf_id = conf_ids[min_idx]
min_energy = energies[min_idx]

print(f"Lowest energy conformer ID: {min_conf_id}, Energy: {min_energy:.2f} kcal/mol")

# 5. Write the lowest-energy conformer to an XYZ file
conf = mol.GetConformer(min_conf_id)
Chem.MolToXYZFile(mol, "lowest_energy_conf.xyz", confId=min_conf_id)