In [2]:
import pandas as pd
from psikit import Psikit
from rdkit import Chem
from rdkit.Chem import PandasTools

# Read and clean SMILES data
smiles_df = pd.read_csv("smiles.csv")
smiles_df['smiles'] = smiles_df['smiles'].str.strip()

# Generate RDKit molecule and atom count columns
PandasTools.AddMoleculeColumnToFrame(smiles_df, smilesCol='smiles', molCol='ROMol')
smiles_df["n_atoms"] = smiles_df['ROMol'].map(lambda x: x.GetNumAtoms() if x is not None else None)

# Initialize Psikit and set Psi4 options to disable symmetry
pk = Psikit()
#k.psi4.set_options({"symmetry": "c1"})  # Disable symmetry to prevent symmetry errors

# Add a new column to store computed energies
smiles_df["Optimized Energy (Hartree)"] = None

# Iterate through each molecule to perform energy optimization
for i, row in smiles_df.iterrows():
    smiles = row["smiles"]
    print(f"Processing molecule {i+1} with SMILES: {smiles}")
    
    # Load molecule in Psikit
    pk.read_from_smiles(smiles)
    
    # Perform a "fake" calculation for setup verification
    print("Performing a fake calculation (no actual energy calculation yet)...")
    pk.mol
    
    # Compute optimized energy with %time to measure time taken, with error handling
    try:
        print("Computing optimized energy...")
        %time energy = pk.optimize()
        smiles_df.at[i, "Optimized Energy (Hartree)"] = energy
    except Exception as e:
        print(f"Failed to optimize molecule {i+1} with SMILES: {smiles}. Error: {e}")

# Display the updated DataFrame with computed energies
smiles_df[["smiles", "ROMol", "n_atoms", "Optimized Energy (Hartree)"]]


Processing molecule 1 with SMILES: C=C
Performing a fake calculation (no actual energy calculation yet)...
Computing optimized energy...
Optimizer: Optimization complete!
CPU times: user 6.53 s, sys: 337 ms, total: 6.87 s
Wall time: 9.84 s
Processing molecule 2 with SMILES: C=O
Performing a fake calculation (no actual energy calculation yet)...
Computing optimized energy...
Optimizer: Optimization complete!
CPU times: user 4.2 s, sys: 156 ms, total: 4.36 s
Wall time: 1.09 s
Processing molecule 3 with SMILES: C#N
Performing a fake calculation (no actual energy calculation yet)...
Computing optimized energy...
Optimizer: Optimization complete!
CPU times: user 3.84 s, sys: 122 ms, total: 3.96 s
Wall time: 995 ms
Processing molecule 4 with SMILES: CCO
Performing a fake calculation (no actual energy calculation yet)...
Computing optimized energy...
Optimizer: Optimization complete!
CPU times: user 11.4 s, sys: 372 ms, total: 11.8 s
Wall time: 2.97 s
Processing molecule 5 with SMILES: CCC(=O

Unnamed: 0,smiles,ROMol,n_atoms,Optimized Energy (Hartree)
0,C=C,<rdkit.Chem.rdchem.Mol object at 0x14a0d2cd7660>,2,-78.038769
1,C=O,<rdkit.Chem.rdchem.Mol object at 0x14a0d2cd7510>,2,-113.869536
2,C#N,<rdkit.Chem.rdchem.Mol object at 0x14a0d2cd74a0>,2,-92.876942
3,CCO,<rdkit.Chem.rdchem.Mol object at 0x14a0d2cd7430>,3,-154.089919
4,CCC(=O)O,<rdkit.Chem.rdchem.Mol object at 0x14a0d2d4ae40>,5,-266.858985
5,CN=C=O,<rdkit.Chem.rdchem.Mol object at 0x14a0d2d4ac80>,4,-206.795593
