In [2]:
import os
import pandas as pd
from rdkit import Chem
from functools import reduce

In [3]:
#putanje
Eseji = os.path.join(os.path.expanduser("~"), "Desktop", "Eseji")
csv_folder = r"D:\Krov 2\DESKTOP\Tox21\Eseji\CSV_auto" 
sdf_folder = r"D:\Krov 2\DESKTOP\Tox21\Eseji\SDF_auto" 

In [4]:
csv_files = [f for f in os.listdir(csv_folder) if f.endswith(".csv")]
sdf_files = [f for f in os.listdir(sdf_folder) if f.endswith(".sdf")]

In [5]:
def extract_sdf_data_with_smiles(path):
    supplier = Chem.SDMolSupplier(path)
    data = []
    for mol in supplier:
        if mol is None:
            continue
        try:
            props = mol.GetPropsAsDict()

            original_smiles = props.get("SMILES", None)

            generated_smiles = Chem.MolToSmiles(mol)

            props["SMILES_ORIGINAL"] = original_smiles
            props["SMILES_GENERATED"] = generated_smiles

            data.append(props)
        except:
            continue
    return pd.DataFrame(data)

In [6]:
csv_dfs = {
    os.path.splitext(f)[0]: pd.read_csv(os.path.join(csv_folder, f))
    for f in csv_files
}
sdf_dfs = {
    os.path.splitext(f)[0]: extract_sdf_data_with_smiles(os.path.join(sdf_folder, f))
    for f in sdf_files
}

In [7]:
merged_dfs = {}
for key in sdf_dfs:
    if key in csv_dfs:
        sdf_df = sdf_dfs[key][["PREFERRED_NAME", "CASRN", "SMILES_ORIGINAL", "SMILES_GENERATED"]]
        csv_df = csv_dfs[key][["PREFERRED NAME", "CASRN", "HIT CALL"]]
        merged = pd.merge(
            sdf_df,
            csv_df,
            left_on=["PREFERRED_NAME", "CASRN"],
            right_on=["PREFERRED NAME", "CASRN"],
            how="inner"
        )
        merged = merged.drop(columns=["PREFERRED NAME"])
        merged = merged.rename(columns={"HIT CALL": f"{key}_HIT CALL"})
        merged_dfs[key] = merged

In [8]:
if merged_dfs:
    autofluorescentne = reduce(
    lambda left, right: pd.merge(left, right, on=["PREFERRED_NAME", "CASRN", "SMILES_GENERATED", "SMILES_ORIGINAL"], how="outer"),
    merged_dfs.values()
    )
    display(autofluorescentne)
else:
    print("Nema spojenih tablica! Provjeri poklapanje naziva i sadr≈æaja CSV i SDF datoteka.")

Unnamed: 0,PREFERRED_NAME,CASRN,SMILES_ORIGINAL,SMILES_GENERATED,Assay List TOX21_AutoFluor_HEK293_Cell_blue-2025-06-01_HIT CALL,Assay List TOX21_AutoFluor_HEK293_Media_blue-2025-06-01_HIT CALL,Assay List TOX21_AutoFluor_HEPG2_Cell_blue-2025-06-01_HIT CALL,Assay List TOX21_AutoFluor_HEPG2_Media_blue-2025-06-01_HIT CALL,Assay List TOX21_AutoFluor_HEPG2_Media_green-2025-06-01_HIT CALL
0,"1,3-Diiminobenz(f)isoindoline",65558-69-2,N=C1NC(=N)C2=C1C=C1C=CC=CC1=C2,N=C1NC(=N)c2cc3ccccc3cc21,Active,,Active,Active,
1,2-(4-Aminophenyl)-6-methylbenzothiazole sulfon...,130-17-6,CC1=C(C2=C(C=C1)N=C(S2)C1=CC=C(N)C=C1)S(O)(=O)=O,Cc1ccc2nc(-c3ccc(N)cc3)sc2c1S(=O)(=O)O,,,,Active,
2,2-Fluoroacetamide,640-19-7,NC(=O)CF,NC(=O)CF,,,Active,,
3,"4-Amino-1,2,4-triazole",584-13-4,NN1C=NN=C1,Nn1cnnc1,,,,Active,
4,"4-Amino-2,6-dichlorophenol",5930-28-9,NC1=CC(Cl)=C(O)C(Cl)=C1,Nc1cc(Cl)c(O)c(Cl)c1,Active,,,,
5,Heptachlor epoxide B,1024-57-3,[H][C@]12O[C@@]1([H])[C@@]1([H])[C@@]([H])([C@...,ClC1=C(Cl)[C@]2(Cl)[C@@H]3[C@@H](Cl)[C@H]4O[C@...,,Active,,,
6,Iodoalphionic acid,577-91-3,OC(=O)C(CC1=CC(I)=C(O)C(I)=C1)C1=CC=CC=C1,O=C(O)C(Cc1cc(I)c(O)c(I)c1)c1ccccc1,Active,,,,
7,Isoamyl cinnamate,7779-65-9,CC(C)CCOC(=O)C=CC1=CC=CC=C1,CC(C)CCOC(=O)C=Cc1ccccc1,Active,,,,
8,"N,N-Bis(4-tert-octylphenyl)amine",15721-78-5,CC(C)(C)CC(C)(C)C1=CC=C(NC2=CC=C(C=C2)C(C)(C)C...,CC(C)(C)CC(C)(C)c1ccc(Nc2ccc(C(C)(C)CC(C)(C)C)...,,Active,,,
9,Norflurazon,27314-13-2,CNC1=C(Cl)C(=O)N(N=C1)C1=CC=CC(=C1)C(F)(F)F,CNc1cnn(-c2cccc(C(F)(F)F)c2)c(=O)c1Cl,,,,Active,


In [9]:
autofluorescentne.to_csv("autofluorescentne.csv", index=False)