In [None]:
import csv
from rdkit import Chem
from molvs.normalize import Normalizer, Normalization
from rdkit.Chem.SaltRemover import SaltRemover
from molvs.charge import Reionizer, Uncharger

def preprocess(smi):
    mol = Chem.MolFromSmiles(smi)
    if mol is None:
        return None  # If SMILES cannot be converted to a molecular object, return None
    normalizer = Normalizer()
    new1 = normalizer.normalize(mol)
    remover = SaltRemover()
    new2 = remover(new1)
    neutralize1 = Reionizer()
    new3 = neutralize1(new2)
    neutralize2 = Uncharger()
    new4 = neutralize2(new3)
    new_smiles = Chem.MolToSmiles(new4, kekuleSmiles=False)
    return new_smiles

def process_csv(input_file, output_file):
    with open(input_file, 'r', newline='') as infile, open(output_file, 'w', newline='') as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)
        header = next(reader)  # Read the header row
        writer.writerow(header)  # Write the header row to the new file
        for row in reader:
            smiles = row[0]  # Get the SMILES data
            new_smiles = preprocess(smiles)  # Apply the preprocessing function
            if new_smiles is not None:
                row[0] = new_smiles  # Update the SMILES data
                writer.writerow(row)  # Write the updated row to the new file
            else:
                print(f"Skipping invalid SMILES: {smiles}")  # Print and skip the SMILES that cannot be preprocessed

input_file = 'input2.csv'  # Input CSV file name
output_file = 'output2.csv'  # Output CSV file name
process_csv(input_file, output_file)