In [None]:
pip install rdkit



In [None]:
# Assuming the files were uploaded
itp_file_path = '010.itp'
gro_file_path = '010_1.gro'
output_file_path = 'output.gro'

In [None]:
import pandas as pd
import re
import networkx as nx

def parse_itp_file(file_path):
    data = {}
    current_section = None

    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line.startswith(';') or not line:
                continue
            elif line.startswith('[') and line.endswith(']'):
                current_section = line[1:-1].strip()
                data[current_section] = []
                if current_section in ['angles', 'moleculetype', 'atoms']:
                    next(file)  # Skip one line for certain sections
            else:
                line_data = line.split(';')[0].strip()
                if line_data:
                    if current_section == 'dihedrals':
                        if 'IMPROPER DIHEDRAL' in line_data:
                            next(file)  # Skip the next line for improper dihedrals
                            continue
                        elif 'PROPER DIHEDRAL' in line_data:
                            next(file)  # Skip the next line for proper dihedrals
                            continue
                    data[current_section].append(line_data.split())

    return data

def process_dihedrals_sections(file_path):
    improper_data = []
    proper_data = []
    with open(file_path, 'r') as file:
        section_flag = None  # Tracks the current section
        dihedrals_flag = False  # Flag to indicate dihedrals section
        section_type = None  # Tracks the type of dihedrals section
        for line in file:
            line = line.strip()
            if line.startswith('[ dihedrals ]'):
                dihedrals_flag = True
            elif dihedrals_flag and line.startswith('; IMPROPER DIHEDRAL ANGLES'):
                section_flag = 'improper'
                section_type = 'Improper Dihedrals'
            elif dihedrals_flag and line.startswith('; PROPER DIHEDRAL ANGLES'):
                section_flag = 'proper'
                section_type = 'Proper Dihedrals'
            elif '[ pairs ]' in line:
                break  # Stop collecting data after encountering [ pairs ]
            elif section_flag == 'improper' and line.strip() and not line.startswith(';'):
                improper_data.append(line.split())
            elif section_flag == 'proper' and line.strip() and not line.startswith(';'):
                proper_data.append(line.split())

    # Convert data to dataframes
    df_improper = pd.DataFrame(improper_data, columns=['ai', 'aj', 'ak', 'al', 'funct', 'c0', 'c1', 'c2'])
    df_proper = pd.DataFrame(proper_data, columns=['ai', 'aj', 'ak', 'al', 'funct', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5'])

    return df_improper, df_proper

def parse_gro(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    num_atoms = int(lines[1].strip())

    atom_data = []

    for i in range(2, 2 + num_atoms):
        line = lines[i].strip()

        parts = re.split(r'\s+', line)

        res = parts[0]
        atom_name = parts[1]
        atom_num = int(parts[2])
        x = float(parts[3])
        y = float(parts[4])
        z = float(parts[5])

        atom_data.append([res, atom_name, atom_num, x, y, z])

    df = pd.DataFrame(atom_data, columns=['res', 'atom_name', 'atom_num', 'x', 'y', 'z'])

    return df

def write_gro_file(df, input_file_path, output_file_path):
    # Read the first line from the input.gro file
    with open(input_file_path, 'r') as file:
        first_line = file.readline().strip()

    # Calculate the total number of atoms
    total_atoms = df['atom_num'].max()

    # Check and convert x, y, z columns to numeric if possible
    numeric_columns = ['x', 'y', 'z']
    for col in numeric_columns:
        try:
            df[col] = pd.to_numeric(df[col])
        except ValueError:
            print(f"Error: Column '{col}' contains non-numeric values.")
            return

    # Prepare the DataFrame content for writing
    df['x'] = df['x'].map('{:.3f}'.format)
    df['y'] = df['y'].map('{:.3f}'.format)
    df['z'] = df['z'].map('{:.3f}'.format)

    # Write to the output.gro file
    with open(output_file_path, 'w') as file:
        file.write(f"{first_line}\n")
        file.write(f"  {total_atoms}\n")

        for _, row in df.iterrows():
            res = row['res']
            atom_names = row['atom_name']
            atom_num = row['atom_num']
            x = row['x']
            y = row['y']
            z = row['z']

            file.write(f"{res:>8}{atom_names:>7}{atom_num:>5}{x:>8}{y:>8}{z:>8}\n")
        file.write(f"   1.00000   1.00000   1.00000\n")

parsed_data = parse_itp_file(itp_file_path)
gro_df = parse_gro(gro_file_path)

# Convert the 'atoms' section to a DataFrame
df_atomtypes = pd.DataFrame(parsed_data.get('atomtypes', []), columns=['type', 'type_name', 'mass', 'charge', 'element', 'sigma', 'epsilon'])
df_moleculetype = pd.DataFrame(parsed_data.get('moleculetype', []), columns=['name', 'nrexcl'])
df_atoms = pd.DataFrame(parsed_data.get('atoms', []), columns=['nr', 'type', 'resnr', 'residue', 'atom', 'cgnr', 'charge', 'mass'])
df_bonds = pd.DataFrame(parsed_data.get('bonds', []), columns=['ai', 'aj', 'funct', 'c0', 'c1'])
df_angles = pd.DataFrame(parsed_data.get('angles', []), columns=['ai', 'aj', 'ak', 'funct', 'c0', 'c1'])
df_pairs = pd.DataFrame(parsed_data.get('pairs', []), columns=['ai', 'aj', 'funct'])

# Replace the values in 'atom_name' column of gro_df with values from 'atom' column of df_atoms
gro_df['atom_name'] = df_atoms['atom']

###########################################################################################################
# Set all values in the 'charge' column to 0.0000
df_atoms['charge'] = df_atoms['charge'].apply(lambda x: format(0.0000, '.4f'))
###########################################################################################################

write_gro_file(gro_df, gro_file_path, output_file_path)

In [None]:
print(df_atoms)

    nr      type resnr residue  atom cgnr  charge     mass
0    1  opls_800     1     UNK  O000    1  0.0000  15.9990
1    2  opls_801     1     UNK  C001    1  0.0000  12.0110
2    3  opls_821     1     UNK  H007    1  0.0000   1.0080
3    4  opls_802     1     UNK  C002    1  0.0000  12.0110
4    5  opls_820     1     UNK  C006    1  0.0000  12.0110
5    6  opls_803     1     UNK  C003    1  0.0000  12.0110
6    7  opls_822     1     UNK  H008    1  0.0000   1.0080
7    8  opls_834     1     UNK  H00B    2  0.0000   1.0080
8    9  opls_805     1     UNK  C005    1  0.0000  12.0110
9   10  opls_804     1     UNK  C004    1  0.0000  12.0110
10  11  opls_823     1     UNK  H009    1  0.0000   1.0080
11  12  opls_824     1     UNK  H00A    1  0.0000   1.0080
12  13  opls_806     1     UNK  O00C    1  0.0000  15.9990
13  14  opls_807     1     UNK  C00D    1  0.0000  12.0110
14  15  opls_808     1     UNK  C00E    1  0.0000  12.0110
15  16  opls_819     1     UNK  C00I    1  0.0000  12.01