<a href="https://colab.research.google.com/github/vinayak2019/gromacs_automation/blob/main/correct_pdb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from collections import defaultdict

def rearrange_pdb_with_correct_chain(input_pdb, output_pdb, ligand_resname="LIG"):
    """
    Rearranges atoms in a PDB file, groups protein chains by segment name,
    replaces the chain ID (column 22) with the correct identifier,
    inserts TER between chains, and ensures the ligand is placed separately at the end.

    :param input_pdb: Path to the input PDB file
    :param output_pdb: Path to the output formatted PDB file
    :param ligand_resname: Residue name of the ligand (default: "LIG")
    """

    chain_groups = defaultdict(list)
    ligand_atoms = []  # List to store ligand atoms
    protein_segment_names = set()  # Set to store unique protein segment names

    with open(input_pdb, "r") as pdb_file:
        for line in pdb_file:
            if not line.strip():  # Skip empty lines
                continue

            if line.startswith(("ATOM", "HETATM")) and len(line) >= 76:  # Ensure valid line
                segment_name = line[72:76].strip() if len(line) >= 76 else ""  # Extract segment name safely
                residue_name = line[17:20].strip()  # Extract residue name (columns 18-20)

                if residue_name == ligand_resname or "HETATM" in line:  # Identify ligand
                    ligand_atoms.append(line)
                else:
                    chain_groups[segment_name].append(line)
                    protein_segment_names.add(segment_name)

    with open(output_pdb, "w") as output_file:
        # Process protein chains first
        for segment_name in sorted(protein_segment_names):  # Sort chains alphabetically
            if not segment_name:  # Skip missing segment names
                continue

            chain_id = segment_name[-1]  # Assign chain ID from last letter of segment name (PROA → A, PROC → C)

            for line in chain_groups[segment_name]:
                new_line = line[:21] + f"{chain_id:<1}" + line[22:]  # Replace chain ID in column 22
                output_file.write(new_line)
            output_file.write("TER\n")  # Insert TER after each chain

        # Process the ligand separately
        if ligand_atoms:
            ligand_chain_id = "L"  # Assign 'L' as chain ID for ligand
            for line in ligand_atoms:
                new_line = line[:21] + f"{ligand_chain_id:<1}" + line[22:]  # Replace chain ID for ligand
                output_file.write(new_line)
            output_file.write("TER\n")  # Add TER after ligand

    print(f"✅ Reformatted PDB file saved as {output_pdb}")

# Example usage
rearrange_pdb_with_correct_chain("Structures.pdb", "Structure_new.pdb", ligand_resname="LIG")


✅ Reformatted PDB file saved as output_rearranged.pdb
