In [1]:
"""
python 3.10
This converts ALL .pdb file in the directory for gag 
lattices output from NERDSS to a .csv of all centers of
mass (CoMs) of gag.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
def read_pdb(filename):
    """
    read .pdb file for gag lattices and return dataframe
    with header renamed. Note the last two columns are
    only placeholder for pdb file format.
    """
    df = pd.read_csv(filename, skiprows = 2, sep = "\s+", header = None) #"\s+" for undefined number of spaces
    df.columns = ["Type", "Index", "Particle", "Molecule", "Molecule Index", "X", "Y", "Z", "Occupancy", "Atom"]
    return df

In [3]:
def get_COM_coord(df, drop = False):
    """
    Extract coordinate data (XYZ) of COM from the input
    dataframe based on "Particle" property. Drop all
    other properties when set to drop = True.
    """
    df_COM = df.loc[df['Particle'] == "COM"].reset_index(drop = True)
    if drop:
        df_COM = df_COM.drop(columns =\
                             ["Type", "Particle", "Index", "Molecule", "Molecule Index", "Occupancy", "Atom"])
    return df_COM

In [4]:
def convert_all_to_csv(readDir = "./", saveDir = "./csv/"):
    """
    Iterate over readDir where this file is in, select
    the files that end with .pdb, and convert them to
    .csv with only COM coord. Save .csv in saveDir.
    """
    directory = os.fsencode(readDir)
    
    #create save directory if not exists
    if not os.path.exists(saveDir):
        os.makedirs(saveDir)
    
    for file in os.listdir(directory):
        #iteratre over directory
        filename = os.fsdecode(file)
        if filename.endswith(".pdb"):

            #read files that end with .pdb
            print(f"Read {filename}") 
            df = read_pdb(filename)
            df = get_COM_coord(df, drop = True)

            #convert filename to "x.csv"
            filenameCsv = saveDir + filename[:-3] + "csv"

            #write to .csv
            print(f"Write to {filenameCsv}")
            df.to_csv(filenameCsv, header = False, index = False)

        else:
            #skip file
            print(f"Skip {filename}")
    
    return True

In [5]:
convert_all_to_csv()

Read 2500gags.pdb
Write to ./csv/2500gags.csv
Skip 3125gags.csv
Skip 1250gags.csv
Skip .DS_Store
Skip 1875gags.csv
Skip convert_pdb_to_xyz_CoM.ipynb
Skip 2500gags.csv
Read 3125gags.pdb
Write to ./csv/3125gags.csv
Read 1250gags.pdb
Write to ./csv/1250gags.csv
Read 1875gags.pdb
Write to ./csv/1875gags.csv
Skip csv
Skip 625gags.csv
Skip .ipynb_checkpoints
Read 625gags.pdb
Write to ./csv/625gags.csv


True