In [3]:
from itertools import combinations
from msmbuilder.featurizer import AtomPairsFeaturizer
import os
import numpy as np
import mdtraj as md

ModuleNotFoundError: No module named 'msmbuilder'

In [7]:
def create_pairwise_index(atomindex_file):
    """
    Create pair-wise index for pdb files from input and remove redundant pairs

    Parameters
    ----------
    atomindex_file:
        Text file containing the atom index for featurization

    Return
    ----------
    pairwise_index: list
        a list of pairwise index

    """
    tmp_array=[]
    counter=0
    for line in open(atomindex_file):
        line=line.strip().split()
        for i in range(len(line)):
            if ((counter==0)&(i==0)):  # the first number in the index file of msmbuilder1 is the total number
                continue
            else:
                tmp_array.append(int(line[i])-1) # starting from 0 and shift the atom index in pdb by 1;
        counter+=1
    output_array = np.unique(tmp_array)
    pairwise_index=list(combinations(output_array, 2))
    return pairwise_index

In [10]:
def feat(atom_pairs_feat,traj_name,topfile):
    """
    Use AtomPairsFeaturizer from MSMbuilder to featurize a trajectory

    Parameters
    ----------
    atom_pairs_feat:
        AtomPairsFeaturizer from MSMbuilder

    traj_name:
        Directory to the MD trajectories

    topfile:
        PDB file for the topology of trajectories
    Return
    ----------
    pairwise_index:
        a list of pairwise index
    """
    traj = md.load(traj_name, top=topfile)
    feat_traj = atom_pairs_feat.partial_transform(traj)
    return feat_traj

In [None]:
featdir="./Featurization/"
trajDir="./trajs/"
topfile = trajDir+"npt_fit.pdb"

#Create pairwise index
atom_set =featdir+"AtomIndices.dat"
atom_pair_list=create_pairwise_index(atom_set)
np.savetxt(featdir+"atom_pair_list.dat",atom_pair_list,fmt='%d')

#Prepare an index for trajectories
from glob import glob
import re
trajlist=glob(trajDir+"*.xtc")
trajlist.sort(key=lambda f: int(re.sub('\D', '', f)))

#Featurize trajectories
os.makedirs(featdir+"features",exist_ok=True)
atom_pairs_feat = AtomPairsFeaturizer(atom_pair_list)
for n,i in enumerate(trajlist):
    feat_ = feat(atom_pairs_feat,i,topfile)
    np.save("{}features/{}.npy".format(featdir,n), feat_)