In [33]:
import os
import io
import re
import glob
import toolz

In [34]:
import pandas

In [35]:
import properties
from tmol.properties.reactive import cached

In [36]:
from toolz import curry, filter, compose, concat

In [61]:
class ResidueType(properties.HasProperties):
    source = properties.File("source params file", mode="r")
    
    filters = compose(
        str.strip,
        curry(re.sub)("#.*", ""),
        curry(re.sub)("\t", " "),
    )
    
    @cached(properties.Dictionary("params lines, split by line key"))
    def lines(self):
        raw = [self.filters(l) for l in self.source.readlines()]
        
        return toolz.groupby(lambda l: l.split()[0], [l for l in raw if l])
    
    subtables = [
        ("NAME", ("NAME",), ("NAME", "name")),
        ("AA", ("AA",), ("AA", "name")),
        ("ATOM", ("ATOM",), ("ATOM", "name", "atom_type", "_mm_atom_type", "_charge", "_charge2")),
        ("BOND", ("BOND", "BOND_TYPE"), ("BOND", "atom_a", "atom_b")),
        ("ICOOR_INTERNAL", ("ICOOR_INTERNAL",), ("ICOOR_INTERNAL", "name", "t1", "t2", "d", "p1", "p2", "p3")),
        ("LOWER_CONNECT", ("LOWER_CONNECT",), ("LOWER_CONNECT", "name")),
        ("UPPER_CONNECT", ("UPPER_CONNECT",), ("UPPER_CONNECT", "name")),
    ]
   
    @property
    def tables(self):
        result = {}
        for name, record_names, column_names in self.subtables:
            table = pandas.read_table(
                io.StringIO("\n".join(toolz.concat(self.lines[k] for k in record_names))),
                sep="\s+",
                header=None,
                names=column_names,
            )
            
            for n in table.columns:
                if n.startswith("_"):
                    del table[n]
            table[table.columns[0]] = table.columns[0]
            del table[name]
            result[name] = table
        return result


In [62]:
class ResidueTypes(properties.HasProperties):
    res_db = properties.String("base dir for residue parameters files")
    
    @property
    def res(self):
        return [
            ResidueType(source = f) for f in glob.glob(f"{self.res_db}/*.params")
        ]
    
    @property
    def json(self):
        return [
            r.json for r in self.res
        ]

In [63]:
rosetta_db = "/home/fordas/workspace/rosetta/main/database"
l_caa = f"{rosetta_db}/chemical/residue_type_sets/fa_standard/residue_types/l-caa"

In [64]:
from toolz.curried import concat, get

In [65]:
rdb = ResidueTypes(res_db = l_caa)

In [76]:
res_by_name = {r.tables["NAME"].name.values[0] : r for r in rdb.res}

#Seeing error due to invalid connect record formatting on CYS variants, need to add validation logic to data input.
del res_by_name["CYV"]
del res_by_name["CYZ"]

In [77]:
from tmol.io.flatfiledb import FlatFileDB

In [78]:
import json
!mkdir -p ../tmol/database/basic/l-caa

In [80]:
for resn, r in res_by_name.items():
    with open(f"../tmol/database/basic/l-caa/{resn}.params", "w") as of:
        of.write(FlatFileDB.write(r.tables))