# Examples: Processing PDB Files

In [None]:
%load_ext autoreload
%autoreload 2

from IPython.display import display
from pathlib import Path

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

from opencadd.api.web import rcsb
from opencadd.io import pdb
from opencadd.io.pdb import _records


In [None]:
pdb_paths = list(Path("./pdbs").glob("*.pdb"))

In [None]:
parsers = [pdb.parser.from_filepath(pdb_path, parse=False) for pdb_path in pdb_paths[:100]]

In [None]:
def print_compare(record_name, begin=0, end=100):
    for i, p in enumerate(parsers[begin:end]):
        record = getattr(records, record_name.upper())
        if p.has_record(record):
            print(f"\nIndex: {i+begin}; PDB-ID: {p.record_header['pdb_id']}\n")
            print("","".join([str(i) for i in range(0,10)]*8))
            print(p.record_lines(record, False), "\n")
            data = getattr(p, f"record_{record_name}")
            #if isinstance(data, pd.DataFrame):
            display(data)  
            print("\n","-"*84)
        

### HEADER

In [None]:
print_compare("header")

### OBSLTE

In [None]:
obslte_pdb_ids = rcsb.data_holdings(status="removed")

for obslte_pdb_id in obslte_pdb_ids[:10]:
    p = pdb.parser.from_pdb_id(obslte_pdb_id)
    print(p.record_obslte)
    print(p.record_lines(records.OBSLTE, False), "\n\n")

### TITLE

In [None]:
print_compare("title")

### CAVEAT

In [None]:
print_compare("caveat")

### COMPND

In [None]:
print_compare("compnd")

In [None]:
for i, p in enumerate(parsers):
    try:
        if not p.record_compnd.engineered.dropna().isin(["YES", "NO"]).all():
            print(p.record_lines(records.COMPND, False))
    except AttributeError as e:
        pass
    except Exception as e:
        print(i)
        print(p.record_header["pdb_id"])
        print(p.record_lines(records.COMPND, False))

### SOURCE 

In [None]:
print_compare("source")

### KEYWDS

In [None]:
print_compare("keywds")

### EXPDTA

In [None]:
print_compare("expdta")

### NUMMDL

In [None]:
print_compare("nummdl")

### MDLTYP

In [None]:
print_compare("mdltyp")

### AUTHOR

In [None]:
print_compare("author")

### REVDAT

In [None]:
print_compare("revdat")

### SPRSDE

In [None]:
print_compare("sprsde")

### JRNL

In [None]:
parsers[0].record_jrnl

In [None]:
parsers[0].record_lines(pdb.records.JRNL, False)

### DBREF

In [None]:
print_compare("dbref")

### SEQADV

In [None]:
print_compare("seqadv")

### SEQRES

In [None]:
print_compare("seqres")

### MODRES

In [None]:
print_compare("modres")

### HET

In [None]:
print_compare("het")

### HETNAM

In [None]:
print_compare("hetnam")

### HETSYN

In [None]:
print_compare("hetsyn")

### FORMUL

In [None]:
print_compare("formul")

In [None]:
for i, p in enumerate(parsers):
    try:
        if p.has_record(records.FORMUL) and np.any(p.record_formul.count_rest != 0):
            display(p.record_formul)
    except:
        print(i)
        print(p.record_lines(records.FORMUL, False))

In [None]:
parsers[5233].record_formul

In [None]:
import re
re.split(r'[()]+', "2()")

### HELIX

In [None]:
print_compare("helix")

### SHEET

In [None]:
print_compare("sheet")

### SSBOND

In [None]:
print_compare("ssbond")

### LINK

In [None]:
print_compare("link")

### CISPEP

In [None]:
print_compare("cispep")

### SITE

In [None]:
print_compare("site")

### CRYST1

In [None]:
print_compare("cryst1")

### ORIGX1

In [None]:
print_compare("origx2")

In [None]:
for p in parsers:
    print(p.records_origx)

### SCALE

In [None]:
for p in parsers:
    print(p.records_scale)

In [None]:
print_compare("scale1")

In [None]:
for p in parsers:
    display(p.records_mtrix)

In [None]:
parsers[-5].records_mtrix.loc[1,"transformation_matrix"]

In [None]:
print_compare("mtrix1")

### ATOM/HETATM

In [None]:
for p in parsers:
    print(p.record_header["pdb_id"])
    display(p.records_atom_hetatm)
    

In [None]:
print_compare("anisou")

In [None]:
c = parsers[59].record_lines(records.ANISOU)[:, 78:80].view(dtype=(str, 2))
c[(c!="  ")&(c!="1-")]

### TER

In [None]:
print_compare("ter")

### CONECT

In [None]:
print_compare("conect")

### MASTER

In [None]:
print_compare("master")

In [None]:
r = parsers[0].record_remark
print("\n".join(r.loc[800, "content"]))

In [None]:
r.index.unique()

In [None]:
"REMARK 2 RESOLUTION. NOT APPLICABLE. "[23:30]

In [None]:
parsers[0].record_remark4

In [None]:
from opencadd import api

In [None]:
from opencadd.api.web.http_request import response_http_request

In [None]:
a=response_http_request("https://files.wwpdb.org/pub/pdb/compatible/pdb_bundle/pdb_bundle_index.txt", response_type="str")

In [None]:
q = """{entry(entry_id:"4HHB"){exptl{method}}}"""
response_http_request(f"https://data.rcsb.org/graphql?query={q}", response_type="json")

In [None]:
q = """query exptl_method($id: String!) {
   entry(entry_id:$id) {
      exptl {
        method
      }
   }
}
"""
response_http_request(f"https://data.rcsb.org/graphql?query={q}", response_type="json", params={"id":"4HHB"})

In [None]:
type(a)

In [None]:
import numpy as np

In [None]:
a.upper().splitlines()

In [None]:
from opencadd import api

In [None]:
api.web.rcsb.data_holdings_without_pdb_file()

In [None]:
x=api.web.rcsb.data_assembly("3w32",1)

In [None]:
[print(key) for key in x.keys()]

In [None]:
x["rcsb_assembly_container_identifiers"]

In [None]:
x["rcsb_id"]

In [None]:
x["rcsb_assembly_info"]

In [2]:
from pdbx.reader import PdbxReader#, PdbxContainers

In [3]:
import opencadd as oc

In [4]:
oc.api

AttributeError: module 'opencadd' has no attribute 'api'

In [6]:
import opencadd as oc

In [8]:
import opencadd.api

In [9]:
oc.api = opencadd.api

In [10]:
oc._http_request

AttributeError: module 'opencadd' has no attribute '_http_request'

In [11]:
import opencadd

In [12]:
opencadd._typing

AttributeError: module 'opencadd' has no attribute '_typing'

In [13]:
from opencadd.data import pdb

In [14]:
a=pdb.data.schema("chem_comp")

In [16]:
a.keys()

dict_keys(['type', 'properties', 'additionalProperties', 'required', '$schema', 'title', 'description', '$comment'])

In [17]:
a["type"]

'object'

In [18]:
a["title"]

'schema: bird_chem_comp_core collection: bird_chem_comp_core version: 7.1.2'

In [20]:
a["properties"].keys()

dict_keys(['chem_comp', 'pdbx_chem_comp_audit', 'pdbx_chem_comp_descriptor', 'pdbx_chem_comp_feature', 'pdbx_chem_comp_identifier', 'pdbx_family_prd_audit', 'pdbx_prd_audit', 'pdbx_reference_entity_list', 'pdbx_reference_entity_poly', 'pdbx_reference_entity_poly_link', 'pdbx_reference_entity_poly_seq', 'pdbx_reference_entity_sequence', 'pdbx_reference_entity_src_nat', 'pdbx_reference_molecule', 'pdbx_reference_molecule_annotation', 'pdbx_reference_molecule_details', 'pdbx_reference_molecule_family', 'pdbx_reference_molecule_features', 'pdbx_reference_molecule_list', 'pdbx_reference_molecule_related_structures', 'pdbx_reference_molecule_synonyms', 'rcsb_bird_citation', 'rcsb_chem_comp_annotation', 'rcsb_chem_comp_container_identifiers', 'rcsb_chem_comp_descriptor', 'rcsb_chem_comp_info', 'rcsb_chem_comp_related', 'rcsb_chem_comp_synonyms', 'rcsb_chem_comp_target', 'rcsb_schema_container_identifiers', 'rcsb_id'])

In [24]:
a["properties"]["chem_comp"]["properties"].keys()

dict_keys(['formula', 'formula_weight', 'id', 'mon_nstd_parent_comp_id', 'name', 'one_letter_code', 'pdbx_ambiguous_flag', 'pdbx_formal_charge', 'pdbx_initial_date', 'pdbx_modified_date', 'pdbx_processing_site', 'pdbx_release_status', 'pdbx_replaced_by', 'pdbx_replaces', 'pdbx_subcomponent_list', 'three_letter_code', 'type'])

In [25]:
def recursive_items(dictionary):
    for key, value in dictionary.items():
        if type(value) is dict:
            yield from recursive_items(value)
        else:
            yield (key, value)


In [26]:
for key, _ in recursive_items(a):
    print(key)

type
type
type
examples
description
rcsb_description
type
examples
description
rcsb_search_context
rcsb_units
rcsb_description
rcsb_search_group
type
examples
description
rcsb_description
type
type
description
rcsb_description
uniqueItems
type
examples
description
rcsb_search_context
rcsb_full_text_priority
rcsb_description
rcsb_search_group
type
examples
description
rcsb_description
type
description
rcsb_description
type
description
rcsb_description
type
format
description
rcsb_description
type
format
description
rcsb_description
type
enum
description
rcsb_description
type
enum
description
rcsb_enum_annotated
rcsb_description
type
examples
description
rcsb_description
type
examples
description
rcsb_description
type
examples
description
rcsb_description
type
examples
description
rcsb_description
type
enum
description
rcsb_search_context
rcsb_full_text_priority
rcsb_enum_annotated
rcsb_description
rcsb_search_group
additionalProperties
required
type
type
type
enum
description
rcsb_descr