# WS_ch08B.ipynb
### WESmith 04/18/23

## INTRODUCING BIO.PDB
#### (see book code in Chapter08/PDB.py)

### WS created this notebook to follow along with code from the book
### 'Bioinformatics with Python Cookbook' by Tiago Antao
#### Each recipe will have its own notebook, suffixed by A, B, etc.¶


In [None]:
from Bio import PDB
from Bio.SeqIO import PdbIO, FastaIO
import os

In [None]:
data_dir  = 'data'

In [None]:
repository = PDB.PDBList()

In [None]:
repository.retrieve_pdb_file?

In [None]:
# WS this downloads the pdb data to a local file suffixed with .ent
repository.retrieve_pdb_file('1TUP', pdir=data_dir, file_format='pdb')
repository.retrieve_pdb_file('1OLG', pdir=data_dir, file_format='pdb')
repository.retrieve_pdb_file('1YCQ', pdir=data_dir, file_format='pdb')

In [None]:
parser = PDB.PDBParser()

In [None]:
parser.get_structure?

In [None]:
# WS
pdb1tup_file = os.path.join(data_dir, 'pdb1tup.ent')
pdb1olg_file = os.path.join(data_dir, 'pdb1olg.ent')
pdb1ycq_file = os.path.join(data_dir, 'pdb1ycq.ent')

In [None]:
p53_1tup = parser.get_structure('P 53 - DNA Binding',     pdb1tup_file)
p53_1olg = parser.get_structure('P 53 - Tetramerization', pdb1olg_file)
p53_1ycq = parser.get_structure('P 53 - Transactivation', pdb1ycq_file)

In [None]:
def print_pdb_headers(headers, indent=0):
    ind_text = ' ' * indent
    for header, content in headers.items():
        if type(content) == dict:
            print('\n%s%20s:' % (ind_text, header))
            print_pdb_headers(content, indent + 4)
            print()
        elif type(content) == list:
            print('%s%20s:' % (ind_text, header))
            for elem in content:
                print('%s%21s %s' % (ind_text, '->', elem))
        else:
            print('%s%20s: %s' % (ind_text, header, content))

In [None]:
print(p53_1tup.header.keys()) # WS

In [None]:
print_pdb_headers(p53_1tup.header)

In [None]:
print(p53_1tup.header['compound'])
print(p53_1olg.header['compound'])
print(p53_1ycq.header['compound'])

In [None]:
print_pdb_headers(p53_1tup.header['compound']) # WS a cleaner output

In [None]:
print_pdb_headers(p53_1olg.header['compound']) # WS a cleaner output

In [None]:
print_pdb_headers(p53_1ycq.header['compound']) # WS a cleaner output

In [None]:
def describe_model(name, pdb):
    print()
    for model in pdb:
        for chain in model:
            print('%s - Chain: %s. Number of residues: %d. Number of atoms: %d.' %
                  (name, chain.id, len(chain), len(list(chain.get_atoms()))))

In [None]:
describe_model('1TUP', p53_1tup)
describe_model('1OLG', p53_1olg)
describe_model('1YCQ', p53_1ycq)

In [None]:
# WS to explore chain object
dd = []
for model in p53_1tup:
    for chain in model:
        dd.append(chain)
ee = list(dd[0].get_atoms())

In [None]:
for residue in p53_1tup.get_residues():
    if residue.id[0] in [' ', 'W']:
        continue
    print(residue.id)

In [None]:
type(p53_1tup.get_residues())

In [None]:
# WS to explore residue object
gg = list(p53_1tup.get_residues())
for k in gg: 
    print(k.id)

In [None]:
res = next(p53_1tup[0]['A'].get_residues())
print(res)

In [None]:
for atom in res:
    print(atom, atom.serial_number, atom.element)

In [None]:
print(p53_1tup[0]['A'][94]['CA'])  # WS model, chain, residue, atom

In [None]:
ff = p53_1tup[0]['A'][94]['CA']

In [None]:
ff.full_id, ff.coord  # WS

In [None]:
# write fasta file
def get_fasta(pdb_file, fasta_file, transfer_ids=None):
    fasta_writer = FastaIO.FastaWriter(fasta_file)
    fasta_writer.write_header()
    for rec in PdbIO.PdbSeqresIterator(pdb_file):
        if len(rec.seq) == 0:
            continue
        if transfer_ids is not None and rec.id not in transfer_ids:
            continue
        print(rec.id, rec.seq, len(rec.seq))
        fasta_writer.write_record(rec)

In [None]:
# WS
fasta_1tup_file = os.path.join(data_dir, '1tup.fasta')
fasta_1olg_file = os.path.join(data_dir, '1olg.fasta')
fasta_1ycq_file = os.path.join(data_dir, '1ycq.fasta')

In [None]:
get_fasta(open(pdb1tup_file), open(fasta_1tup_file, 'w'), transfer_ids=['1TUP:B'])
get_fasta(open(pdb1olg_file), open(fasta_1olg_file, 'w'), transfer_ids=['1OLG:B'])
get_fasta(open(pdb1ycq_file), open(fasta_1ycq_file, 'w'), transfer_ids=['1YCQ:B'])