In [29]:
# Python 3.10.12
import os
import pandas as pd
import numpy as np
#import re
from pybmrb import Spectra

#import pybmrb
#pybmrb.ChemicalShift.from_bmrb(bmrb_ids=bmrb_ids)

def bmrb_to_xpk(bmrb_ids=[],freq = 500.131988525,spectrum='n15hsqc',sidechains=True,save_path=None,listnames=[],return_df = True,showxpk=True):
    '''
    generate xpk files from bmrb entries
    freq: 1H frequency
    spectrum: 'n15hsqc' or 'c13hsqc'
    sidechains: True or False to include sidechains (n15 only) 
    save_path: if specified, save .xpk files to this directory
    listnames: custom file names (leave off .xpk), if empy use bmrb_id to generate list name
    return_df: return the combined dataframe of shifts, assignments, for all bmrb_ids
    showxpk: if True also print xpk to screen, requires save_path to generate the xpk
    '''
    fN = 9.867650682574023
    fC = 3.9767816176803237
    freq_dict = {'1H':1,'H':1,'HN':1,
             '15N':fN,'N':fN,'NH':fN,
             '13C':fC,'C':fC}
    if spectrum == 'n15hsqc': 
        spectrafunc = Spectra.create_n15hsqc_peaklist
        f2 = '1H'
        f1 = '15N'
        kwargs = {'include_sidechain':sidechains}
    else: 
        spectrafunc = Spectra.create_c13hsqc_peaklist
        f2 = '1H'
        f1 = '13C'
        kwargs = {}
    
    bmrbdf = pd.DataFrame()
    for i,bid in enumerate(bmrb_ids):
        spectra_list = spectrafunc(bmrb_ids=bid,**kwargs)
        #0: f2 ppm      8.607
        #1: f1 ppm      123.656
        #2: Entry id    '36081-1'
        #3: assignment  '36081-1-1-2-GLN-H-N'
        #4: residue     'GLN'
        #5: comment?    {} #empty in example
        biddf = pd.DataFrame(spectra_list).T.rename({0:f2+'.P',1:f1+'.P',2:'Entry',3:'assignment',4:'aa',5:'comment'},axis='columns')
        biddf['Resi'] = biddf['assignment'].str.extract(r'.*-(.*?)-.*-.*-.*').astype(int)
        biddf['atoms'] = biddf['assignment'].str.extract(r'.*-(.*?-.*)')
        bmrbdf = pd.concat([bmrbdf,biddf],ignore_index = True)

        if save_path is not None:
            if not os.path.exists(save_path): 
                os.makedirs(save_path)
                print("created directory",save_path)
            if len(listnames) > 0:
                listname = listnames[i]
            else: listname = 'bmrb_'+str(bid)+'_'+spectrum
            
            dummy1 = '0.05000 0.05000 ++ {0.0} {?} '
            dummy2 = '0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0'
            f1sw = biddf[f1+'.P'].min()+10
            f2sw = biddf[f2+'.P'].min()+10

            xpk_header = f'''label dataset sw sf condition
{f2} {f1}
{listname}.nv
{f2sw*freq} {f1sw*freq/freq_dict[f1]}
{freq} {freq / freq_dict[f1]}

{f2+'.L'} {f2+'.P'} {f2+'.W'} {f2+'.B'} {f2+'.E'} {f2+'.J'} {f2+'.U'} {f1+'.L'} {f1+'.P'} {f1+'.W'} {f1+'.B'} {f1+'.E'} {f1+'.J'} {f1+'.U'} vol int stat comment flag0 '''

            with open(os.path.join(save_path,listname+'.xpk'),'w') as f_xpk:
                if showxpk:
                    print(xpk_header)
                print(xpk_header,file=f_xpk)
                for index,row in biddf.iterrows():
                    f2L,f1L = row['atoms'].split('-') #specific atom labels 
                    if showxpk:
                        print(index,'{'+str(row['Resi'])+'.'+f2L+'}',row[f2+'.P'],dummy1,'{'+str(row['Resi'])+'.'+f1L+'}',row[f1+'.P'],dummy2)
                    print(index,'{'+str(row['Resi'])+'.'+f2L+'}',row[f2+'.P'],dummy1,'{'+str(row['Resi'])+'.'+f1L+'}',row[f1+'.P'],dummy2,file=f_xpk)
        
        
    if return_df: return bmrbdf.reset_index(drop=True)
    else: return


In [30]:
bmrb_ids=[27081]#36081,36082]
save_path = os.path.join(os.getcwd(),'')
bmrbdf = bmrb_to_xpk(bmrb_ids=bmrb_ids,spectrum='n15hsqc',save_path=save_path,showxpk=True)
bmrbdf

label dataset sw sf condition
1H 15N
bmrb_27081_n15hsqc.nv
8246.1762268002 5606.562502699682
500.131988525 50.6839981079

1H.L 1H.P 1H.W 1H.B 1H.E 1H.J 1H.U 15N.L 15N.P 15N.W 15N.B 15N.E 15N.J 15N.U vol int stat comment flag0 
0 {3.H} 8.359 0.05000 0.05000 ++ {0.0} {?}  {3.N} 124.505 0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0
1 {4.H} 8.094 0.05000 0.05000 ++ {0.0} {?}  {4.N} 112.772 0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0
2 {5.H} 8.314 0.05000 0.05000 ++ {0.0} {?}  {5.N} 120.732 0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0
3 {6.H} 8.117 0.05000 0.05000 ++ {0.0} {?}  {6.N} 115.878 0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0
4 {7.H} 8.135 0.05000 0.05000 ++ {0.0} {?}  {7.N} 122.169 0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0
5 {8.H} 7.828 0.05000 0.05000 ++ {0.0} {?}  {8.N} 123.514 0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0
6 {9.H} 8.298 0.05000 0.05000 ++ {0.0} {?}  {9.N} 125.883 0.40000 0.40000 ++ {0.0} {?} 0.0 1.1000 0 {?} 0
7 {11.H} 8.269 0.05000 0.05000 

Unnamed: 0,1H.P,15N.P,Entry,assignment,aa,comment,Resi,atoms
0,8.359,124.505,27081-1,27081-1-1-3-ALA-H-N,ALA,,3,H-N
1,8.094,112.772,27081-1,27081-1-1-4-THR-H-N,THR,,4,H-N
2,8.314,120.732,27081-1,27081-1-1-5-ASN-H-N,ASN,,5,H-N
3,8.117,115.878,27081-1,27081-1-1-6-SER-H-N,SER,,6,H-N
4,8.135,122.169,27081-1,27081-1-1-7-PHE-H-N,PHE,,7,H-N
...,...,...,...,...,...,...,...,...
146,7.616,118.425,27081-1,27081-1-1-141-GLU-H-N,GLU,,141,H-N
147,7.834,120.3,27081-1,27081-1-1-142-TYR-H-N,TYR,,142,H-N
148,7.959,109.677,27081-1,27081-1-1-143-GLY-H-N,GLY,,143,H-N
149,8.063,120.3,27081-1,27081-1-1-144-GLU-H-N,GLU,,144,H-N
