# BICePs 2.0 Example
### Dataset: _Cyclic hairpin_

** The algorithm is segmented into four parts: **
1. Preparation
2. Initialization
3. Posterior sampling
4. MBAR analysis and plot figures

In [2]:
import sys, os, glob
sys.path.append('src')         # Source code path 
#from BICePs import *
from Preparation import *      # Class for BICePs input files generation
from Restraint import *        # Class for initialization 
from PosteriorSampler import * # Class for sampling
from Analysis import *         # Class for MBAR calculation and figures

## 1. Preparation
Lets' create input files for BICePs

In [3]:
# Specify necessary argument values
path='cs_H/cs/H/*txt'                # Only amide H is used in this example
states=50                            # Number of states from Markov state model
indices='cs_H/cs_indices_NH.txt'     # Indices of experimental data
exp_data='cs_H/chemical_shift_NH.txt'# Raw experimental data
top='cs_H/8690.pdb'                  # Topology file
data_dir=path     
out_dir='test_cs_H'                  # Output directory
p=Preparation('cs_H',states=states,indices=indices,
              exp_data=exp_data,top=top,data_dir=data_dir)
p.write(out_dir=out_dir)

Wrote test_cs_H/0.cs_H
Wrote test_cs_H/1.cs_H
Wrote test_cs_H/2.cs_H
Wrote test_cs_H/3.cs_H
Wrote test_cs_H/4.cs_H
Wrote test_cs_H/5.cs_H
Wrote test_cs_H/6.cs_H
Wrote test_cs_H/7.cs_H
Wrote test_cs_H/8.cs_H
Wrote test_cs_H/9.cs_H
Wrote test_cs_H/10.cs_H
Wrote test_cs_H/11.cs_H
Wrote test_cs_H/12.cs_H
Wrote test_cs_H/13.cs_H
Wrote test_cs_H/14.cs_H
Wrote test_cs_H/15.cs_H
Wrote test_cs_H/16.cs_H
Wrote test_cs_H/17.cs_H
Wrote test_cs_H/18.cs_H
Wrote test_cs_H/19.cs_H
Wrote test_cs_H/20.cs_H
Wrote test_cs_H/21.cs_H
Wrote test_cs_H/22.cs_H
Wrote test_cs_H/23.cs_H
Wrote test_cs_H/24.cs_H
Wrote test_cs_H/25.cs_H
Wrote test_cs_H/26.cs_H
Wrote test_cs_H/27.cs_H
Wrote test_cs_H/28.cs_H
Wrote test_cs_H/29.cs_H
Wrote test_cs_H/30.cs_H
Wrote test_cs_H/31.cs_H
Wrote test_cs_H/32.cs_H
Wrote test_cs_H/33.cs_H
Wrote test_cs_H/34.cs_H
Wrote test_cs_H/35.cs_H
Wrote test_cs_H/36.cs_H
Wrote test_cs_H/37.cs_H
Wrote test_cs_H/38.cs_H
Wrote test_cs_H/39.cs_H
Wrote test_cs_H/40.cs_H
Wrote test_cs_H/41.cs_H
Wr

## 2. Initialization
Let's create our ensemble of structures

In [4]:
# Specify necessary argument values
dataFiles = 'test_cs_H'             # Directory including all input files
data = sort_data(dataFiles)         # Sort input files and determine observables
energies_filename =  'energy.txt'   # Energy file computed from MSM populations
energies = loadtxt(energies_filename)
energies -= energies.min()          # Set ground state to zero, just in case
outdir = 'results_ref_normal'
# Temporarily placing the number of steps here...
nsteps = 1000 # 10000000
"""OUTPUT

    Files written:
        <outdir>/traj_lambda_<lambda>.yaml  - YAML Trajectory file
        <outdit>/sampler_<lambda>.pkl       - a cPickle'd sampler object
"""
# Make a new directory?
if not os.path.exists(outdir):
    os.mkdir(outdir)

## 3. Posterior Sampling
Next, let's do some posterior sampling

In [5]:
lambda_values = [0.0,1.0]      # lambda values to be sampled
for j in lambda_values:
    verbose = False
    lam = j
    # We will instantiate a number of Structure() objects to construct the ensemble
    ensemble = []
    for i in range(energies.shape[0]):
        print
        print '#### STRUCTURE %d ####'%i
        if verbose:
            print data[i]
        s = Restraint('8690.pdb',lam,energies[i],data = data[i])
        if verbose:
            print 's.sse_cs_H', s.sse_cs_H
        ensemble.append( s )

    ########## Posterior Sampling ############

    sampler = PosteriorSampler(ensemble)

    sampler.sample(nsteps)      # number of steps
    print 'Processing trajectory...',
    sampler.traj.process()      # compute averages, etc.
    print '...Done.'

    print 'Writing results...',
    sampler.traj.write_results(os.path.join(outdir,'traj_lambda%2.2f.yaml'%lam))
    print '...Done.'

    print 'Pickling the sampler object ...',
    outfilename = 'sampler_lambda%2.2f.pkl'%lam
    print outfilename,
    fout = open(os.path.join(outdir, outfilename), 'wb')
    cPickle.dump(sampler, fout) # Pickle dictionary using protocol 0.
    fout.close()
    print '...Done.'


#### STRUCTURE 0 ####
test_cs_H/0.cs_H

#### STRUCTURE 1 ####
test_cs_H/1.cs_H

#### STRUCTURE 2 ####
test_cs_H/2.cs_H

#### STRUCTURE 3 ####
test_cs_H/3.cs_H

#### STRUCTURE 4 ####
test_cs_H/4.cs_H

#### STRUCTURE 5 ####
test_cs_H/5.cs_H

#### STRUCTURE 6 ####
test_cs_H/6.cs_H

#### STRUCTURE 7 ####
test_cs_H/7.cs_H

#### STRUCTURE 8 ####
test_cs_H/8.cs_H

#### STRUCTURE 9 ####
test_cs_H/9.cs_H

#### STRUCTURE 10 ####
test_cs_H/10.cs_H

#### STRUCTURE 11 ####
test_cs_H/11.cs_H

#### STRUCTURE 12 ####
test_cs_H/12.cs_H

#### STRUCTURE 13 ####
test_cs_H/13.cs_H

#### STRUCTURE 14 ####
test_cs_H/14.cs_H

#### STRUCTURE 15 ####
test_cs_H/15.cs_H

#### STRUCTURE 16 ####
test_cs_H/16.cs_H

#### STRUCTURE 17 ####
test_cs_H/17.cs_H

#### STRUCTURE 18 ####
test_cs_H/18.cs_H

#### STRUCTURE 19 ####
test_cs_H/19.cs_H

#### STRUCTURE 20 ####
test_cs_H/20.cs_H

#### STRUCTURE 21 ####
test_cs_H/21.cs_H

#### STRUCTURE 22 ####
test_cs_H/22.cs_H

#### STRUCTURE 23 ####
test_cs_H/23.cs_H

#### STRUCT



 s.sum_gaussian_neglog_reference_potentials_H 0.0 s.sum_gaussian_neglog_reference_potentials_Ha 0.0 s.sum_gaussian_neglog_reference_potentials_N 0.0 s.sum_gaussian_neglog_reference_potentials_Ca 0.0 s.sum_gaussian_neglog_reference_potentials_pf 0.0
s =  <Restraint.Restraint instance at 0x1094bd440>
Result = 3.91202300543
state, f_sim 31 0.0 s.sse_cs_H 3.16747235 s.Ndof_cs_H 7.0
s.sse_cs_Ha 0 s.Ndof_cs_Ha None
s.sse_cs_N 0 s.Ndof_cs_N None
s.sse_cs_Ca 0 s.Ndof_cs_Ca None
s.sse_pf 0 s.Ndof_pf None
s.sum_neglog_reference_potentials_noe 0.0 s.sum_neglog_reference_potentials_H 21.7242628874 s.sum_neglog_reference_potentials_Ha 0.0 s.sum_neglog_reference_potentials_N 0.0 s.sum_neglog_reference_potentials_Ca 0.0 s.sum_neglog_reference_potentials_pf 0.0
s.sum_gaussian_neglog_reference_potentials_noe 0.0 s.sum_gaussian_neglog_reference_potentials_H 0.0 s.sum_gaussian_neglog_reference_potentials_Ha 0.0 s.sum_gaussian_neglog_reference_potentials_N 0.0 s.sum_gaussian_neglog_reference_potentials_Ca

## 4. MBAR and Figures
Let's do analysis using MBAR and plot figures

In [6]:
# Specify necessary argument values
dataFiles = 'test_cs_H'
A = Analysis(50,dataFiles,'results_ref_normal')
A.plot()

Loading results_ref_normal/traj_lambda0.00.yaml ...
Loading results_ref_normal/traj_lambda1.00.yaml ...
Loading results_ref_normal/sampler_lambda0.00.pkl ...
Loading results_ref_normal/sampler_lambda1.00.pkl ...
lam = [0.0, 1.0]
nstates 50
E0 evaluated in model 0 -10.8499649873 state, sigma_noe_index, sigma_J_index, sigma_cs_H_index, sigma_cs_Ha_index, sigma_cs_N_index, sigma_cs_Ca_index, sigma_pf_index, gamma_index 0 301 152 151 151 151 151 151 197
s =  <Restraint.Restraint instance at 0x109c53f80>
Result = 3.91202300543
state, f_sim 0 0.0 s.sse_cs_H 1.28564951 s.Ndof_cs_H 7.0
s.sse_cs_Ha 0 s.Ndof_cs_Ha None
s.sse_cs_N 0 s.Ndof_cs_N None
s.sse_cs_Ca 0 s.Ndof_cs_Ca None
s.sse_pf 0 s.Ndof_pf None
s.sum_neglog_reference_potentials_noe 0.0 s.sum_neglog_reference_potentials_H 21.8057899379 s.sum_neglog_reference_potentials_Ha 0.0 s.sum_neglog_reference_potentials_N 0.0 s.sum_neglog_reference_potentials_Ca 0.0 s.sum_neglog_reference_potentials_pf 0.0
s.sum_gaussian_neglog_reference_potentia

  if self._edgecolors == str('face'):
