# Calculate Pathweigh analysis, UDP, activity and cosistency.

In [1]:
import numpy as np
import pandas as pd
from scipy.special import gammaln, psi, factorial
from scipy.optimize import fmin_l_bfgs_b as optim
from scipy.stats import nbinom, norm, poisson, gennorm
from sklearn.mixture import GaussianMixture as GMM
from numpy import inf
from matplotlib import pyplot as plt
from udp import *
from activity import *

infinitesimal = np.finfo(np.float).eps

## Load file

In [3]:
#Running locally, read file from path.
#data = pd.read_csv('./data/Pathweigh/GSE29013_RMA.csv', index_col=0)
#Or upload file using widget.
import pandas as pd
import ipywidgets as widgets
import io

#FileUpload limit size: https://github.com/jupyter-widgets/ipywidgets/issues/2522.
data = pd.DataFrame()

def ff(b):
    global data
    s.style.button_color = "yellow"
    for elem in b.new.values():
        print(elem['metadata']['name'])
        #with open(elem['metadata']['name'], 'wb') as file:
            #file.write(elem['content'])
        print(f'Length of data file: {len(elem["content"])}')
        data = pd.read_csv(io.BytesIO(elem['content']), index_col=0)
        print("Done")
    s.style.button_color = "lightgreen"

s = widgets.FileUpload(
    accept='.csv',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)
s.style.button_color = "lightgreen"
s.observe(ff, names='value')
s

FileUpload(value={}, accept='.csv', description='Upload', style=ButtonStyle(button_color='lightgreen'))

GSE29013_RMA.csv
Length of data file: 36399849
Done


## Calculate UDP

In [4]:
#Run UDP calculation.
is_rnaseq = False #Chance to True in case of RNASeq data.
udp = calc_udp_multi_process(data, is_rnaseq)

Fri Feb  7 11:29:26 2020 Calculate UDP, is_rnaseq: False
...............................................................................

## Calculate Activity and Consistency

In [5]:
activity_obj = path_activity(udp, is_rnaseq)
activity = activity_obj.calc_activity_consistency_multi_process()

Fri Feb  7 11:30:34 2020 Init activity object
Fri Feb  7 11:30:35 2020 Calculate activity and consistency...
...Fri Feb  7 11:30:42 2020 Done.


## Select Path For Export

In [6]:
paths = pd.read_csv('pathologist.db.txt', delimiter='\t', header=None)
options = paths[0].unique()
dd = widgets.Dropdown(options=options, description='Select a path', disabled=False)
dd

Dropdown(description='Select a path', options=('granzyme a mediated apoptosis pathway(BioCarta)', 'hiv-1 nef: …

In [7]:
xml_result = activity_obj.xmlparser(path_id=dd.value, sample_num=0) #Select the required sample number.

Create Kegg XML for path: granzyme a mediated apoptosis pathway(BioCarta), sample: 0
Output XML file: /tmp/output_path.xml


In [29]:
from ipywidgets import HTML

#HTML("""<a download='test.csv' href='data:text/csv;charset=utf-8;base64,c2VwYWxfbGVuZ3RoLHNlcGFsX3dpZHRoLHBldGFsX2xlbmd0aCxwZXRhbF93aWR0aCxzcGVjaWVzDQo1LjEsMy41LDEuNCwwLjIsc2V0b3NhDQo0LjksMywxLjQsMC4yLHNldG9zYQ0KNC43LDMuMiwxLjMsMC4yLHNldG9zYQ0K'>asdf</a>""")
HTML(f"<a download='path.xml' href='data:text;charset=utf-8,{xml_result}'>path.xml</a>")

HTML(value='<a download=\'path.xml\' href=\'data:text;charset=utf-8,<?xml version="1.0"?><!DOCTYPE pathway SYS…

In [27]:
xml_result

'<?xml version="1.0"?><!DOCTYPE pathway SYSTEM "https://www.kegg.jp/kegg/xml/KGML_v0.7.2_.dtd"><pathway name="path:hsa100035" org="hsa" number="granzyme a mediated apoptosis pathway(BioCarta)" title="100035"><entry id="GZMB" name="hsa:4967 hsa:28151" type="protein" link="https://www.kegg.jp/dbget-bin/www_bget?hsa:4967+hsa:28151"> <graphics name="GZMB" fgcolor="#11FF00" bgcolor="#42ECEF" x="0.03754758905328347" type="rectangle" width="32" height="17"/> </entry><entry id="100473" name="100473" type="reaction" link="https://www.kegg.jp/dbget-bin/www_bget?C00158"> <graphics name="translocation" fgcolor="#00FF00" bgcolor="#FFF8F8" x="0.003746694081506414" y="0.9589870755245716" type="circle" width="60" height="15"/> </entry><relation entry1="100473" entry2="GZMB" type="PPrel"> <subtype name="activation" value="-->"/> </relation><relation entry1="GZMB" entry2="100473" type="PPrel"> <subtype name="activation" value="-->"/> </relation><entry id="PRF1" name="hsa:4967 hsa:101015" type="protein" 