# Generate the matrix based on log2Foldchange

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
from ipywidgets import widgets
from IPython.display import Markdown, display

def printmd(string):
    display(Markdown(string))
    
# Display output text box widget (will populate when value submitted in input)
printmd("**Type the log2fold change, press Enter**")
name=input() 


transcript = widgets.Dropdown(options = ['mRNA', 'lncRNA', 'lncRNA + mRNA'])
printmd("**Select the transcript type and click on Run**")
display(transcript)



**Type the log2fold change, press Enter**

-0.58


**Select the transcript type and click on Run**

Dropdown(options=('mRNA', 'lncRNA', 'lncRNA + mRNA'), value='mRNA')

In [156]:
new_variable = float(name)
printmd("**Cut-off:**")
print(new_variable)

#read files for foldchange criteria
if transcript.value == 'mRNA':
    all_files = glob.glob('data/*mRNA_change.txt')
elif transcript.value == 'lncRNA':
    all_files = glob.glob('data/*lncRNA_change.txt')
elif transcript.value == 'lncRNA + mRNA':
    all_files = glob.glob('data/*_change.txt') 
list_of_dfs = [pd.read_csv(all_files, sep="\t").drop(['Gene', 'baseMean','lfcSE','stat','pvalue', 'padj'], 1).set_index('Transcript') for all_files in all_files]

#loop for creating a list of datadrames - 1 per file
for dataframe, all_file in zip(list_of_dfs, all_files):
    all_file2 = all_file[5:].split('_')
    dataframe.rename(columns={'log2FoldChange':all_file2[0]}, inplace=True)

#Combine all the dfs
combined_df = pd.concat(list_of_dfs, axis=1, sort=True)
#combined_df_pvalue = pd.concat(list_of_dfs2, axis=1, sort=True)

#Create a coloumn named label containing all the cell line intersections based on gene name. Replace "-1" by the wanted foldchange
combined_df['label'] = (combined_df < new_variable).apply(lambda y: combined_df.columns[y.tolist()].tolist(), axis=1)

#Count the number of intersection using "," as delimiter
combined_df['Count Overlap'] = combined_df['label'].astype(str).str.count("\,") +1 

#Sort based on # of intersections
combined_df_sorted = combined_df.sort_values(by=['Count Overlap'], ascending=False)

#remove empty intersection
combined_df_sorted_clean = combined_df_sorted[~combined_df_sorted['label'].astype(str).str.contains('\[]')].fillna(0)

if transcript.value == 'lncRNA + mRNA':
    combined_df_sorted_clean = combined_df_sorted_clean.groupby(combined_df_sorted_clean.columns, axis=1, sort=False).sum()
combined_df_sorted_clean

**Select the transcript type**

Dropdown(options=('mRNA', 'lncRNA', 'lncRNA + mRNA'), value='mRNA')

**Cut-off:**

-0.58


Unnamed: 0,CL40,COLO320,GTG7,HT55,HUTU80,LOVO,LS180,LS411,MDST8,SW1463,label,Count Overlap
NM_021103,-3.376571,-2.940238,-1.757715,-1.995440,0.337592,-1.113494,-1.008932,0.010219,-0.867050,-1.167274,"[CL40, COLO320, GTG7, HT55, LOVO, LS180, MDST8...",8
NM_002165,-1.181748,0.344808,-0.730505,-1.497162,-1.164779,-1.004176,-2.316883,-0.714432,-1.417058,0.575379,"[CL40, GTG7, HT55, HUTU80, LOVO, LS180, LS411,...",8
NM_002513,1.020166,-1.138803,-1.072349,-2.902673,-1.190006,-1.635652,-0.406431,-1.791223,-1.933246,-2.029388,"[COLO320, GTG7, HT55, HUTU80, LOVO, LS411, MDS...",8
NM_001039842,3.299277,-0.724057,-0.758224,-0.250184,-1.358259,-1.612723,-0.665963,-1.062795,-2.541358,-1.150794,"[COLO320, GTG7, HUTU80, LOVO, LS180, LS411, MD...",8
NM_199246,-1.157603,-0.853996,0.939694,-1.765926,0.000000,-0.832885,-0.759741,0.212621,-0.831559,-0.979851,"[CL40, COLO320, HT55, LOVO, LS180, MDST8, SW1463]",7
...,...,...,...,...,...,...,...,...,...,...,...,...
NM_004185,-0.348659,-0.152684,-0.834357,0.765374,0.000000,0.000000,0.000000,0.000000,0.000000,1.488374,[GTG7],1
NM_004232,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.650310,0.000000,0.000000,0.000000,[LS180],1
NM_004217,0.312928,0.821141,-0.488065,0.628933,-0.602056,-0.073617,0.690702,0.324438,-0.001701,0.000000,[HUTU80],1
NM_004226,-0.195291,0.000000,0.577943,0.032632,0.000000,-0.166023,-0.051205,0.419706,-1.029061,0.000000,[MDST8],1


## Save the matrix as a excel file:

In [18]:
def csv_download_link(df, csv_file_name, delete_prompt=True):
    """Display a download link to load a data frame as csv from within a Jupyter notebook"""
    df.to_csv(csv_file_name, sep='\t')
    from IPython.display import FileLink
    display(FileLink(csv_file_name))
csv_download_link(combined_df_sorted_clean, 'matrix_TEAD_lncRNA.csv')   

## Visualize Tracks

In [None]:
import coolbox
from coolbox.api import *


frame = BigWig("tracks/LOVO_TEAD.bam.f.bw") + TrackHeight(2) + Color("#e79e25")  + Title("LOVO_TEAD") +\
        BigWig("tracks/LOVO_TEAD.bam.r.bw") + TrackHeight(2) + Color("#e79e25")  + Title("LOVO_TEAD") + Inverted() +\
        BigWig("tracks/LOVO_GFP.bam.f.bw") + TrackHeight(2) + Color("#e79e25")  + Title("LOVO_GFP") +\
        BigWig("tracks/LOVO_GFP.bam.r.bw") + TrackHeight(2) + Color("#e79e25")  + Title("LOVO_GFP") + Inverted() +\
        BigWig("tracks/LS411_TEAD.bam.f.bw") + TrackHeight(2) + Color("#e79e25")  + Title("LS411_TEAD") +\
        BigWig("tracks/LS411_TEAD.bam.r.bw") + TrackHeight(2) + Color("#e79e25")  + Title("LS411_TEAD") + Inverted() +\
        BigWig("tracks/GTG7_TEAD.bam.f.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("GTG7_TEAD") +\
        BigWig("tracks/GTG7_TEAD.bam.r.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("GTG7_TEAD") + Inverted() +\
        BigWig("tracks/GTG7_GFP.bam.f.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("GTG7_GFP") +\
        BigWig("tracks/GTG7_GFP.bam.r.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("GTG7_GFP") + Inverted() +\
        BigWig("tracks/HT55_TEAD.bam.f.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("HT55_TEAD") +\
        BigWig("tracks/HT55_TEAD.bam.r.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("HT55_TEAD") + Inverted() +\
        BigWig("tracks/HT55_GFP.bam.f.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("HT55_GFP") +\
        BigWig("tracks/HT55_GFP.bam.r.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("HT55_GFP") + Inverted() +\
        BigWig("tracks/SW1463_TEAD.bam.f.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("SW1463_TEAD") +\
        BigWig("tracks/SW1463_TEAD.bam.r.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("SW1463_TEAD") + Inverted() +\
        BigWig("tracks/SW1463_GFP.bam.f.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("SW1463_GFP") +\
        BigWig("tracks/SW1463_GFP.bam.r.bw") + TrackHeight(2) + Color("#0f71ae")  + Title("SW1463_GFP") + Inverted() +\
        BigWig("tracks/CL40_TEAD.bam.f.bw") + TrackHeight(2) + Color("#c47da1")  + Title("CL40_TEAD") +\
        BigWig("tracks/CL40_TEAD.bam.r.bw") + TrackHeight(2) + Color("#c47da1")  + Title("CL40_TEAD") + Inverted() +\
        BigWig("tracks/LS180_TEAD.bam.f.bw") + TrackHeight(2) + Color("#c47da1")  + Title("LS180_TEAD") +\
        BigWig("tracks/LS180_TEAD.bam.r.bw") + TrackHeight(2) + Color("#c47da1")  + Title("LS180_TEAD") + Inverted() +\
        BigWig("tracks/HUTU80_TEAD.bam.f.bw") + TrackHeight(2) + Color("#169c73")  + Title("HUTU80_TEAD") +\
        BigWig("tracks/HUTU80_TEAD.bam.r.bw") + TrackHeight(2) + Color("#169c73")  + Title("HUTU80_TEAD") + Inverted() +\
        BigWig("tracks/HUTU80_GFP.bam.f.bw") + TrackHeight(2) + Color("#169c73")  + Title("HUTU80_GFP") +\
        BigWig("tracks/HUTU80_GFP.bam.r.bw") + TrackHeight(2) + Color("#169c73")  + Title("HUTU80_GFP") + Inverted() +\
        BigWig("tracks/COLO320_TEAD.bam.f.bw") + TrackHeight(2) + Color("#169c73")  + Title("COLO320_TEAD") +\
        BigWig("tracks/COLO320_TEAD.bam.r.bw") + TrackHeight(2) + Color("#169c73")  + Title("COLO320_TEAD") + Inverted() +\
        BigWig("tracks/COLO320_GFP.bam.f.bw") + TrackHeight(2) + Color("#169c73")  + Title("COLO320_GFP") +\
        BigWig("tracks/COLO320_GFP.bam.r.bw") + TrackHeight(2) + Color("#169c73")  + Title("COLO320_GFP") + Inverted() +\
        BigWig("tracks/MDST8_TEAD.bam.f.bw") + TrackHeight(2) + Color("#169c73")  + Title("MDST8_TEAD") +\
        BigWig("tracks/MDST8_TEAD.bam.r.bw") + TrackHeight(2) + Color("#169c73")  + Title("MDST8_TEAD") + Inverted() +\
        BigWig("tracks/MDST8_GFP.bam.f.bw") + TrackHeight(2) + Color("#169c73")  + Title("MDST8_GFP") +\
        BigWig("tracks/MDST8_GFP.bam.r.bw") + TrackHeight(2) + Color("#169c73")  + Title("MDST8_GFP") + Inverted() 
 
frame = XAxis() + frame + XAxis()

bsr = Browser(frame)
bsr.show()

## Generate high quality Fig

Do not forget to set the genomic coordinates

In [None]:
fig = frame.plot("chr11", 64085560-5000, 64089283+5000)
fig.savefig('tracks.pdf')

## Expression of a specific lncRNA

In [1]:
combined_df_sorted_clean.loc["ENSG00000163597.14_2"]

NameError: name 'combined_df_sorted_clean' is not defined