In [1]:
import re
import numpy as np
import pandas as pd
import seaborn as sns

from pathlib import Path
from scipy import stats
from veliadb import base, settings
from veliadb import benchling_orm as bo
from veliadb.base import (Session, Orf, OrfXref, Transcript, Gene, 
                          TranscriptOrf, SequenceRegionXref, Protein, 
                          ProteinXref, Dataset, ProteinOrf)

from sqlalchemy.sql.expression import func, and_, or_

from dashboard import data_load
import pyarrow.parquet as pq
from dashboard.etl import CACHE_DIR, TPM_DESEQ2_FACTOR, DATA_DIR

pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 100



In [3]:
session = base.Session()

In [4]:
session.query(Orf).count()

16282850

### Swissprot

In [35]:
swissprot_query = \
    session.query(Protein, Orf)\
           .join(ProteinXref, ProteinXref.protein_id == Protein.id)\
           .join(Dataset, Dataset.id == ProteinXref.xref_dataset_id)\
           .join(ProteinOrf, ProteinOrf.protein_id == Protein.id)\
           .join(Orf, Orf.id == ProteinOrf.orf_id)\
           .filter(Dataset.name == 'swissprot')\
           .filter(func.length(Protein.aa_seq) < 150)\
           .distinct(ProteinXref.protein_id)



In [32]:
alan_df = pd.read_csv('../../data/orfs_under_150codons_with_function-2.csv')

In [37]:
with open('../cache_updates/swissprot_sORF_150aa_vtx.txt', 'w') as outfile:
    for prot, orf in swissprot_query.all():
        outfile.write(f'VTX-{orf.id:07d}\n')
    

In [28]:
prot, orf = swissprot_query.first()

In [19]:
prot.

SyntaxError: invalid syntax (2621462385.py, line 1)

In [15]:
prot.ensembl_protein_id

'ENSP00000319240.2'

In [14]:
orf.ensembl_protein_id

'ENSP00000319240.2'

In [5]:
session.query(ProteinOrf).count()

0

In [19]:
df = pd.read_parquet(CACHE_DIR.joinpath('sorf_df.parq'))

In [20]:
signal_cols = ['SignalP 4.1_cut', 'SignalP 5b_cut', 'SignalP 6slow_cut', 'Deepsig_cut']
conservation_cols = ['tblastn_align_identity', 'blastp_align_identity', 'nonsig_blastp_align_identity', 'nonsig_tblastn_align_identity']
isoform_cols = ['swissprot_isoform', 'ensembl_isoform', 'refseq_isoform']
conservation_threshold = 50

measured_secreted_or_predicted_secreted =  (df[signal_cols] > -1).any(axis=1)

In [24]:
x = df[measured_secreted_or_predicted_secreted]

In [26]:
set(x['screening_phase'])

{'Not Screened',
 'Phase 1',
 'Phase 2',
 'Phase 3',
 'RefSeq;ENSEMBL;velia_phase5_uniprot-tremble',
 'RefSeq;velia_phase1_Chen;velia_phase1_Bona fide;velia_phase2_Bonafide_Bianca;velia_phase3_nan;velia_phase1_Prensner;ENSEMBL',
 'RefSeq;velia_phase5_uniprot-tremble',
 'velia_phase4_nan',
 'velia_phase4_nan;openprot',
 'velia_phase4_nan;velia_phase1_Chen',
 'velia_phase4_nan;velia_phase1_Chen;openprot',
 'velia_phase5_Blume_Mudge;velia_phase6_plasma_mass_spec',
 'velia_phase5_autoimmune lncRNA',
 'velia_phase5_autoimmune lncRNA;openprot',
 'velia_phase5_autoimmune lncRNA;velia_phase4_nan',
 'velia_phase5_uniprot-tremble',
 'velia_phase5_uniprot-tremble;openprot',
 'velia_phase6_autoimmune_gwas;openprot',
 'velia_phase6_public_mass_spec',
 'velia_phase6_public_mass_spec;openprot'}

In [30]:
x[x['screening_phase'] != 'Phase 3']

Unnamed: 0_level_0,show_details,vtx_id,aa_length,screening_phase_id,screening_phase,ucsc_track,source,orf_xrefs,protein_xrefs,gene_xrefs,transcript_xrefs,transcripts_exact,aa,nucl,index_copy,genscript_id,chr,strand,start,end,chrom_starts,block_sizes,phases,Ribo-Seq RPKM Support,blastp_score,...,Deepsig_cut,SignalP 6slow_cut,SignalP 5b_cut,SignalP 4.1_cut,Phobius,DeepTMHMM,nonsignal_seqs,trans1,trans2,trans3,sec1,sec2,sec3,translated_mean,secreted_mean,secreted,translated,swissprot_isoform,ensembl_isoform,refseq_isoform,phylocsf_58m_avg,phylocsf_58m_max,phylocsf_58m_min,phylocsf_vals,ESMFold plddt 90th percentile
vtx_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
VTX-0688940,False,VTX-0688940,126,-1,Not Screened,chr4:54658015-54695825,[],[],[],ENSG00000157404.17;HGNC:6342;KIT,ENST00000514582.1;OTTHUMT00000361418.1,[ENST00000514582],MRGARGAWDFLCVLLLLLRVQTGSSQPSVSPGEPSPPSIHPGKSDLIVRVGDEIRLLCTDPGFVKWTFEILDETNENKQNEWITEKAEATNTGKYT...,ATGAGAGGCGCTCGCGGCGCCTGGGATTTTCTCTGCGTTCTGCTCCTACTGCTTCGCGTCCAGACAGGCTCTTCTCAACCATCTGTGAGTCCAGGG...,VTX-0688940,,chr4,+,54658015,54695825,54658015;54695512,67;314,0;2,True,389.0,...,25,25,25,25,-1,25,-1.0,,,,,,,,,True,True,,,,1.045192,10.397,-8.702,"[-0.17299999296665192, -0.17299999296665192, -0.17299999296665192, -0.1679999977350235, -0.16799...",69.523293
VTX-0826553,False,VTX-0826553,96,-1,Not Screened,chr9:128149526-128150527,[],[],[],ENSG00000148346.12;HGNC:6526;LCN2,ENST00000470902.5;OTTHUMT00000054376.1;ENST00000487719.1;OTTHUMT00000054379.1,"[ENST00000470902, ENST00000487719]",MPLGLLWLGLALLGALHAQAQDSTSDLIPAPPLSKVPLQQNFQDNQVRADISWGCESQTDVTGKGWPKLRDPVVHLAVLPGIHLCSSFLCSLEQRL,ATGCCCCTAGGTCTCCTGTGGCTGGGCCTAGCCCTGTTGGGGGCTCTGCATGCCCAGGCCCAGGACTCCACCTCAGACCTGATCCCAGCCCCACCT...,VTX-0826553,,chr9,+,128149526,128150527,128149526;128150375,138;153,0;0,True,64.0,...,20,20,20,20,-1,18,-1.0,47270.0,53510.0,30520.0,21700.0,22370,99480.0,43766.666667,21700.0,True,True,,,,-4.595113,6.380,-14.386,"[-3.259000062942505, -3.259000062942505, -3.259000062942505, -6.340000152587891, -6.340000152587...",43.645977
VTX-0846585,False,VTX-0846585,50,-1,Not Screened,chrX:153794567-153795715,[],[],[],ENSG00000180879.14;HGNC:11326;SSR4,ENST00000482902.5;OTTHUMT00000061031.1;11326;CHS.59049.11;NR_037927.1;RefSeq:NR_037927.1;rna-NR_...,[ENST00000482902],MPAQPFGREEAMAAMASLGALALLLLSSLSRCSGICLLWSRPRAFSDVLC,ATGCCGGCCCAGCCGTTCGGCAGAGAAGAGGCGATGGCGGCGATGGCATCTCTCGGCGCCCTGGCGCTGCTCCTGCTGTCCAGCCTCTCCCGCTGC...,VTX-0846585,,chrX,+,153794567,153795715,153794567;153794674;153795663,19;81;53,0;2;2,False,,...,-1,34,-1,-1,-1,30,-1.0,,,,,,,,,False,,,,,,,,,67.050358
VTX-0738397,False,VTX-0738397,34,-1,Not Screened,chr6:32166166-32166270,[],[],[],ENSG00000258388.7;HGNC:48343;PPT2-EGFL8;13944;80864;EGFL8;EGFL8;ENSG00000241404.7;gene-EGFL8;HGN...,ENST00000421600.2;OTTHUMT00000412522.2;ENST00000428388.6;OTTHUMT00000412520.1;ENST00000453656.6;...,"[ENST00000421600, ENST00000428388, ENST00000453656, ENST00000466239, ENST00000583227]",MGSRAELCTLLGGFSFLLLLIPGEGAKGGSLRER,ATGGGGTCCAGGGCTGAGCTGTGCACTCTCTTAGGCGGATTCTCCTTCCTCCTGCTACTGATACCAGGCGAGGGGGCCAAGGGTGGATCCCTCAGA...,VTX-0738397,,chr6,+,32166166,32166270,32166166,105,0,False,86.0,...,25,-1,25,-1,-1,25,-1.0,710.0,670.0,580.0,10.0,10,20.0,653.333333,10.0,False,False,,,,-4.481629,-2.070,-8.226,"[-3.309999942779541, -3.309999942779541, -3.309999942779541, -4.703999996185303, -4.703999996185...",65.348766
VTX-0849971,False,VTX-0849971,43,-1,Phase 2,chr3:107526338-107645906,[],[],"[BBX_107245184_43aa, c3riboseqorf111_GENE_ID=ENSG00000114439_TYPE=uORF_LEN=132, Phase 2_465, Pha...",BBX;ENSG00000114439.19;HGNC:14422,14422;BBX;CCDS46881.1;CHS.37927.9;ENST00000325805.13;NM_001142568.3;OTTHUMT00000317820.2;RefSeq:...,"[ENST00000325805, ENST00000402163, ENST00000402543, ENST00000415149, ENST00000416476, ENST000004...",MTKALPQFIFLPVYFPFAFLESCCITEAGSSDVPLKSQWKVLT,ATGACAAAGGCTTTGCCGCAGTTCATCTTCCTCCCTGTGTACTTTCCATTTGCCTTCCTGGAATCCTGCTGCATCACAGAAGCTGGAAGTTCTGAT...,VTX-0849971,,chr3,+,107526338,107645906,107526338;107645836,61;71,0;2,True,,...,20,-1,28,20,-1,-1,-1.0,30005.0,25575.0,26795.0,2745.0,2560,4715.0,27458.333333,2745.0,False,True,,,,-5.923364,-4.148,-11.434,"[-4.1479997634887695, -4.1479997634887695, -4.1479997634887695, -4.9770002365112305, -4.97700023...",50.666769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VTX-0338425,False,VTX-0338425,83,-1,Not Screened,chr1:230795167-230795418,[],[],[],ENSG00000244137;ENSG00000244137.1,ENST00000412344.1;OTTHUMT00000092178.1,[ENST00000412344],MGPHTSDHGSLWSIFAITLTRLFLMRSWRVTEDATPRCTLSLAGHLRLAFSSRTRQLRKSSKSSWSSSSAYLSTISCRRWLLS,ATGGGTCCACACACATCAGACCATGGCTCTCTGTGGTCTATCTTTGCCATTACTTTAACCAGGCTGTTTCTCATGCGCTCATGGAGGGTGACTGAA...,VTX-0338425,,chr1,-,230795167,230795418,230795167,252,0,False,,...,-1,-1,-1,27,-1,-1,-1.0,,,,,,,,,False,,,,,,,,,42.575333
VTX-0317567,False,VTX-0317567,45,-1,velia_phase4_nan;openprot,chr1:143879917-143880054,[],[],"[IP_669255, Phase 4_82, sORF482]",ENSG00000290735;ENSG00000290735.1,ENST00000415338.1;OTTHUMT00000098435.2;ENST00000669710.1;OTTHUMT00000529220.1;20654;CHS.2781.8;H...,"[ENST00000415338, ENST00000669710]",MSLLGLLHYVRLYLSSLDGKSPPELEEVSCHARRGPTRVPQGKGL,ATGTCACTCTTGGGATTGCTACATTATGTAAGACTCTATCTTAGCAGTCTGGATGGAAAGTCTCCTCCTGAACTTGAAGAGGTAAGCTGCCACGCC...,VTX-0317567,,chr1,-,143879917,143880054,143879917,138,0,False,,...,19,-1,-1,-1,-1,25,-1.0,14200.0,13190.0,14040.0,910.0,2900,1490,13810.000000,910.0,False,True,,,,-7.189133,-3.629,-12.545,"[-6.5279998779296875, -6.5279998779296875, -6.5279998779296875, -7.103000164031982, -7.103000164...",57.336015
VTX-0732678,False,VTX-0732678,46,-1,Phase 2,chr6:8609641-8609781,[],[],"[AL591485.1/ENST00000645486.1/ncRNA/-, Phase 2_277, sORF2277]",ENSG00000285219.4;HGNC:34232;HULC,ENST00000645486.1;OTTHUMT00000495768.1,[ENST00000645486],MSRGPGEPEILAVDLVFVALLLIGIEVNLCDHKGECSQNGQTCSQL,ATGAGTCGTGGGCCCGGGGAACCAGAAATCTTGGCTGTTGATCTGGTATTTGTGGCTCTATTGCTGATTGGTATAGAAGTGAACTTATGTGACCAT...,VTX-0732678,,chr6,+,8609641,8609781,8609641,141,0,False,54.0,...,30,-1,-1,30,-1,29,-1.0,1530.0,2290.0,1710.0,1280.0,530,330,1843.333333,1280.0,False,True,,,,-8.232565,-4.439,-14.823,"[-7.859000205993652, -7.859000205993652, -7.859000205993652, -8.086000442504883, -8.086000442504...",62.834780
VTX-0863424,False,VTX-0863424,95,-1,Not Screened,chr2:227813912-227817083,[],[],[],CCL20;ENSG00000115009.13;HGNC:10619,CCDS46536.1;ENST00000409189.7;OTTHUMT00000256898.4;10619;CCL20;CHS.32242.2;NM_001130046.2;RefSeq...,[ENST00000409189],MCCTKSLLLAALMSVLLLHLCGESEASNFDCCLGYTDRILHPKFIVGFTRQLANEGCDINAIIFHTKKKLSVCANPKQTWVKYIVRLLSKKVKNM,ATGTGCTGTACCAAGAGTTTGCTCCTGGCTGCTTTGATGTCAGTGCTGCTACTCCACCTCTGCGGCGAATCAGAAGCAAGCAACTTTGACTGCTGT...,VTX-0863424,,chr2,+,227813912,227817083,227813912;227815457;227816307;227817062,76;112;78;22,0;2;1;1,False,68.0,...,26,26,26,26,-1,26,-1.0,,,,,,,,,False,,,,,,,,,77.181085


In [28]:
x

Unnamed: 0_level_0,show_details,vtx_id,aa_length,screening_phase_id,screening_phase,ucsc_track,source,orf_xrefs,protein_xrefs,gene_xrefs,transcript_xrefs,transcripts_exact,aa,nucl,index_copy,genscript_id,chr,strand,start,end,chrom_starts,block_sizes,phases,Ribo-Seq RPKM Support,blastp_score,...,Deepsig_cut,SignalP 6slow_cut,SignalP 5b_cut,SignalP 4.1_cut,Phobius,DeepTMHMM,nonsignal_seqs,trans1,trans2,trans3,sec1,sec2,sec3,translated_mean,secreted_mean,secreted,translated,swissprot_isoform,ensembl_isoform,refseq_isoform,phylocsf_58m_avg,phylocsf_58m_max,phylocsf_58m_min,phylocsf_vals,ESMFold plddt 90th percentile
vtx_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
VTX-0688940,False,VTX-0688940,126,-1,Not Screened,chr4:54658015-54695825,[],[],[],ENSG00000157404.17;HGNC:6342;KIT,ENST00000514582.1;OTTHUMT00000361418.1,[ENST00000514582],MRGARGAWDFLCVLLLLLRVQTGSSQPSVSPGEPSPPSIHPGKSDLIVRVGDEIRLLCTDPGFVKWTFEILDETNENKQNEWITEKAEATNTGKYT...,ATGAGAGGCGCTCGCGGCGCCTGGGATTTTCTCTGCGTTCTGCTCCTACTGCTTCGCGTCCAGACAGGCTCTTCTCAACCATCTGTGAGTCCAGGG...,VTX-0688940,,chr4,+,54658015,54695825,54658015;54695512,67;314,0;2,True,389.0,...,25,25,25,25,-1,25,-1.0,,,,,,,,,True,True,,,,1.045192,10.397,-8.702,"[-0.17299999296665192, -0.17299999296665192, -0.17299999296665192, -0.1679999977350235, -0.16799...",69.523293
VTX-0826553,False,VTX-0826553,96,-1,Not Screened,chr9:128149526-128150527,[],[],[],ENSG00000148346.12;HGNC:6526;LCN2,ENST00000470902.5;OTTHUMT00000054376.1;ENST00000487719.1;OTTHUMT00000054379.1,"[ENST00000470902, ENST00000487719]",MPLGLLWLGLALLGALHAQAQDSTSDLIPAPPLSKVPLQQNFQDNQVRADISWGCESQTDVTGKGWPKLRDPVVHLAVLPGIHLCSSFLCSLEQRL,ATGCCCCTAGGTCTCCTGTGGCTGGGCCTAGCCCTGTTGGGGGCTCTGCATGCCCAGGCCCAGGACTCCACCTCAGACCTGATCCCAGCCCCACCT...,VTX-0826553,,chr9,+,128149526,128150527,128149526;128150375,138;153,0;0,True,64.0,...,20,20,20,20,-1,18,-1.0,47270.0,53510.0,30520.0,21700.0,22370,99480.0,43766.666667,21700.0,True,True,,,,-4.595113,6.380,-14.386,"[-3.259000062942505, -3.259000062942505, -3.259000062942505, -6.340000152587891, -6.340000152587...",43.645977
VTX-0846585,False,VTX-0846585,50,-1,Not Screened,chrX:153794567-153795715,[],[],[],ENSG00000180879.14;HGNC:11326;SSR4,ENST00000482902.5;OTTHUMT00000061031.1;11326;CHS.59049.11;NR_037927.1;RefSeq:NR_037927.1;rna-NR_...,[ENST00000482902],MPAQPFGREEAMAAMASLGALALLLLSSLSRCSGICLLWSRPRAFSDVLC,ATGCCGGCCCAGCCGTTCGGCAGAGAAGAGGCGATGGCGGCGATGGCATCTCTCGGCGCCCTGGCGCTGCTCCTGCTGTCCAGCCTCTCCCGCTGC...,VTX-0846585,,chrX,+,153794567,153795715,153794567;153794674;153795663,19;81;53,0;2;2,False,,...,-1,34,-1,-1,-1,30,-1.0,,,,,,,,,False,,,,,,,,,67.050358
VTX-0738397,False,VTX-0738397,34,-1,Not Screened,chr6:32166166-32166270,[],[],[],ENSG00000258388.7;HGNC:48343;PPT2-EGFL8;13944;80864;EGFL8;EGFL8;ENSG00000241404.7;gene-EGFL8;HGN...,ENST00000421600.2;OTTHUMT00000412522.2;ENST00000428388.6;OTTHUMT00000412520.1;ENST00000453656.6;...,"[ENST00000421600, ENST00000428388, ENST00000453656, ENST00000466239, ENST00000583227]",MGSRAELCTLLGGFSFLLLLIPGEGAKGGSLRER,ATGGGGTCCAGGGCTGAGCTGTGCACTCTCTTAGGCGGATTCTCCTTCCTCCTGCTACTGATACCAGGCGAGGGGGCCAAGGGTGGATCCCTCAGA...,VTX-0738397,,chr6,+,32166166,32166270,32166166,105,0,False,86.0,...,25,-1,25,-1,-1,25,-1.0,710.0,670.0,580.0,10.0,10,20.0,653.333333,10.0,False,False,,,,-4.481629,-2.070,-8.226,"[-3.309999942779541, -3.309999942779541, -3.309999942779541, -4.703999996185303, -4.703999996185...",65.348766
VTX-0849971,False,VTX-0849971,43,-1,Phase 2,chr3:107526338-107645906,[],[],"[BBX_107245184_43aa, c3riboseqorf111_GENE_ID=ENSG00000114439_TYPE=uORF_LEN=132, Phase 2_465, Pha...",BBX;ENSG00000114439.19;HGNC:14422,14422;BBX;CCDS46881.1;CHS.37927.9;ENST00000325805.13;NM_001142568.3;OTTHUMT00000317820.2;RefSeq:...,"[ENST00000325805, ENST00000402163, ENST00000402543, ENST00000415149, ENST00000416476, ENST000004...",MTKALPQFIFLPVYFPFAFLESCCITEAGSSDVPLKSQWKVLT,ATGACAAAGGCTTTGCCGCAGTTCATCTTCCTCCCTGTGTACTTTCCATTTGCCTTCCTGGAATCCTGCTGCATCACAGAAGCTGGAAGTTCTGAT...,VTX-0849971,,chr3,+,107526338,107645906,107526338;107645836,61;71,0;2,True,,...,20,-1,28,20,-1,-1,-1.0,30005.0,25575.0,26795.0,2745.0,2560,4715.0,27458.333333,2745.0,False,True,,,,-5.923364,-4.148,-11.434,"[-4.1479997634887695, -4.1479997634887695, -4.1479997634887695, -4.9770002365112305, -4.97700023...",50.666769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VTX-0338425,False,VTX-0338425,83,-1,Not Screened,chr1:230795167-230795418,[],[],[],ENSG00000244137;ENSG00000244137.1,ENST00000412344.1;OTTHUMT00000092178.1,[ENST00000412344],MGPHTSDHGSLWSIFAITLTRLFLMRSWRVTEDATPRCTLSLAGHLRLAFSSRTRQLRKSSKSSWSSSSAYLSTISCRRWLLS,ATGGGTCCACACACATCAGACCATGGCTCTCTGTGGTCTATCTTTGCCATTACTTTAACCAGGCTGTTTCTCATGCGCTCATGGAGGGTGACTGAA...,VTX-0338425,,chr1,-,230795167,230795418,230795167,252,0,False,,...,-1,-1,-1,27,-1,-1,-1.0,,,,,,,,,False,,,,,,,,,42.575333
VTX-0317567,False,VTX-0317567,45,-1,velia_phase4_nan;openprot,chr1:143879917-143880054,[],[],"[IP_669255, Phase 4_82, sORF482]",ENSG00000290735;ENSG00000290735.1,ENST00000415338.1;OTTHUMT00000098435.2;ENST00000669710.1;OTTHUMT00000529220.1;20654;CHS.2781.8;H...,"[ENST00000415338, ENST00000669710]",MSLLGLLHYVRLYLSSLDGKSPPELEEVSCHARRGPTRVPQGKGL,ATGTCACTCTTGGGATTGCTACATTATGTAAGACTCTATCTTAGCAGTCTGGATGGAAAGTCTCCTCCTGAACTTGAAGAGGTAAGCTGCCACGCC...,VTX-0317567,,chr1,-,143879917,143880054,143879917,138,0,False,,...,19,-1,-1,-1,-1,25,-1.0,14200.0,13190.0,14040.0,910.0,2900,1490,13810.000000,910.0,False,True,,,,-7.189133,-3.629,-12.545,"[-6.5279998779296875, -6.5279998779296875, -6.5279998779296875, -7.103000164031982, -7.103000164...",57.336015
VTX-0732678,False,VTX-0732678,46,-1,Phase 2,chr6:8609641-8609781,[],[],"[AL591485.1/ENST00000645486.1/ncRNA/-, Phase 2_277, sORF2277]",ENSG00000285219.4;HGNC:34232;HULC,ENST00000645486.1;OTTHUMT00000495768.1,[ENST00000645486],MSRGPGEPEILAVDLVFVALLLIGIEVNLCDHKGECSQNGQTCSQL,ATGAGTCGTGGGCCCGGGGAACCAGAAATCTTGGCTGTTGATCTGGTATTTGTGGCTCTATTGCTGATTGGTATAGAAGTGAACTTATGTGACCAT...,VTX-0732678,,chr6,+,8609641,8609781,8609641,141,0,False,54.0,...,30,-1,-1,30,-1,29,-1.0,1530.0,2290.0,1710.0,1280.0,530,330,1843.333333,1280.0,False,True,,,,-8.232565,-4.439,-14.823,"[-7.859000205993652, -7.859000205993652, -7.859000205993652, -8.086000442504883, -8.086000442504...",62.834780
VTX-0863424,False,VTX-0863424,95,-1,Not Screened,chr2:227813912-227817083,[],[],[],CCL20;ENSG00000115009.13;HGNC:10619,CCDS46536.1;ENST00000409189.7;OTTHUMT00000256898.4;10619;CCL20;CHS.32242.2;NM_001130046.2;RefSeq...,[ENST00000409189],MCCTKSLLLAALMSVLLLHLCGESEASNFDCCLGYTDRILHPKFIVGFTRQLANEGCDINAIIFHTKKKLSVCANPKQTWVKYIVRLLSKKVKNM,ATGTGCTGTACCAAGAGTTTGCTCCTGGCTGCTTTGATGTCAGTGCTGCTACTCCACCTCTGCGGCGAATCAGAAGCAAGCAACTTTGACTGCTGT...,VTX-0863424,,chr2,+,227813912,227817083,227813912;227815457;227816307;227817062,76;112;78;22,0;2;1;1,False,68.0,...,26,26,26,26,-1,26,-1.0,,,,,,,,,False,,,,,,,,,77.181085
