# Multi-omics analysis of SARS-Cov-2

Integrate multi-omics data for SARS-Cov-2.

Data obtained from:

- [SARS-CoV-2 infected host cell proteomics reveal potential therapy targets](DOI:10.21203/rs.3.rs-17218/v1)
  Denisa Bojkova, Kevin Klann, Benjamin Koch, Marek Widera, David Krause, Sandra Ciesek, Jindrich Cinatl, Christian Münch

[Supp table 1](https://assets.researchsquare.com/files/rs-17218/v1/Supplementary%20Table%2001.xlsx)
[Supp table 2](https://assets.researchsquare.com/files/rs-17218/v1/Supplementary%20Table%2002.xlsx)

In [2]:
import re
import pandas as pd

In [42]:
proteome_infile = "../data/proteome.txt"
translatome_infile = "../data/translatome.txt"
proteome_outfile = "../data/proteome_diablo.txt"
translatome_outfile = "../data/translatome_diablo.txt"
classes_outfile = "../data/classes_diablo.txt"

In [30]:
proteome = pd.read_csv(proteome_infile, sep="\t")
proteome_classes = proteome.rename(columns=lambda x: re.sub('_.*','',x)).columns[2:26]
proteome.rename(columns=lambda x: re.sub(' ', '_', x), inplace=True)
proteome.rename(columns=lambda x: re.sub('_$', '', x), inplace=True)
proteome.set_index("UniProt_Accession", inplace=True)
proteome.drop("Gene_Symbol", axis=1, inplace=True)
proteome_diablo = proteome.iloc[:,:24].T
proteome_diablo.to_csv(proteome_outfile, sep="\t")
proteome_diablo

Index(['Control 2h', 'Control 2h', 'Control 2h', 'Control 6h', 'Control 6h',
       'Control 6h', 'Control 10h', 'Control 10h', 'Control 10h',
       'Control 24h', 'Control 24h', 'Control 24h', 'Virus 2h', 'Virus 2h',
       'Virus 2h', 'Virus 6h', 'Virus 6h', 'Virus 6h', 'Virus 10h',
       'Virus 10h', 'Virus 10h', 'Virus 24h', 'Virus 24h', 'Virus 24h'],
      dtype='object')

In [34]:
translatome = pd.read_csv(translatome_infile, sep="\t")
translatome_classes = translatome.rename(columns=lambda x: re.sub('_.*','',x)).columns[3:27]
translatome.rename(columns=lambda x: re.sub(' ', '_', x), inplace=True)
translatome.rename(columns=lambda x: re.sub('_$', '', x), inplace=True)
translatome.set_index("Accession", inplace=True)
translatome.drop(["Gene_Symbol01", "Species_Names01"], axis=1, inplace=True)#.isnull().values.any()
translatome_diablo = translatome.iloc[:,:24].T
translatome_diablo.replace(["#DIV/0!", "#NUM!", None], 0, inplace=True)
translatome_diablo.replace(" ", "_", inplace=True)
translatome_diablo.to_csv(translatome_outfile, sep="\t")
translatome_diablo
translatome_classes

Index(['Control 2h', 'Control 2h', 'Control 2h', 'Control 6h', 'Control 6h',
       'Control 6h', 'Control 10h', 'Control 10h', 'Control 10h',
       'Control 24h', 'Control 24h', 'Control 24h', 'Virus 2h', 'Virus 2h',
       'Virus 2h', 'Virus 6h', 'Virus 6h', 'Virus 6h', 'Virus 10h',
       'Virus 10h', 'Virus 10h', 'Virus 24h', 'Virus 24h', 'Virus 24h'],
      dtype='object')

In [35]:
proteome_classes == translatome_classes

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [44]:
# proteome_classes and translatome_classes are identical, use either
classes = pd.DataFrame(proteome_classes)
classes.to_csv(classes_outfile, sep="\t")
classes

Unnamed: 0,0
0,Control 2h
1,Control 2h
2,Control 2h
3,Control 6h
4,Control 6h
5,Control 6h
6,Control 10h
7,Control 10h
8,Control 10h
9,Control 24h
