# Introduction
Use this notebook to create the pickled version of the DESeq2 data for later loading in other notebooks.

# Setup

In [1]:
import scanpy as sc
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from scipy import sparse
import warnings
import itertools as it
import json
import seaborn as sns
import pickle as pkl
from functools import reduce
import gc
import timeit
import os

from nero import Nero as nr

In [2]:
sc.settings.verbosity = 4
sc.settings.set_figure_params(dpi=80)
sc.settings.n_jobs=30

In [3]:
mountpoint = '/data/clue/'
prefix_adts = mountpoint + 'prod/adts/'
prefix_mrna = mountpoint + 'prod/mrna/'
prefix_comb = mountpoint + 'prod/comb/'

# Load in Data

## mRNA

In [4]:
de_dirs = os.listdir(prefix_mrna + 'vals/de/')

In [5]:
de = dict() # differential expression dictionary
for de_dir in de_dirs:
    de[de_dir] = dict()
    for ct_type in ['ct2', 'ct3']:
        de[de_dir][ct_type] = dict()

In [6]:
for de_dir, ct_type in tqdm(it.product(de_dirs, ['ct2', 'ct3']), total=4):
    prefix_de = prefix_mrna + 'vals/de/' + de_dir + '/' + ct_type + '/res/'
    for fname in os.listdir(prefix_de):
        cond, ct = fname.split('.')[0].split('_', 1)
        try:
            de[de_dir][ct_type][cond]
        except KeyError:
            de[de_dir][ct_type][cond] = dict()
        de[de_dir][ct_type][cond][ct] = pd.read_csv(prefix_de + fname, index_col=0).dropna()  

  0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
try:
    os.mkdir(prefix_mrna + 'pkls/de/')
except FileExistsError:
    pass

In [8]:
# with open(prefix_mrna + 'pkls/de/IFNs.pkl', 'wb') as file:
#     pkl.dump(de['IFNs'], file)
# with open(prefix_mrna + 'pkls/de/all.pkl', 'wb') as file:
#     pkl.dump(de['all'], file)

de = dict()
with open(prefix_mrna + 'pkls/de/IFNs.pkl', 'rb') as file:
    de['IFNs'] = pkl.load(file)
with open(prefix_mrna + 'pkls/de/all.pkl', 'rb') as file:
    de['all'] = pkl.load(file)

## ADTs

In [14]:
de_dirs = os.listdir(prefix_adts + 'vals/de/')

In [15]:
de = dict() # differential expression dictionary
for de_dir in de_dirs:
    de[de_dir] = dict()
    for ct_type in ['ct2', 'ct3']:
        de[de_dir][ct_type] = dict()

In [16]:
for de_dir, ct_type in tqdm(it.product(de_dirs, ['ct2', 'ct3']), total=4):
    prefix_de = prefix_adts + 'vals/de/' + de_dir + '/' + ct_type + '/res/'
    for fname in os.listdir(prefix_de):
        cond, ct = fname.split('.')[0].split('_', 1)
        try:
            de[de_dir][ct_type][cond]
        except KeyError:
            de[de_dir][ct_type][cond] = dict()
        de[de_dir][ct_type][cond][ct] = pd.read_csv(prefix_de + fname, index_col=0).dropna()  

  0%|          | 0/4 [00:00<?, ?it/s]

In [17]:
try:
    os.mkdir(prefix_adts + 'pkls/de/')
except FileExistsError:
    pass

In [18]:
# with open(prefix_adts + 'pkls/de/IFNs.pkl', 'wb') as file:
#     pkl.dump(de['IFNs'], file)
# with open(prefix_adts + 'pkls/de/all.pkl', 'wb') as file:
#     pkl.dump(de['all'], file)

de = dict()
with open(prefix_adts + 'pkls/de/IFNs.pkl', 'rb') as file:
    de['IFNs'] = pkl.load(file)
with open(prefix_adts + 'pkls/de/all.pkl', 'rb') as file:
    de['all'] = pkl.load(file)