# Process and soma reads enrichment

In [None]:
# Import Packages

%load_ext autoreload
%autoreload 2

import os
import warnings 
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# test()

## IO

In [None]:
# Set path
base_path = 'path/to/dataset/folder'

input_path = os.path.join(base_path, 'input')

out_path = os.path.join(base_path, 'output')
if not os.path.exists(out_path):
    os.mkdir(out_path)
    
fig_path = os.path.join(base_path, 'figures')
if not os.path.exists(fig_path):
    os.mkdir(fig_path)

sc.settings.figdir = fig_path

## Reads distribution (RIBO combined)

### RIBO - rep1

In [None]:
# load reads 
ribo_rep1_assigned_reads_df = pd.read_csv(os.path.join("path/to/assigned_amplicons_RIBOmap_rep1.csv"), index_col=0)
ribo_rep1_unassigned_reads_df = pd.read_csv(os.path.join("path/to/unassigned_amplicons_RIBOmap_rep1.csv"), index_col=0)

In [None]:
# counts
ribo_rep1_assigned_counts_df = pd.DataFrame(ribo_rep1_assigned_reads_df['gene_name'].value_counts())
ribo_rep1_assigned_counts_df.columns = ['soma reads']

ribo_rep1_unassigned_counts_df = pd.DataFrame(ribo_rep1_unassigned_reads_df['gene_name'].value_counts())
ribo_rep1_unassigned_counts_df.columns = ['non-soma reads']

ribo_rep1_reads_df = pd.concat([ribo_rep1_assigned_counts_df, ribo_rep1_unassigned_counts_df], axis=1)
ribo_rep1_reads_df['all reads'] = ribo_rep1_reads_df.sum(axis=1)

In [None]:
ribo_rep1_reads_df

### RIBO - rep2

In [None]:
# reads (ribo - rep2)
ribo_rep2_assigned_reads_df = pd.read_csv(os.path.join("path/to/assigned_amplicons_RIBOmap_rep2.csv"), index_col=0)
ribo_rep2_unassigned_reads_df = pd.read_csv(os.path.join("path/to/unassigned_amplicons_RIBOmap_rep2.csv"), index_col=0)

In [None]:
# counts
ribo_rep2_assigned_counts_df = pd.DataFrame(ribo_rep2_assigned_reads_df['gene_name'].value_counts())
ribo_rep2_assigned_counts_df.columns = ['soma reads']

ribo_rep2_unassigned_counts_df = pd.DataFrame(ribo_rep2_unassigned_reads_df['gene_name'].value_counts())
ribo_rep2_unassigned_counts_df.columns = ['non-soma reads']

ribo_rep2_reads_df = pd.concat([ribo_rep2_assigned_counts_df, ribo_rep2_unassigned_counts_df], axis=1)
ribo_rep2_reads_df['all reads'] = ribo_rep2_reads_df.sum(axis=1)

In [None]:
ribo_rep2_reads_df

### Combined

In [None]:
ribo_rep1_reads_df = ribo_rep1_reads_df.sort_index()
ribo_rep2_reads_df = ribo_rep2_reads_df.sort_index() 
ribo_combined_df = ribo_rep1_reads_df + ribo_rep2_reads_df
ribo_combined_df

In [None]:
ribo_combined_df['soma reads percentage'] = ribo_combined_df['soma reads'] / ribo_combined_df['all reads']
ribo_combined_df['non-soma reads percentage'] = ribo_combined_df['non-soma reads'] / ribo_combined_df['all reads']

In [None]:
# plot distribution
sns.set_style('white')
fig, axs = plt.subplots(figsize=(10, 4), ncols=2)
sns.histplot(ribo_combined_df['soma reads percentage'], ax=axs[0])
axs[0].axvline(x=ribo_combined_df['soma reads percentage'].median(), color='r', linestyle='-')
axs[0].set_title('soma reads percentage')

sns.histplot(ribo_combined_df['non-soma reads percentage'], ax=axs[1])
axs[1].axvline(x=ribo_combined_df['non-soma reads percentage'].median(), color='r', linestyle='-')
axs[1].set_title('non-soma reads percentage')
# plt.savefig(os.path.join(fig_path, 'reads-distribution.pdf'))
plt.show()

In [None]:
# determine threshold
ns_thres = ribo_combined_df['non-soma reads percentage'].quantile(0.90) # compare w/ list_1
s_thres = ribo_combined_df['soma reads percentage'].quantile(0.90) # compare w/ list_4

ribo_combined_df['top soma gene'] = ribo_combined_df['soma reads percentage'] > s_thres
ribo_combined_df['top non-soma gene'] = ribo_combined_df['non-soma reads percentage'] > ns_thres

In [None]:
# set class
ribo_combined_df['class'] = 0
ribo_combined_df.loc[ribo_combined_df['top soma gene'] == True, 'class'] = 1
ribo_combined_df.loc[ribo_combined_df['top non-soma gene'] == True, 'class'] = 2
ribo_combined_df['class'] = ribo_combined_df['class'].astype('category')

ribo_combined_df = ribo_combined_df.sort_values('soma reads percentage')
ribo_combined_df['gene'] = ribo_combined_df.index.values
ribo_combined_df = ribo_combined_df.reset_index(drop=True)
ribo_combined_df['order'] = ribo_combined_df.index.values
ribo_combined_df = ribo_combined_df.sort_values('gene')
ribo_combined_df = ribo_combined_df.reset_index(drop=True)
ribo_combined_df

In [None]:
cpl_colors = ['#bfbfbf', '#1d43cf', '#cf1d1d']
cpl = sns.color_palette(cpl_colors)
cmap = ListedColormap(cpl.as_hex())

sns.set_style("ticks")
fig, ax = plt.subplots(figsize=(6,5))

sns.scatterplot(y='non-soma reads percentage', x='order', hue='class', data=ribo_combined_df, s=12, edgecolor=None, palette=cpl, legend=False)    
   
# plt.savefig(os.path.join(fig_path, 'neuropil_reads.pdf'))

plt.show()

In [None]:
sns.set_style("ticks")
fig, ax = plt.subplots(figsize=(6,5))

sns.scatterplot(y='non-soma reads percentage', x='order', hue='class', data=ribo_combined_df, s=18, edgecolor=None, palette=cpl, legend=False)

annotate_genes = ['Shank1', 'Eef2', 'Kif5a', 'Calm1', 'Gfap', 'Mbp', 'App', 'Atp1a1', 'Mal']
for gene in annotate_genes:
    x = ribo_combined_df.loc[ribo_combined_df['gene'] == gene, 'order'].values[0]
    y = ribo_combined_df.loc[ribo_combined_df['gene'] == gene, 'non-soma reads percentage'].values[0]
    print(gene, x, y)
    
sns.scatterplot(y='non-soma reads percentage', x='order', hue='class', data=ribo_combined_df.loc[ribo_combined_df['gene'].isin(annotate_genes), :], s=12, edgecolor='k', linewidth=1, palette=cpl, legend=False)
    
ax.annotate('Shank1', (1, 0.3747586069121662), xytext=(1000, 0.37), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

ax.annotate('Mbp', (11, 0.33414947607388434), xytext=(1000, 0.34), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

ax.annotate('Eef2', (46, 0.30670992572800865), xytext=(1000, 0.32), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

ax.annotate('Kif5a', (52, 0.304207745967859), xytext=(1000, 0.30), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

ax.annotate('Calm1', (113, 0.285176315137501), xytext=(1000, 0.28), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

ax.annotate('Gfap', (119, 0.2847781448084992), xytext=(1000, 0.26), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )


ax.annotate('App', (4928, 0.13463226687475616), xytext=(4000, 0.11), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

ax.annotate('Atp1a1', (5279, 0.11071260662233864), xytext=(4000, 0.09), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

ax.annotate('Mal', (5399, 0.0752002397881871), xytext=(4000, 0.07), size=7,
            bbox=dict(boxstyle="round", alpha=0.1), 
            arrowprops = dict(arrowstyle='-', connectionstyle="arc3", facecolor='black', edgecolor='black', lw=1)
           )

# plt.savefig(os.path.join(fig_path, 'neuropil_reads.pdf'))

plt.show()