In [None]:
import sys, os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
sys.path.append("../python-scripts/")
import functions_common

In [None]:
parameters = functions_common.read_parameters(base_params = "../python-scripts/base/parameters.yml", 
                                              local_params = "../python-scripts/local/parameters.yml")
parameters

In [None]:
key_file = functions_common.read_key_file(parameters)
print(key_file.columns)
key_file.head()

In [None]:
sns.set_style("white")
sns.set(font_scale = 2)
fig, ax = plt.subplots( figsize=(20,10))
sns.countplot(x = "cancer_cells", hue= "dpi", data= key_file, ax=ax, palette="Set2")
ax.set_xlabel("cancer cells")
ax.set_ylabel("#samples")
plt.xticks(rotation=45)
plt.savefig("data_abundance.png")
plt.savefig("data_abundance.pdf")

In [None]:
start_time_points = parameters["start_time"]
end_time_points = parameters["end_time"]
print(start_time_points)

start_1dpi = start_time_points['1dpi']

start_5dpi = start_time_points['5dpi']


for index, row in key_file.iterrows():
    dpi = "%sdpi" % row['dpi']
    #print(dpi)
    key_file.at[index, 't_start_min'] = row['t_start']*row['dt_min'] + start_time_points[dpi]
    key_file.at[index, 't_end_min'] = row['t_end']*row['dt_min'] + start_time_points[dpi]
    key_file.at[index, 't_start_h'] = key_file.at[index, 't_start_min']/60.0
    key_file.at[index, 't_end_h'] = key_file.at[index, 't_end_min']/60.0

In [None]:
key_file_1dpi = key_file[key_file['dpi']==1]
end_1dpi = key_file_1dpi['t_end_min'].max()
print(end_1dpi)

key_file_5dpi = key_file[key_file['dpi']==5]
end_5dpi = key_file_5dpi['t_end_min'].max()
print(end_5dpi)

In [None]:
key_file[['t_start','t_start_min','t_end','t_end_min']]

In [None]:
obs_time_points = np.arange(start_1dpi,end_5dpi,60)
print(obs_time_points)

count_df = pd.DataFrame()
count_df = pd.DataFrame()
index = 0

for time in obs_time_points:
    
    dpi = 1
    if time >= start_5dpi:
        dpi = 5
    
    sub_df = key_file[key_file['t_end_min'] > time]
    sub_df = sub_df[sub_df['dpi'] == dpi]
       
    if len(sub_df.index) > 0:
    
        for cancer_cells in sub_df['cancer_cells'].unique():
            n = len(sub_df[sub_df["cancer_cells"] == cancer_cells]["short_name"].unique())
            
            #sub_df.groupby(['cancer_cells'])['short_name'].count()
            
            count_df.at[index, 'time_in_min'] = time
            count_df.at[index, 'time_in_h'] = time/60.0
            count_df.at[index, 'cancer_cells'] = cancer_cells
            count_df.at[index, 'sample_nr'] = n
            count_df.at[index, 'dpi'] = dpi
            
            index += 1
        #for entry in temp:
        #    print(entry)
        #count_df.at[index, 'time_in_min'] = time
        #count_df.at[index, 'time_in_h'] = time/60.0
    
        #count_df.at[index, '#samples'] = sub_df.groupby(['cancer_cells'])['short_name'].count()
        #print(sub_df.groupby(['cancer_cells'])['short_name'].count())
    
        
    
    

In [None]:
count_df

In [None]:
sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize=(10,10))
count_1dpi_df = count_df[count_df['dpi']==1]
count_1dpi_df = count_1dpi_df[count_1dpi_df['time_in_min'] <= end_1dpi]
ax.set_xlim(start_1dpi/60.0, end_1dpi/60.0 + 1.0)
ax.set_ylim(0.0, count_df['sample_nr'].max() + 3)
ax.set_ylabel("#samples")
ax.set_xlabel("time in h")
sns.lineplot(x = "time_in_h",  y= "sample_nr", hue= "cancer_cells", data= count_1dpi_df, ax=ax, linewidth = 5.0)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("data_abundance_1dpi_over_time.pdf")
#sns.countplot(x = "dpf", hue= "cancer_cells", data= key_file, ax=ax[1])

In [None]:
fig, ax = plt.subplots(figsize=(20,20))
count_5dpi_df = count_df[count_df['dpi']==5]
count_5dpi_df = count_5dpi_df[count_5dpi_df['time_in_min'] >= start_5dpi]
ax.set_ylim(0.0, count_df['sample_nr'].max() + 5)
sns.lineplot(x = "time_in_h",  y= "sample_nr", hue= "cancer_cells", data= count_5dpi_df, ax=ax, linewidth = 5.0)
ax.set_ylabel("#samples")
ax.set_xlabel("time in h")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("data_abundance_5dpi_over_time.pdf")

In [None]:
fig, ax = plt.subplots(2,1, figsize=(20,10))
sns.countplot(x = "cancer_cells", hue= "dpf", data= key_file, ax=ax[0])
sns.countplot(x = "dpf", hue= "cancer_cells", data= key_file, ax=ax[1])