In [None]:
import sys, os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

sys.path.append("../../python-scripts/")
import functions_common

# add count data to macrophage and tumor data

In [None]:
parameters = functions_common.read_parameters(base_params = "../../python-scripts/base/parameters.yml", 
                                              local_params = "../../python-scripts/local/parameters.yml")
parameters

In [None]:
filename_tumor_1dpi = "2023-02-01_tumor_volumes_1dpi.csv"
#filename_tumor_1dpi = "tumor_volumes_and_distances_1dpi.csv"
data_tumor_1dpi = pd.read_csv("../data_raw/" + filename_tumor_1dpi)
filename_tumor_5dpi = "2023-02-01_tumor_volumes_5dpi.csv"
#filename_tumor_5dpi = "tumor_volumes_and_distances_5dpi.csv"
data_tumor_5dpi = pd.read_csv("../data_raw/" + filename_tumor_5dpi)
#filename = "macrophage_count_1dpi_and_5dpi.csv"
#filename_macrophages = "2023-01-23_macrophage_props_1dpi_and_5dpi.csv"
filename_macrophages = "2023-02-03_macrophage_props_1dpi_and_5dpi.csv"
data_macrophages = pd.read_csv("../data_raw/" + filename_macrophages)



In [None]:
def time_frame_to_min(macrophage_properties_,dpi):
    
    #dt_min = macrophage_properties_["dt_min"].iloc[0]
    macrophage_properties = macrophage_properties_.copy()
    # macrophage_properties["circularity"] = 4.0*macrophage_properties["Area"]/(np.pi*macrophage_properties["perimeter_px"]) 
   
    macrophage_properties["time_in_min"] = macrophage_properties["time_point"]*macrophage_properties_["dt_min"] + start_time_points[dpi]
    macrophage_properties["time_in_h"] = macrophage_properties["time_in_min"]/60.0
    macrophage_properties["dpi"] = dpi

    return macrophage_properties

In [None]:
start_time_points = parameters["start_time"]
end_time_points = parameters["end_time"]
print(start_time_points)
start_1dpi = start_time_points['1dpi']
start_5dpi = start_time_points['5dpi']

In [None]:
#data_macrophages_1dpi = data_macrophages[data_macrophages["dpi"] == "1dpi"]
#data_macrophages_5dpi = data_macrophages[data_macrophages["dpi"] == "5dpi"]
#data_macrophages_1dpi = time_frame_to_min(data_macrophages_1dpi,"1dpi")
#data_macrophages_5dpi = time_frame_to_min(data_macrophages_5dpi,"5dpi")
#data_macrophages = pd.concat([data_macrophages_1dpi, data_macrophages_5dpi], ignore_index = True)

In [None]:
def add_fish_nr(data):
    
    for index, row in data.iterrows():

        cancer_cells = row['cancer_cells']
        data_ = data[data["time_in_min"]== row["time_in_min"]]
        data_ = data_[data_['cancer_cells'] == cancer_cells]
        data.at[index, 'nr_of_fish'] = len(data_['fish_id'].unique()) 

        print(data_['fish_id'].unique())
        
    return data


In [None]:
data_macrophages = add_fish_nr(data_macrophages)
data_tumor_1dpi = add_fish_nr(data_tumor_1dpi)
data_tumor_5dpi = add_fish_nr(data_tumor_5dpi)
data_macrophages.to_csv(filename_macrophages, index = False)
data_tumor_1dpi.to_csv(filename_tumor_1dpi, index = False)
data_tumor_5dpi.to_csv(filename_tumor_5dpi, index = False)

# compute count data from key file

In [None]:
key_file = functions_common.read_key_file(parameters)
print(key_file.columns)
key_file.head()

In [None]:
for index, row in key_file.iterrows():
    dpi = "%sdpi" % row['dpi']
    #print(dpi)
    key_file.at[index, 't_start_min'] = row['t_start']*row['dt_min'] + start_time_points[dpi]
    key_file.at[index, 't_end_min'] = row['t_end']*row['dt_min'] + start_time_points[dpi]
    key_file.at[index, 't_start_h'] = key_file.at[index, 't_start_min']/60.0
    key_file.at[index, 't_end_h'] = key_file.at[index, 't_end_min']/60.0
    
key_file_1dpi = key_file[key_file['dpi']==1]
end_1dpi = key_file_1dpi['t_end_min'].max()
print(end_1dpi)

key_file_5dpi = key_file[key_file['dpi']==5]
end_5dpi = key_file_5dpi['t_end_min'].max()
print(end_5dpi)

In [None]:
#data_macrophages_1dpi["dpi"]

In [None]:
obs_time_points = np.arange(start_1dpi,end_5dpi,60)
print(obs_time_points)

count_df = pd.DataFrame()
count_df = pd.DataFrame()
index = 0

for time in obs_time_points:
    
    dpi = 1
    if time >= start_5dpi:
        dpi = 5
    
    sub_df = key_file[key_file['t_end_min'] > time]
    sub_df = sub_df[sub_df['dpi'] == dpi]
       
    if len(sub_df.index) > 0:
    
        for cancer_cells in sub_df['cancer_cells'].unique():
            n = len(sub_df[sub_df["cancer_cells"] == cancer_cells]["short_name"].unique())
            
            #sub_df.groupby(['cancer_cells'])['short_name'].count()
            
            count_df.at[index, 'time_in_min'] = time
            count_df.at[index, 'time_in_h'] = time/60.0
            count_df.at[index, 'cancer_cells'] = cancer_cells
            count_df.at[index, 'nr_of_fish'] = n
            count_df.at[index, 'dpi'] = dpi
            
            index += 1

In [None]:
sns.set_style("whitegrid")
fig, ax = plt.subplots(2, figsize=(10,20))
count_1dpi_df = count_df[count_df['dpi']==1]
count_1dpi_df = count_1dpi_df[count_1dpi_df['time_in_min'] <= end_1dpi]
data_tumor_1dpi_ = data_tumor_1dpi[data_tumor_1dpi["time_in_min"].isin(obs_time_points)]

#ax[0].set_xlim(start_1dpi/60.0, end_1dpi/60.0 + 1.0)

max_time = max([data_tumor_1dpi["time_in_h"].max(), end_1dpi/60.0])
ax[0].set_xlim(start_1dpi/60.0, max_time)
ax[0].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[0].set_ylabel("#samples")
ax[0].set_xlabel("time in h")
ax[0].set_title("abundance from key file (1dpi)")
ax[1].set_xlim(start_1dpi/60.0, max_time)
ax[1].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[1].set_ylabel("#samples")
ax[1].set_xlabel("time in h")
ax[1].set_title("abundance tumor volumes (1dpi)")
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= count_1dpi_df, ax=ax[0], linewidth = 5.0)
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= data_tumor_1dpi_ , ax=ax[1], linewidth = 5.0)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("data_abundance_tumor_1dpi_over_time.pdf")

In [None]:
data_tumor_1dpi_missing_sample = data_tumor_1dpi_[data_tumor_1dpi_["time_in_h"] == 36.0]
data_tumor_1dpi_missing_sample = data_tumor_1dpi_missing_sample[data_tumor_1dpi_missing_sample["cancer_cells"] == "LBT003"]
print(data_tumor_1dpi_missing_sample["short_name"].unique())
missing = data_tumor_1dpi_missing_sample["short_name"].unique()
all_samples = data_tumor_1dpi_[data_tumor_1dpi_["time_in_h"] == 30.0]
print(all_samples[all_samples["cancer_cells"]=="LBT003"]["short_name"].unique())
all_ =all_samples[all_samples["cancer_cells"]=="LBT003"]["short_name"].unique()
for sample in all_:
    if not (sample in all_):
        print(sample)
    

In [None]:
for sample in all_:
    if not (sample in missing):
        print(sample)

In [None]:
sns.set_style("whitegrid")
fig, ax = plt.subplots(2, figsize=(10,20))
count_5dpi_df = count_df[count_df['dpi']==5]
count_5dpi_df = count_5dpi_df[count_5dpi_df['time_in_min'] >= start_5dpi]
data_tumor_5dpi_ = data_tumor_5dpi[data_tumor_5dpi["time_in_min"].isin(obs_time_points)]
#ax[0].set_xlim(start_1dpi/60.0, end_1dpi/60.0 + 1.0)

max_time = max([data_tumor_5dpi["time_in_h"].max(), end_5dpi/60.0])
ax[0].set_xlim(start_5dpi/60.0, max_time)
ax[0].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[0].set_ylabel("#samples")
ax[0].set_xlabel("time in h")
ax[0].set_title("abundance from key file (5dpi)")
ax[1].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[1].set_xlim(start_5dpi/60.0, max_time)
ax[1].set_ylabel("#samples")
ax[1].set_xlabel("time in h")
ax[1].set_title("abundance tumor volumes (5dpi)")
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= count_5dpi_df, ax=ax[0], linewidth = 5.0)
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= data_tumor_5dpi_, ax=ax[1], linewidth = 5.0)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("data_abundance_tumor_5dpi_over_time.pdf")

In [None]:
sns.set_style("whitegrid")
fig, ax = plt.subplots(2, figsize=(10,20))
count_5dpi_df = count_df[count_df['dpi']==5]
count_5dpi_df = count_5dpi_df[count_5dpi_df['time_in_min'] >= start_5dpi]
data_macrophages_5dpi_  = data_macrophages[data_macrophages ['dpi']=="5dpi"]
data_macrophages_5dpi_ = data_macrophages_5dpi_[data_macrophages_5dpi_["time_in_min"].isin(obs_time_points)]

max_time = max([data_macrophages_5dpi_["time_in_h"].max(), end_5dpi/60.0])
ax[0].set_xlim(start_5dpi/60.0, max_time)
ax[0].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[0].set_ylabel("#samples")
ax[0].set_xlabel("time in h")
ax[0].set_title("abundance from key file (5dpi)")
ax[0].set_xlim(start_5dpi/60.0, max_time)
ax[1].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[1].set_ylabel("#samples")
ax[1].set_xlabel("time in h")
ax[1].set_title("abundance macrophages (5dpi)")
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= count_5dpi_df, ax=ax[0], linewidth = 5.0)
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= data_macrophages_5dpi_, ax=ax[1], linewidth = 5.0)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("data_abundance_macrophages_5dpi_over_time.pdf")

In [None]:
sns.set_style("whitegrid")
fig, ax = plt.subplots(2, figsize=(10,20))
count_1dpi_df = count_df[count_df['dpi']==1]
count_1dpi_df = count_1dpi_df[count_1dpi_df['time_in_min'] <= end_1dpi]
#ax[0].set_xlim(start_1dpi/60.0, end_1dpi/60.0 + 1.0)
data_macrophages_1dpi_ = data_macrophages[data_macrophages ['dpi']=="1dpi"]
data_macrophages_1dpi_ = data_macrophages_1dpi_[data_macrophages_1dpi_["time_in_min"].isin(obs_time_points)]


max_time = max([data_macrophages_1dpi_["time_in_h"].max(), end_1dpi/60.0])
ax[0].set_xlim(start_1dpi/60.0, max_time)
ax[0].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[0].set_ylabel("#samples")
ax[0].set_xlabel("time in h")
ax[0].set_title("abundance from key file (1dpi)")
ax[1].set_xlim(start_1dpi/60.0, max_time)
ax[1].set_ylim(0.0, count_df['nr_of_fish'].max() + 3)
ax[1].set_ylabel("#samples")
ax[1].set_xlabel("time in h")
ax[1].set_title("abundance macrophages (1dpi)")
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= count_1dpi_df, ax=ax[0], linewidth = 5.0)
sns.lineplot(x = "time_in_h",  y= "nr_of_fish", hue= "cancer_cells", data= data_macrophages_1dpi_, ax=ax[1], linewidth = 5.0)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("data_abundance_macrophages_1dpi_over_time.pdf")