In [1]:
import os
import pandas as pd
import numpy as np
import preprocess_data as prep
from pandas.api.types import CategoricalDtype
import seaborn as sns
from matplotlib import pyplot as plt

%matplotlib inline

plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = (15, 5)
plt.rcParams['font.size'] = 18

DATA_PATH = '../datasets/'

## Load the data

### Metadata

###### Keratin

In [None]:
ker_meta = prep.load_data("Ker_metadata.csv", data_path=DATA_PATH)
ker_meta.rename(str.lower, axis='columns', inplace=True)
ker_meta = prep.create_label(ker_meta, per_cell=False)
print(ker_meta.shape)
ker_meta.head()

###### Vimentin

In [None]:
vim_meta = prep.load_data("Vim_metadata.csv", data_path=DATA_PATH)
vim_meta.rename(str.lower, axis='columns', inplace=True)
vim_meta = prep.create_label(vim_meta, per_cell=False)
print(vim_meta.shape)
vim_meta.head()

### Background signal

###### Keratin

In [None]:
ker_bg = prep.load_data("Ker_background.csv", data_path=DATA_PATH)
ker_bg.rename(columns={'mean':'bg'}, inplace=True)
ker_bg = prep.create_label(ker_bg, per_cell=False)
print(ker_bg.shape)
ker_bg.head()

###### Vimentin

In [None]:
vim_bg = prep.load_data("Vim_background.csv", data_path=DATA_PATH)
vim_bg.rename(columns={'mean':'bg'}, inplace=True)
vim_bg = prep.create_label(vim_bg, per_cell=False)
print(vim_bg.shape)
vim_bg.head()

## Merge datasets

###### Keratin

In [None]:
ker_bg = pd.merge(ker_meta, ker_bg)
ker_bg = prep.move_column(ker_bg, 'label')
print(ker_bg.shape)
ker_bg.head()

###### Vimentin

In [None]:
vim_bg = pd.merge(vim_meta, vim_bg)
vim_bg = prep.move_column(vim_bg, 'label')
print(vim_bg.shape)
vim_bg.head()

## Explore the data

### Plot background signal

In [None]:
sns.barplot(x=ker_bg.stiffness, y=ker_bg.bg, 
            ci='sd', capsize=.1);
plt.xlabel("Stiffness")
plt.ylabel("Intensity")
plt.title("Background in Cytokeratin channel")
plt.savefig('../results/Keratin Background.png', bbox_inches='tight', dpi=300);

In [None]:
sns.barplot(x=vim_bg.stiffness, y=vim_bg.bg, 
            ci='sd', capsize=.1);
plt.xlabel("Stiffness")
plt.ylabel("Intensity")
plt.title("Background in Vimentin channel")
plt.savefig('../results/Vimentin Background.png', bbox_inches='tight', dpi=300);

In [None]:
ker_bg.groupby("stiffness").bg.describe()

In [None]:
vim_bg.groupby("stiffness").bg.describe()

### Normalise by gain

In [None]:
# normalise background signal so that the gain = 700.
ker_bg['norm_bg'] = ker_bg.bg / ker_bg.gain * 700
vim_bg['norm_bg'] = vim_bg.bg / vim_bg.gain * 700

In [None]:
sns.barplot(x=ker_bg.stiffness, y=ker_bg.norm_bg, 
            ci='sd', capsize=.1);
plt.xlabel("Stiffness")
plt.ylabel("Intensity")
plt.title("Background in Cytokeratin channel normalised by gain")
plt.savefig('../results/Keratin Background by gain.png', bbox_inches='tight', dpi=300);

In [None]:
sns.barplot(x=vim_bg.stiffness, y=vim_bg.norm_bg, 
            ci='sd', capsize=.1);
plt.xlabel("Stiffness")
plt.ylabel("Intensity")
plt.title("Background in Vimentin channel normalised by gain")
plt.savefig('../results/Vimentin Background by gain.png', bbox_inches='tight', dpi=300);

In [None]:
vim_bg[vim_bg.stiffness == "64.0"].plot(kind="scatter", x="user", y="norm_bg", alpha=0.5);

In [None]:
vim_bg.pcf.value_counts()

In [None]:
stiff64 = vim_bg[vim_bg.stiffness == "64.0"]
stiff64.head()

In [None]:
sns.pairplot(stiff64.iloc[:,7:19]);

In [None]:
vim_bg[vim_bg.stiffness=="64.0"].groupby("user").norm_bg.describe()

In [None]:
vim_bg[(vim_bg.stiffness=="64.0") & (vim_bg.user==44709390)]

### Statistical testing

#### Check parametric assumptions

In [None]:
ker_bg.hist(column='norm_bg', by='stiffness');

In [None]:
ker_bg.groupby("stiffness").norm_bg.describe()

In [None]:
vim_bg.hist(column='norm_bg', by='stiffness');

In [None]:
vim_bg.groupby("stiffness").norm_bg.describe()

#### Two-sample t-test

###### Keratin

In [None]:
from scipy.stats import mannwhitneyu, ttest_ind

print("Background in Cytokeratin channel:\n")

t, p = ttest_ind(ker_bg[ker_bg.stiffness == "0.2"].norm_bg,
                 ker_bg[ker_bg.stiffness == "2.0"].norm_bg, 
                 equal_var=False)
print("0.2 kPa vs 2.0 kPa \t p-value =", p)

t, p = ttest_ind(ker_bg[ker_bg.stiffness == "2.0"].norm_bg,
                 ker_bg[ker_bg.stiffness == "8.0"].norm_bg, 
                 equal_var=False)
print("2.0 kPa vs 8.0 kPa \t p-value =", p)

t, p = ttest_ind(ker_bg[ker_bg.stiffness == "8.0"].norm_bg,
                 ker_bg[ker_bg.stiffness == "16.0"].norm_bg, 
                 equal_var=False)
print("8.0 kPa vs 16.0 kPa \t p-value =", p)

t, p = ttest_ind(ker_bg[ker_bg.stiffness == "16.0"].norm_bg,
                 ker_bg[ker_bg.stiffness == "32.0"].norm_bg, 
                 equal_var=False)
print("16.0 kPa vs 32.0 kPa \t p-value =", p)
      
t, p = ttest_ind(ker_bg[ker_bg.stiffness == "32.0"].norm_bg,
                 ker_bg[ker_bg.stiffness == "64.0"].norm_bg, 
                 equal_var=False)
print("32.0 kPa vs 64.0 kPa \t p-value =", p)

###### Vimentin

In [None]:
print("Background in vimentin channel:\n")

t, p = ttest_ind(vim_bg[vim_bg.stiffness == "0.2"].norm_bg,
                 vim_bg[vim_bg.stiffness == "2.0"].norm_bg, 
                 equal_var=False)
print("0.2 kPa vs 2.0 kPa \t p-value =", p)

t, p = ttest_ind(vim_bg[vim_bg.stiffness == "2.0"].norm_bg,
                 vim_bg[vim_bg.stiffness == "8.0"].norm_bg, 
                 equal_var=False)
print("2.0 kPa vs 8.0 kPa \t p-value =", p)

t, p = ttest_ind(vim_bg[vim_bg.stiffness == "8.0"].norm_bg,
                 vim_bg[vim_bg.stiffness == "16.0"].norm_bg, 
                 equal_var=False)
print("8.0 kPa vs 16.0 kPa \t p-value =", p)

t, p = ttest_ind(vim_bg[vim_bg.stiffness == "16.0"].norm_bg,
                 vim_bg[vim_bg.stiffness == "32.0"].norm_bg, 
                 equal_var=False)
print("16.0 kPa vs 32.0 kPa \t p-value =", p)
      
t, p = ttest_ind(vim_bg[vim_bg.stiffness == "32.0"].norm_bg,
                 vim_bg[vim_bg.stiffness == "64.0"].norm_bg, 
                 equal_var=False)
print("32.0 kPa vs 64.0 kPa \t p-value =", p)