In [1]:
import os
import os.path
import glob
import pandas as pd
import numpy as np
from PIL import Image
import preprocessing as proc
from pandas.api.types import CategoricalDtype
import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = (15, 5)
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14

pd.options.display.max_columns = 1000

DATA_PATH = '../datasets/'
TIFF_PATH = '/Users/vladarozova/Dropbox/New experiment/Images/tiff/'

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'



## Metadata

### Load metadata

In [2]:
meta = proc.load_data("Metadata.csv", data_path=DATA_PATH)
print(meta.shape)
meta.head()

(930, 14)


Unnamed: 0,Stiffness,Combination,Well,Site,Channel,Fluorophore,Gain,Offset,Power,Attenuation,PCF,zPosition,zOffset,USER
0,0.2,B,A1,1,1,AF647,700.0,0,0.005,0.95,7679.330711,795.356,0.003333,44709390
1,0.2,B,A1,1,2,AF568,680.0,0,0.02,0.95,7679.330711,795.356,0.003333,44709390
2,0.2,B,A1,1,3,FITC,730.0,0,0.03,0.98,7679.330711,795.356,0.003333,44709390
3,0.2,B,A1,1,4,DAPI,550.0,0,0.024,0.98,7679.330711,795.356,0.003333,44709390
4,0.2,B,A1,2,1,AF647,700.0,0,0.005,0.95,7679.330711,795.272,0.003332,44709390


In [3]:
# Rename columns and create a label for each image
meta.rename(str.lower, axis='columns', inplace=True)
meta = proc.create_label(meta, per_cell=False)
meta.head()

Unnamed: 0,stiffness,combination,well,site,channel,fluorophore,gain,offset,power,attenuation,pcf,zposition,zoffset,user,label
0,0.2,B,A1,1,1,AF647,700.0,0,0.005,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
1,0.2,B,A1,1,2,AF568,680.0,0,0.02,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
2,0.2,B,A1,1,3,FITC,730.0,0,0.03,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
3,0.2,B,A1,1,4,DAPI,550.0,0,0.024,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
4,0.2,B,A1,2,1,AF647,700.0,0,0.005,0.95,7679.330711,795.272,0.003332,44709390,0.2-B-A1-2


In [4]:
# Map fluorophore to the biomarker based on the combination 
# Combination A
meta.loc[meta.combination == 'A', 'channel'] = meta.loc[meta.combination == 'A',
                                                        'fluorophore'].map({'AF647' : 'Pax',
                                                                            'FITC' : 'WGA', 
                                                                            'DAPI' : 'DAPI'})
# Combination B
meta.loc[meta.combination == 'B', "channel"] = meta.loc[meta.combination == 'B',
                                                        'fluorophore'].map({'AF647' : 'Ker',
                                                                            'AF568' : 'Vim', 
                                                                            'FITC' : 'WGA', 
                                                                            'DAPI' : 'DAPI'})
# Combination C
meta.loc[meta.combination == 'C', 'channel'] = meta.loc[meta.combination == 'C',
                                                        'fluorophore'].map({'AF647' : 'Ecad',
                                                                            'FITC' : 'WGA', 
                                                                            'DAPI' : 'DAPI'})
# Combination D
meta.loc[meta.combination == 'D', 'channel'] = meta.loc[meta.combination == 'D',
                                                        'fluorophore'].map({'AF647' : 'Ecad',
                                                                            'AF568' : 'Vim', 
                                                                            'FITC' : 'Ker', 
                                                                            'DAPI' : 'DAPI'})
meta.head()

Unnamed: 0,stiffness,combination,well,site,channel,fluorophore,gain,offset,power,attenuation,pcf,zposition,zoffset,user,label
0,0.2,B,A1,1,Ker,AF647,700.0,0,0.005,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
1,0.2,B,A1,1,Vim,AF568,680.0,0,0.02,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
2,0.2,B,A1,1,WGA,FITC,730.0,0,0.03,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
3,0.2,B,A1,1,DAPI,DAPI,550.0,0,0.024,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1
4,0.2,B,A1,2,Ker,AF647,700.0,0,0.005,0.95,7679.330711,795.272,0.003332,44709390,0.2-B-A1-2


In [5]:
if meta.channel.isnull().sum():
    print('Some channels are empty!') 
    
meta = meta.drop(meta[(meta.combination == 'C') & (meta.fluorophore == 'AF568')].index).reset_index(drop=True)

Some channels are empty!


In [6]:
# Image set is a group of images of the same biomarker 
# labelled with the same fluorophore

meta['imageset'] = meta.channel + '-' + meta.fluorophore
meta.head()

Unnamed: 0,stiffness,combination,well,site,channel,fluorophore,gain,offset,power,attenuation,pcf,zposition,zoffset,user,label,imageset
0,0.2,B,A1,1,Ker,AF647,700.0,0,0.005,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,Ker-AF647
1,0.2,B,A1,1,Vim,AF568,680.0,0,0.02,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,Vim-AF568
2,0.2,B,A1,1,WGA,FITC,730.0,0,0.03,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,WGA-FITC
3,0.2,B,A1,1,DAPI,DAPI,550.0,0,0.024,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,DAPI-DAPI
4,0.2,B,A1,2,Ker,AF647,700.0,0,0.005,0.95,7679.330711,795.272,0.003332,44709390,0.2-B-A1-2,Ker-AF647


In [7]:
meta[meta.channel == "Ecad"].groupby('stiffness').gain.unique()

stiffness
0.2                   [700.0]
0.5                        []
2.0                   [700.0]
8.0                   [950.0]
16.0    [800.0, 780.0, 730.0]
32.0                  [780.0]
64.0    [700.0, 730.0, 690.0]
Name: gain, dtype: object

In [8]:
meta[(meta.channel == "DAPI") & (meta.stiffness=="0.5")].gain

165    630.0
169    630.0
173    630.0
177    630.0
181    630.0
185    630.0
189    630.0
193    630.0
197    630.0
201    630.0
205    630.0
209    630.0
213    630.0
217    630.0
221    630.0
Name: gain, dtype: float64

### Calculate normalising coefficient 

In [9]:
# For each image:
# coeff = min gain in the image set / image gain 

min_gain = dict(meta.groupby('imageset').gain.min())
min_gain

{'DAPI-DAPI': 524.2961320466021,
 'Ecad-AF647': 690.0,
 'Ker-AF647': 690.0,
 'Ker-FITC': 650.0,
 'Vim-AF568': 650.0,
 'WGA-FITC': 650.0}

In [10]:
coeff = []
for ind, row in meta.iterrows():
    coeff.append(min_gain[row.imageset] / row.gain)
meta['coeff'] = coeff
meta.head()

Unnamed: 0,stiffness,combination,well,site,channel,fluorophore,gain,offset,power,attenuation,pcf,zposition,zoffset,user,label,imageset,coeff
0,0.2,B,A1,1,Ker,AF647,700.0,0,0.005,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,Ker-AF647,0.985714
1,0.2,B,A1,1,Vim,AF568,680.0,0,0.02,0.95,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,Vim-AF568,0.955882
2,0.2,B,A1,1,WGA,FITC,730.0,0,0.03,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,WGA-FITC,0.890411
3,0.2,B,A1,1,DAPI,DAPI,550.0,0,0.024,0.98,7679.330711,795.356,0.003333,44709390,0.2-B-A1-1,DAPI-DAPI,0.953266
4,0.2,B,A1,2,Ker,AF647,700.0,0,0.005,0.95,7679.330711,795.272,0.003332,44709390,0.2-B-A1-2,Ker-AF647,0.985714


In [None]:
# norm_to = {
#     'DAPI' : 600,
#     'FITC' : 730,
#     'AF568' : 680,
#     'AF647' : 700
# }
# coeff = []
# for ind, row in df.iterrows():
#     coeff.append(norm_to[row.fluorophore] / row.gain)
# df['coeff'] = coeff

In [None]:
# cap_dapi = (df[df.fluorophore == "DAPI"].coeff.unique() * 65535).min()
# cap_wga = (df[df.fluorophore == "FITC"].coeff.unique() * 65535).min()
# cap_vim = (df[df.fluorophore == "AF568"].coeff.unique() * 65535).min()
# cap_ker = (df[df.fluorophore == "AF647"].coeff.unique() * 65535).min()

# print(cap_dapi, cap_wga, cap_vim, cap_ker)

# cap_values = {'DAPI' : cap_dapi, 'FITC' : cap_wga, 'AF568' : cap_vim, 'AF647' : cap_ker}

## Background measurements

### Load background measurements

In [None]:
def load_bg_data(comb):
    df = pd.DataFrame()

    # Load each dataset and add stiffness column
    for s in ("0.2", "0.5", "2", "8", "16", "32", "64"):
        tmp = pd.read_csv(DATA_PATH + s + "-" + comb + "-background.csv", index_col=0)
        tmp['stiffness'] = np.float(s)
        df = pd.concat([df, tmp], sort=True)

    # Reset index
    df.reset_index(inplace=True, drop=True)

    # Rename columns
    df.columns = ["channel", "bg_mean", "bg_median", "bg_std", "stiffness"]

    # Add columns with combination, well, site
    df['combination'] = comb
    df['well'] = df.channel.str.split(pat = "-", expand=True)[0]
    df['site'] = df.channel.str.split(pat = "-", expand=True)[1]

    # Add a column with channel
    df['channel'] = df.channel.str.split(pat = "-", expand=True)[2].str.split(pat = ".", expand=True)[0]

    # Map the biomarker to the fluorophore based on the combination 
    df['fluorophore'] = 0

    # Combination A
    if comb == "A":
        df.loc[df.combination == 'A', 'fluorophore'] = df.loc[df.combination == 'A',
                                                              'channel'].map({'Pax' : 'AF647', 
                                                                              'WGA' : 'FITC', 
                                                                              'DAPI' : 'DAPI'})
    # Combination B
    if comb == "B":
        df.loc[df.combination == 'B', 'fluorophore'] = df.loc[df.combination == 'B',
                                                              'channel'].map({'Ker' : 'AF647', 
                                                                              'Vim' : 'AF568',
                                                                              'WGA' : 'FITC', 
                                                                              'DAPI' : 'DAPI'})
    # Combination C
    if comb == "C":
        df.loc[df.combination == 'C', 'fluorophore'] = df.loc[df.combination == 'C',
                                                              'channel'].map({'Ecad' : 'AF647', 
                                                                              'WGA' : 'FITC', 
                                                                              'DAPI' : 'DAPI'})
    # Combination D
    if comb == "D":
        df.loc[df.combination == 'D', 'fluorophore'] = df.loc[df.combination == 'D',
                                                              'channel'].map({'Ecad' : 'AF647', 
                                                                              'Vim' : 'AF568',
                                                                              'Ker' : 'FITC', 
                                                                              'DAPI' : 'DAPI'})
    df = proc.create_label(df, per_cell=False)
    return df

In [None]:
comb='B'
bg = load_bg_data(comb)
bg.head()

### Normalise background measurements

In [None]:
meta.shape, bg.shape

In [None]:
bg_new = pd.merge(meta, bg, how='inner', on=['label', 'channel'])[['label', 'channel', 'coeff', 
                                                                   'bg_mean', 'bg_median', 'bg_std']]
bg_new.bg_mean = bg_new.bg_mean * bg_new.coeff
bg_new.bg_median = bg_new.bg_median * bg_new.coeff
bg_new.bg_std = bg_new.bg_std * bg_new.coeff

bg_new.head()

In [None]:
proc.save_data(bg_new, "gain_Background.csv", data_path=DATA_PATH)

## Normalise images

In [11]:
# Load one raw image at a time
# Create image label
# Find the corresponding coeff in meta dataframe
# Multiply all pixels by coeff
# Convert to unsigned int
# Save image as tiff without any transformations

for s in ("0.2", "2", "16", "32", "64"): #("0.2", "0.5", "2", "8", "16", "32", "64"):
    #for c in ("A", "B", "C", "D"):
    c = 'C'   
    
    # List of images in the folder
    image_path = os.path.join(TIFF_PATH, 'Cytosoft ' + s + ' kPa/Combination ' + c)
    os.chdir(image_path)
    filenames = glob.glob("*.tif")
    
    # Only the original images
    originals = [name for name in filenames if len(name) < 14]
    
    s = str(float(s))
    for original in originals:
        # Create label
        w, t, channel = original.split('.')[0].split('-')
        label = "-".join([s, c, w, t])
        
        # Normalising coefficient
        coeff = meta.loc[(meta.label == label) & (meta.channel == channel), 'coeff'].values
        
        # Read image
        image = plt.imread(original)
        
        # Normalise pixel values
        image_corr = image * coeff
        
#         # Cap values to 16 bit
#         image_corr[image_corr > 65535] = 65535

#         # Cap values to the minimum
#         image_corr[image_corr > cap_values[f]] = cap_values[f]
        
        # Save image
        image_corr = np.uint16(image_corr)
        im = Image.fromarray(image_corr)
        im.save("-".join([w, t, channel, "gain"]) + ".tif")