**I. Data upload, preparation and normalization**

1. Importing packages and libraries.

In [None]:
import numpy as np
import scipy
import pandas as pd
import math
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy.signal import argrelextrema
from scipy.optimize import curve_fit
from pybaselines import Baseline
from sklearn.metrics import auc
import cv2
#tuckeys test

2. Uploading the data of all lanes.

In [None]:
cy3_excel = "data/1 T19G + U19U CY3.xlsx"
cy5_excel = "data/1 T19G + U19U CY5.xlsx"
cy3_image = "1 T19G+U19U 09-17-[Cy3].tif"
cy5_image = "1 T19G+U19U 09-17-[Cy5].tif"

2.1 CY3

In [None]:
#Viewing the image

tiff_cy3 = cv2.imread(cy3_image)
plt.imshow(tiff_cy3, aspect='auto')

In [None]:
# Uploading the data of all lanes.

grey_values_list = ["Grey_Values_T0", "Grey_Values_T10s", "Grey_Values_T30s", "Grey_Values_T1M", "Grey_Values_T3M", "Grey_Values_T6M",
                   "Grey_Values_T30M", "Grey_Values_T100M", "Grey_Values_L1", "Grey_Values_L2"]

#CY3
data_cy3 = pd.read_excel(cy3_excel)

for d in data_cy3:
    for l in grey_values_list:
        if d == l:
            data_cy3 = data_cy3.rename(columns={d: d+'_CY3'})

In [None]:
data_cy3

In [None]:
# Plotting the initial CY3 data.

figure, axis = plt.subplots(nrows=10, figsize=(12, 22), constrained_layout=True)

for di, d in enumerate(grey_values_list):
    name_list = ["T0s", "T10s", "T30s", "T1m", "T3m", "T6m", "T30m", "T100m", "Ladder1", "Ladder2"]
    axis[di].plot(data_cy3["Distance"], data_cy3[d+"_CY3"])
    axis[di].set_title(str(name_list[di])) 
    #axis[di].invert_xaxis()
    axis[di].set(xlabel="Distance, cm", ylabel="Grey Values")

2.2 CY5

In [None]:
#Viewing the image

tiff_cy5 = cv2.imread(cy5_image)
plt.imshow(tiff_cy5, aspect='auto')

In [None]:
# Uploading the data of all lanes.

#CY5
data_cy5 = pd.read_excel(cy5_excel)

for d in data_cy5:
    for l in grey_values_list:
        if d == l:
            data_cy5 = data_cy5.rename(columns={d: d+'_CY5'})

In [None]:
data_cy5.drop(data_cy5.tail(1).index,inplace=True)###
data_cy5

In [None]:
# Plotting the initial CY5 data.

figure, axis = plt.subplots(nrows=10, figsize=(12, 22), constrained_layout=True)

for di, d in enumerate(grey_values_list):
    name_list = ["T0s", "T10s", "T30s", "T1m", "T3m", "T6m", "T30m", "T100m", "Ladder1", "Ladder2"]
    axis[di].plot(data_cy5["Distance"], data_cy5[d+"_CY5"])
    axis[di].set_title(str(name_list[di])) 
    #axis[di].invert_xaxis()
    axis[di].set(xlabel="Distance, cm", ylabel="Grey Values")

3. Combining the datasets.

In [None]:
data = pd.concat([data_cy3, data_cy5.drop(columns = ["Distance"])], axis=1)
data

4. Removing the background (baseline adjustment).

In [None]:
grey_values_list_cy3 = ["Grey_Values_T0_CY3", "Grey_Values_T10s_CY3", "Grey_Values_T30s_CY3", "Grey_Values_T1M_CY3", "Grey_Values_T3M_CY3", "Grey_Values_T6M_CY3",
                   "Grey_Values_T30M_CY3", "Grey_Values_T100M_CY3", "Grey_Values_L1_CY3", "Grey_Values_L2_CY3"]
grey_values_list_cy5 = ["Grey_Values_T0_CY5", "Grey_Values_T10s_CY5", "Grey_Values_T30s_CY5", "Grey_Values_T1M_CY5", "Grey_Values_T3M_CY5", "Grey_Values_T6M_CY5",
                   "Grey_Values_T30M_CY5", "Grey_Values_T100M_CY5", "Grey_Values_L1_CY5", "Grey_Values_L2_CY5"]

grey_values_list_all = grey_values_list_cy3 + grey_values_list_cy5 

In [None]:
for d in data:
    for l in grey_values_list_all:
        if d == l:
            baseline_fitter = Baseline(x_data=data["Distance"])
            imodpoly = baseline_fitter.imodpoly(data[d], poly_order=3, num_std=0.8)
            new_y = data[d]-imodpoly[0]
            new_y[new_y < 0] = 0
            data[d+"_baseline"] = new_y
       

In [None]:
data

In [None]:
grey_values_list_cy3_baseline = []
grey_values_list_cy5_baseline = []

for n in grey_values_list_cy3:
    n = n+"_baseline"
    grey_values_list_cy3_baseline.append(n)
    
for n in grey_values_list_cy5:
    n = n+"_baseline"
    grey_values_list_cy5_baseline.append(n)

grey_values_baseline_adj_list = grey_values_list_cy3_baseline + grey_values_list_cy5_baseline

In [None]:
# Plotting the data with adjusted baseline and initial data

for di, d in enumerate(grey_values_list_cy3):
    fig, axes = plt.subplots(2, 1, figsize=(14, 4))

    name_list = ["T0s", "T10s", "T30s", "T1m", "T3m", "T6m", "T30m", "T100m", "Ladder1", "Ladder2"]
    
    ax = axes[0]
    ax.set_title(name_list[di]) 
    ax.plot(data["Distance"],data[grey_values_list_cy3_baseline[di]], color = "green")
    ax.plot(data["Distance"],data[grey_values_list_cy3[di]], color = "lightgreen")
    ax.set_ylabel('CY3', rotation = 0, ha='left')
    ax.yaxis.set_label_position("right")

    ax = axes[1]
    ax.plot(data["Distance"],data[grey_values_list_cy5_baseline[di]])
    ax.plot(data["Distance"],data[grey_values_list_cy5[di]], color = "lightblue")
    ax.set_ylabel('CY5', rotation = 0, ha='left')
    ax.yaxis.set_label_position("right")
    ax.set_xlabel("Distance, cm")

5. Normalizing the distance in each lane.

In [None]:
#Finding peak values:

peaks_list_cy3 = []
peaks_list_cy5 = []


for di, d in enumerate(grey_values_list_cy3_baseline):
    peaks, values = find_peaks(data[d], height=3000)
    peak_values = data.iloc[peaks, 0]
    peak_values = list(peak_values)
    peak_data = (peak_values)
    peaks_list_cy3.append(peak_data)
    #print(peak_data)

for di, d in enumerate(grey_values_list_cy5_baseline):
    peaks, values = find_peaks(data[d], height=600)
    peak_values = data.iloc[peaks, 0]
    peak_values = list(peak_values)
    peak_data = (peak_values)
    peaks_list_cy5.append(peak_data)
    #print(peak_data)   

peaks_list = peaks_list_cy3 + peaks_list_cy5    
print(peaks_list)

In [None]:
# Markers (min and max values of peaks of each lane) are taken only from CY5 - CY3 tends to be more inconsitent.

min_max_list = []

for i in peaks_list_cy5:
    min_peak = i[0]
    max_peak = i[-1]
    min_max = list((min_peak, max_peak))
    min_max_list.append(min_max)
min_max_list = min_max_list+min_max_list

In [None]:
# Normalizing the distance

norm_list = []
name_list_cy3 = ["T0s_CY3", "T10s_CY3", "T30s_CY3", "T1m_CY3", "T3m_CY3", "T6m_CY3", "T30m_CY3", "T100m_CY3", "Ladder1_CY3", "Ladder2_CY3"]
name_list_cy5 = ["T0s_CY5", "T10s_CY5", "T30s_CY5", "T1m_CY5", "T3m_CY5", "T6m_CY5", "T30m_CY5", "T100m_CY5", "Ladder1_CY5", "Ladder2_CY5"]
name_list = name_list_cy3 + name_list_cy5

for i, n in min_max_list:
    norm_distance = ((data["Distance"] - i) / (n - i))
    norm_list.append(norm_distance)

for i, n in enumerate(norm_list):
    data[name_list[i]+"_norm_distance"] = n
    
data

In [None]:
df = data[['Ladder1_CY5_norm_distance', 'Grey_Values_L1_CY5_baseline']]
df.to_csv('Ladder1_CY5_norm_distance.csv', encoding='utf-8', index = False)

In [None]:
distance_norm_list_cy3 = []
distance_norm_list_cy5 = []


for n in name_list_cy3:
    n = n+"_norm_distance"
    distance_norm_list_cy3.append(n)
    
for n in name_list_cy5:
    n = n+"_norm_distance"
    distance_norm_list_cy5.append(n)

6. Plotting the 0-1 normalized data.

In [None]:
# Plotting the data with normalized distance

for di, d in enumerate(grey_values_list_cy3):
    fig, axes = plt.subplots(2, 1, figsize=(14, 4))

    name_list = ["T0s", "T10s", "T30s", "T1m", "T3m", "T6m", "T30m", "T100m", "Ladder1", "Ladder2"]
    
    ax = axes[0]
    ax.set_title(name_list[di]) 
    ax.plot(data[distance_norm_list_cy3[di]], data[grey_values_list_cy3_baseline[di]], color = "green")
    ax.set_ylabel('CY3', rotation = 0, ha='left')
    ax.yaxis.set_label_position("right")

    ax = axes[1]
    ax.plot(data[distance_norm_list_cy5[di]], data[grey_values_list_cy5_baseline[di]])
    ax.set_ylabel('CY5', rotation = 0, ha='left')
    ax.yaxis.set_label_position("right")
    ax.set_xlabel("Distance, cm")

In [None]:
# Plotting the data with normalized distance

for di, d in enumerate(grey_values_list_cy3):
    fig, axes = plt.subplots(2, 1, figsize=(14, 4))

    name_list = ["T0s", "T10s", "T30s", "T1m", "T3m", "T6m", "T30m", "T100m", "Ladder1", "Ladder2"]
    
    ax = axes[0]
    ax.set_title(name_list[di]) 
    ax.plot(data[distance_norm_list_cy3[di]], data[grey_values_list_cy3_baseline[di]], color = "green")
    ax.set_ylabel('CY3', rotation = 0, ha='left')
    ax.yaxis.set_label_position("right")
    ax.set_xlim(0, 1)

    ax = axes[1]
    ax.plot(data[distance_norm_list_cy5[di]], data[grey_values_list_cy5_baseline[di]])
    ax.set_ylabel('CY5', rotation = 0, ha='left')
    ax.yaxis.set_label_position("right")
    ax.set_xlabel("Distance, cm")
    ax.set_xlim(0, 1)

**II. Ladder**

1. Finding the peak values. 

In [None]:
#Laddder lane 1 (CY5)

plt.figure(figsize=(15,6))
plot = plt.plot(data['Ladder1_CY5_norm_distance'], data['Grey_Values_L1_CY5_baseline'])
plt.xlabel("Distance, cm")
plt.ylabel("Grey Values")

plt.show()

In [None]:
#Finding ladder 1 peak values:

peaks_l1, values_l1 = find_peaks(data['Grey_Values_L1_CY5_baseline'], height=600)
peak_values_l1 = data.iloc[peaks_l1, -2]

In [None]:
peak_values_l1 = list(peak_values_l1)
peak_values_l1

In [None]:
plt.figure(figsize=(15,6))
plot = plt.plot(data['Ladder1_CY5_norm_distance'], data['Grey_Values_L1_CY5_baseline'])
plt.scatter(peak_values_l1, values_l1.values(), color='red')
plt.xlabel("Distance, cm")
plt.ylabel("Grey Values")
plt.show()

In [None]:
cut_x_data = data['Ladder1_CY5_norm_distance']
cut_x_data = cut_x_data[peaks_l1[0]:peaks_l1[-1]+1]
cut_x_data

In [None]:
cut_y_data = data['Grey_Values_L1_CY5_baseline']
cut_y_data = cut_y_data[peaks_l1[0]:peaks_l1[-1]+1]
cut_y_data

In [None]:
plt.figure(figsize=(15,6))
plot = plt.plot(cut_x_data, cut_y_data)
plt.scatter(peak_values_l1, values_l1.values(), color='red')
plt.xlabel("Distance, cm")
plt.ylabel("Grey Values")
plt.show()

2.1  Converting distance to nt. Exponential distance between peaks based on a single equation of all data points.

In [None]:
ladder_lane = [141, 116, 81, 71, 66, 60, 55, 51, 46, 41, 35, 28]

In [None]:
ladder_df = pd.DataFrame(ladder_lane, columns=["Ladder"])
values_df = pd.DataFrame(peak_values_l1, columns=["Norm. distance"])
curve_data = pd.concat([ladder_df, values_df], axis=1)
curve_data 

In [None]:
def fitting_function(xs, ys):

    const_list = []
    
    # perform the fit
    p0 = (60, 1, 30) # start with values near those we expect
    params, cv = scipy.optimize.curve_fit(monoExp, xs, ys, p0, maxfev=5000)
    m, t, b = params

    # determine quality of the fit
    squaredDiffs = np.square(ys - monoExp(xs, m, t, b))
    squaredDiffsFromMean = np.square(ys - np.mean(ys))
    rSquared = 1 - np.sum(squaredDiffs) / np.sum(squaredDiffsFromMean)
    print(f"R² = {rSquared}")

    # plot the results
    plt.figure(figsize=(10,6))
    plt.plot(xs, ys, '.', label="data")
    model_x = np.arange(np.min(xs), np.max(xs), step = 0.01)
    plt.plot(model_x, monoExp(model_x, m, t, b), label="fitted")
   # plt.title("Fitted Exponential Curve")

    # inspect the parameters
    print(f"Y = {m} * e^(-{t} * x) + {b}")
    
    const_list.append(m)
    const_list.append(t)
    const_list.append(b)
    
    return const_list

def monoExp(x, m, t, b):
    return m * np.exp(-t * x) + b
    

In [None]:
xs = curve_data.iloc[:, 1]
ys = curve_data.iloc[:, 0]

plt.figure(figsize=(10,6))
plt.plot(xs, ys, '.')
plt.title("Original Data")

In [None]:
const_values = fitting_function(xs, ys)

In [None]:
exp_x_list = []

for i in cut_x_data: 
    value = const_values[0] * math.exp(-const_values[1]*i)+const_values[2]
    exp_x_list.append(value)

#exp_x_list 

In [None]:
plt.figure(figsize=(15,6))
plot = plt.plot(exp_x_list, cut_y_data)
ax = plt.gca().invert_xaxis()
plt.xlabel("Size in nt")
plt.ylabel("Grey Values")
plt.show()

In [None]:
exp_ladder_list = []

for i, n in enumerate(exp_x_list):
    if i in peaks_l1-peaks_l1[0]: ## cut data is shifted, so it is moved by length of peaks_l1[0]
        exp_ladder_list.append(n)
exp_ladder_list

In [None]:
values_exp_ladder = pd.DataFrame(exp_ladder_list, columns=["Exp. all peaks"])
curve_data2 = pd.concat([curve_data, values_exp_ladder], axis=1)
curve_data2

2.2 Converting distance to nt. Exponential distance between peaks. 4 points overlap.

In [None]:
curve_data_list = curve_data.values.tolist()
curve_data_list

In [None]:
curve_data_list_nt = []
curve_data_list_dist = []

for i, n  in curve_data_list:
    curve_data_list_nt.append(i)
    curve_data_list_dist.append(n)

In [None]:
overlap = 3
size = 4

overlap_list_nt = []
overlap_list_dist = []

for i in range(0, len(curve_data_list_nt) - overlap, size - overlap):            
    k = curve_data_list_nt[i:i + size]
    j = curve_data_list_dist[i:i + size]
    overlap_list_nt.append(k)
    overlap_list_dist.append(j)

overlap_list_nt

In [None]:
values_list = []
for i in range(len(overlap_list_dist)):
    xs = np.array(overlap_list_dist[i])
    ys = np.array(overlap_list_nt[i])
    a = fitting_function(xs, ys)
    values_list.append(a)

In [None]:
ranges_list = []
data_ranges_list = []

for i in range(len(peaks_l1)):
    if i+1 != len(peaks_l1):
        ranges = list((peaks_l1[i], peaks_l1[i+1]))
        ranges_list.append(ranges)

for i, n in ranges_list:
    data_range = list(cut_x_data[i-peaks_l1[0]:n-peaks_l1[0]+1])
    data_ranges_list.append(data_range)

In [None]:
# Dividing data into ranges and removing overlapping values 

new_data_ranges_list = [list(dict.fromkeys(data_ranges_list[0]+data_ranges_list[1])), data_ranges_list[2], data_ranges_list[3],
                       data_ranges_list[4], data_ranges_list[5], data_ranges_list[6], data_ranges_list[7], data_ranges_list[8],
                       list(dict.fromkeys(data_ranges_list[9]+data_ranges_list[10]))]

for i in range(len(new_data_ranges_list)-1):
    remove_last = new_data_ranges_list[i]
    del remove_last[-1]

In [None]:
exp_x_list_4p = []

for j, k in enumerate(new_data_ranges_list):
    for i in k:
        values_list1 = values_list[j]
        value = values_list1[0] * math.exp(-values_list1[1]*i)+values_list1[2]
        exp_x_list_4p.append(value)
        
exp_x_list_4p

In [None]:
plt.figure(figsize=(15,6))
plot = plt.plot(exp_x_list, cut_y_data)####
ax = plt.gca().invert_xaxis()
plt.xlabel("Size in nt")
plt.ylabel("Grey Values")
plt.show()

In [None]:
exp_ladder_list_4p = []

for i, n in enumerate(exp_x_list_4p):
    if i in peaks_l1-18:
        exp_ladder_list_4p.append(n)
exp_ladder_list_4p

In [None]:
values_exp_ladder_4p = pd.DataFrame(exp_ladder_list_4p, columns=["Exp. all peaks, 4 points"])
curve_data3 = pd.concat([curve_data2, values_exp_ladder_4p], axis=1)
curve_data3

In [None]:
cut_y0_data = data['Grey_Values_T0_CY5_baseline']
cut_y0_data = cut_y0_data[peaks_l1[0]:peaks_l1[-1]+1]
cut_y0_data

**3. AUC**

1. Applying conversion to nt.

In [None]:
data_t0s = data[['T0s_CY5_norm_distance', "Grey_Values_T0_CY5_baseline"]]
data_t10s = data[['T10s_CY5_norm_distance', "Grey_Values_T10s_CY5_baseline"]]
data_t30s = data [['T30s_CY5_norm_distance', "Grey_Values_T30s_CY5_baseline"]]
data_t1m = data[['T1m_CY5_norm_distance', "Grey_Values_T1M_CY5_baseline"]]
data_t3m = data[['T3m_CY5_norm_distance', "Grey_Values_T3M_CY5_baseline"]]
data_t6m = data[['T6m_CY5_norm_distance', "Grey_Values_T6M_CY5_baseline"]]
data_t30m = data[['T30m_CY5_norm_distance', "Grey_Values_T30M_CY5_baseline"]]
data_t100m = data[['T100m_CY5_norm_distance', "Grey_Values_T100M_CY5_baseline"]]

In [None]:
data_list = [data_t0s, data_t10s, data_t30s, data_t1m, data_t3m, data_t6m, data_t30m, data_t100m]

2. Selecting substrate area based on t0s.

In [None]:
# Selecting area for the analysis

data_cutpeak_list1 = []

for d in data_list:
    d = d[50:300]
    data_cutpeak_list1.append(d)
 

In [None]:
# Plotting the initial data with normalized distance

figure, axis = plt.subplots(nrows=8, figsize=(18, 30), constrained_layout=True)

for di, d in enumerate(data_cutpeak_list1):
    name_list = ["T0s", "T10s", "T30s", "T1m", "T3m", "T6m", "T30m", "T100m"]
    axis[di].plot(d.iloc[:, 0], d.iloc[:, 1])
    axis[di].set_title(str(name_list[di])) 
    #axis[di].invert_xaxis()
    axis[di].set(xlabel="Distance, cm", ylabel="Grey Values")

In [None]:
# Getting the total area under the curve in each lane

auc_total_area_list  = []

for d in data_list:
    d_auc = auc(d.iloc[:, 0], d.iloc[:, 1])
    auc_total_area_list .append(d_auc)
    
auc_total_area_list 

In [None]:
# Getting the area under the curve of substrate in each lane

auc_substrate_list = []

for d in data_list:
    d_auc = auc(d.iloc[50:300,0], d.iloc[50:300,1])
    auc_substrate_list.append(d_auc)
    
auc_substrate_list

In [None]:
# Getting the percentage of substrate in each lane compared to total
auc_percent_of_total_list = []

for i in range(len(auc_substrate_list)):
    auc_percent_of_total = auc_substrate_list[i]/auc_total_area_list[i]
    auc_percent_of_total_list.append(auc_percent_of_total)

auc_percent_of_total_list

In [None]:
# Getting the percentage of substrate in each lane
auc_percent_substrate_list = []

for i in auc_percent_of_total_list:
    auc_percent_substrate = i/auc_percent_of_total_list[0]
    auc_percent_substrate_list.append(auc_percent_substrate)

auc_percent_substrate_list

In [None]:
auc_percent_substrate_list[-1] = 0
time_points = [0, 10, 30, 60, 180, 360, 1800, 6000]
time = pd.DataFrame(time_points, columns=["Time_Points"])
auc_sub = pd.DataFrame(auc_percent_substrate_list, columns=["Decay"])
df_sub = pd.concat([time, auc_sub], axis=1)
df_sub

In [None]:
df_sub.to_csv('data/results_20240110/substrate_decay.csv', encoding='utf-8', index = False, header = False)