In [301]:
import scipy.stats as stats
import numpy as np
import pandas as pd
from tkinter import filedialog
import matplotlib.pyplot as plt
import seaborn as sns
import os
from math import ceil
from rich.progress import track

# Disable chained assignment warning
pd.options.mode.chained_assignment = None

In [302]:
um_per_pixel = 0.117
s_per_frame = 2
window_size = 5
dtype_dict = {
    "t": "float64",
    "x": "float64",
    "y": "float64",
    "trackID": "Int64",
}

In [303]:
# File selection dialog (make sure to run this in an environment that supports dialogs, or replace with a direct file path)
csv_file_path = filedialog.askopenfilename(
    title="Select CSV File",
    filetypes=(("CSV files", "*.csv"), ("All files", "*.*")),
)

In [304]:
# Read the CSV file with predefined data types
df = pd.read_csv(csv_file_path, dtype=dtype_dict)

# Data cleaning and type conversion
df = df.dropna(subset=["t", "x", "y"])
df["trackID"] = pd.to_numeric(df["trackID"], errors="coerce").astype("Int64")
df = df.dropna(subset=["trackID"])

# Data scaling
df["t"] *= s_per_frame
df["x"] *= um_per_pixel
df["y"] *= um_per_pixel

df[["trackID", "x", "y", "t"]]

Unnamed: 0,trackID,x,y,t
0,0,11.865987,17.556255,0.0
1,0,11.850599,17.554147,2.0
2,0,11.825462,17.509511,4.0
3,0,11.847452,17.497576,6.0
4,0,11.854185,17.515370,8.0
...,...,...,...,...
36784,478,20.382938,31.687488,390.0
36785,478,20.479945,31.677403,392.0
36786,478,20.472566,31.656533,394.0
36787,478,20.521312,31.722895,396.0


In [305]:
# groupby and sort operation:

grouped_tracks = df.groupby("trackID", group_keys=True).apply(lambda x: x.sort_values("t"))

for track_id, track_data in grouped_tracks.groupby(level=0):
    print(f"trackID: {track_id}")
    print(track_data)
    break

trackID: 0
                spotID  trackID      QUALITY          x          y      t  \
trackID                                                                     
0       0    1068926.0        0  1045.519165  11.865987  17.556255    0.0   
        1    1071420.0        0  1216.241089  11.850599  17.554147    2.0   
        2    1077846.0        0  1288.747925  11.825462  17.509511    4.0   
        3    1078668.0        0  1223.735596  11.847452  17.497576    6.0   
        4    1070107.0        0  1217.275391  11.854185  17.515370    8.0   
...                ...      ...          ...        ...        ...    ...   
        195  1150522.0        0  1052.832642  11.737797  17.458084  390.0   
        196  1149706.0        0  1030.824707  11.747215  17.512622  392.0   
        197  1150925.0        0   964.472107  11.762474  17.496453  394.0   
        198  1151327.0        0  1148.490967  11.687325  17.524704  396.0   
        199  1151736.0        0  1037.414795  11.329165  17.61623

In [306]:
def calc_MSD_NonPhysUnit(track_data, lags):

    Xs = track_data["x"].to_numpy()
    Ys = track_data["y"].to_numpy()

    MSDs = []
    for lag in lags:
        displacements = (Xs[:-lag] - Xs[lag:]) ** 2 + (Ys[:-lag] - Ys[lag:]) ** 2
        valid_displacements = displacements[~np.isnan(displacements)]
        MSD = np.nanmean(valid_displacements)
        MSDs.append(MSD)

    return np.array(MSDs, dtype=float)

In [307]:
for track_id, track_data in grouped_tracks.groupby(level=0):

    lags = np.arange(1, (window_size + 1))
    MSDs = calc_MSD_NonPhysUnit(track_data, lags)

    print(f"trackID : {track_id}")
    print(f"MSD: {MSDs}\n")

    break

trackID : 0
MSD: [0.00273752 0.00421892 0.00536604 0.00639606 0.00706112]



In [308]:
def calc_alpha(MSDs, lags):

    valid_indices = ~np.isnan(MSDs)
    valid_MSDs = MSDs[valid_indices]
    valid_lags = lags[valid_indices]

    log_lags = np.log10(valid_lags)
    log_MSDs = np.log10(valid_MSDs)

    slope, intercept, r_value, p_value, std_error = stats.linregress(log_lags, log_MSDs)

    alpha = slope
    diffusion_coefficient = (1 / 4) * (10**intercept)
    r_squared = r_value**2

    return alpha, r_squared, diffusion_coefficient

In [309]:
for track_id, track_data in grouped_tracks.groupby(level=0):
    lags = np.arange(1, (window_size + 1))
    MSDs = calc_MSD_NonPhysUnit(track_data, lags)

    if not np.any(np.isnan(MSDs)):
        alpha, r_squared, diffusion_coefficient = calc_alpha(MSDs, lags)

        print(f"trackID: {track_id}")
        print(f"Alpha: {alpha}")
        print(f"R^2: {r_squared}")
        print(f"Diffusion Coefficient: {diffusion_coefficient}\n")

    break

trackID: 0
Alpha: 0.5956534724012006
R^2: 0.9984677728974671
Diffusion Coefficient: 0.0006912662377722752



In [310]:
def calculate_alpha_and_D_for_track(df_track, um_per_pixel, s_per_frame, window_size):

    df_track["R2"] = np.nan
    df_track["alpha"] = np.nan
    df_track["D"] = np.nan

    window_info = []

    # Iterate through windows and update alpha for the middle frame
    step_size = 1
    for start in range(0, len(df_track) - window_size + 1, step_size):
        end = start + window_size
        df_window = df_track.iloc[start:end]

        number_lag = ceil(window_size / 2)
        if number_lag < 3:
            number_lag = 3
        window_msd = calc_MSD_NonPhysUnit(
            df_track.iloc[start:end], np.arange(1, number_lag + 1)
        )
        if np.sum(window_msd <= 0) > 0:
            # Skip this window since it contains invalid MSD values
            continue

        alpha, r_squared, D = calc_alpha(
            window_msd, np.arange(1, number_lag + 1) * s_per_frame
        )
        # r_squared = r_value**2
        if not np.isnan(alpha):
            middle_frame_index = start + ceil(window_size / 2)
            df_track.at[df_track.index[middle_frame_index], "R2"] = r_squared
            df_track.at[df_track.index[middle_frame_index], "alpha"] = alpha
            df_track.at[df_track.index[middle_frame_index], "D"] = D

            window_info.append(
                {
                    "window_start_frame": df_window.iloc[0]["t"],
                    "window_end_frame": df_window.iloc[-1]["t"],
                    "alpha": alpha,
                    "R2": r_squared,
                    "D": D,
                    "lags": np.arange(1, number_lag + 1).tolist(),
                }
            )

    return df_track, window_info

In [311]:
for track_id, df_track in grouped_tracks.groupby(level=0):

    result_df_track, window_info = calculate_alpha_and_D_for_track(
        df_track, um_per_pixel, s_per_frame, window_size
    )
    calculated = result_df_track.dropna(subset=["alpha", "R2", "D"])

    print(f"trackID: {track_id}")

    if not calculated.empty:
        print(
            calculated[["x", "y", "t", "R2", "alpha", "D"]]
        )
        for info in window_info:
            print(
                f"Window from {info['window_start_frame']} to {info['window_end_frame']}s: lags = {info['lags']}"
            )
            print(f"Alpha: {info['alpha']}, R2: {info['R2']}, D: {info['D']}\n")

    print("\n")

    break

trackID: 0
                     x          y      t        R2     alpha         D
trackID                                                               
0       3    11.847452  17.497576    6.0  0.871088  0.978549  0.000133
        4    11.854185  17.515370    8.0  0.782439  0.180230  0.000469
        5    11.831834  17.447612   10.0  0.951582  0.925610  0.000250
        6    11.854118  17.421677   12.0  0.994143  1.232036  0.000254
        7    11.906279  17.418952   14.0  0.998915  1.193610  0.000316
...                ...        ...    ...       ...       ...       ...
        194  11.723378  17.505315  388.0  0.332334 -0.350581  0.000882
        195  11.737797  17.458084  390.0  0.058538 -0.402320  0.000530
        196  11.747215  17.512622  392.0  0.013123 -0.104408  0.000481
        197  11.762474  17.496453  394.0  0.106998  0.204640  0.000588
        198  11.687325  17.524704  396.0  0.998952  0.882763  0.005004

[196 rows x 6 columns]
Window from 0.0 to 8.0s: lags = [1, 2, 3]


In [312]:
def process_csv_and_add_alpha_and_D(
    csv_file_path, window_size, um_per_pixel, s_per_frame
):

    df = pd.read_csv(csv_file_path, dtype=dtype_dict)
    # df["t"] *= s_per_frame
    # df["x"] *= um_per_pixel
    # df["y"] *= um_per_pixel

    processed_track_list = []

    grouped_tracks = df.groupby("trackID")
    for track_id, df_track in grouped_tracks:
        processed_track, window_info = calculate_alpha_and_D_for_track(
            df_track, um_per_pixel, s_per_frame, window_size
        )
        processed_track_list.append(
            processed_track[
                [
                    "trackID",
                    "x",
                    "y",
                    "t",
                    "R2",
                    "alpha",
                    "D",
                ]
            ]
        )

    processed_df = pd.concat(processed_track_list).reset_index(drop=True)

    save_path = os.path.dirname(csv_file_path)
    base_name = os.path.basename(csv_file_path)
    name, ext = os.path.splitext(base_name)

    output_file_name = f"{name}_processed_alpha_and_D_w{window_size}{ext}"
    output_file_path = os.path.join(save_path, output_file_name)

    processed_df.to_csv(output_file_path, index=False)
    print(f"Processed CSV file saved: {output_file_path}")

    return output_file_path


process_csv_and_add_alpha_and_D(csv_file_path, window_size, um_per_pixel, s_per_frame)

Processed CSV file saved: Z:/Bisal_Halder_turbo/PROCESSED_DATA/Impact_of_cytoskeleton_on_HOPS_condensates/LatrunculinA/5uM/2x/Analysed Data/60min/Reformatted\20240321_UGD-2x-2s-5uM-LatrunculinA-60min-replicate2-FOV-3_reformatted_processed_alpha_and_D_w5.csv


'Z:/Bisal_Halder_turbo/PROCESSED_DATA/Impact_of_cytoskeleton_on_HOPS_condensates/LatrunculinA/5uM/2x/Analysed Data/60min/Reformatted\\20240321_UGD-2x-2s-5uM-LatrunculinA-60min-replicate2-FOV-3_reformatted_processed_alpha_and_D_w5.csv'