In [112]:
import scipy.stats as stats
import numpy as np
import pandas as pd
from tkinter import filedialog
import matplotlib.pyplot as plt
import seaborn as sns
import os
from math import ceil
from rich.progress import track

# Disable chained assignment warning
pd.options.mode.chained_assignment = None

In [113]:
um_per_pixel = 0.117
s_per_frame = 2
window_size = 5
dtype_dict = {
    "t": "float64",
    "x": "float64",
    "y": "float64",
    "trackID": "Int64",
}

In [114]:
# File selection dialog (make sure to run this in an environment that supports dialogs, or replace with a direct file path)
csv_file_path = filedialog.askopenfilename(
    title="Select CSV File",
    filetypes=(("CSV files", "*.csv"), ("All files", "*.*")),
)

In [115]:
# Read the CSV file with predefined data types
df = pd.read_csv(csv_file_path, dtype=dtype_dict)

# Data cleaning and type conversion
df = df.dropna(subset=["t", "x", "y"])
df["trackID"] = pd.to_numeric(df["trackID"], errors="coerce").astype("Int64")
df = df.dropna(subset=["trackID"])

# Data scaling
df["t"] *= s_per_frame
df["x"] *= um_per_pixel
df["y"] *= um_per_pixel

df[["trackID", "x", "y", "t"]]

Unnamed: 0,trackID,x,y,t
0,0,32.373397,15.309703,0.0
1,0,32.374962,15.323184,2.0
2,0,32.366657,15.340336,4.0
3,0,32.341169,15.341604,6.0
4,0,32.418121,15.315826,8.0
...,...,...,...,...
173960,2254,36.944881,42.855212,378.0
173961,2254,36.936062,42.838893,380.0
173962,2254,36.966318,42.864858,382.0
173963,2254,36.923269,42.867848,384.0


In [116]:
# groupby and sort operation:

grouped_tracks = df.groupby("trackID", group_keys=True).apply(lambda x: x.sort_values("t"))

for track_id, track_data in grouped_tracks.groupby(level=0):
    print(f"trackID: {track_id}")
    print(track_data)
    break

trackID: 0
                spotID  trackID     QUALITY          x          y      t    R  \
trackID                                                                         
0       0    1862725.0        0  388.725006  32.373397  15.309703    0.0  2.0   
        1    1853589.0        0  488.622528  32.374962  15.323184    2.0  2.0   
        2    1872483.0        0  531.795776  32.366657  15.340336    4.0  2.0   
        3    1872510.0        0  533.085266  32.341169  15.341604    6.0  2.0   
        4    1852639.0        0  472.626099  32.418121  15.315826    8.0  2.0   
...                ...      ...         ...        ...        ...    ...  ...   
        108  1954319.0        0  307.770844  32.010223  15.426631  216.0  2.0   
        109  1952751.0        0  301.908356  31.973639  15.429954  218.0  2.0   
        110  1950119.0        0  317.623627  31.969367  15.435809  220.0  2.0   
        111  1959725.0        0  303.694855  31.987043  15.431787  222.0  2.0   
        112  1953

In [117]:
def calc_MSD_NonPhysUnit(track_data, lags):

    Xs = track_data["x"].to_numpy()
    Ys = track_data["y"].to_numpy()

    MSDs = []
    for lag in lags:
        displacements = (Xs[:-lag] - Xs[lag:]) ** 2 + (Ys[:-lag] - Ys[lag:]) ** 2
        valid_displacements = displacements[~np.isnan(displacements)]
        MSD = np.nanmean(valid_displacements)
        MSDs.append(MSD)

    return np.array(MSDs, dtype=float)

In [118]:
for track_id, track_data in grouped_tracks.groupby(level=0):

    lags = np.arange(1, (window_size + 1))
    MSDs = calc_MSD_NonPhysUnit(track_data, lags)

    print(f"trackID : {track_id}")
    print(f"MSD: {MSDs}\n")

    break

trackID : 0
MSD: [0.00155973 0.00274899 0.00379296 0.0053183  0.00629954]



In [119]:
def calc_alpha(MSDs, lags):

    valid_indices = ~np.isnan(MSDs)
    valid_MSDs = MSDs[valid_indices]
    valid_lags = lags[valid_indices]

    log_lags = np.log10(valid_lags)
    log_MSDs = np.log10(valid_MSDs)

    slope, intercept, r_value, p_value, std_error = stats.linregress(log_lags, log_MSDs)

    alpha = slope
    diffusion_coefficient = (1 / 4) * (10**intercept)
    r_squared = r_value**2

    return alpha, r_squared, diffusion_coefficient

In [120]:
for track_id, track_data in grouped_tracks.groupby(level=0):
    lags = np.arange(1, (window_size + 1))
    MSDs = calc_MSD_NonPhysUnit(track_data, lags)

    if not np.any(np.isnan(MSDs)):
        alpha, r_squared, diffusion_coefficient = calc_alpha(MSDs, lags)

        print(f"trackID: {track_id}")
        print(f"Alpha: {alpha}")
        print(f"R^2: {r_squared}")
        print(f"Diffusion Coefficient: {diffusion_coefficient}\n")

    break

trackID: 0
Alpha: 0.8738200585316298
R^2: 0.9961822432507514
Diffusion Coefficient: 0.0003817953672760974



In [121]:
def calculate_alpha_and_D_for_track(df_track, um_per_pixel, s_per_frame, window_size):

    df_track["R2"] = np.nan
    df_track["alpha"] = np.nan
    df_track["D"] = np.nan

    window_info = []

    # Iterate through windows and update alpha for the middle frame
    step_size = 1
    for start in range(0, len(df_track) - window_size + 1, step_size):
        end = start + window_size
        df_window = df_track.iloc[start:end]

        number_lag = ceil(window_size / 2)
        if number_lag < 3:
            number_lag = 3
        window_msd = calc_MSD_NonPhysUnit(
            df_track.iloc[start:end], np.arange(1, number_lag + 1)
        )
        if np.sum(window_msd <= 0) > 0:
            # Skip this window since it contains invalid MSD values
            continue

        alpha, r_squared, D = calc_alpha(
            window_msd, np.arange(1, number_lag + 1) * s_per_frame
        )
        # r_squared = r_value**2
        if not np.isnan(alpha):
            middle_frame_index = start + ceil(window_size / 2)
            df_track.at[df_track.index[middle_frame_index], "R2"] = r_squared
            df_track.at[df_track.index[middle_frame_index], "alpha"] = alpha
            df_track.at[df_track.index[middle_frame_index], "D"] = D

            window_info.append(
                {
                    "window_start_frame": df_window.iloc[0]["t"],
                    "window_end_frame": df_window.iloc[-1]["t"],
                    "alpha": alpha,
                    "R2": r_squared,
                    "D": D,
                    "lags": np.arange(1, number_lag + 1).tolist(),
                }
            )

    return df_track, window_info

In [122]:
for track_id, df_track in grouped_tracks.groupby(level=0):

    result_df_track, window_info = calculate_alpha_and_D_for_track(
        df_track, um_per_pixel, s_per_frame, window_size
    )
    calculated = result_df_track.dropna(subset=["alpha", "R2", "D"])

    print(f"trackID: {track_id}")

    if not calculated.empty:
        print(
            calculated[["x", "y", "t", "R2", "alpha", "D"]]
        )
        for info in window_info:
            print(
                f"Window from {info['window_start_frame']} to {info['window_end_frame']}s: lags = {info['lags']}"
            )
            print(f"Alpha: {info['alpha']}, R2: {info['R2']}, D: {info['D']}\n")

    print("\n")

    break

trackID: 0
                     x          y      t        R2     alpha         D
trackID                                                               
0       3    32.341169  15.341604    6.0  0.124229  0.013339  0.000478
        4    32.418121  15.315826    8.0  0.618865  0.649770  0.000362
        5    32.439079  15.320363   10.0  0.999226  1.233312  0.000210
        6    32.433555  15.304612   12.0  0.932343  0.857705  0.000477
        7    32.514711  15.330764   14.0  0.996743  1.341323  0.000218
...                ...        ...    ...       ...       ...       ...
        107  32.015904  15.405097  214.0  0.999297  1.369219  0.000061
        108  32.010223  15.426631  216.0  0.996840  1.720862  0.000058
        109  31.973639  15.429954  218.0  0.999997  1.731016  0.000048
        110  31.969367  15.435809  220.0  0.966628  1.117927  0.000067
        111  31.987043  15.431787  222.0  0.855358  0.707887  0.000141

[109 rows x 6 columns]
Window from 0.0 to 8.0s: lags = [1, 2, 3]


In [123]:
def process_csv_and_add_alpha_and_D(
    csv_file_path, window_size, um_per_pixel, s_per_frame
):

    df = pd.read_csv(csv_file_path, dtype=dtype_dict)
    # df["t"] *= s_per_frame
    # df["x"] *= um_per_pixel
    # df["y"] *= um_per_pixel

    processed_track_list = []

    grouped_tracks = df.groupby("trackID")
    for track_id, df_track in grouped_tracks:
        processed_track, window_info = calculate_alpha_and_D_for_track(
            df_track, um_per_pixel, s_per_frame, window_size
        )
        processed_track_list.append(
            processed_track[
                [
                    "trackID",
                    "x",
                    "y",
                    "t",
                    "R2",
                    "alpha",
                    "D",
                ]
            ]
        )

    processed_df = pd.concat(processed_track_list).reset_index(drop=True)

    save_path = os.path.dirname(csv_file_path)
    base_name = os.path.basename(csv_file_path)
    name, ext = os.path.splitext(base_name)

    output_file_name = f"{name}_processed_alpha_and_D_w{window_size}{ext}"
    output_file_path = os.path.join(save_path, output_file_name)

    processed_df.to_csv(output_file_path, index=False)
    print(f"Processed CSV file saved: {output_file_path}")

    return output_file_path


process_csv_and_add_alpha_and_D(csv_file_path, window_size, um_per_pixel, s_per_frame)

Processed CSV file saved: Z:/Bisal_Halder_turbo/PROCESSED_DATA/Impact_of_cytoskeleton_on_HOPS_condensates/no_drug/Analysed Data/2x/Reformatted\20240116_UGD-2x-2s-replicate1-FOV-1_reformatted_processed_alpha_and_D_w5.csv


'Z:/Bisal_Halder_turbo/PROCESSED_DATA/Impact_of_cytoskeleton_on_HOPS_condensates/no_drug/Analysed Data/2x/Reformatted\\20240116_UGD-2x-2s-replicate1-FOV-1_reformatted_processed_alpha_and_D_w5.csv'