In [1]:
from flim_analysis.feature_extraction.extract_features import *
import config.params as params
import config.const as const
from utils.auxiliary_func import *
import pandas as pd

# FLIM Data Feature Extraction

> NOTE: Run this only after preprocessing and segmentation are complete.

This section extracts features from FLIM data.

- Extracts and compiles single-nucleus features from full-tissue core samples.

- Extracts FLIM features for patch-based analysis using the specified `patch_size` and `overlap`.  
  These patch-level features enable spatial resolution and support localized analysis within tissue samples.


### Configuration

#### Patch Extraction Parameters 
Define the size of each patch used for processing large FLIM images.

Patches are extracted with a defined overlap.

In [2]:
patch_size = 1500        # Size of each square patch in pixels
overlap = 0.75           # Fractional overlap between adjacent patches (e.g., 0.75 = 75% overlap)

#### Lifetime Distribution Parameters
These parameters define the binning strategy for creating lifetime histograms (distributions)


In [3]:
max_val = params.lifetime_distribution_max_val      # Maximum lifetime value to consider (upper limit)
bin_range = params.lifetime_distribution_bin_range  # Width of each bin in the histogram (resolution)
bins_amount = round(max_val / bin_range)            # Total number of bins

print(f"Lifetime histogram will use {bins_amount} bins of size {bin_range} ns, up to {max_val} ns.")


Lifetime histogram will use 18 bins of size 0.73 ns, up to 13 ns.


# Core sample

## Feature extraction

Following segmentation of individual nuclei in FLIM images, this step extracts per-nucleus quantitative features that describe:
- **Morphological properties** (e.g., nuclear area, eccentricity)
- **Local nuclear density**: number of neighboring nuclei within a defined radius, divided by the area of the region (i.e., density = neighbors / πr²)
- **Fluorescence lifetime metrics** (mean lifetime per nucleus)


### Tissue wise

In [2]:
create_all_feature_core_full_tissue_df(with_mae=False)

Core Responder Count: 8
Core Non-Responder Count: 8
Resection Count: 0
Start with leap 015
Start with leap 023
Start with leap 036
Start with leap 019
Start with leap 017
Start with leap 021
Start with leap 032
Start with leap 034
Start with leap 028
Start with leap 038
DataFrame for leap 032 saved as LEAP032_single_cell_features.csv
Start with leap 039
DataFrame for leap 028 saved as LEAP028_single_cell_features.csv
Start with leap 041
DataFrame for leap 015 saved as LEAP015_single_cell_features.csv
Start with leap 042
DataFrame for leap 039 saved as LEAP039_single_cell_features.csv
Start with leap 043
DataFrame for leap 036 saved as LEAP036_single_cell_features.csv
Start with leap 044
DataFrame for leap 042 saved as LEAP042_single_cell_features.csv
Start with leap 066
DataFrame for leap 021 saved as LEAP021_single_cell_features.csv
DataFrame for leap 034 saved as LEAP034_single_cell_features.csv
DataFrame for leap 044 saved as LEAP044_single_cell_features.csv
DataFrame for leap 023 s

In [5]:
df_file_path = os.path.join(const.FULL_TISSUE_DIR, 'core', "FLIM_features_full_tissue.csv")
full_tissue_features_df = pd.read_csv(df_file_path, dtype = {'leap_ID': str})
print(full_tissue_features_df.shape)
full_tissue_features_df.head()

(7603779, 22)


Unnamed: 0,leap_ID,nucleus_label,X coordinate,Y coordinate,lifetime_mean,area,extent,solidity,perimeter,diameter_area,...,perimeter_crofton,major_axis_length,orientation,diameter_max,eccentricity,density_radius_20,density_radius_40,density_radius_60,density_radius_80,category
0,32,1,1335.169231,2723.292308,2.720446,65.0,0.722222,0.928571,28.142136,9.097284,...,29.361935,9.406879,0.89189,10.440307,0.318169,0.008754,0.011539,0.009991,0.008555,non responder
1,32,2,1117.571429,3347.375,2.923679,56.0,0.777778,0.949153,25.313708,8.444016,...,26.680418,8.962899,-0.572409,9.848858,0.46767,0.022282,0.019695,0.017861,0.015766,non responder
2,32,3,1234.854167,3566.583333,1.771021,48.0,0.888889,1.0,23.071068,7.81764,...,24.554261,9.510037,1.475704,9.486833,0.741052,0.015915,0.017507,0.01733,0.016015,non responder
3,32,4,1043.071429,2489.428571,3.386071,14.0,0.7,0.875,11.656854,4.222008,...,13.732908,4.762643,-1.209092,5.09902,0.608034,0.010345,0.007958,0.00672,0.006565,non responder
4,32,5,670.118644,3978.237288,2.304644,59.0,0.7375,0.967213,26.142136,8.667245,...,27.465816,9.526111,-1.185587,10.440307,0.55321,0.006366,0.010743,0.010522,0.010693,non responder


### Patch wise

> **NOTE:** Run this only AFTER tissue-wise feature extraction is complete.

In [6]:
create_all_feature_patches_df(patch_size, overlap)

Core Responder Count: 8
Core Non-Responder Count: 8
Resection Count: 0
start extract features for patch size 1500, overlap 0.75
start with leap number 017
start with leap number 028
start with leap number 021
start with leap number 019
start with leap number 023
start with leap number 015
DataFrame for patch 1500_296_3000_1796 saved as LEAP019_1500_296_3000_1796_single_cell_features.csv
DataFrame for patch 0_0_1500_1500 saved as LEAP028_0_0_1500_1500_single_cell_features.csv
DataFrame for patch 9_3_1509_1503 saved as LEAP023_9_3_1509_1503_single_cell_features.csv
DataFrame for patch 78_127_1578_1627 saved as LEAP021_78_127_1578_1627_single_cell_features.csv
DataFrame for patch 384_3_1884_1503 saved as LEAP023_384_3_1884_1503_single_cell_features.csv
DataFrame for patch 3_2_1503_1502 saved as LEAP015_3_2_1503_1502_single_cell_features.csv
DataFrame for patch 759_3_2259_1503 saved as LEAP023_759_3_2259_1503_single_cell_features.csv
DataFrame for patch 453_127_1953_1627 saved as LEAP021_4

In [4]:
patches_df = read_patches_files(patch_size=patch_size, patch_overlap=overlap)
print(patches_df.shape)
patches_df.head(5)

(107933477, 7)


Unnamed: 0,leap_ID,patch_ID,nucleus_label,lifetime_mean,category,X coordinate,Y coordinate
0,39,0_73_1500_1573,4,4.302937,non responder,1490.5,831.0
1,39,0_73_1500_1573,13,1.02,non responder,1503.988506,869.712644
2,39,0_73_1500_1573,14,3.939235,non responder,858.588235,628.882353
3,39,0_73_1500_1573,15,3.830133,non responder,1322.4,450.4
4,39,0_73_1500_1573,29,4.325032,non responder,276.451613,1535.290323


## FLIM Data Aggregation — Lifetime Distribution & Median Features

In this step of the TNBC FLIM tissue analysis pipeline, we summarize lifetime data from single-cell features across full tissue samples and patches.

Per-nucleus features are aggregated into:
- **Median Feature DataFrames**: median values of each feature.
- **Lifetime Distributions**: binned histograms of per-nucleus lifetime values within each sample.



### Create median feature data frame 

In [2]:
median_df = aggregate_median_features_by_leap(const.FULL_TISSUE_DIR, 'core')

Reading FLIM features from: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/full_tissue/core/FLIM_features_full_tissue.csv
Data loaded successfully.
Dropped unwanted columns. Remaining columns: ['leap_ID', 'lifetime_mean', 'area', 'extent', 'solidity', 'perimeter', 'diameter_area', 'convex_hull_area', 'minor_axis_length', 'perimeter_crofton', 'major_axis_length', 'orientation', 'diameter_max', 'eccentricity', 'density_radius_20', 'density_radius_40', 'density_radius_60', 'density_radius_80', 'category']
Aggregation complete. Resulting shape: (53, 19)
Saving aggregated DataFrame to: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/full_tissue/core/features_median_data.csv
File saved successfully.


In [3]:
median_df.head()

Unnamed: 0,leap_ID,lifetime_mean,area,extent,solidity,perimeter,diameter_area,convex_hull_area,minor_axis_length,perimeter_crofton,major_axis_length,orientation,diameter_max,eccentricity,density_radius_20,density_radius_40,density_radius_60,density_radius_80,category
0,15,3.915147,16.0,0.75,0.954545,12.242641,4.513517,17.0,3.669612,14.518306,5.449354,0.0,5.385165,0.71318,0.007958,0.005769,0.00504,0.004526,non responder
1,17,4.1773,18.0,0.75,0.944444,13.656854,4.787307,20.0,3.991464,16.184387,6.06544,0.084918,6.324555,0.722862,0.019099,0.018104,0.016977,0.016015,non responder
2,19,4.465636,17.0,0.755102,0.958333,13.071068,4.652426,18.0,3.894303,15.073666,5.602338,0.0,5.830952,0.694588,0.016711,0.01691,0.0168,0.016662,non responder
3,21,4.353615,14.0,0.75,0.96,11.071068,4.222008,14.0,3.337716,13.732908,5.149449,0.0,5.385165,0.729661,0.012732,0.011738,0.011406,0.011091,responder
4,23,3.970538,11.0,0.75,1.0,9.414214,3.74241,11.0,2.970079,11.836789,4.418072,0.0,4.472136,0.720941,0.015915,0.014125,0.013528,0.013081,responder


### Create lifetime distribution

#### Tissue wise

In [5]:
sample_type = 'core'
feature_file_name = "FLIM_features_full_tissue.csv"


lifetime_distribution_full_tissue_df = build_lifetime_distribution_full_tissue(sample_type, max_val, bin_range, feature_file_name)

Starting lifetime distribution processing...
Sample type         : core
Maximum value       : 13
Bin range (width)   : 0.73

Reading FLIM feature file from: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/full_tissue/core/FLIM_features_full_tissue.csv


In [6]:
file_name_lifetime_dist_full_tissue = f"features_lifetime_distribution_data_max_val_{max_val}_bins_amount_{bins_amount}_bin_range_{bin_range}.csv"
df_file_path = os.path.join(const.FULL_TISSUE_DIR, 'core', file_name_lifetime_dist_full_tissue)
lifetime_distribution_full_tissue_df = pd.read_csv(df_file_path, dtype = {'leap_ID': str})
lifetime_distribution_full_tissue_df.head()

Unnamed: 0,lifetime_mean_0.0-0.73,lifetime_mean_0.73-1.46,lifetime_mean_1.46-2.19,lifetime_mean_2.19-2.92,lifetime_mean_2.92-3.65,lifetime_mean_3.65-4.38,lifetime_mean_4.38-5.109999999999999,lifetime_mean_5.109999999999999-5.84,lifetime_mean_5.84-6.57,lifetime_mean_6.57-7.3,lifetime_mean_7.3-8.03,lifetime_mean_8.03-8.76,lifetime_mean_8.76-9.49,lifetime_mean_9.49-10.219999999999999,lifetime_mean_10.219999999999999-10.95,lifetime_mean_10.95-11.68,lifetime_mean_11.68-12.41,lifetime_mean_12.41-13.14,leap_ID,categories
0,0.0,0.00037,0.006297,0.01845,0.145427,0.820404,0.00889,9.3e-05,4.6e-05,2.3e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,0
1,0.0,2.9e-05,4.8e-05,9.2e-05,0.013817,0.883741,0.102244,2.4e-05,0.0,0.0,5e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17,0
2,0.0,0.0,0.0,6.6e-05,0.000976,0.352698,0.646233,2.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19,0
3,0.0,0.0,6.3e-05,4.2e-05,0.00189,0.553116,0.444477,0.000384,7e-06,7e-06,0.0,0.0,0.0,0.0,0.0,1.4e-05,0.0,0.0,21,1
4,0.0,0.001145,0.001335,0.007055,0.156732,0.762043,0.06825,0.002747,0.000433,0.000131,6.5e-05,2.4e-05,1.2e-05,6e-06,6e-06,0.0,1.2e-05,6e-06,23,1


#### Patch wise

In [9]:
df_lifetime_dist_patch = build_lifetime_distribution_patch(patch_size=patch_size, patch_overlap=overlap, max_range=max_val, bin_range=bin_range)
print(df_lifetime_dist_patch.shape)
df_lifetime_dist_patch.head(2)

Reading filtered patch data from: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/patches_tissue/size_1500_overlap_0.75/FLIM_features_patches_size_1500_overlap_0.75_after_filter.csv
Filtered data loaded.

Building patch-level lifetime distribution...
Distribution parameters: ['lifetime_mean', 13, 0.73]
Start with feature lifetime_mean
Finish with feature ['lifetime_mean', 13, 0.73]
Distribution built. Total bins created: 18

Lifetime distribution saved to: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/patches_tissue/size_1500_overlap_0.75/features_lifetime_distribution_data_patches_size_1500_overlap_0.75_max_val_13_bins_amount_18_bin_range_0.73.csv
(13893, 21)


bin,lifetime_mean_0.0-0.73,lifetime_mean_0.73-1.46,lifetime_mean_1.46-2.19,lifetime_mean_2.19-2.92,lifetime_mean_2.92-3.65,lifetime_mean_3.65-4.38,lifetime_mean_4.38-5.109999999999999,lifetime_mean_5.109999999999999-5.84,lifetime_mean_5.84-6.57,lifetime_mean_6.57-7.3,...,lifetime_mean_8.03-8.76,lifetime_mean_8.76-9.49,lifetime_mean_9.49-10.219999999999999,lifetime_mean_10.219999999999999-10.95,lifetime_mean_10.95-11.68,lifetime_mean_11.68-12.41,lifetime_mean_12.41-13.14,leap_ID,patch_ID,categories
0,0.0,0.005474,0.001825,0.021898,0.317518,0.65146,0.001825,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,1128_1127_2628_2627,0
1,0.0,0.002439,0.0,0.014634,0.412195,0.570732,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,1128_1502_2628_3002,0


In [7]:
file_name_lifetime_dist_patch = f"features_lifetime_distribution_data_patches_size_{patch_size}_overlap_{overlap}_max_val_{max_val}_bins_amount_{bins_amount}_bin_range_{bin_range}.csv"
specific_patch_dir = os.path.join(const.PATCH_DIR, f"size_{patch_size}_overlap_{overlap}")

df_file_path = os.path.join(specific_patch_dir, file_name_lifetime_dist_patch)
df_lifetime_dist_patch = pd.read_csv(df_file_path, dtype = {'leap_ID': str})
df_lifetime_dist_patch.head()

Unnamed: 0,lifetime_mean_0.0-0.73,lifetime_mean_0.73-1.46,lifetime_mean_1.46-2.19,lifetime_mean_2.19-2.92,lifetime_mean_2.92-3.65,lifetime_mean_3.65-4.38,lifetime_mean_4.38-5.109999999999999,lifetime_mean_5.109999999999999-5.84,lifetime_mean_5.84-6.57,lifetime_mean_6.57-7.3,...,lifetime_mean_8.03-8.76,lifetime_mean_8.76-9.49,lifetime_mean_9.49-10.219999999999999,lifetime_mean_10.219999999999999-10.95,lifetime_mean_10.95-11.68,lifetime_mean_11.68-12.41,lifetime_mean_12.41-13.14,leap_ID,patch_ID,categories
0,0.0,0.005474,0.001825,0.021898,0.317518,0.65146,0.001825,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,1128_1127_2628_2627,0
1,0.0,0.002439,0.0,0.014634,0.412195,0.570732,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,1128_1502_2628_3002,0
2,0.0,0.006452,0.0,0.012903,0.409677,0.570968,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,1128_1877_2628_3377,0
3,0.0,0.008475,0.0,0.016949,0.516949,0.457627,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,1128_2252_2628_3752,0
4,0.0,0.000387,0.000193,0.005027,0.119876,0.869103,0.005414,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,1128_2_2628_1502,0


# Resection sample

## Feature extraction

Following segmentation of individual nuclei in FLIM images, this step extracts per-nucleus quantitative features that describe:
- **Morphological properties** (e.g., nuclear area, eccentricity)
- **Local nuclear density**: number of neighboring nuclei within a defined radius, divided by the area of the region (i.e., density = neighbors / πr²)
- **Fluorescence lifetime metrics** (mean lifetime per nucleus)

In [None]:
create_all_feature_resection_full_tissue_df()

In [None]:
df_file_path = os.path.join(const.FULL_TISSUE_DIR, 'resection', "FLIM_features_full_tissue.csv")
resection_features_df = pd.read_csv(df_file_path, dtype = {'leap_ID': str})
print(resection_features_df.shape)
resection_features_df.head()

In [4]:
median_df = aggregate_median_features_by_leap(const.FULL_TISSUE_DIR, 'resection')

Reading FLIM features from: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/full_tissue/resection/FLIM_features_full_tissue.csv
Data loaded successfully.
Dropped unwanted columns. Remaining columns: ['leap_ID', 'lifetime_mean', 'area', 'extent', 'solidity', 'perimeter', 'diameter_area', 'convex_hull_area', 'minor_axis_length', 'perimeter_crofton', 'major_axis_length', 'orientation', 'diameter_max', 'eccentricity', 'density_radius_20', 'density_radius_40', 'density_radius_60', 'density_radius_80', 'category']
Aggregation complete. Resulting shape: (16, 19)
Saving aggregated DataFrame to: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/full_tissue/resection/features_median_data.csv
File saved successfully.


In [5]:
median_df.head()

Unnamed: 0,leap_ID,lifetime_mean,area,extent,solidity,perimeter,diameter_area,convex_hull_area,minor_axis_length,perimeter_crofton,major_axis_length,orientation,diameter_max,eccentricity,density_radius_20,density_radius_40,density_radius_60,density_radius_80,category
0,16,3.975238,17.0,0.76,0.954545,13.071068,4.652426,18.0,3.993285,15.073666,5.602338,0.0,5.830952,0.68313,0.011141,0.009748,0.00893,0.008356,non responder
1,18,4.234513,16.0,0.75,0.947368,12.828427,4.513517,18.0,3.679465,15.073666,5.782733,0.0,6.0,0.739975,0.007958,0.006764,0.006101,0.005769,non responder
2,20,4.22754,29.0,0.75,0.947368,17.899495,6.076508,31.0,5.212951,19.976625,7.276737,0.0,7.615773,0.673317,0.011937,0.012136,0.012113,0.012086,non responder
3,29,2.878154,14.0,0.75,0.952381,11.656854,4.222008,15.0,3.289022,13.732908,5.326819,0.0,5.385165,0.752023,0.007958,0.00756,0.00725,0.007063,non responder
4,33,3.444826,19.0,0.75,0.947368,14.242641,4.918491,21.0,3.938749,16.414425,6.286994,0.050623,6.324555,0.74455,0.010345,0.009748,0.009372,0.009052,non responder


In [5]:
sample_type = 'resection'
feature_file_name = "FLIM_features_full_tissue.csv"

lifetime_distribution_full_tissue_df = build_lifetime_distribution_full_tissue(sample_type, max_val, bin_range, feature_file_name)

Starting lifetime distribution processing...
Sample type         : resection
Maximum value       : 13
Bin range (width)   : 0.73

Reading FLIM feature file from: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/full_tissue/resection/FLIM_features_full_tissue.csv
Data loaded successfully.

Building lifetime distribution...
Distribution parameters: ['lifetime_mean', 13, 0.73]
Start with feature lifetime_mean
Finish with feature ['lifetime_mean', 13, 0.73]
Distribution built. Total bins created: 18

Saving bin distribution to: /sise/assafzar-group/assafzar/reut/Test_FLIM/from_FLIM_dir/full_tissue/resection/features_lifetime_distribution_data_max_val_13_bins_amount_18_bin_range_0.73.csv
Bin distribution CSV saved successfully.

