In [12]:
import os
import gc
import glob
import numpy as np
import pandas as pd
import json
import torch
from torchinfo import summary

from skimage.measure import label, regionprops

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from src.utils import *
from src.dataset import get_data_from_file

In [None]:
save_path = 'C:/7_felev/szakdoga'

runs = os.listdir(save_path)
# remove wandb folder
runs.remove('wandb')

dataframes = {}
aggregated_data = {}

for run in runs:
    run_path = os.path.join(save_path, run)
    results = glob.glob(os.path.join(run_path, 'test_result*.json'))
    
    # List to hold all dictionaries for the current run
    run_data = []
    
    for result in results:
        with open(result, 'r') as f:
            data = json.load(f)
            run_data.append(data)
    
    # Create a DataFrame for the current run
    run_df = pd.DataFrame(run_data)

    mean_df = run_df.mean().to_frame(name='mean')
    std_df = run_df.std().to_frame(name='std')
    
    # Combine mean and std into a single DataFrame
    aggregated_df = pd.concat([mean_df, std_df], axis=1)
    
    dataframes[run] = run_df
    aggregated_data[run] = aggregated_df

# Now dataframes contains the aggregated DataFrame for each run
for run in runs:
    print(run)
    print(aggregated_data[run])
    # print(dataframes[run].head())
    print()

UNet4_len1_singleconv
                           mean         std
dice_score             0.361941    0.008812
detection_rate         0.711793    0.028222
avg_pred_cell_size  1422.704910  199.209316
std_pred_cell_size   854.140881  130.015269
avg_mask_cell_size   687.055321    0.000000
std_mask_cell_size   603.114496    0.000000

UNet4_len8_doubleconv
                           mean         std
dice_score             0.361604    0.019674
detection_rate         0.734969    0.040802
avg_pred_cell_size  1517.317151  203.670426
std_pred_cell_size   878.032632  141.310499
avg_mask_cell_size   687.055321    0.000000
std_mask_cell_size   603.114496    0.000000

UNet4_len8_singleconv
                           mean         std
dice_score             0.358528    0.008931
detection_rate         0.705794    0.023740
avg_pred_cell_size  1571.522097  177.630265
std_pred_cell_size  1059.144348  257.037198
avg_mask_cell_size   687.055321    0.000000
std_mask_cell_size   603.114496    0.000000

UNet8_l

In [None]:
# test if the test set is always the same?
# can it change between runs?

In [10]:
# calculating the high resoulution test cell sizes

def calculate_test_cell_sizes():
    path = 'C:/7_felev/data'
    files = []
    for root, dirs, files_ in os.walk(path):
        for file in files_:
            file_path = os.path.join(root, file)
            if file.endswith('.npz'):
                files.append(file_path)

    assert len(files) == 163, "Dataset size should be 163"

    np.random.seed(42)
    files = np.random.permutation(files)

    test_files = files[:33]
    # print(test_files)
    # return test_files

    cell_sizes_label = []
    for file in test_files:
        bio, mask = get_data_from_file(file, 8)
        print(file, bio.shape, mask.shape)

        # calculating the high resoulution test cell sizes
        true_mask_int = mask.astype(np.int32)
        labeled_label, num_cells_label = label(true_mask_int, return_num=True)

        cell_sizes_label.extend([region.area for region in regionprops(labeled_label)])

    scale = mask.shape[0] / 80
    cell_sizes_label = pixel_to_micrometer(cell_sizes_label, scale)

    avg_cell_size_label = np.mean(cell_sizes_label) if cell_sizes_label else 0
    std_cell_size_label = np.std(cell_sizes_label) if cell_sizes_label else 0

    return avg_cell_size_label, std_cell_size_label


In [13]:
mean, std = calculate_test_cell_sizes()

C:/7_felev/data\noncoated\20210604_Hela_nonc\C2_seg.npz (8, 80, 80) (4155, 4155)
C:/7_felev/data\noncoated\20210528_MCF7_nonc\A2_seg.npz (8, 80, 80) (4182, 4182)
C:/7_felev/data\noncoated\20210604_Hela_nonc\B2_seg.npz (8, 80, 80) (4129, 4129)
C:/7_felev/data\fibronectin\20210527_HepG2_fn\A4_seg.npz (8, 80, 80) (4179, 4179)
C:/7_felev/data\noncoated\20210407_LCLC_H838_nonc\B2_seg.npz (8, 80, 80) (4161, 4161)
C:/7_felev/data\fibronectin\20210526_MDAMB231_fn\A2_seg.npz (8, 80, 80) (4188, 4188)
C:/7_felev/data\noncoated\20210608_MDAMB231_MCF7_nonc\C3_seg.npz (8, 80, 80) (4245, 4245)
C:/7_felev/data\fibronectin\20210527_H838_fn\C4_seg.npz (8, 80, 80) (4143, 4143)
C:/7_felev/data\noncoated\20210407_LCLC_H838_nonc\C4_seg.npz (8, 80, 80) (4158, 4158)
C:/7_felev/data\noncoated\20210607_LCLC_H838_nonc\C1_seg.npz (8, 80, 80) (4191, 4191)
C:/7_felev/data\fibronectin\20210526_MCF7_fn\A4_seg.npz (8, 80, 80) (4192, 4192)
C:/7_felev/data\fibronectin\20210609_HepG2_fn\A1_seg.npz (8, 80, 80) (4158, 4158

In [14]:
print(mean, std)

649.2377342347785 512.7229437467809
