In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import plotly.express as px

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
raw_data_path = '/content/drive/MyDrive/img-label-correction-SAM/data/raw/'
output_metadata_folder_path = '/content/drive/MyDrive/img-label-correction-SAM/output/metadata/'
manifest_path = output_metadata_folder_path+ 'rts_auto_segmentation_manifest.csv'

df = pd.read_csv(manifest_path)

In [22]:
df = pd.read_csv(manifest_path)
df['iou'] = pd.to_numeric(df['iou'], errors='coerce')
df = df.dropna(subset=['iou'])

df['complexity_group'] = df['complexity'].str[0]
df['level'] = df['complexity'].str[1:].astype(int)

max_iou_per_complexity = df.groupby('complexity')['iou'].transform('max')
best = df[df['iou'] == max_iou_per_complexity]

grouped = (
    best.groupby(['level', 'complexity_group', 'iou'])
    .agg({
        'model_name': lambda x: ', '.join(sorted(set(x))),
        'prompt': lambda x: ', '.join(sorted(set(x)))
    })
    .reset_index()
)
grouped['level_label'] = 'Level ' + grouped['level'].astype(str)
color_map = {
    'A': '#90e0ef',
    'B': '#00b4d8',
    'C': '#0077b6'
}

fig = px.bar(
    grouped,
    x='level_label',
    y='iou',
    color='complexity_group',
    color_discrete_map=color_map,
    barmode='group',
    labels={'iou': 'Max IOU', 'level_label': 'Level', 'complexity_group': 'Group'},
    title='Max IOU by Level and Complexity Group',
    text='model_name',
    hover_data={'prompt': True, 'iou': True}
)

fig.update_traces(
    textposition='outside',
    textfont=dict(family="Arial Black", size=12)
)
fig.update_layout(
    yaxis=dict(
      dtick=0.02,
      range=[0.8, df['iou'].max() + 0.01]
  ),
  xaxis={'categoryorder':'category ascending'},
  width=1000,
  height=500,
  bargap=0.3,
  bargroupgap=0.1
)

fig.show()


In [28]:
df['level_label'] = 'Level ' + df['level'].astype(str)

fig = px.box(
    df,
    x='level_label',
    y='iou',
    color='complexity_group',
    color_discrete_map=color_map,
    #points='outliers',  # 'all' shows all data points; 'outliers' shows only outliers
    hover_data=['model_name', 'prompt'],
    title='IOU Distribution by Level and Complexity Group'
)

fig.update_layout(
    yaxis=dict(dtick=0.1, range=[0.0, df['iou'].max() + 0.1]),
    xaxis={'categoryorder':'category ascending'},
    width=1000,
    height=500,
    xaxis_title='Level',
    yaxis_title='IOU'
)

fig.show()


In [29]:
df = pd.read_csv(manifest_path)
df['complexity'] = df['complexity'].str[0]
df['level'] = df['level'].astype(str)

df['iou'] = pd.to_numeric(df['iou'])
df['Level-Complexity'] = df['complexity'] + df['level']
color_map = {
  'box': '#0077b6',
  'mask': '#90e0ef',
  'point': '#00b4d8'
}

fig = px.box(
    df,
    x='Level-Complexity',
    y='iou',
    color='prompt',
    color_discrete_map=color_map,
    hover_data=['model_name', 'base_tiff'],
    title='IOU Distribution per Level-Complexity and Prompt Type'
)

fig.update_layout(
    yaxis=dict(
        dtick=0.1,
        range=[0.0, 1.0],
        title='IOU'
    ),
    xaxis_title='Level & Complexity',
    height=500,
    bargap=0.2,
    legend_title_text='Prompt Type'
)

fig.show()


In [36]:
df = pd.read_csv(manifest_path)
df['complexity'] = df['complexity'].str[0]
df['level'] = df['level'].astype(str)
prompt_order = ['mask', 'point', 'box']
color_map = {
  'mask': '#90e0ef',
  'point': '#00b4d8',
  'box': '#0077b6'
}
max_iou_per_group = df.groupby(['level', 'complexity', 'prompt'])['iou'].transform('max')
max_iou_rows = df[df['iou'] == max_iou_per_group]
summary_df = (
  max_iou_rows
  .groupby(['level', 'complexity', 'prompt'])
  .agg(
      iou=('iou', 'max'),
      models=('model_name', lambda x: ', '.join(sorted(x.unique())))
  )
  .reset_index()
)

summary_df['prompt'] = pd.Categorical(summary_df['prompt'], categories=prompt_order, ordered=True)
summary_df['Level-Complexity'] = summary_df['complexity'] + summary_df['level']
summary_df['level_int'] = summary_df['level'].astype(int)
summary_df = summary_df.sort_values(by=['complexity', 'level_int', 'prompt'])
fig = px.bar(
  summary_df,
  x='Level-Complexity',
  y='iou',
  color='prompt',
  color_discrete_map=color_map,
  category_orders={'prompt': prompt_order},  # force prompt order in legend and bar grouping
  barmode='group',
  text='iou',
  title='Max IOU per Level-Complexity and Prompt (with Model Names)',
  labels={'iou': 'Max IOU', 'Level-Complexity': 'Level & Complexity'},
  hover_data={'models': True, 'iou': ':.2f'}
)

fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(
    yaxis=dict(dtick=0.04),
    xaxis_tickangle=0,
    yaxis_range=[0.6, 1.0],
    bargap=0.2,
    height=500,
    legend_title_text='Prompt Type'
)

fig.show()


In [None]:


level_list = ['1', '2', '3']
complexity_list = ['A', 'B', 'C']
columns = [f'Level {lvl}' for lvl in level_list]
summary_df = pd.DataFrame(0, index=complexity_list, columns=columns)
unique_tif_files = set()
for level in level_list:
  for complexity in complexity_list:
    level_path = f'/Level {level}/{complexity}{level}'
    tif_path = raw_data_path + level_path

    if os.path.exists(tif_path):
      tif_files = [f for f in os.listdir(tif_path) if f.endswith('.tif')]
      summary_df.at[complexity, f'Level {level}'] = len(tif_files)
      unique_tif_files.update(tif_files)

summary_df['Row Total'] = summary_df.sum(axis=1)
summary_df.loc['Col Total'] = summary_df.sum(numeric_only=True)
summary_df_transposed = summary_df.T
print("\n TIFF File Summary:")
print("----------------------")
print(f"\n Total unique .tif files across all folders: {len(unique_tif_files)}")
print()
print(summary_df_transposed)
print()

df = pd.read_csv(manifest_path)
df['complexity'] = df['complexity'].str[0]
df['level'] = df['level'].astype(str)
summary = df.groupby(['level', 'complexity'])['uid'].nunique().unstack(fill_value=0)
summary = summary.reindex(index=sorted(summary.index, key=int), columns=['A', 'B', 'C'], fill_value=0)
summary['Row Total'] = summary.sum(axis=1)
summary.loc['Col Total'] = summary.sum()
unique_uid_count = df['uid'].nunique()
print("\n Manifest File Summary:")
print("----------------------")
print(f" Total unique UIDs in the manifest file: {unique_uid_count}")
print()
print(summary)


 TIFF File Summary:
----------------------

 Total unique .tif files across all folders: 300

            A    B    C  Col Total
Level 1    16   46   39        101
Level 2     6   59   36        101
Level 3     4   42   54        100
Row Total  26  147  129        302


 Manifest File Summary:
----------------------
 Total unique UIDs in the manifest file: 300

complexity   A    B    C  Row Total
level                              
1           16   46   39        101
2            6   59   36        101
3            4   42   54        100
Col Total   26  147  129        302


In [None]:
# --- UID count per model_name and prompt ---
df = pd.read_csv(manifest_path)
print(f"\n Total entries in manifest file: {len(df)}")
summary = (
  df.groupby(['model_name', 'prompt'])['uid']
  .agg(total_rows='count', UIDs='nunique')
  .sort_values(by='prompt')
  .reset_index()
)
print("\n Summary")
print("----------------------------------------------------------------")
print(summary)


 Total entries in manifest file: 19026

 Summary
----------------------------------------------------------------
       model_name prompt  total_rows  UIDs
0   sam2_baseplus    box         906   300
1       sam_large    box         906   300
2      sam2_large    box         906   300
3        sam_huge    box         906   300
4      sam2_small    box         906   300
5        sam_base    box         906   300
6       sam2_tiny    box         906   300
7        sam_huge   mask         906   300
8        sam_base   mask         906   300
9       sam_large   mask         906   300
10      sam2_tiny   mask         906   300
11     sam2_small   mask         906   300
12     sam2_large   mask         906   300
13  sam2_baseplus   mask         906   300
14      sam2_tiny  point         906   300
15     sam2_large  point         906   300
16       sam_base  point         906   300
17  sam2_baseplus  point         906   300
18       sam_huge  point         906   300
19     sam2_small  point 