In [2]:
!pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2


In [4]:
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys

# --- Configuration ---
log_file_path = "/home/yoavh/code/research/caption_reconstruction/logs/"+"66a4d370059d4f0f85f57cf01e9be197.log"

# --- Data Loading and Parsing ---
try:
    with open(log_file_path, 'r') as f:
        log_data = f.read()
except FileNotFoundError:
    print(f"❌ Error: The file '{log_file_path}' was not found.")
    sys.exit(1) 

# --- NEW: The updated regular expression ---
# This regex is designed to parse the new log format:
# StrategyName__partition(key1=value1, key2=value2, ...) ... Mean F1: value
regex = re.compile(
    r"(?P<strategy_name>\w+?)__partition\("
    r"num_partitions=(?P<num_partitions>\d+), "
    r"start_partition=(?P<start_partition>\d+), "
    r"num_parts_to_mask=(?P<num_parts_to_mask>\d+)"
    r"\).*Mean F1: (?P<mean_f1>[\d.]+)"
)

results = [match.groupdict() for line in log_data.splitlines() if (match := regex.search(line))]
df = pd.DataFrame(results)

if df.empty:
    print("❌ Error: No valid log entries found to plot with the new regex.")
    sys.exit(1)
    
df[['start_partition', 'num_parts_to_mask', 'mean_f1', 'num_partitions']] = df[['start_partition', 'num_parts_to_mask', 'mean_f1', 'num_partitions']].apply(pd.to_numeric)

print("Successfully parsed the following data:")
print(df)



Successfully parsed the following data:
            strategy_name  num_partitions  start_partition  num_parts_to_mask  \
0  BaselineRepeatStrategy               5                0                  1   
1  BaselineRepeatStrategy               5                1                  1   
2  BaselineRepeatStrategy               5                2                  1   

   mean_f1  
0   0.9805  
1   0.9805  
2   0.9806  


In [None]:
# --- Plotting ---
sns.set_theme(style="whitegrid", palette="viridis")

# Graph 1: F1 vs. Mask Start Position
plt.figure(figsize=(12, 7))
g1 = sns.lineplot(
    data=df,
    x="start_partition",
    y="mean_f1",
    hue="strategy_name",
    style="num_parts_to_mask",
    markers=True,
    dashes=False
)
g1.set_title("Mean F1 Score vs. Mask Start Partition", fontsize=16)
g1.set_xlabel("Start Partition Index", fontsize=12)
g1.set_ylabel("Mean F1 Score", fontsize=12)
plt.legend(title="Strategy & Mask Size")
plt.show()

# Graph 2: F1 vs. Mask Size
plt.figure(figsize=(12, 7))
g2 = sns.lineplot(
    data=df,
    x="num_parts_to_mask",
    y="mean_f1",
    hue="strategy_name",
    marker="o"
)
g2.set_title("Mean F1 Score vs. Number of Masked Parts", fontsize=16)
g2.set_xlabel("Number of Parts Masked", fontsize=12)
g2.set_ylabel("Mean F1 Score", fontsize=12)
plt.legend(title="Strategy")
plt.xticks(df['num_parts_to_mask'].unique())
plt.show()