In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import polars as pl

# Temperature

In [3]:
data_path = "data.05_23_22_37"
task = "text_summarization"

default_model={'text_summarization': "philschmid/bart-large-cnn-samsum",'machine_translation': "facebook/mbart-large-en-ro"}
def temperature_ablation(data_path,task,wp_type="Delta"):
    df = pl.scan_ndjson(f"{data_path}/{task}_ablation.txt")
    ldf = df.filter(pl.col("watermark_processor").str.contains(rf"{wp_type}.*, False\)"))\
            .filter(pl.col("test_config").struct["wp_str"].str.contains(rf"{wp_type}.*, True\)"))\
            .filter(pl.col("test_config").struct["top_k"]==50)\
            .filter(pl.col("test_config").struct["no_input"]==False)\
            .filter(pl.col("test_config").struct["model_str"]==default_model[task])\
            .with_columns([pl.col("test_config").struct["temperature"].alias("test_temperature")])\
            .select(["test_temperature", "best_score"])\
            .explode(pl.col("best_score"))\
            .groupby("test_temperature")\
            .agg(
                [
                pl.count().alias("token_count"),
                pl.col("best_score").mean().alias("score_per_token"),
                pl.col("best_score").std().alias("score_per_token_std"),
                ]
                )\
            .sort("test_temperature")
    return ldf

In [4]:
ldf = temperature_ablation(data_path,task,wp_type="Delta").collect()
ldf

test_temperature,token_count,score_per_token,score_per_token_std
f64,u32,f64,f64
0.5,882487,0.049404,0.407079
1.0,882487,0.878379,1.435374
1.5,882487,0.036002,0.498856


In [5]:
ldf = temperature_ablation(data_path,task,wp_type="Gamma").collect()
ldf

test_temperature,token_count,score_per_token,score_per_token_std
f64,u32,f64,f64
0.5,878117,0.132985,0.309065
1.0,878117,0.220681,0.36776
1.5,878117,0.166003,0.455464


In [6]:
ldf = temperature_ablation(data_path,"machine_translation",wp_type="Delta").collect()
ldf

test_temperature,token_count,score_per_token,score_per_token_std
f64,u32,f64,f64
0.5,69790,0.041363,0.303473
1.0,69790,0.420137,1.13556
1.5,69790,0.019048,0.32428


In [7]:
ldf = temperature_ablation(data_path,"machine_translation",wp_type="Gamma").collect()
ldf

test_temperature,token_count,score_per_token,score_per_token_std
f64,u32,f64,f64
0.5,69807,0.084925,0.241549
1.0,69807,0.105879,0.291684
1.5,69807,0.087959,0.335659


# top_k

In [8]:
def top_k_ablation(data_path,task,wp_type="Delta"):
    df = pl.scan_ndjson(f"{data_path}/{task}_ablation.txt")
    ldf = df.filter(pl.col("watermark_processor").str.contains(rf"{wp_type}.*, False\)"))\
            .filter(pl.col("test_config").struct["wp_str"].str.contains(rf"{wp_type}.*, True\)"))\
            .filter(pl.col("test_config").struct["temperature"]==1.0)\
            .filter(pl.col("test_config").struct["no_input"]==False)\
            .filter(pl.col("test_config").struct["model_str"]==default_model[task])\
            .with_columns([pl.col("test_config").struct["top_k"].alias("test_top_k")])\
            .select(["test_top_k", "best_score"])\
            .explode(pl.col("best_score"))\
            .groupby("test_top_k")\
            .agg(
                [
                pl.count().alias("token_count"),
                pl.col("best_score").mean().alias("score_per_token"),
                pl.col("best_score").std().alias("score_per_token_std"),
                ]
                )\
            .sort("test_top_k")
    return ldf

In [9]:
ldf = top_k_ablation(data_path,task,wp_type="Delta").collect()
ldf

test_top_k,token_count,score_per_token,score_per_token_std
i64,u32,f64,f64
0,882487,0.377826,1.124531
20,882487,0.520355,1.144058
50,882487,0.878379,1.435374
100,882487,0.582203,1.262202


In [10]:
ldf = top_k_ablation(data_path,task,wp_type="Gamma").collect()
ldf

test_top_k,token_count,score_per_token,score_per_token_std
i64,u32,f64,f64
0,878117,0.216535,0.373048
20,878117,0.211919,0.362486
50,878117,0.220681,0.36776
100,878117,0.219959,0.369314


In [11]:
ldf = top_k_ablation(data_path,"machine_translation",wp_type="Delta").collect()
ldf

test_top_k,token_count,score_per_token,score_per_token_std
i64,u32,f64,f64
0,69790,0.02195,0.34957
20,69790,0.274142,0.859782
50,69790,0.420137,1.13556
100,69790,0.288118,0.93018


In [12]:
ldf = top_k_ablation(data_path,"machine_translation",wp_type="Gamma").collect()
ldf

test_top_k,token_count,score_per_token,score_per_token_std
i64,u32,f64,f64
0,69807,0.097126,0.324025
20,69807,0.10194,0.284995
50,69807,0.105879,0.291684
100,69807,0.105606,0.292268


# w/o input

In [13]:
def no_input_ablation(data_path,task,wp_type="Delta"):
    df = pl.scan_ndjson(f"{data_path}/{task}_ablation.txt")
    ldf = df.filter(pl.col("watermark_processor").str.contains(rf"{wp_type}.*, False\)"))\
            .filter(pl.col("test_config").struct["wp_str"].str.contains(rf"{wp_type}.*, True\)"))\
            .filter(pl.col("test_config").struct["temperature"]==1.0)\
            .filter(pl.col("test_config").struct["top_k"]==50)\
            .filter(pl.col("test_config").struct["model_str"]==default_model[task])\
            .with_columns([pl.col("test_config").struct["no_input"].alias("test_no_input")])\
            .select(["test_no_input", "best_score"])\
            .explode(pl.col("best_score"))\
            .groupby("test_no_input")\
            .agg(
                [
                pl.count().alias("token_count"),
                pl.col("best_score").mean().alias("score_per_token"),
                pl.col("best_score").std().alias("score_per_token_std"),
                ]
                )\
            .sort("test_no_input")
    return ldf

In [14]:
ldf =no_input_ablation(data_path,task,wp_type="Delta").collect()
ldf

test_no_input,token_count,score_per_token,score_per_token_std
bool,u32,f64,f64
False,882487,0.878379,1.435374
True,882487,0.010843,0.217019


In [15]:
ldf = no_input_ablation(data_path,task,wp_type="Gamma").collect()
ldf

test_no_input,token_count,score_per_token,score_per_token_std
bool,u32,f64,f64
False,878117,0.220681,0.36776
True,878117,0.024405,0.241708


In [16]:
ldf = no_input_ablation(data_path,"machine_translation",wp_type="Delta").collect()
ldf

test_no_input,token_count,score_per_token,score_per_token_std
bool,u32,f64,f64
False,69790,0.420137,1.13556
True,69790,0.009619,0.2004


In [17]:
ldf = no_input_ablation(data_path,"machine_translation",wp_type="Gamma").collect()
ldf

test_no_input,token_count,score_per_token,score_per_token_std
bool,u32,f64,f64
False,69807,0.105879,0.291684
True,69807,0.018599,0.190461


# different model

In [19]:
def model_ablation(data_path,task,wp_type="Delta"):
    df = pl.scan_ndjson(f"{data_path}/{task}_ablation.txt")
    ldf = df.filter(pl.col("watermark_processor").str.contains(rf"{wp_type}.*, False\)"))\
            .filter(pl.col("test_config").struct["wp_str"].str.contains(rf"{wp_type}.*, True\)"))\
            .filter(pl.col("test_config").struct["temperature"]==1.0)\
            .filter(pl.col("test_config").struct["top_k"]==50)\
            .filter(pl.col("test_config").struct["no_input"]==False)\
            .with_columns([pl.col("test_config").struct["model_str"].alias("test_model_str")])\
            .select(["test_model_str", "best_score"])\
            .explode(pl.col("best_score"))\
            .groupby("test_model_str")\
            .agg(
                [
                pl.count().alias("token_count"),
                pl.col("best_score").mean().alias("score_per_token"),
                pl.col("best_score").std().alias("score_per_token_std"),
                ]
                )\
            .sort("test_model_str")
    return ldf

In [20]:
ldf =model_ablation(data_path,task,wp_type="Delta").collect()
ldf

test_model_str,token_count,score_per_token,score_per_token_std
str,u32,f64,f64
"""facebook/bart-…",882487,0.040898,0.446834
"""philschmid/bar…",882487,0.878379,1.435374


In [21]:
ldf = model_ablation(data_path,task,wp_type="Gamma").collect()
ldf

test_model_str,token_count,score_per_token,score_per_token_std
str,u32,f64,f64
"""facebook/bart-…",878117,0.091067,0.412726
"""philschmid/bar…",878117,0.220681,0.36776
