In [2]:
import polars as pl
import glob


def load_json_to_dataframe(json_folder_path):
    json_files = glob.glob(
        f"{json_folder_path}/*.json"
    )  # Get all JSON files in the folder
    df_list = [
        pl.read_json(file) for file in json_files
    ]  # Read each file into a DataFrame
    combined_df = pl.concat(df_list, how="diagonal")  # Concatenate all DataFrames
    return combined_df

In [7]:
df = load_json_to_dataframe("bert_generated_text/elections")
print(df.shape)

(227, 11)


In [4]:
# change the original bias from numerical to text to match llama

df = df.with_columns(
    pl.col("original_bias")
    .cast(pl.Utf8)  # Convert column to string
    .replace({"0": "left", "1": "center", "2": "right"})
    .alias("original_bias")
)

In [8]:
# how often does llama get bias tagging right? [comparing original bias to llama tagging the original text]

total_rows = df.height
print("num rows = ", total_rows)
count_same_original = (df["original_bias"] == df["llama_bias_with_sample"]).sum()
print(
    f"total accurate original bias tagging by llama = ",
    count_same_original / total_rows,
)

# Count occurrences where both columns have the value "center"
original_center = df.filter(pl.col("original_bias") == 1).height
count_center = df.filter(
    (pl.col("original_bias") == 1) & (pl.col("llama_bias_with_sample") == 1)
).height
print(
    f"accurate original bias tagging by llama for original value CENTER = ",
    count_center / original_center,
)

# Count occurrences where both columns have the value "left"
original_left = df.filter(pl.col("original_bias") == 0).height
count_left = df.filter(
    (pl.col("original_bias") == 0) & (pl.col("llama_bias_with_sample") == 0)
).height
print(
    f"accurate original bias tagging by llama for original value LEFT = ",
    count_left / original_left,
)

# Count occurrences where both columns have the value "right"
original_right = df.filter(pl.col("original_bias") == 2).height
count_right = df.filter(
    (pl.col("original_bias") == 2) & (pl.col("llama_bias_with_sample") == 2)
).height
print(
    f"accurate original bias tagging by llama for original value RIGHT = ",
    count_right / original_right,
)

num rows =  227
total accurate original bias tagging by llama =  0.5418502202643172
accurate original bias tagging by llama for original value CENTER =  0.5166666666666667
accurate original bias tagging by llama for original value LEFT =  0.4845360824742268
accurate original bias tagging by llama for original value RIGHT =  0.6428571428571429


In [81]:
# how often does the generated text's bias (calculated by llama) match the original (human annotated)

total_rows = df.height
count_same_original = (df["original_bias"] == df["llama_generated_bias"]).sum()
print(
    f"total match between original bias and generated llama text bias= ",
    count_same_original / total_rows,
)

# Count occurrences where both columns have the value "center"
original_center = df.filter(pl.col("original_bias") == "center").height
count_center = df.filter(
    (pl.col("original_bias") == "center") & (pl.col("llama_generated_bias") == "center")
).height
print(
    f"how often does generated llama text bias match original bias for original value CENTER = ",
    count_center / original_center,
)

# Count occurrences where both columns have the value "left"
original_left = df.filter(pl.col("original_bias") == "left").height
count_left = df.filter(
    (pl.col("original_bias") == "left") & (pl.col("llama_generated_bias") == "left")
).height
print(
    f"how often does generated llama text bias match original bias for original value LEFT = ",
    count_left / original_left,
)

# Count occurrences where both columns have the value "right"
original_right = df.filter(pl.col("original_bias") == "right").height
count_right = df.filter(
    (pl.col("original_bias") == "right") & (pl.col("llama_generated_bias") == "right")
).height
print(
    f"how often does generated llama text bias match original bias for original value RIGHT = ",
    count_right / original_right,
)

total match between original bias and generated llama text bias=  0.49901046084252193
how often does generated llama text bias match original bias for original value CENTER =  0.577028258887876
how often does generated llama text bias match original bias for original value LEFT =  0.529657477025898
how often does generated llama text bias match original bias for original value RIGHT =  0.4006436041834272


In [82]:
# how often does the generated text's bias (calculated by llama) match the original (llama generated)

total_rows = df.height
count_same_original = (df["llama_original_bias"] == df["llama_generated_bias"]).sum()
print(
    f"total match between original bias and generated llama text bias= ",
    count_same_original / total_rows,
)

# Count occurrences where both columns have the value "center"
original_center = df.filter(pl.col("llama_original_bias") == "center").height
count_center = df.filter(
    (pl.col("llama_original_bias") == "center")
    & (pl.col("llama_generated_bias") == "center")
).height
print(
    f"how often does generated llama text bias match llama original bias for original value CENTER = ",
    count_center / original_center,
)

# Count occurrences where both columns have the value "left"
original_left = df.filter(pl.col("llama_original_bias") == "left").height
count_left = df.filter(
    (pl.col("llama_original_bias") == "left")
    & (pl.col("llama_generated_bias") == "left")
).height
print(
    f"how often does generated llama text bias match llama original bias for original value LEFT = ",
    count_left / original_left,
)

# Count occurrences where both columns have the value "right"
original_right = df.filter(pl.col("llama_original_bias") == "right").height
count_right = df.filter(
    (pl.col("llama_original_bias") == "right")
    & (pl.col("llama_generated_bias") == "right")
).height
print(
    f"how often does generated llama text bias match llama original bias for original value RIGHT = ",
    count_right / original_right,
)

total match between original bias and generated llama text bias=  0.7537461125247384
how often does generated llama text bias match llama original bias for original value CENTER =  0.8216098622189992
how often does generated llama text bias match llama original bias for original value LEFT =  0.7821939586645469
how often does generated llama text bias match llama original bias for original value RIGHT =  0.61


In [80]:
# are the topics the same? using llama
# Compute value counts
value_counts = (
    df["llama_same_topic"]
    .value_counts()
    .with_columns((pl.col("count") / df.height).alias("percentage"))
)

print(value_counts)

shape: (2, 3)
┌──────────────────┬───────┬────────────┐
│ llama_same_topic ┆ count ┆ percentage │
│ ---              ┆ ---   ┆ ---        │
│ bool             ┆ u32   ┆ f64        │
╞══════════════════╪═══════╪════════════╡
│ true             ┆ 3532  ┆ 0.998586   │
│ false            ┆ 5     ┆ 0.001414   │
└──────────────────┴───────┴────────────┘
