In [1]:
%load_ext autoreload
%autoreload 2

### Define Variables 

In [3]:
from utils import * 

In [4]:
# These json files are the outputs from the classifier when was feeded with articles 

# Mistral Articles 
mistral_outputs_1 = "/home/t/tzelilai/Desktop/Thesis/Diagramms/mistral_outputs_1.json" # Simple Prompt 
mistral_outputs_2 = "/home/t/tzelilai/Desktop/Thesis/Diagramms/mistral_outputs_2.json" # Advanced Prompt 

# Llama Articles
llama_outputs_1 = "/home/t/tzelilai/Desktop/Thesis/Diagramms/llama3.1_outputs_1.json" # Simple Prompt 
llama_outputs_2 = "/home/t/tzelilai/Desktop/Thesis/Diagramms/llama3.1_outputs_2.json" # Advanced Prompt 

# Original Article 
original_article_outputs = "/home/t/tzelilai/Desktop/Thesis/Diagramms/original_article_outputs.json"

In [4]:
mistral_2_df = load_file(mistral_outputs_2)
llama_1_df = load_file(llama_outputs_1)
original_article_df = load_file(original_article_outputs)

### Transformed Data Distribution (All Dataset)
#### f(x) = len(x/(1-x))

In [5]:
# Transform Left Classified Content 
mistral_2_df["Left_"] = logit_transform(mistral_2_df["Left"], epsilon=1e-6)
llama_1_df["Left_"] = logit_transform(llama_1_df["Left"], epsilon=1e-6)
original_article_df["Left_"] = logit_transform(original_article_df["Left"], epsilon=1e-6)

# Transform Center Classified Content 
mistral_2_df["Center_"] = logit_transform(mistral_2_df["Center"], epsilon=1e-6)
llama_1_df["Center_"] = logit_transform(llama_1_df["Center"], epsilon=1e-6)
original_article_df["Center_"] = logit_transform(original_article_df["Center"], epsilon=1e-6)

# Transform Right Classified Content 
mistral_2_df["Right_"] = logit_transform(mistral_2_df["Right"], epsilon=1e-6)
llama_1_df["Right_"] = logit_transform(llama_1_df["Right"], epsilon=1e-6)
original_article_df["Right_"] = logit_transform(original_article_df["Right"], epsilon=1e-6)

In [6]:
figures = show_distribution({"mistral_articles":mistral_2_df, "llama_articles":llama_1_df, 
                             "original_articles":original_article_df},
                             columns=["Left_", "Center_", "Right_"]) 

In [7]:
figures[0].show()

In [8]:
figures[1]

In [9]:
figures[2]

### Transformed Data Distribution (Seperate based on class)
#### f(x) = len(x/(1-x))

In [10]:
# Take only Left classified content 
mistral_2_df_left = mistral_2_df[(mistral_2_df["Left"] > mistral_2_df["Right"]) & (mistral_2_df["Left"] > mistral_2_df["Center"])][["Left"]]
llama_1_df_left = llama_1_df[(llama_1_df["Left"] > llama_1_df["Right"]) & (llama_1_df["Left"] > llama_1_df["Center"])][["Left"]]
original_article_df_left = original_article_df[(original_article_df["Left"] > original_article_df["Right"]) & (original_article_df["Left"] > original_article_df["Center"])][["Left"]]
# Take only Center classified content 
mistral_2_df_center = mistral_2_df[(mistral_2_df["Center"] > mistral_2_df["Right"]) & (mistral_2_df["Center"] > mistral_2_df["Left"])][["Center"]]
llama_1_df_center = llama_1_df[(llama_1_df["Center"] > llama_1_df["Right"]) & (llama_1_df["Center"] > llama_1_df["Left"])][["Center"]]
original_article_df_center = original_article_df[(original_article_df["Center"] > original_article_df["Right"]) & (original_article_df["Center"] > original_article_df["Left"])][["Center"]]
# Take only Right classified content 
mistral_2_df_right = mistral_2_df[(mistral_2_df["Right"] > mistral_2_df["Left"]) & (mistral_2_df["Right"] > mistral_2_df["Center"])][["Right"]]
llama_1_df_right = llama_1_df[(llama_1_df["Right"] > llama_1_df["Left"]) & (llama_1_df["Right"] > llama_1_df["Center"])][["Right"]]
original_article_df_right = original_article_df[(original_article_df["Right"] > original_article_df["Left"]) & (original_article_df["Right"] > original_article_df["Center"])][["Right"]]


In [11]:
# Transform Left Classified Content 
mistral_2_df_left["Left_"] = logit_transform(mistral_2_df_left["Left"], epsilon=1e-6)
llama_1_df_left["Left_"] = logit_transform(llama_1_df_left["Left"], epsilon=1e-6)
original_article_df_left["Left_"] = logit_transform(original_article_df_left["Left"], epsilon=1e-6)

# Transform Center Classified Content 
mistral_2_df_center["Center_"] = logit_transform(mistral_2_df_center["Center"], epsilon=1e-6)
llama_1_df_center["Center_"] = logit_transform(llama_1_df_center["Center"], epsilon=1e-6)
original_article_df_center["Center_"] = logit_transform(original_article_df_center["Center"], epsilon=1e-6)

# Transform Right Classified Content 
mistral_2_df_right["Right_"] = logit_transform(mistral_2_df_right["Right"], epsilon=1e-6)
llama_1_df_right["Right_"] = logit_transform(llama_1_df_right["Right"], epsilon=1e-6)
original_article_df_right["Right_"] = logit_transform(original_article_df_right["Right"], epsilon=1e-6)

In [12]:
figures1 = show_distribution({"mistral_articles":mistral_2_df_left, "llama_articles":llama_1_df_left, 
                             "original_articles":original_article_df_left},
                             columns=["Left_"])

figures2 = show_distribution({"mistral_articles":mistral_2_df_center, "llama_articles":llama_1_df_center, 
                             "original_articles":original_article_df_center},
                             columns=["Center_"])

figures3 = show_distribution({"mistral_articles":mistral_2_df_right, "llama_articles":llama_1_df_right, 
                             "original_articles":original_article_df_right},
                             columns=["Right_"])

In [13]:
figures1[0]

In [14]:
figures2[0]

In [15]:
figures3[0]

### Kolmogorov–Smirnov test

In [16]:
x1 = mistral_2_df_left["Left_"].to_numpy()
y1 = original_article_df_left["Left_"].to_numpy()

x2 = mistral_2_df_center["Center_"].to_numpy()
y2 = original_article_df_center["Center_"].to_numpy()

x3 = mistral_2_df_right["Right_"].to_numpy()
y3 = original_article_df_right["Right_"].to_numpy()

In [17]:
from scipy.stats import wasserstein_distance

wd1 = wasserstein_distance(x1, y1)
wd2 = wasserstein_distance(x2, y2)
wd3 = wasserstein_distance(x3, y3)

print("Center: Wasserstein distance =", wd1)
print("Left  : Wasserstein distance =", wd2)
print("Rigth : Wasserstein distance =", wd3)

Center: Wasserstein distance = 8.038697830687546
Left  : Wasserstein distance = 5.937037583177133
Rigth : Wasserstein distance = 5.287204055504348
