##a. LIBRARY INSTALL AND IMPORT

In [None]:
!pip install pandas numpy scikit-learn torch transformers tqdm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
# This cell will authenticate you and mount your Drive in the Colab.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Import all necessary libraries
import re
import nltk
import os
import json
import pandas as pd
import torch
import numpy as np
from sklearn.utils import resample
from transformers import BertTokenizer, BertModel
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [None]:
# check some numpy version
print(f"NumPy version: {np.__version__}")

NumPy version: 2.0.2


#b.DATA

###b1.DATA LOADING

In [None]:
import json
import pandas as pd

# Function to read a JSON file
def load_json_file(file_path):
    """Load a JSON file"""
    with open(file_path, "r", encoding="utf-8") as file:
        return json.load(file)

# Function to process FakeNewsNet data
def process_FakeNewsNet_data(file_paths):
    """Read multiple JSON files and organize them into a DataFrame"""
    data_list = []  # Store all data

    for category, file_path in file_paths.items():
        data = load_json_file(file_path)  # Load JSON data

        for news_id, news_content in data.items():
            if 'text' in news_content:  # Ensure the text field exists
                # Determine the news source
                if "gossipcop" in category:
                    source = "gossipcop"
                elif "politifact" in category:
                    source = "politifact"
                else:
                    source = "unknown"

                # Determine the news type
                if "R" in category:
                    if "HR" in category:
                        label = "human_real" # Human-written real news
                    else:
                        label = "gpt_real"  # AI-generated real news
                elif "F" in category:
                    if "MF" in category:
                        label = "gpt_fake"  # AI-generated fake news
                    else:
                        label = "human_fake"  # Human-written fake news
                else:
                    label = "unknown"

                data_list.append({
                    "id": news_id,
                    "text": news_content["text"],
                    "title": news_content.get("title", ""),  # Some data may not have a title
                    "source": source,  # News source
                    "label": label,  # Real/fake news category
                })

    # Convert to a Pandas DataFrame
    df = pd.DataFrame(data_list)
    return df

# Define paths for the dataset
fake_news_paths = {
    "gossipcop_HF": "/content/drive/MyDrive/DS266/HF.json",
    "gossipcop_HR": "/content/drive/MyDrive/DS266/HR.json",
    "gossipcop_MF": "/content/drive/MyDrive/DS266/MF.json",
    "gossipcop_MR": "/content/drive/MyDrive/DS266/MR.json",
}

# Load and process the data
FakeNewsNet_df = process_FakeNewsNet_data(fake_news_paths)

# Display the first 5 rows
FakeNewsNet_df.head()


Unnamed: 0,id,text,title,source,label
0,0,✕ Close Meghan Markle and Prince Harry have an...,As it happened: Prince Harry and Meghan Markle...,gossipcop,human_fake
1,1,Kim Kardashian and Kanye West are pulling out ...,Kim & Kanye Install At-Home Panic Room After P...,gossipcop,human_fake
2,2,Prince Harry and Meghan currently live at Kens...,£1.4million spent renovating Prince Harry and ...,gossipcop,human_fake
3,3,They can't get enough of the Biebs on this sho...,Photos from Dancing With the Stars: Special Gu...,gossipcop,human_fake
4,4,Ben Affleck is keeping life with his three kid...,Jennifer Garner ‘Doesn’t Want’ Her Kids Around...,gossipcop,human_fake


##b2.DATA CLEANING

In [None]:
# check each label category, there exisT in-balance data
print(FakeNewsNet_df['label'].value_counts())

target_count = FakeNewsNet_df['label'].value_counts().min()
print(target_count)


label
human_real    8168
gpt_real      4169
human_fake    4084
gpt_fake      4084
Name: count, dtype: int64
4084


In [None]:
#Balance the data

MR =  FakeNewsNet_df[FakeNewsNet_df['label'] == "gpt_real"]
MF =  FakeNewsNet_df[FakeNewsNet_df['label'] == "gpt_fake"]
HR =  FakeNewsNet_df[FakeNewsNet_df['label'] == "human_real"]
HF =  FakeNewsNet_df[FakeNewsNet_df['label'] == "human_fake"]

MR = resample(MR,replace=True,n_samples=target_count,random_state=42)
HR = resample(HR,replace=True,n_samples=target_count,random_state=42)


Bal_FNN_df = pd.concat([MR,MF,HR,HF])

print(Bal_FNN_df['label'].value_counts())

label
gpt_real      4084
gpt_fake      4084
human_real    4084
human_fake    4084
Name: count, dtype: int64


In [None]:
# combine text and title for training
Bal_FNN_df['combined_text'] = Bal_FNN_df['text'] + ' ' + Bal_FNN_df['title']

# Convert labels to numerical values (0=real, 1=fake)
Bal_FNN_df['label_new'] = Bal_FNN_df['label'].map({'human_fake': 0, 'human_real': 1.0,'gpt_fake': 2,'gpt_real':3})

# Show sample data
print("Sample data:")
print(Bal_FNN_df[['text','combined_text', 'label','label_new']].head())

Sample data:
                                                    text  \
17196  When I walked into the Astros' offices on June...   
20108  Prince Jackson, the 20-year-old son of the lat...   
19428  If you're tearing up just thinking about Game ...   
16802  Paramount Network has decided to postpone the ...   
19780  The much-anticipated return of the beloved Emm...   

                                           combined_text     label  label_new  
17196  When I walked into the Astros' offices on June...  gpt_real        3.0  
20108  Prince Jackson, the 20-year-old son of the lat...  gpt_real        3.0  
19428  If you're tearing up just thinking about Game ...  gpt_real        3.0  
16802  Paramount Network has decided to postpone the ...  gpt_real        3.0  
19780  The much-anticipated return of the beloved Emm...  gpt_real        3.0  


In [None]:
# calcalate word count distributon of news text
Bal_FNN_df["word_count"] = Bal_FNN_df["text"].apply(lambda x: len(x.split()))
Bal_FNN_df.groupby("label")["word_count"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
gpt_fake,4084.0,225.148874,51.698819,8.0,192.0,222.0,255.0,520.0
gpt_real,4084.0,368.942214,252.584697,1.0,262.0,331.0,413.0,4292.0
human_fake,4084.0,467.411117,714.384583,5.0,182.0,339.0,483.0,16373.0
human_real,4084.0,529.747796,795.61284,4.0,218.0,355.0,578.0,17224.0


In [None]:
def simple_tokenizer(text):
    return re.findall(r'\b\w+\b', text.lower())

text = "Hello! This is a test."
print(simple_tokenizer(text))

['hello', 'this', 'is', 'a', 'test']


In [None]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
import string

# Remove unwanted characters, links, and HTML tags
def clean_text(text):
    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    # Remove special characters and numbers, keeping only letters
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    # Convert to lowercase
    text = text.lower()
    words = simple_tokenizer(text)
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

# Apply cleaning to datasets
Bal_FNN_df['cleaned_combined_text'] = Bal_FNN_df['combined_text'].apply(clean_text)
# Print some cleaned samples

print(Bal_FNN_df['cleaned_combined_text'].head())


17196    walked astros offices june team seen laughings...
20108    prince jackson yearold son late pop icon micha...
19428    youre tearing thinking game thrones coming end...
16802    paramount network decided postpone premiere hi...
19780    muchanticipated return beloved emmywinning com...
Name: cleaned_combined_text, dtype: object


##b3.TRAIN, TEST AND VALIDATION SPLIT

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_temp, y_train, y_temp = train_test_split(
    Bal_FNN_df['combined_text'],
    Bal_FNN_df['label_new'],
    test_size=0.4,
    random_state=42,
    stratify=Bal_FNN_df['label_new']
)


X_test, X_val, y_test, y_val = train_test_split(
    X_temp,
    y_temp,
    test_size=0.5,
    random_state=42,
    stratify=y_temp
)


print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print(f"Valid samples: {len(X_val)}")

Training samples: 9801
Test samples: 3267
Valid samples: 3268


## c.Baseline Model -  Text Classification.
####  pre-train model used:
#####   1. "roberta-base"
#####   2. "bert-base"
#####   3. "roberta-base-fakenews"
#####   4. "microsoft/deberta-v3-base"


In [None]:
#@title install
!pip install -q transformers
!pip install -q torchinfo
!pip install -q datasets
!pip install -q evaluate

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/183.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/143.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
#@title Imports

import numpy as np

import transformers
import evaluate

from datasets import load_dataset
from torchinfo import summary

from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer

import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

import nltk
nltk.download('stopwords')
nltk.download('punkt')

from datasets import Dataset
import pandas as pd

from sklearn.metrics import confusion_matrix, classification_report
import numpy as np


from transformers import Trainer, TrainingArguments
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
from sklearn.model_selection import train_test_split

# Split the data while keeping the DataFrame structure ( for hugging face dataset use)

train_df, tmp_df = train_test_split(
    Bal_FNN_df[['cleaned_combined_text', 'label_new']],
    test_size=0.4,
    random_state=42,
    stratify=Bal_FNN_df['label_new']
)

val_df, test_df = train_test_split(
    tmp_df,
    test_size=0.5,
    random_state=42,
    stratify=tmp_df['label_new']
)
# Reset the index for both DataFrames
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

print(f"Training samples: {len(train_df)}")
print(f"Test samples: {len(test_df)}")
print(f"Val samples: {len(test_df)}")
print("\nTrain DataFrame:")
print(train_df.head())
print("\nTest DataFrame:")
print(test_df.head())
print("\nVal DataFrame:")
print(val_df.head())

Training samples: 9801
Test samples: 3268
Val samples: 3268

Train DataFrame:
                               cleaned_combined_text  label_new
0  kim kardashian falsely accused attacked los an...        2.0
1  difference year makes though lots recent rumor...        1.0
2  recent interview hollywood reporter jennifer a...        2.0
3  nina dobrev glen powell slowing things source ...        1.0
4  maroon led frontman adam levine delivered unfo...        3.0

Test DataFrame:
                               cleaned_combined_text  label_new
0  millie bobby brown shared photo sharing sweet ...        3.0
1  image credit john salangsangjohn milneshutters...        0.0
2  parentshaming mainstay social media occasion a...        3.0
3  amber rose exposes crotch halfnude photo racy ...        1.0
4  alexander clooney arrived hollywoods favorite ...        2.0

Val DataFrame:
                               cleaned_combined_text  label_new
0  botched returning month second half fourth sea...     

In [None]:
# Convert to Hugging Face Dataset for training
formatted_df = train_df.rename(columns={
    'cleaned_combined_text': 'text',
    'label_new': 'label'
})

fake_news_dataset_train = Dataset.from_pandas(formatted_df)


In [None]:
# Convert to Hugging Face Dataset for testimg
formatted_df = test_df.rename(columns={
    'cleaned_combined_text': 'text',
    'label_new': 'label'
})

fake_news_dataset_test = Dataset.from_pandas(formatted_df)

In [None]:
# Convert to Hugging Face Dataset for validating
formatted_df = val_df.rename(columns={
    'cleaned_combined_text': 'text',
    'label_new': 'label'
})

fake_news_dataset_val = Dataset.from_pandas(formatted_df)

In [None]:
print("Test: \n",fake_news_dataset_test)
print("Train: \n",fake_news_dataset_train)
print("Val: \n",fake_news_dataset_val)

Test: 
 Dataset({
    features: ['text', 'label'],
    num_rows: 3268
})
Train: 
 Dataset({
    features: ['text', 'label'],
    num_rows: 9801
})
Val: 
 Dataset({
    features: ['text', 'label'],
    num_rows: 3267
})


In [None]:
MAX_SEQUENCE_LENGTH = 500
# to get 75% for all 4 class, choose max_sequence small than longest class.


# |label|count|mean|std|min|25%|50%|75%|max|
# |---|---|---|---|---|---|---|---|---|
# |gpt\_fake|4084\.0|225\.1488736532811|51\.69881948866794|8\.0|192\.0|222\.0|255\.0|520\.0|
# |gpt\_real|4084\.0|368\.9422135161606|252\.5846971492773|1\.0|262\.0|331\.0|413\.0|4292\.0|
# |human\_fake|4084\.0|467\.4111165523996|714\.3845834167945|5\.0|182\.0|339\.0|483\.0|16373\.0|
# |human\_real|4084\.0|529\.7477962781587|795\.6128398262819|4\.0|218\.0|355\.0|578\.0|17224\.0|


In [None]:
# Encode data

def preprocess_data(data, tokenizer):
    review_text = data['text']

    encoded = tokenizer.batch_encode_plus(
            review_text,
            max_length=MAX_SEQUENCE_LENGTH,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=True,
            return_tensors="pt"
        )

    return encoded

In [None]:
#Creating  compute metrics function

metric = evaluate.load('accuracy')

# FakeNewsNet_df['label_new'] = FakeNewsNet_df['label'].map({'human_fake': 0, 'human_real': 1.0,'gpt_fake': 2,'gpt_real': 3})
def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  predictions = np.argmax(predictions, axis=1)
  precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
  acc = accuracy_score(labels, predictions)

  class_report = classification_report(
        labels, predictions,
        target_names=['human_fake', 'human_real', 'gpt_fake','gpt_real'],
        digits=4
    )
  print("\nClassification Report:")
  print(class_report)

  return {
      'accuracy': acc,
      'f1': f1,
      'precision': precision,
      'recall': recall
  }


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [None]:
## Fine tuning the "bert-base-cased" model

def fine_tune_classification_model(classification_model,
                                   tokenizer,
                                   train_data,
                                   val_data,
                                   batch_size = 16,
                                   num_epochs = 3):

    preprocessed_train_data = train_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})
    preprocessed_val_data = val_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})

    preprocessed_train_data = preprocessed_train_data.map(
        lambda x: {'labels': torch.tensor(x['label'], dtype=torch.long)}  # Ensure labels are int64
    )
    preprocessed_val_data  = preprocessed_val_data .map(
        lambda x: {'labels': torch.tensor(x['label'], dtype=torch.long)}
    )



    training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/DS266/model_output",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_dir="/content/drive/MyDrive/DS266/logs",
    eval_strategy="epoch",
    save_strategy="epoch",
    report_to='none'
)
    trainer = Trainer(
        model=classification_model,
        args=training_args,
        train_dataset=preprocessed_train_data,
        eval_dataset=preprocessed_val_data,
        compute_metrics=compute_metrics
    )


    trainer.train()



### c1: bert-base model

In [None]:
model_checkpoint_name = "bert-base-cased"
bert_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
# bert_classification_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint_name)

bert_classification_model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint_name,
    num_labels=4  # number of classes = 4
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
fine_tune_classification_model(bert_classification_model, bert_tokenizer, fake_news_dataset_train, fake_news_dataset_val)

Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7697,0.513308,0.812978,0.812186,0.81186,0.812978
2,0.4577,0.483522,0.832568,0.83262,0.843619,0.832568
3,0.3435,0.474232,0.84573,0.846332,0.848629,0.84573



Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7697    0.7405    0.7548       817
  human_real     0.7032    0.7083    0.7057       816
    gpt_fake     0.9068    0.9523    0.9290       817
    gpt_real     0.8677    0.8507    0.8591       817

    accuracy                         0.8130      3267
   macro avg     0.8118    0.8129    0.8122      3267
weighted avg     0.8119    0.8130    0.8122      3267


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8477    0.6879    0.7595       817
  human_real     0.6846    0.8431    0.7556       816
    gpt_fake     0.9050    0.9792    0.9406       817
    gpt_real     0.9371    0.8201    0.8747       817

    accuracy                         0.8326      3267
   macro avg     0.8436    0.8326    0.8326      3267
weighted avg     0.8436    0.8326    0.8326      3267


Classification Report:
              precision    recall  f1-score   support

  h

### c2: roberta-base model

In [None]:
model_checkpoint_name = "roberta-base"
roberta_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
roberta_classification_model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint_name,
    num_labels=4  # number of classes = 4
)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
fine_tune_classification_model(roberta_classification_model, roberta_tokenizer, fake_news_dataset_train, fake_news_dataset_val)

Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7062,0.651159,0.782369,0.77629,0.778831,0.782369
2,0.4718,0.438687,0.844506,0.845893,0.85355,0.844506
3,0.3474,0.473497,0.85124,0.85119,0.853264,0.85124



Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7617    0.5789    0.6579       817
  human_real     0.6878    0.7047    0.6961       816
    gpt_fake     0.8559    0.9596    0.9048       817
    gpt_real     0.8098    0.8862    0.8463       817

    accuracy                         0.7824      3267
   macro avg     0.7788    0.7823    0.7763      3267
weighted avg     0.7788    0.7824    0.7763      3267


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8217    0.7503    0.7844       817
  human_real     0.7004    0.8309    0.7601       816
    gpt_fake     0.9388    0.9767    0.9574       817
    gpt_real     0.9531    0.8201    0.8816       817

    accuracy                         0.8445      3267
   macro avg     0.8535    0.8445    0.8459      3267
weighted avg     0.8535    0.8445    0.8459      3267


Classification Report:
              precision    recall  f1-score   support

  h

In [None]:
# too time consuming,

# model_checkpoint_name = "roberta-large"
# roberta2_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
# roberta2_classification_model = AutoModelForSequenceClassification.from_pretrained(
#     model_checkpoint_name,
#     num_labels=4  # number of classes = 4
# )

# fine_tune_classification_model(roberta2_classification_model, roberta2_tokenizer, fake_news_dataset_train, fake_news_dataset_val)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

### c3: distilbert model

In [None]:
model_checkpoint_name = "distilbert-base-uncased"
distilbert_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
distilbert_classification_model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint_name,
    num_labels=4  # number of classes = 4
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
fine_tune_classification_model(distilbert_classification_model, distilbert_tokenizer, fake_news_dataset_train, fake_news_dataset_val)

Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Map:   0%|          | 0/9801 [00:00<?, ? examples/s]

Map:   0%|          | 0/3267 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6921,0.494627,0.81206,0.814451,0.818485,0.81206
2,0.4215,0.443193,0.835629,0.836486,0.850757,0.835629
3,0.2944,0.477598,0.848791,0.848579,0.848988,0.848791



Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7585    0.7650    0.7617       817
  human_real     0.6886    0.7451    0.7157       816
    gpt_fake     0.9771    0.8874    0.9301       817
    gpt_real     0.8496    0.8507    0.8502       817

    accuracy                         0.8121      3267
   macro avg     0.8184    0.8120    0.8144      3267
weighted avg     0.8185    0.8121    0.8145      3267


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8549    0.6707    0.7517       817
  human_real     0.6711    0.8701    0.7577       816
    gpt_fake     0.9438    0.9670    0.9553       817
    gpt_real     0.9330    0.8348    0.8811       817

    accuracy                         0.8356      3267
   macro avg     0.8507    0.8356    0.8365      3267
weighted avg     0.8508    0.8356    0.8365      3267


Classification Report:
              precision    recall  f1-score   support

  h

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification

In [None]:
# save bert model for later evaluation
base_classifier_checkpoint_filepath = '/content/drive/MyDrive/DS266/model_checkpoint/base_classifier'
bert_classification_model.save_pretrained(base_classifier_checkpoint_filepath, from_pt=True)

# bert_classification_model = BertForSequenceClassification.from_pretrained(base_classifier_checkpoint_filepath)

In [None]:
# save roberta model for later evaluation
base_classifier_checkpoint_filepath_roberta = '/content/drive/MyDrive/DS266/model_checkpoint/base_classifier_roberta'
roberta_classification_model.save_pretrained(base_classifier_checkpoint_filepath_roberta, from_pt=True)

# roberta_classification_model = robertaForSequenceClassification.from_pretrained(base_classifier_checkpoint_filepath_roberta)

In [None]:
# save distilbert model for later evaluation
base_classifier_checkpoint_filepath_distilbert= '/content/drive/MyDrive/DS266/model_checkpoint/base_classifier_distilbert'
distilbert_classification_model.save_pretrained(base_classifier_checkpoint_filepath_distilbert, from_pt=True)

# roberta_classification_model = robertaForSequenceClassification.from_pretrained(base_classifier_checkpoint_filepath_roberta)

##d. Base Line Model Evaluation

Current base line model proceed with bert, distilbert, and roberta finetunning having bias on GPT generated text. There exist overfitting on both MF and MR label.

Question for this section: Why overfitting?

### d1.Feature-Level Diagnostics -  feature contribution



Useful Resource: https://pmc.ncbi.nlm.nih.gov/articles/PMC11513550/

In [None]:
# Load Model saved
# save_path2 = '/content/drive/MyDrive/DS266/model_checkpoint/base_classifier_wthclean'
save_path_b = '/content/drive/MyDrive/DS266/model_checkpoint/base_classifier'
save_path_r = '/content/drive/MyDrive/DS266/model_checkpoint/base_classifier_roberta'
save_path_d = '/content/drive/MyDrive/DS266/model_checkpoint/base_classifier_distilbert'

In [None]:

from transformers import AutoTokenizer, AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(save_path_b)
model_name = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [None]:
import shap
from transformers import pipeline
# Load trained model
classifier = pipeline(
    task="text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0
)
explainer = shap.Explainer(classifier)

Device set to use cuda:0


###### d.1.0: test with sample data

In [None]:
#test with one gpt fake result.
# LABEL: ({'human_fake': 0, 'human_real': 1.0,'gpt_fake': 2,'gpt_real': 3})

test_text = "Excitement and anticipation are in the air as the Royal Family gets ready to welcome their newest member. Prince Harry has finally proposed to his girlfriend Meghan Markle and the couple is set to tie the knot in the spring of 2018. \n\nSpeculations have been swirling for months that the couple was planning to get engaged, and today the news has finally been confirmed. The public has fallen in love with the American actress and her fashion sense, down-to-earth personality, and charitable works. The Royal Family, too, is reportedly ecstatic about the prospect of having such a \"modern\" bride.\n\nMany pundits have praised Prince Harry for choosing someone outside of the typical royal circles, and believe that Meghan Markle brings much-needed diversity to the monarchy. Markle is of mixed race, divorced, and a successful actress in her own right. Her engagement to Prince Harry is already being hailed as a critical moment in royal history.\n\nRumor has it that the wedding preparations are already underway, and that the couple is determined to make their big day a memorable one. There are already some reports that they plan to buck tradition when it comes to everything from the guest list to the style of dress. However, the Royal Family has been tight-lipped about any details.\n\nOne thing is for certain: the world will be watching as Prince Harry and Meghan Markle exchange their vows in spring 2018. The couple has already won the hearts of millions around the globe, and their wedding is shaping up to be a \"can't miss\" event. Stay tuned for more updates as the big day approaches!."

result = classifier(test_text)
print(f"{result[0]['label']},  {result[0]['score']:.2f}")

print( classifier.tokenizer.tokenize(test_text))

LABEL_2,  0.59
['Ex', '##cite', '##ment', 'and', 'anticipation', 'are', 'in', 'the', 'air', 'as', 'the', 'Royal', 'Family', 'gets', 'ready', 'to', 'welcome', 'their', 'newest', 'member', '.', 'Prince', 'Harry', 'has', 'finally', 'proposed', 'to', 'his', 'girlfriend', 'Meg', '##han', 'Mark', '##le', 'and', 'the', 'couple', 'is', 'set', 'to', 'tie', 'the', 'knot', 'in', 'the', 'spring', 'of', '2018', '.', 'S', '##pec', '##ulation', '##s', 'have', 'been', 'swirling', 'for', 'months', 'that', 'the', 'couple', 'was', 'planning', 'to', 'get', 'engaged', ',', 'and', 'today', 'the', 'news', 'has', 'finally', 'been', 'confirmed', '.', 'The', 'public', 'has', 'fallen', 'in', 'love', 'with', 'the', 'American', 'actress', 'and', 'her', 'fashion', 'sense', ',', 'down', '-', 'to', '-', 'earth', 'personality', ',', 'and', 'charitable', 'works', '.', 'The', 'Royal', 'Family', ',', 'too', ',', 'is', 'reportedly', 'e', '##cs', '##tat', '##ic', 'about', 'the', 'prospect', 'of', 'having', 'such', 'a', '"'

In [None]:

shap_values_test = explainer([test_text])
shap_values_test.values.shape

#(4 is label count)

  0%|          | 0/498 [00:00<?, ?it/s]

(1, 334, 4)

In [None]:
tokens = classifier.tokenizer.tokenize(test_text)
ai_class_index = classifier.model.config.label2id["LABEL_2"] ## gpt fake is the most overfitting label
shap_scores = shap_values_test[0, :len(tokens), ai_class_index].values

word_contributions = {
    token: score for token, score in zip(tokens, shap_scores)
}

sorted_contributions = sorted(
    word_contributions.items(),
    key=lambda x: -abs(x[1])
)

print("Important word lead to LABEL2 : \n")
for token, score in sorted_contributions[:10]:
    print(f"{token}: contribution score: {score:+.4f} ")

Important word lead to LABEL2 : 

air: contribution score: +0.0075 
have: contribution score: +0.0073 
pu: contribution score: +0.0073 
##ndi: contribution score: +0.0073 
##ts: contribution score: +0.0073 
praised: contribution score: +0.0073 
plan: contribution score: +0.0056 
b: contribution score: +0.0056 
that: contribution score: +0.0056 
they: contribution score: +0.0056 


In [None]:
#test with clean, 100 samples
filtered_df = test_df[test_df['label_new'] == 2.0]
## model not seen data

sampled_df = filtered_df.sample(n=100, random_state=42)

text_list = sampled_df['cleaned_combined_text'].tolist()

print(text_list)

['fans kelly clarkson shocked hear pop star recently disastrous accident recently purchased victorian mansion according sources close singer incident occurred charity event hosting home eyewitnesses report throughout night toilets kellys house heavily used guests point evening something went wrong toilet master bathroom became clogged despite efforts event staff discreetly fix issue blockage persisted panic kelly decided take matters hands attempt unclog toilet unfortunately efforts made situation worse followed scene chaos confusion water began overflow bathroom onto plush carpet bedroom cleanup operation got underway guests forced evacuate area stench sewage filled room meanwhile kelly reportedly retreated dressing room tears mortified transpired although incident undoubtedly embarrassing star many praised showing downtoearth side face mishap one guest remarked anything shows kelly relatable doesnt take seriously despite setback kelly seems taking things stride statement released eve

In [None]:
#test with clean, 100 samples
filtered_df_r = test_df[
    (test_df['label_new'] == 3.0) &
    (test_df['cleaned_combined_text'].str.split().str.len() <= 400)
]
## model not seen data

sampled_df_r = filtered_df_r.sample(n=100, random_state=42)

text_list_r = sampled_df_r['cleaned_combined_text'].tolist()

print(text_list_r )



###### d.1.1: Bert

In [None]:
shap_values= explainer(text_list)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
PartitionExplainer explainer:  52%|█████▏    | 52/100 [04:11<03:52,  4.85s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  55%|█████▌    | 55/100 [04:26<03:40,  4.89s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  69%|██████▉   | 69/100 [05:34<02:29,  4.82s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  79%|███████▉  | 79/100 [06:24<01:41,  4.83s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 101it [08:12,  4.98s/it]


In [None]:
import numpy as np
from collections import defaultdict


global_avg = defaultdict(list)
global_max = defaultdict(float)


for sample_idx, text in enumerate(text_list):
    tokens = classifier.tokenizer.tokenize(text)

    ai_class_index = classifier.model.config.label2id["LABEL_2"]
    shap_scores = shap_values[sample_idx, :len(tokens), ai_class_index].values

    for token, score in zip(tokens, shap_scores):
        global_avg[token].append(score)
        if score > global_max[token]:
            global_max[token] = score

def print_top_stats(stats_dict, title, value_format="+.4f"):
    filtered_items = [
        (token, score)
        for token, score in stats_dict.items()
    ]
    sorted_items = sorted(filtered_items, key=lambda x: -x[1])

    print(f"\n{title}:")
    for token, value in sorted_items[:10]:
        print(f"  {token} : {value:{value_format}}")

avg_score = {token: np.mean(scores) for token, scores in global_avg.items()}
print_top_stats(avg_score, "Average Score")

print_top_stats(global_max, "Max Score")





Average Score:
  humble : +0.1857
  confused : +0.1232
  gratitude : +0.0858
  rubbing : +0.0761
  sympathetic : +0.0652
  uncomfortable : +0.0610
  positive : +0.0542
  blind : +0.0537
  bad : +0.0527
  succeed : +0.0435

Max Score:
  interview : +0.2285
  humble : +0.1857
  close : +0.1769
  believe : +0.1475
  busy : +0.1459
  an : +0.1376
  suggest : +0.1287
  remains : +0.1237
  seen : +0.1237
  others : +0.1232


In [None]:
shap.plots.text(shap_values[50, :, ai_class_index])

In [None]:
text_list_r_filtered = [
    text for text in text_list_r
    if len(tokenizer.tokenize(text)) <= 512
]

shap_values= explainer(text_list_r_filtered)


global_avg = defaultdict(list)
global_max = defaultdict(float)


for sample_idx, text in enumerate(text_list_r_filtered):
    tokens = classifier.tokenizer.tokenize(text)

    ai_class_index = classifier.model.config.label2id["LABEL_2"]
    shap_scores = shap_values[sample_idx, :len(tokens), ai_class_index].values

    for token, score in zip(tokens, shap_scores):
        global_avg[token].append(score)
        if score > global_max[token]:
            global_max[token] = score

def print_top_stats(stats_dict, title, value_format="+.4f"):
    filtered_items = [
        (token, score)
        for token, score in stats_dict.items()
    ]
    sorted_items = sorted(filtered_items, key=lambda x: -x[1])

    print(f"\n{title}:")
    for token, value in sorted_items[:10]:
        print(f"  {token} : {value:{value_format}}")

avg_score = {token: np.mean(scores) for token, scores in global_avg.items()}
print_top_stats(avg_score, "Average Score")

print_top_stats(global_max, "Max Score")




  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   2%|▏         | 2/96 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   4%|▍         | 4/96 [00:16<04:37,  3.02s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   5%|▌         | 5/96 [00:22<06:30,  4.29s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   6%|▋         | 6/96 [00:27<06:56,  4.63s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   8%|▊         | 8/96 [00:37<07:07,  4.86s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   9%|▉         | 9/96 [00:43<07:33,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  10%|█         | 10/96 [00:50<08:08,  5.69s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  12%|█▎        | 12/96 [01:00<07:25,  5.30s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  18%|█▊        | 17/96 [01:25<06:39,  5.06s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  22%|██▏       | 21/96 [01:45<06:13,  4.97s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  23%|██▎       | 22/96 [01:51<06:42,  5.44s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  24%|██▍       | 23/96 [01:58<07:02,  5.79s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  27%|██▋       | 26/96 [02:13<06:13,  5.34s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  28%|██▊       | 27/96 [02:19<06:05,  5.30s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  29%|██▉       | 28/96 [02:24<05:56,  5.24s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  30%|███       | 29/96 [02:30<06:06,  5.47s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  33%|███▎      | 32/96 [02:45<05:33,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  35%|███▌      | 34/96 [02:56<05:29,  5.32s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  38%|███▊      | 36/96 [03:07<05:13,  5.23s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  39%|███▊      | 37/96 [03:12<05:10,  5.27s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  40%|███▉      | 38/96 [03:19<05:30,  5.71s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  44%|████▍     | 42/96 [03:40<04:43,  5.24s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  45%|████▍     | 43/96 [03:46<04:50,  5.49s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  48%|████▊     | 46/96 [04:01<04:23,  5.27s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  50%|█████     | 48/96 [04:12<04:07,  5.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  51%|█████     | 49/96 [04:17<04:02,  5.17s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  52%|█████▏    | 50/96 [04:22<03:57,  5.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  53%|█████▎    | 51/96 [04:28<04:02,  5.40s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  54%|█████▍    | 52/96 [04:33<03:57,  5.40s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  55%|█████▌    | 53/96 [04:39<03:58,  5.55s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  56%|█████▋    | 54/96 [04:45<03:58,  5.68s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  60%|██████    | 58/96 [05:06<03:18,  5.23s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  62%|██████▎   | 60/96 [05:16<03:07,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  64%|██████▎   | 61/96 [05:22<03:05,  5.31s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  65%|██████▍   | 62/96 [05:28<03:13,  5.69s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  67%|██████▋   | 64/96 [05:39<02:58,  5.58s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  70%|██████▉   | 67/96 [05:54<02:30,  5.18s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  71%|███████   | 68/96 [06:00<02:24,  5.17s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  72%|███████▏  | 69/96 [06:06<02:26,  5.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  75%|███████▌  | 72/96 [06:21<02:05,  5.25s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  76%|███████▌  | 73/96 [06:27<02:01,  5.29s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  77%|███████▋  | 74/96 [06:32<01:56,  5.28s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  78%|███████▊  | 75/96 [06:38<01:55,  5.50s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  84%|████████▍ | 81/96 [07:09<01:16,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  85%|████████▌ | 82/96 [07:16<01:17,  5.52s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  86%|████████▋ | 83/96 [07:21<01:10,  5.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  88%|████████▊ | 84/96 [07:27<01:07,  5.58s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  89%|████████▊ | 85/96 [07:32<01:01,  5.55s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  91%|█████████ | 87/96 [07:43<00:49,  5.48s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  92%|█████████▏| 88/96 [07:49<00:45,  5.63s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  93%|█████████▎| 89/96 [07:54<00:38,  5.50s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  95%|█████████▍| 91/96 [08:05<00:26,  5.35s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  96%|█████████▌| 92/96 [08:11<00:22,  5.57s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  97%|█████████▋| 93/96 [08:16<00:16,  5.45s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  99%|█████████▉| 95/96 [08:26<00:05,  5.24s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 100%|██████████| 96/96 [08:32<00:00,  5.45s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 97it [08:38,  5.45s/it]



Average Score:
  snapped : +0.0429
  sad : +0.0325
  ##thin : +0.0222
  welcomed : +0.0213
  involving : +0.0211
  rests : +0.0205
  control : +0.0179
  ##bloid : +0.0169
  boy : +0.0140
  anguish : +0.0139

Max Score:
  sad : +0.0649
  scandal : +0.0632
  involving : +0.0632
  may : +0.0594
  second : +0.0520
  arrival : +0.0520
  go : +0.0490
  fan : +0.0490
  self : +0.0429
  snapped : +0.0429


###### d.1.2: Roberta

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(save_path_r)
model_name = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load trained model
classifier = pipeline(
    task="text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0
)


shap_values = explainer(text_list)


Device set to use cuda:0


  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   2%|▏         | 2/100 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   4%|▍         | 4/100 [00:15<04:13,  2.64s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   5%|▌         | 5/100 [00:21<05:57,  3.77s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   6%|▌         | 6/100 [00:26<06:44,  4.30s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   7%|▋         | 7/100 [00:31<07:09,  4.62s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   8%|▊         | 8/100 [00:36<07:20,  4.78s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   9%|▉         | 9/100 [00:41<07:27,  4.92s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  10%|█         | 10/100 [00:47<07:27,  4.97s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  11%|█         | 11/100 [00:52<07:28,  5.04s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  12%|█▏        | 12/100 [00:57<07:38,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  13%|█▎        | 13/100 [01:03<07:35,  5.24s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  14%|█▍        | 14/100 [01:08<07:38,  5.33s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  15%|█▌        | 15/100 [01:13<07:28,  5.28s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  16%|█▌        | 16/100 [01:19<07:25,  5.30s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  17%|█▋        | 17/100 [01:24<07:20,  5.31s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  18%|█▊        | 18/100 [01:29<07:15,  5.31s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  19%|█▉        | 19/100 [01:35<07:12,  5.34s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  20%|██        | 20/100 [01:40<07:05,  5.31s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  21%|██        | 21/100 [01:45<07:02,  5.35s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  22%|██▏       | 22/100 [01:51<07:01,  5.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  23%|██▎       | 23/100 [01:56<06:58,  5.44s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  24%|██▍       | 24/100 [02:02<06:53,  5.44s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  25%|██▌       | 25/100 [02:07<06:47,  5.44s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  26%|██▌       | 26/100 [02:13<06:41,  5.42s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  27%|██▋       | 27/100 [02:18<06:34,  5.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  28%|██▊       | 28/100 [02:23<06:26,  5.37s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  29%|██▉       | 29/100 [02:29<06:18,  5.33s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  30%|███       | 30/100 [02:34<06:11,  5.31s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  31%|███       | 31/100 [02:39<06:04,  5.28s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  32%|███▏      | 32/100 [02:44<05:55,  5.23s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  33%|███▎      | 33/100 [02:49<05:49,  5.22s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  34%|███▍      | 34/100 [02:55<05:43,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  35%|███▌      | 35/100 [03:00<05:37,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  36%|███▌      | 36/100 [03:05<05:33,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  37%|███▋      | 37/100 [03:10<05:27,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  38%|███▊      | 38/100 [03:15<05:20,  5.17s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  39%|███▉      | 39/100 [03:21<05:16,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  40%|████      | 40/100 [03:26<05:11,  5.18s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  41%|████      | 41/100 [03:31<05:04,  5.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  42%|████▏     | 42/100 [03:36<05:04,  5.25s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  43%|████▎     | 43/100 [03:41<04:56,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  44%|████▍     | 44/100 [03:47<04:52,  5.22s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  45%|████▌     | 45/100 [03:52<04:46,  5.22s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  46%|████▌     | 46/100 [03:57<04:40,  5.19s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  47%|████▋     | 47/100 [04:02<04:35,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  48%|████▊     | 48/100 [04:07<04:30,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  49%|████▉     | 49/100 [04:13<04:25,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  50%|█████     | 50/100 [04:18<04:21,  5.22s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  51%|█████     | 51/100 [04:23<04:14,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  52%|█████▏    | 52/100 [04:28<04:09,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  53%|█████▎    | 53/100 [04:34<04:11,  5.34s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  54%|█████▍    | 54/100 [04:39<04:05,  5.33s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  55%|█████▌    | 55/100 [04:45<04:00,  5.34s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  56%|█████▌    | 56/100 [04:50<03:58,  5.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  57%|█████▋    | 57/100 [04:55<03:51,  5.39s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  58%|█████▊    | 58/100 [05:01<03:44,  5.35s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  59%|█████▉    | 59/100 [05:06<03:39,  5.35s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  60%|██████    | 60/100 [05:12<03:36,  5.42s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  61%|██████    | 61/100 [05:17<03:31,  5.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  62%|██████▏   | 62/100 [05:22<03:24,  5.39s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  63%|██████▎   | 63/100 [05:28<03:23,  5.50s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  64%|██████▍   | 64/100 [05:33<03:14,  5.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  65%|██████▌   | 65/100 [05:39<03:07,  5.36s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  66%|██████▌   | 66/100 [05:44<03:01,  5.34s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  68%|██████▊   | 68/100 [05:54<02:46,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  69%|██████▉   | 69/100 [05:59<02:41,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  70%|███████   | 70/100 [06:05<02:42,  5.43s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  72%|███████▏  | 72/100 [06:16<02:28,  5.29s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  73%|███████▎  | 73/100 [06:21<02:21,  5.25s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  74%|███████▍  | 74/100 [06:26<02:15,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  75%|███████▌  | 75/100 [06:31<02:09,  5.17s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  77%|███████▋  | 77/100 [06:41<01:57,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  79%|███████▉  | 79/100 [06:51<01:46,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  80%|████████  | 80/100 [06:57<01:47,  5.36s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  84%|████████▍ | 84/100 [07:17<01:21,  5.08s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  85%|████████▌ | 85/100 [07:23<01:16,  5.12s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  86%|████████▌ | 86/100 [07:28<01:12,  5.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  87%|████████▋ | 87/100 [07:33<01:06,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  89%|████████▉ | 89/100 [07:43<00:56,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  90%|█████████ | 90/100 [07:48<00:51,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  91%|█████████ | 91/100 [07:53<00:46,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  92%|█████████▏| 92/100 [07:59<00:41,  5.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  93%|█████████▎| 93/100 [08:04<00:35,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  94%|█████████▍| 94/100 [08:09<00:30,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  95%|█████████▌| 95/100 [08:14<00:25,  5.18s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  96%|█████████▌| 96/100 [08:19<00:20,  5.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  97%|█████████▋| 97/100 [08:24<00:15,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  98%|█████████▊| 98/100 [08:29<00:10,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  99%|█████████▉| 99/100 [08:34<00:05,  5.12s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 101it [08:45,  5.30s/it]


In [None]:
import numpy as np
from collections import defaultdict


global_avg = defaultdict(list)
global_max = defaultdict(float)


for sample_idx, text in enumerate(text_list):
    tokens = classifier.tokenizer.tokenize(text)

    ai_class_index = classifier.model.config.label2id["LABEL_2"]
    shap_scores = shap_values[sample_idx, :len(tokens), ai_class_index].values

    for token, score in zip(tokens, shap_scores):
        global_avg[token].append(score)
        if score > global_max[token]:
            global_max[token] = score

def print_top_stats(stats_dict, title, value_format="+.4f"):
    filtered_items = [
        (token, score)
        for token, score in stats_dict.items()
    ]
    sorted_items = sorted(filtered_items, key=lambda x: -x[1])

    print(f"\n{title}:")
    for token, value in sorted_items[:10]:
        print(f"  {token} : {value:{value_format}}")

avg_score = {token: np.mean(scores) for token, scores in global_avg.items()}
print_top_stats(avg_score, "Average Score")

print_top_stats(global_max, "Max Score")



Average Score:
  Ġmaj : +0.1857
  Ġtrick : +0.0851
  Ġconflicting : +0.0818
  Ġobjects : +0.0640
  Ġbought : +0.0640
  Ġcandidacy : +0.0617
  Ġembracing : +0.0612
  filled : +0.0596
  Ġgrown : +0.0586
  immer : +0.0562

Max Score:
  Ġinterview : +0.2285
  Ġmaj : +0.1857
  Ġcouple : +0.1769
  Ġincident : +0.1545
  Ġprivacy : +0.1475
  Ġcareers : +0.1459
  Ġsay : +0.1376
  Ġp : +0.1311
  Ġrevealed : +0.1289
  Ġexclusive : +0.1287


###### d.1.3: Distilbert

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(save_path_d)
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load trained model
classifier = pipeline(
    task="text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0
)


shap_values = explainer(text_list)

Device set to use cuda:0


  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   2%|▏         | 2/100 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   4%|▍         | 4/100 [00:15<04:04,  2.54s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   5%|▌         | 5/100 [00:20<05:45,  3.63s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   8%|▊         | 8/100 [00:35<07:05,  4.62s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   9%|▉         | 9/100 [00:40<07:16,  4.79s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  10%|█         | 10/100 [00:45<07:19,  4.88s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  11%|█         | 11/100 [00:51<07:22,  4.97s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  12%|█▏        | 12/100 [00:56<07:22,  5.03s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  13%|█▎        | 13/100 [01:01<07:19,  5.05s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  14%|█▍        | 14/100 [01:06<07:19,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  15%|█▌        | 15/100 [01:11<07:13,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  17%|█▋        | 17/100 [01:21<07:03,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  18%|█▊        | 18/100 [01:27<07:00,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  19%|█▉        | 19/100 [01:32<06:56,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  20%|██        | 20/100 [01:37<06:52,  5.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  21%|██        | 21/100 [01:42<06:46,  5.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  22%|██▏       | 22/100 [01:47<06:46,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  23%|██▎       | 23/100 [01:53<06:39,  5.19s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  24%|██▍       | 24/100 [01:58<06:35,  5.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  25%|██▌       | 25/100 [02:03<06:28,  5.19s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  26%|██▌       | 26/100 [02:08<06:23,  5.18s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  28%|██▊       | 28/100 [02:18<06:10,  5.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  29%|██▉       | 29/100 [02:24<06:06,  5.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  30%|███       | 30/100 [02:29<06:01,  5.17s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  32%|███▏      | 32/100 [02:39<05:47,  5.12s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  33%|███▎      | 33/100 [02:44<05:43,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  34%|███▍      | 34/100 [02:49<05:37,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  35%|███▌      | 35/100 [02:54<05:32,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  36%|███▌      | 36/100 [02:59<05:28,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  38%|███▊      | 38/100 [03:10<05:16,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  39%|███▉      | 39/100 [03:15<05:11,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  40%|████      | 40/100 [03:20<05:06,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  41%|████      | 41/100 [03:25<05:00,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  43%|████▎     | 43/100 [03:35<04:49,  5.07s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  44%|████▍     | 44/100 [03:40<04:44,  5.07s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  46%|████▌     | 46/100 [03:50<04:32,  5.05s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  47%|████▋     | 47/100 [03:55<04:29,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  48%|████▊     | 48/100 [04:00<04:24,  5.08s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  49%|████▉     | 49/100 [04:05<04:19,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  51%|█████     | 51/100 [04:16<04:09,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  52%|█████▏    | 52/100 [04:21<04:04,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  54%|█████▍    | 54/100 [04:31<03:56,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  55%|█████▌    | 55/100 [04:36<03:50,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  56%|█████▌    | 56/100 [04:42<03:50,  5.24s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  57%|█████▋    | 57/100 [04:47<03:44,  5.23s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  58%|█████▊    | 58/100 [04:52<03:37,  5.19s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  59%|█████▉    | 59/100 [04:57<03:31,  5.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  61%|██████    | 61/100 [05:07<03:19,  5.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  62%|██████▏   | 62/100 [05:13<03:14,  5.12s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  64%|██████▍   | 64/100 [05:23<03:05,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  65%|██████▌   | 65/100 [05:28<03:00,  5.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  68%|██████▊   | 68/100 [05:43<02:42,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  69%|██████▉   | 69/100 [05:48<02:37,  5.08s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  70%|███████   | 70/100 [05:54<02:40,  5.35s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  72%|███████▏  | 72/100 [06:05<02:26,  5.24s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  73%|███████▎  | 73/100 [06:10<02:20,  5.22s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  74%|███████▍  | 74/100 [06:15<02:15,  5.21s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  75%|███████▌  | 75/100 [06:20<02:09,  5.19s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  79%|███████▉  | 79/100 [06:40<01:46,  5.07s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  80%|████████  | 80/100 [06:46<01:46,  5.35s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  84%|████████▍ | 84/100 [07:06<01:21,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  85%|████████▌ | 85/100 [07:12<01:16,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  87%|████████▋ | 87/100 [07:22<01:06,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  89%|████████▉ | 89/100 [07:32<00:55,  5.08s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  90%|█████████ | 90/100 [07:37<00:50,  5.09s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  91%|█████████ | 91/100 [07:42<00:45,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  92%|█████████▏| 92/100 [07:47<00:41,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  94%|█████████▍| 94/100 [07:58<00:30,  5.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  95%|█████████▌| 95/100 [08:03<00:25,  5.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  96%|█████████▌| 96/100 [08:08<00:20,  5.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  97%|█████████▋| 97/100 [08:13<00:15,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  98%|█████████▊| 98/100 [08:18<00:10,  5.14s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  99%|█████████▉| 99/100 [08:23<00:05,  5.13s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 101it [08:34,  5.19s/it]


In [None]:
import numpy as np
from collections import defaultdict


global_avg = defaultdict(list)
global_max = defaultdict(float)


for sample_idx, text in enumerate(text_list):
    tokens = classifier.tokenizer.tokenize(text)

    ai_class_index = classifier.model.config.label2id["LABEL_2"]
    shap_scores = shap_values[sample_idx, :len(tokens), ai_class_index].values

    for token, score in zip(tokens, shap_scores):
        global_avg[token].append(score)
        if score > global_max[token]:
            global_max[token] = score

def print_top_stats(stats_dict, title, value_format="+.4f"):
    filtered_items = [
        (token, score)
        for token, score in stats_dict.items()
    ]
    sorted_items = sorted(filtered_items, key=lambda x: -x[1])

    print(f"\n{title}:")
    for token, value in sorted_items[:10]:
        print(f"  {token} : {value:{value_format}}")

avg_score = {token: np.mean(scores) for token, scores in global_avg.items()}
print_top_stats(avg_score, "Average Score")

print_top_stats(global_max, "Max Score")



Average Score:
  service : +0.0928
  smash : +0.0871
  trick : +0.0851
  grown : +0.0851
  conflicting : +0.0818
  available : +0.0738
  attitude : +0.0729
  convenience : +0.0640
  mutual : +0.0596
  reflection : +0.0543

Max Score:
  interview : +0.2285
  service : +0.1857
  wanted : +0.1769
  home : +0.1545
  read : +0.1475
  two : +0.1459
  always : +0.1376
  recently : +0.1311
  public : +0.1289
  love : +0.1287


### d2.Feature-Level Diagnostics -  n_grams frequency


In [None]:
df = Bal_FNN_df[["cleaned_combined_text","label"]]

ai_texts = df[df["label"] == "gpt_fake"]["cleaned_combined_text"].tolist()

In [None]:
len(ai_texts)

4084

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd


# Extract n-grams from AI-generated texts
vectorizer = CountVectorizer(ngram_range=(2,3), max_features=100)
X = vectorizer.fit_transform(ai_texts)

# Get top 20 trigrams
ngram_freq = pd.DataFrame(X.sum(axis=0), columns=vectorizer.get_feature_names_out())
top_ngrams = ngram_freq.T.sort_values(0, ascending=False).head(20)
print(top_ngrams)

                            0
sources close            1448
social media             1360
close couple              685
brad pitt                 682
according sources         678
sources close couple      552
time tell                 489
according sources close   488
kim kardashian            485
jennifer aniston          474
angelina jolie            465
remains seen              451
one thing                 411
selena gomez              402
royal family              356
many fans                 351
meghan markle             347
fake news                 344
source close              336
recent interview          330


### d3.Model-Level Diagnostics - robustness analysis

In [None]:
# https://arxiv.org/pdf/1901.08644  cite paper.

# https://github.com/marcotcr/checklist github

# https://homes.cs.washington.edu/~marcotcr/acl20_checklist.pdf cite paper.


In [None]:
pip cache purge

Files removed: 116


In [None]:
!pip install checklist



In [None]:
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
import spacy
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
from transformers import pipeline

In [None]:
editor = Editor()

In [None]:
nlp = spacy.load('en_core_web_sm')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:

test_df['spacy_docs'] = test_df['cleaned_combined_text'].apply(nlp)
test_df['text_perturbed'] = test_df['spacy_docs'].apply(
    lambda doc: Perturb.strip_punctuation(doc)
)

test_df['text_add_typos'] = test_df['cleaned_combined_text'].apply(
    lambda doc: Perturb.add_typos(doc)
)

In [None]:
from datasets import Dataset
import pandas as pd

formatted_df = test_df[['text_add_typos','label_new']].rename(columns={
    'text_add_typos': 'text',
    'label_new': 'label'
})

# 2. Convert to Hugging Face Dataset
fake_news_dataset_test_withtypo = Dataset.from_pandas(formatted_df)

In [None]:
from datasets import Dataset
import pandas as pd

formatted_df = test_df[['text_perturbed','label_new']].rename(columns={
    'text_perturbed': 'text',
    'label_new': 'label'
})

# 2. Convert to Hugging Face Dataset
fake_news_dataset_test_wiopun = Dataset.from_pandas(formatted_df)

In [None]:
def eval_model(model, tokenizer,MAX_SEQUENCE_LENGTH,ds):
  preprocessed_val_data = ds.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})
  preprocessed_val_data  = preprocessed_val_data .map(
          lambda x: {'labels': torch.tensor(x['label'], dtype=torch.long)}
      )

  trainer = Trainer(
      model=model,
      eval_dataset=preprocessed_val_data,
      compute_metrics=compute_metrics,)

  results = trainer.evaluate()

  print(f"\nEvaluation Results:")
  for k, v in results.items():
      print(f"{k}: {v:.4f}")


#####d.3.1 BERT

In [None]:
MAX_SEQUENCE_LENGTH = 500
model = AutoModelForSequenceClassification.from_pretrained(save_path_b)
model_name = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("After Applying typo:\n")
eval_model(model, tokenizer,MAX_SEQUENCE_LENGTH,fake_news_dataset_test_withtypo)
print("\n")
print("After Applying remove pun:\n")
eval_model(model, tokenizer,MAX_SEQUENCE_LENGTH,fake_news_dataset_test_wiopun)
print("\n")


After Applying typo:



Map:   0%|          | 0/3268 [00:00<?, ? examples/s]

Map:   0%|          | 0/3268 [00:00<?, ? examples/s]


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7729    0.7834    0.7781       817
  human_real     0.7093    0.7405    0.7246       817
    gpt_fake     0.9464    0.9718    0.9589       817
    gpt_real     0.9251    0.8470    0.8843       817

    accuracy                         0.8357      3268
   macro avg     0.8384    0.8357    0.8365      3268
weighted avg     0.8384    0.8357    0.8365      3268



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnancytengjianyi[0m ([33mnancytengjianyi-university-of-california-berkeley[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



Evaluation Results:
eval_loss: 0.4859
eval_model_preparation_time: 0.0031
eval_accuracy: 0.8357
eval_f1: 0.8365
eval_precision: 0.8384
eval_recall: 0.8357
eval_runtime: 48.7124
eval_samples_per_second: 67.0880
eval_steps_per_second: 8.3960


After Applying remove pun:



Map:   0%|          | 0/3268 [00:00<?, ? examples/s]

Map:   0%|          | 0/3268 [00:00<?, ? examples/s]


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7764    0.7821    0.7793       817
  human_real     0.7106    0.7393    0.7247       817
    gpt_fake     0.9431    0.9743    0.9585       817
    gpt_real     0.9201    0.8458    0.8814       817

    accuracy                         0.8354      3268
   macro avg     0.8376    0.8354    0.8359      3268
weighted avg     0.8376    0.8354    0.8359      3268


Evaluation Results:
eval_loss: 0.4894
eval_model_preparation_time: 0.0033
eval_accuracy: 0.8354
eval_f1: 0.8359
eval_precision: 0.8376
eval_recall: 0.8354
eval_runtime: 47.1724
eval_samples_per_second: 69.2780
eval_steps_per_second: 8.6700




#####d.3.2 roberta-base

In [None]:
MAX_SEQUENCE_LENGTH = 500
model = AutoModelForSequenceClassification.from_pretrained(save_path_r)
model_name = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("After Applying typo:\n")
eval_model(model, tokenizer,MAX_SEQUENCE_LENGTH,fake_news_dataset_test_withtypo)
print("\n")
print("After Applying remove pun:\n")
eval_model(model, tokenizer,MAX_SEQUENCE_LENGTH,fake_news_dataset_test_wiopun)
print("\n")

After Applying typo:



Map:   0%|          | 0/3268 [00:00<?, ? examples/s]

Map:   0%|          | 0/3268 [00:00<?, ? examples/s]


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7969    0.7540    0.7748       817
  human_real     0.7092    0.7821    0.7439       817
    gpt_fake     0.9490    0.9792    0.9639       817
    gpt_real     0.9281    0.8531    0.8890       817

    accuracy                         0.8421      3268
   macro avg     0.8458    0.8421    0.8429      3268
weighted avg     0.8458    0.8421    0.8429      3268


Evaluation Results:
eval_loss: 0.4578
eval_model_preparation_time: 0.0035
eval_accuracy: 0.8421
eval_f1: 0.8429
eval_precision: 0.8458
eval_recall: 0.8421
eval_runtime: 46.2635
eval_samples_per_second: 70.6390
eval_steps_per_second: 8.8410


After Applying remove pun:



Map:   0%|          | 0/3268 [00:00<?, ? examples/s]

Map:   0%|          | 0/3268 [00:00<?, ? examples/s]


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7995    0.7613    0.7799       817
  human_real     0.7170    0.7723    0.7437       817
    gpt_fake     0.9446    0.9816    0.9628       817
    gpt_real     0.9198    0.8568    0.8872       817

    accuracy                         0.8430      3268
   macro avg     0.8453    0.8430    0.8434      3268
weighted avg     0.8453    0.8430    0.8434      3268


Evaluation Results:
eval_loss: 0.4676
eval_model_preparation_time: 0.0030
eval_accuracy: 0.8430
eval_f1: 0.8434
eval_precision: 0.8453
eval_recall: 0.8430
eval_runtime: 45.2079
eval_samples_per_second: 72.2880
eval_steps_per_second: 9.0470




#####d.3.3 distilbert-base-uncased

In [None]:
MAX_SEQUENCE_LENGTH = 500
model = AutoModelForSequenceClassification.from_pretrained(save_path_d)
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("After Applying typo:\n")
eval_model(model, tokenizer,MAX_SEQUENCE_LENGTH,fake_news_dataset_test_withtypo)
print("\n")
print("After Applying remove pun:\n")
eval_model(model, tokenizer,MAX_SEQUENCE_LENGTH,fake_news_dataset_test_wiopun)
print("\n")

After Applying typo:



Map:   0%|          | 0/3268 [00:00<?, ? examples/s]

Map:   0%|          | 0/3268 [00:00<?, ? examples/s]


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7937    0.7723    0.7829       817
  human_real     0.7214    0.7797    0.7494       817
    gpt_fake     0.9600    0.9706    0.9653       817
    gpt_real     0.9202    0.8605    0.8893       817

    accuracy                         0.8458      3268
   macro avg     0.8488    0.8458    0.8467      3268
weighted avg     0.8488    0.8458    0.8467      3268


Evaluation Results:
eval_loss: 0.4653
eval_model_preparation_time: 0.0016
eval_accuracy: 0.8458
eval_f1: 0.8467
eval_precision: 0.8488
eval_recall: 0.8458
eval_runtime: 24.2478
eval_samples_per_second: 134.7750
eval_steps_per_second: 16.8680


After Applying remove pun:



Map:   0%|          | 0/3268 [00:00<?, ? examples/s]

Map:   0%|          | 0/3268 [00:00<?, ? examples/s]


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8031    0.7687    0.7855       817
  human_real     0.7227    0.7846    0.7523       817
    gpt_fake     0.9566    0.9718    0.9642       817
    gpt_real     0.9142    0.8605    0.8865       817

    accuracy                         0.8464      3268
   macro avg     0.8491    0.8464    0.8471      3268
weighted avg     0.8491    0.8464    0.8471      3268


Evaluation Results:
eval_loss: 0.4709
eval_model_preparation_time: 0.0015
eval_accuracy: 0.8464
eval_f1: 0.8471
eval_precision: 0.8491
eval_recall: 0.8464
eval_runtime: 24.4814
eval_samples_per_second: 133.4890
eval_steps_per_second: 16.7070




##CODE SANDBOX


In [None]:
# import json
# import pandas as pd

# # Function to read a JSON file
# def load_json_file(file_path):
#     """Load a JSON file"""
#     with open(file_path, "r", encoding="utf-8") as file:
#         return json.load(file)

# # Function to process FakeNewsNet data
# def process_FakeNewsNet_data(file_paths):
#     """Read multiple JSON files and organize them into a DataFrame"""
#     data_list = []  # Store all data

#     for category, file_path in file_paths.items():
#         data = load_json_file(file_path)  # Load JSON data

#         for news_id, news_content in data.items():
#             if 'text' in news_content:  # Ensure the text field exists
#                 # Determine the news source
#                 if "gossipcop" in category:
#                     source = "gossipcop"
#                 elif "politifact" in category:
#                     source = "politifact"
#                 else:
#                     source = "unknown"

#                 # Determine the news type
#                 if "HR" in category:
#                     label = "human_real"  # Human-written real news
#                 elif "F" in category:
#                     if "MF" in category:
#                         label = "gpt_fake"  # AI-generated fake news
#                     else:
#                         label = "human_fake"  # Human-written fake news
#                 else:
#                     label = "unknown"

#                 data_list.append({
#                     "id": news_id,
#                     "text": news_content["text"],
#                     "title": news_content.get("title", ""),  # Some data may not have a title
#                     "source": source,  # News source
#                     "label": label,  # Real/fake news category
#                 })

#     # Convert to a Pandas DataFrame
#     df = pd.DataFrame(data_list)
#     return df

# # Define paths for the dataset
# fake_news_paths = {
#     "gossipcop_HF": "/content/drive/MyDrive/DS266/HF.json",
#     "gossipcop_HR": "/content/drive/MyDrive/DS266/HR.json",
#     "gossipcop_MF": "/content/drive/MyDrive/DS266/MF.json",
# }

# # Load and process the data
# FakeNewsNet_df = process_FakeNewsNet_data(fake_news_paths)

# # Display the first 5 rows
# FakeNewsNet_df.head()


Unnamed: 0,id,text,title,source,label
0,0,✕ Close Meghan Markle and Prince Harry have an...,As it happened: Prince Harry and Meghan Markle...,gossipcop,human_fake
1,1,Kim Kardashian and Kanye West are pulling out ...,Kim & Kanye Install At-Home Panic Room After P...,gossipcop,human_fake
2,2,Prince Harry and Meghan currently live at Kens...,£1.4million spent renovating Prince Harry and ...,gossipcop,human_fake
3,3,They can't get enough of the Biebs on this sho...,Photos from Dancing With the Stars: Special Gu...,gossipcop,human_fake
4,4,Ben Affleck is keeping life with his three kid...,Jennifer Garner ‘Doesn’t Want’ Her Kids Around...,gossipcop,human_fake


In [None]:
# import random
# import pandas as pd
# from typing import Dict, List

# class GPTTextHumanizer:
#     def __init__(self, error_prob: float = 0.18):
#         self.error_prob = error_prob
#         self.error_config = {
#             # Tiered error probabilities (more likely errors first)
#             'common_typos': {
#                 'prob': 0.6,
#                 'map': {
#                     'the': ['teh', 'th'],
#                     'and': ['adn', 'an'],
#                     'that': ['taht'],
#                     'with': ['wit'],
#                     'this': ['tis']
#                 }
#             },
#             'case_errors': {
#                 'prob': 0.4,
#                 'apply': lambda word: ''.join(
#                     c.upper() if random.random() < 0.3 else c.lower()
#                     for c in word
#                 )
#             },
#             'filler_words': {
#                 'prob': 0.3,
#                 'words': ['like', 'you know', 'I mean', 'sorta', 'kinda']
#             },
#         }

#     def _apply_typos(self, word: str) -> str:
#         if random.random() < self.error_config['common_typos']['prob']:
#             lower_word = word.lower()
#             if lower_word in self.error_config['common_typos']['map']:
#                 return random.choice(
#                     self.error_config['common_typos']['map'][lower_word]
#                 )
#         return word

#     def _apply_case_errors(self, word: str) -> str:
#         if random.random() < self.error_config['case_errors']['prob']:
#             return self.error_config['case_errors']['apply'](word)
#         return word

#     def _insert_fillers(self, text: str) -> str:
#         if random.random() < self.error_config['filler_words']['prob']:
#             words = text.split()
#             if len(words) > 4:
#                 insert_pos = random.randint(1, len(words)-1)
#                 filler = random.choice(
#                     self.error_config['filler_words']['words']
#                 )
#                 words.insert(insert_pos, filler)
#                 return ' '.join(words)
#         return text


#     def humanize(self, text: str) -> str:
#         if not isinstance(text, str):
#             return text

#         # Apply transformations in order of most to least disruptive
#         words = text.split()
#         words = [self._apply_case_errors(self._apply_typos(word))
#                 for word in words]
#         text = ' '.join(words)
#         text = self._insert_fillers(text)

#         return text

# def replace_gpt_fake_text(
#     df: pd.DataFrame,
#     target_label: str = "gpt_fake",
#     text_col: str = "text"
# ) -> pd.DataFrame:

#     humanizer = GPTTextHumanizer()

#     # Create mask for target rows
#     target_mask = df['label'] == target_label

#     # Apply humanization
#     df.loc[target_mask, text_col] = df.loc[target_mask, text_col].apply(
#         lambda x: humanizer.humanize(x)
#     )

#     return df

# # Example usage:
# # df = pd.read_csv('your_data.csv')
# # print("Before:", df[df['label'] == "gpt_fake"]['text'].iloc[0])
# # replace_gpt_fake_text(df, target_label="gpt_fake")
# # print("After:", df[df['label'] == "gpt_fake"]['text'].iloc[0])
# # df.to_csv('humanized_output.csv', index=False)

In [None]:
# replace_gpt_fake_text(FakeNewsNet_df, target_label="gpt_fake")

# # FakeNewsNet_df_with_human_error = augment_gpt_fake_data(FakeNewsNet_df, target_label="gpt_fake")


Unnamed: 0,id,text,title,source,label,combined_text,label_new
0,0,✕ Close Meghan Markle and Prince Harry have an...,As it happened: Prince Harry and Meghan Markle...,gossipcop,human_fake,✕ Close Meghan Markle and Prince Harry have an...,0.0
1,1,Kim Kardashian and Kanye West are pulling out ...,Kim & Kanye Install At-Home Panic Room After P...,gossipcop,human_fake,Kim Kardashian and Kanye West are pulling out ...,0.0
2,2,Prince Harry and Meghan currently live at Kens...,£1.4million spent renovating Prince Harry and ...,gossipcop,human_fake,Prince Harry and Meghan currently live at Kens...,0.0
3,3,They can't get enough of the Biebs on this sho...,Photos from Dancing With the Stars: Special Gu...,gossipcop,human_fake,They can't get enough of the Biebs on this sho...,0.0
4,4,Ben Affleck is keeping life with his three kid...,Jennifer Garner ‘Doesn’t Want’ Her Kids Around...,gossipcop,human_fake,Ben Affleck is keeping life with his three kid...,0.0
...,...,...,...,...,...,...,...
16331,4079,As wE BiD farewell to Th drama-filled YeAr tha...,The Most Anticipated Celebrity Feuds of 2018,gossipcop,gpt_fake,As we bid farewell to the drama-filled year th...,2.0
16332,4080,Reality television star kIm Kardashian is faci...,Kim Kardashian Accused of Hypocrisy After Crit...,gossipcop,gpt_fake,Reality television star Kim Kardashian is faci...,2.0
16333,4081,"chRisSy TEigen, thE PopuLar model AdN social m...",Chrissy Teigen Reveals Secret Struggle with Po...,gossipcop,gpt_fake,"Chrissy Teigen, the popular model and social m...",2.0
16334,4082,Kylie Jenner an Travis scOtT'S relationship ma...,Kylie Jenner and Travis Scott's Relationship o...,gossipcop,gpt_fake,Kylie Jenner and Travis Scott's relationship m...,2.0


In [None]:
# # Load the dataset (update path to your file)
# FakeNewsNet_df['combined_text'] = FakeNewsNet_df['text'] + ' ' + FakeNewsNet_df['title']

# # Convert labels to numerical values (0=real, 1=fake)
# FakeNewsNet_df['label_new'] = FakeNewsNet_df['label'].map({'human_fake': 0, 'human_real': 1.0,'gpt_fake': 2})

# # Show sample data
# print("Sample data:")
# print(FakeNewsNet_df[['text','combined_text', 'label','label_new']].head())

Sample data:
                                                text  \
0  ✕ Close Meghan Markle and Prince Harry have an...   
1  Kim Kardashian and Kanye West are pulling out ...   
2  Prince Harry and Meghan currently live at Kens...   
3  They can't get enough of the Biebs on this sho...   
4  Ben Affleck is keeping life with his three kid...   

                                       combined_text       label  label_new  
0  ✕ Close Meghan Markle and Prince Harry have an...  human_fake        0.0  
1  Kim Kardashian and Kanye West are pulling out ...  human_fake        0.0  
2  Prince Harry and Meghan currently live at Kens...  human_fake        0.0  
3  They can't get enough of the Biebs on this sho...  human_fake        0.0  
4  Ben Affleck is keeping life with his three kid...  human_fake        0.0  


In [None]:
# FakeNewsNet_df

Unnamed: 0,id,text,title,source,label,combined_text,label_new
0,0,✕ Close Meghan Markle and Prince Harry have an...,As it happened: Prince Harry and Meghan Markle...,gossipcop,human_fake,✕ Close Meghan Markle and Prince Harry have an...,0.0
1,1,Kim Kardashian and Kanye West are pulling out ...,Kim & Kanye Install At-Home Panic Room After P...,gossipcop,human_fake,Kim Kardashian and Kanye West are pulling out ...,0.0
2,2,Prince Harry and Meghan currently live at Kens...,£1.4million spent renovating Prince Harry and ...,gossipcop,human_fake,Prince Harry and Meghan currently live at Kens...,0.0
3,3,They can't get enough of the Biebs on this sho...,Photos from Dancing With the Stars: Special Gu...,gossipcop,human_fake,They can't get enough of the Biebs on this sho...,0.0
4,4,Ben Affleck is keeping life with his three kid...,Jennifer Garner ‘Doesn’t Want’ Her Kids Around...,gossipcop,human_fake,Ben Affleck is keeping life with his three kid...,0.0
...,...,...,...,...,...,...,...
16331,4079,As wE BiD farewell to Th drama-filled YeAr tha...,The Most Anticipated Celebrity Feuds of 2018,gossipcop,gpt_fake,As wE BiD farewell to Th drama-filled YeAr tha...,2.0
16332,4080,Reality television star kIm Kardashian is faci...,Kim Kardashian Accused of Hypocrisy After Crit...,gossipcop,gpt_fake,Reality television star kIm Kardashian is faci...,2.0
16333,4081,"chRisSy TEigen, thE PopuLar model AdN social m...",Chrissy Teigen Reveals Secret Struggle with Po...,gossipcop,gpt_fake,"chRisSy TEigen, thE PopuLar model AdN social m...",2.0
16334,4082,Kylie Jenner an Travis scOtT'S relationship ma...,Kylie Jenner and Travis Scott's Relationship o...,gossipcop,gpt_fake,Kylie Jenner an Travis scOtT'S relationship ma...,2.0


In [None]:
# import string

# # Remove unwanted characters, links, and HTML tags
# def clean_text(text):
#     # Remove HTML tags
#     text = re.sub(r'<.*?>', '', text)
#     # Remove URLs
#     text = re.sub(r'http\S+|www\S+|https\S+', '', text)
#     # Remove special characters and numbers, keeping only letters
#     text = re.sub(r'[^a-zA-Z\s]', '', text)
#     # Remove extra spaces
#     text = re.sub(r'\s+', ' ', text).strip()
#     # Convert to lowercase
#     text = text.lower()
#     return text

# # Apply cleaning to datasets
# FakeNewsNet_df['cleaned_combined_text'] = FakeNewsNet_df['combined_text'].apply(clean_text)
# # Print some cleaned samples

# print(FakeNewsNet_df['cleaned_combined_text'].head())


0    close meghan markle and prince harry have anno...
1    kim kardashian and kanye west are pulling out ...
2    prince harry and meghan currently live at kens...
3    they cant get enough of the biebs on this show...
4    ben affleck is keeping life with his three kid...
Name: cleaned_combined_text, dtype: object


In [None]:
# from sklearn.model_selection import train_test_split

# # Split the data while keeping the DataFrame structure
# train_df, test_df = train_test_split(
#     FakeNewsNet_df[['cleaned_combined_text', 'label_new']],  # Select both columns
#     test_size=0.3,
#     random_state=42,
#     stratify=FakeNewsNet_df['label_new']
# )

# # Reset the index for both DataFrames
# train_df = train_df.reset_index(drop=True)
# test_df = test_df.reset_index(drop=True)

# print(f"Training samples: {len(train_df)}")
# print(f"Test samples: {len(test_df)}")
# print("\nTrain DataFrame:")
# print(train_df.head())
# print("\nTest DataFrame:")
# print(test_df.head())

# from datasets import Dataset
# import pandas as pd
# formatted_df = train_df.rename(columns={
#     'cleaned_combined_text': 'text',
#     'label_new': 'label'
# })

# # 2. Convert to Hugging Face Dataset
# fake_news_dataset_train = Dataset.from_pandas(formatted_df)

# from datasets import Dataset
# import pandas as pd

# formatted_df = test_df.rename(columns={
#     'cleaned_combined_text': 'text',
#     'label_new': 'label'
# })

# # 2. Convert to Hugging Face Dataset
# fake_news_dataset_test = Dataset.from_pandas(formatted_df)


Training samples: 11435
Test samples: 4901

Train DataFrame:
                               cleaned_combined_text  label_new
0  minutes after winning his second consecutive e...        1.0
1  chrissy teigen celebrates all the people who h...        1.0
2  former the voice judge jennifer hudson is faci...        2.0
3  in a shocking turn of events celine dion alleg...        2.0
4  you learn something new every day like the fac...        0.0

Test DataFrame:
                               cleaned_combined_text  label_new
0  this side of ariana grande was always there it...        1.0
1  daniel bryan takes brie bella to a landfill to...        1.0
2  drake is going back to school and bringing alo...        1.0
3  the oscars ceremony was dominated by discussio...        0.0
4  our list of the best quotes from the fifty sha...        1.0


In [None]:
# fake_news_dataset_test


Dataset({
    features: ['text', 'label'],
    num_rows: 4901
})

In [None]:
# fake_news_dataset_train

Dataset({
    features: ['text', 'label'],
    num_rows: 11435
})

In [None]:

# def preprocess_imdb(data, tokenizer):
#     review_text = data['text']

#     encoded = tokenizer.batch_encode_plus(
#             review_text,
#             max_length=MAX_SEQUENCE_LENGTH,
#             padding='max_length',
#             truncation=True,
#             return_attention_mask=True,
#             return_token_type_ids=True,
#             return_tensors="pt"
#         )

#     return encoded



# def fine_tune_classification_model(classification_model,
#                                    tokenizer,
#                                    train_data,
#                                    dev_data,
#                                    batch_size = 16,
#                                    num_epochs = 10):

#     preprocessed_train_data = train_data.map(preprocess_imdb, batched=True, fn_kwargs={'tokenizer': tokenizer})
#     preprocessed_dev_data = dev_data.map(preprocess_imdb, batched=True, fn_kwargs={'tokenizer': tokenizer})

#     preprocessed_train_data = preprocessed_train_data.map(
#         lambda x: {'labels': torch.tensor(x['label'], dtype=torch.long)}  # Ensure labels are int64
#     )
#     preprocessed_dev_data  = preprocessed_dev_data .map(
#         lambda x: {'labels': torch.tensor(x['label'], dtype=torch.long)}
#     )


#     ### YOUR CODE HERE


#     training_args = TrainingArguments(
#     output_dir="/content/drive/MyDrive/DS266/model_output",
#     per_device_train_batch_size=batch_size,
#     per_device_eval_batch_size=batch_size,
#     num_train_epochs=num_epochs,
#     logging_dir="/content/drive/MyDrive/DS266/logs",
#     eval_strategy="epoch",
#     save_strategy="epoch",
#     report_to='none'
# )
#     trainer = Trainer(
#         model=classification_model,
#         args=training_args,
#         train_dataset=preprocessed_train_data,
#         eval_dataset=preprocessed_dev_data,
#         compute_metrics=compute_metrics
#     )




#     trainer.train()



In [None]:
# from transformers import Trainer, TrainingArguments
# import torch
# import numpy as np
# from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
# model_checkpoint_name = "bert-base-cased"
# bert_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
# # bert_classification_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint_name)
# bert_classification_model = AutoModelForSequenceClassification.from_pretrained(
#     model_checkpoint_name,
#     num_labels=3  # number of classes = 3
# )


# MAX_SEQUENCE_LENGTH = 400

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# fine_tune_classification_model(bert_classification_model, bert_tokenizer, fake_news_dataset_train, fake_news_dataset_test)

Map:   0%|          | 0/11435 [00:00<?, ? examples/s]

Map:   0%|          | 0/4901 [00:00<?, ? examples/s]

Map:   0%|          | 0/11435 [00:00<?, ? examples/s]

Map:   0%|          | 0/4901 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2902,0.438081,0.854724,0.856874,0.861818,0.854724
2,0.2304,0.363135,0.87574,0.874957,0.874529,0.87574



Classification Report:
              precision    recall  f1-score   support

  human_fake     0.6836    0.7812    0.7291      1225
  human_real     0.8822    0.8193    0.8496      2451
    gpt_fake     0.9992    0.9992    0.9992      1225

    accuracy                         0.8547      4901
   macro avg     0.8550    0.8666    0.8593      4901
weighted avg     0.8618    0.8547    0.8569      4901


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7642    0.7273    0.7453      1225
  human_real     0.8670    0.8882    0.8775      2451
    gpt_fake     1.0000    0.9992    0.9996      1225

    accuracy                         0.8757      4901
   macro avg     0.8770    0.8716    0.8741      4901
weighted avg     0.8745    0.8757    0.8750      4901



In [None]:
# fine_tune_classification_model(bert_classification_model, bert_tokenizer, fake_news_dataset_train, fake_news_dataset_test)

Map:   0%|          | 0/11435 [00:00<?, ? examples/s]

Map:   0%|          | 0/4901 [00:00<?, ? examples/s]

Map:   0%|          | 0/11435 [00:00<?, ? examples/s]

Map:   0%|          | 0/4901 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4144,0.355748,0.862477,0.853036,0.866399,0.862477
2,0.335,0.303261,0.879412,0.876944,0.877614,0.879412
3,0.2378,0.339124,0.879208,0.875102,0.878424,0.879208
4,0.1948,0.42009,0.874107,0.871185,0.872102,0.874107
5,0.1129,0.544718,0.875128,0.871406,0.873661,0.875128
6,0.0793,0.607419,0.864517,0.86352,0.863002,0.864517
7,0.0558,0.585819,0.861661,0.861787,0.861917,0.861661
8,0.05,0.696487,0.857784,0.858266,0.858832,0.857784
9,0.0376,0.840768,0.854111,0.8555,0.857816,0.854111
10,0.0261,0.791923,0.864926,0.864316,0.863903,0.864926



Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8520    0.5453    0.6650      1225
  human_real     0.8080    0.9531    0.8746      2451
    gpt_fake     0.9976    0.9984    0.9980      1225

    accuracy                         0.8625      4901
   macro avg     0.8859    0.8323    0.8458      4901
weighted avg     0.8664    0.8625    0.8530      4901


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8008    0.6890    0.7407      1225
  human_real     0.8549    0.9155    0.8842      2451
    gpt_fake     1.0000    0.9976    0.9988      1225

    accuracy                         0.8794      4901
   macro avg     0.8852    0.8674    0.8745      4901
weighted avg     0.8776    0.8794    0.8769      4901


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8282    0.6531    0.7303      1225
  human_real     0.8436    0.9331    0.8861      2451
   

In [None]:

# from checklist.perturb import Perturb

# def safe_add_typos(text, min_length=3, max_typos=2):
#     """Add typos only to sufficiently long text"""
#     if pd.isna(text) or len(text) < min_length:
#         return text  # Return original if too short

#     try:
#         # Calculate safe number of typos
#         n_typos = min(max_typos, len(text)//2)  # Max 1 typo per 2 characters
#         return Perturb.add_typos(text)
#     except ValueError:
#         return text  # Fallback for edge cases

# def conditional_add_typos(row):
#     """Add typos only to samples with label=2 (AI-generated)"""
#     if row['label_new'] == 2.0 or row['label_new'] == 3.0:
#         return safe_add_typos(row['cleaned_combined_text'])
#     else:
#         return row['cleaned_combined_text']  # Return original for other labels



# train_df['text_add_typos'] = train_df.apply(conditional_add_typos, axis=1)

# test_df['text_add_typos'] = test_df.apply(conditional_add_typos, axis=1)


# # train_df['text_add_typos'] = train_df['cleaned_combined_text'].apply(safe_add_typos)

# # # Verify
# # empty_count = train_df['cleaned_combined_text'].apply(lambda x: len(x) < 3).sum()
# # print(f"Fixed {empty_count} short/empty texts")

In [None]:
# from datasets import Dataset
# import pandas as pd

# formatted_df = test_df[['text_add_typos','label_new']].rename(columns={
#     'text_add_typos': 'text',
#     'label_new': 'label'
# })

# # 2. Convert to Hugging Face Dataset
# fake_news_dataset_test_withtypo = Dataset.from_pandas(formatted_df)

In [None]:
# from datasets import Dataset
# import pandas as pd

# formatted_df = train_df[['text_add_typos','label_new']].rename(columns={
#     'text_add_typos': 'text',
#     'label_new': 'label'
# })

# # 2. Convert to Hugging Face Dataset
# fake_news_dataset_train_withtypo = Dataset.from_pandas(formatted_df)

In [None]:
# fine_tune_classification_model(bert_classification_model, bert_tokenizer, fake_news_dataset_train_withtypo, fake_news_dataset_test_withtypo)

Map:   0%|          | 0/14353 [00:00<?, ? examples/s]

Map:   0%|          | 0/6152 [00:00<?, ? examples/s]

Map:   0%|          | 0/14353 [00:00<?, ? examples/s]

Map:   0%|          | 0/6152 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3029,0.564614,0.84054,0.837044,0.850242,0.84054
2,0.1999,0.760155,0.829974,0.831481,0.835213,0.829974
3,0.1101,0.842899,0.828999,0.829428,0.829924,0.828999



Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8278    0.5927    0.6908      1225
  human_real     0.7601    0.9319    0.8372      2451
    gpt_fake     0.9740    0.9469    0.9603      1225
    gpt_real     0.9277    0.8002    0.8592      1251

    accuracy                         0.8405      6152
   macro avg     0.8724    0.8179    0.8369      6152
weighted avg     0.8502    0.8405    0.8370      6152


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.6743    0.7706    0.7192      1225
  human_real     0.8178    0.7891    0.8032      2451
    gpt_fake     0.9566    0.9714    0.9640      1225
    gpt_real     0.9081    0.8297    0.8672      1251

    accuracy                         0.8300      6152
   macro avg     0.8392    0.8402    0.8384      6152
weighted avg     0.8352    0.8300    0.8315      6152


Classification Report:
              precision    recall  f1-score   support

  h

In [None]:
fine_tune_classification_model(bert_classification_model, bert_tokenizer, fake_news_dataset_train_withtypo, fake_news_dataset_test_withtypo)

Map:   0%|          | 0/11435 [00:00<?, ? examples/s]

Map:   0%|          | 0/4901 [00:00<?, ? examples/s]

Map:   0%|          | 0/11435 [00:00<?, ? examples/s]

Map:   0%|          | 0/4901 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.648832,0.841461,0.841677,0.842783,0.841461
2,0.102700,0.577785,0.85656,0.856124,0.856012,0.85656
3,0.083300,0.670189,0.866354,0.861769,0.863982,0.866354
4,0.083300,0.799781,0.854315,0.854883,0.855624,0.854315
5,0.051000,0.98476,0.845746,0.844339,0.843869,0.845746
6,0.032700,0.970212,0.868598,0.866496,0.866548,0.868598
7,0.023100,0.928938,0.862273,0.860141,0.859277,0.862273
8,0.023100,1.11383,0.861049,0.8604,0.859844,0.861049
9,0.016600,1.09,0.855132,0.855148,0.855352,0.855132
10,0.011200,1.089724,0.861457,0.860432,0.859643,0.861457



Classification Report:
              precision    recall  f1-score   support

  human_fake     0.6879    0.7216    0.7044      1225
  human_real     0.8674    0.8274    0.8469      2451
    gpt_fake     0.9484    0.9894    0.9684      1225

    accuracy                         0.8415      4901
   macro avg     0.8346    0.8461    0.8399      4901
weighted avg     0.8428    0.8415    0.8417      4901


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.7288    0.7020    0.7152      1225
  human_real     0.8539    0.8772    0.8654      2451
    gpt_fake     0.9875    0.9698    0.9786      1225

    accuracy                         0.8566      4901
   macro avg     0.8567    0.8497    0.8530      4901
weighted avg     0.8560    0.8566    0.8561      4901


Classification Report:
              precision    recall  f1-score   support

  human_fake     0.8089    0.6392    0.7141      1225
  human_real     0.8411    0.9200    0.8788      2451
   

In [None]:
### import pandas as pd
# import numpy as np
# import re
# import nltk
# from nltk.corpus import stopwords
# from nltk.stem import WordNetLemmatizer
# from sklearn.model_selection import train_test_split
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import classification_report, accuracy_score
# from sklearn.pipeline import Pipeline
# from sklearn.preprocessing import LabelEncoder
# import xgboost as xgb
# import matplotlib.pyplot as plt

# nltk.download('stopwords')
# nltk.download('wordnet')
# nltk.download('omw-1.4')

In [None]:
# from sklearn.feature_extraction.text import TfidfVectorizer

# # Cell 6: Baseline Model 1 - TF-IDF + Logistic Regression
# # Create pipeline
# lr_pipeline = Pipeline([
#     ('tfidf', TfidfVectorizer(
#         max_features=10000,
#         ngram_range=(1, 2),
#         stop_words='english'
#     )),
#     ('clf', LogisticRegression(
#         max_iter=1000,
#         class_weight='balanced',
#         multi_class='multinomial'
#     ))
# ])

# # Train model
# lr_pipeline.fit(X_train, y_train)

# # Evaluate
# y_pred = lr_pipeline.predict(X_test)

# print("Logistic Regression Results:")
# print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
# print("\nClassification Report:")
# print(classification_report(y_test, y_pred))