In [8]:
pip install transformers datasets torch scikit-learn

Collecting datasets
  Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/d7/84/0df6c5981f5fc722381662ff8cfbdf8aad64bec875f75d80b55bfef394ce/datasets-3.2.0-py3-none-any.whl.metadata
  Using cached datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Obtaining dependency information for huggingface-hub<1.0,>=0.23.2 from https://files.pythonhosted.org/packages/ea/da/6c2bea5327b640920267d3bf2c9fc114cfbd0a5de234d81cda80cc9e33c8/huggingface_hub-0.28.1-py3-none-any.whl.metadata
  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)
Using cached datasets-3.2.0-py3-none-any.whl (480 kB)
Downloading huggingface_hub-0.28.1-py3-none-any.whl (464 kB)
   ---------------------------------------- 0.0/464.1 kB ? eta -:--:--
    --------------------------------------- 10.2/464.1 kB ? eta -:--:--
   -- ------------------------------------ 30.7/464.1 kB 330.3 kB/s eta 0:00:02
   --- ----------

In [7]:
!pip install fsspec==2023.5.0



Collecting fsspec==2023.5.0
  Obtaining dependency information for fsspec==2023.5.0 from https://files.pythonhosted.org/packages/ec/4e/397b234a369df06ec782666fcdf9791d125ca6de48729814b381af8c6c03/fsspec-2023.5.0-py3-none-any.whl.metadata
  Using cached fsspec-2023.5.0-py3-none-any.whl.metadata (6.7 kB)
Using cached fsspec-2023.5.0-py3-none-any.whl (160 kB)
Installing collected packages: fsspec
Successfully installed fsspec-2023.5.0


In [5]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset


In [6]:
# Load Dataset
df = pd.read_csv("final_data.csv")


In [7]:
# Ensure correct column names
df.columns = ["text", "label"]

In [8]:
# Convert text to string and label to integer
df["text"] = df["text"].astype(str)  # Convert all text to string
df["label"] = df["label"].astype(int)  # Convert labels to integers

In [9]:
# Train-Test Split
train_texts, val_texts, train_labels, val_labels = train_test_split(df["text"].tolist(), df["label"].tolist(), test_size=0.2, random_state=42)


In [10]:
# Load Tokenizer for XLM-RoBERTa
model_name = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)


In [11]:
# Tokenization Function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

In [12]:
# тЬЕ Convert Data to Hugging Face Dataset Format (Fixed)
train_data = Dataset.from_dict({"text": list(map(str, train_texts)), "label": list(map(int, train_labels))})
val_data = Dataset.from_dict({"text": list(map(str, val_texts)), "label": list(map(int, val_labels))})

In [13]:
# Tokenize Dataset
train_data = train_data.map(tokenize_function, batched=True)
val_data = val_data.map(tokenize_function, batched=True)

Map:   0%|          | 0/1581 [00:00<?, ? examples/s]

Map:   0%|          | 0/396 [00:00<?, ? examples/s]

In [15]:
from transformers import XLMRobertaForSequenceClassification, Trainer, TrainingArguments

# Load pre-trained model
model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=2)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
pip install accelerate>=0.26.0


Note: you may need to restart the kernel to use updated packages.


In [16]:
# Training Arguments
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
)




In [17]:
# Trainer Object
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer,
)

  trainer = Trainer(


In [18]:
# Train Model
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,0.280634
2,No log,0.307996
3,0.303400,0.255545


TrainOutput(global_step=594, training_loss=0.2914544757367786, metrics={'train_runtime': 51536.6669, 'train_samples_per_second': 0.092, 'train_steps_per_second': 0.012, 'total_flos': 1247935735572480.0, 'train_loss': 0.2914544757367786, 'epoch': 3.0})

In [19]:
# Save Model
model.save_pretrained("./fake-news-multilingual")
tokenizer.save_pretrained("./fake-news-multilingual")

('./fake-news-multilingual\\tokenizer_config.json',
 './fake-news-multilingual\\special_tokens_map.json',
 './fake-news-multilingual\\sentencepiece.bpe.model',
 './fake-news-multilingual\\added_tokens.json',
 './fake-news-multilingual\\tokenizer.json')

In [20]:
# Evaluate Model
metrics = trainer.evaluate()
print(metrics)

{'eval_loss': 0.2555449903011322, 'eval_runtime': 230.2998, 'eval_samples_per_second': 1.719, 'eval_steps_per_second': 0.217, 'epoch': 3.0}


In [26]:
from sklearn.metrics import accuracy_score
import numpy as np

# Evaluate Model
eval_results = trainer.evaluate()

# Get predictions
predictions = trainer.predict(val_data)

# Extract logits and labels
logits, labels, _ = predictions

# Get predicted classes
preds = np.argmax(logits, axis=1)

# Calculate accuracy
accuracy = accuracy_score(labels, preds)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9217


In [7]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the model and tokenizer
model_name = "./fake-news-multilingual
"  # Change this to your fine-tuned model path if needed
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Function to predict the label
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()
    label = "Fake News" if predicted_class == 1 else "Real News"
    return label

# Get user input and predict
while True:
    text = input("Enter text to classify (or type 'exit' to quit): ")
    if text.lower() == "exit":
        break
    prediction = predict(text)
    print(f"Prediction: {prediction}")

Enter text to classify (or type 'exit' to quit):  	text 641	рокро╛ро░ро╛ро│рпБрооройрпНро▒родрпНродро┐ройрпН ро╡рпЖро▓рпНро╖рпН роЙро▒рпБрокрпНрокро┐ройро░рпНроХро│ро┐ройрпН роХрпВроЯрпНроЯродрпНродро┐ройрпН рокрпЛродрпБ, тАЛтАЛрокро┐ро░родрооро░рпН роирпИроХро▓рпН роОро╡ройрпНро╕рпН роЯрпНро░роорпНрокрпН роОродро┐ро░рпНрокрпНрокрпБ роЙро▒рпБрокрпНрокро┐ройро░рпНроХро│рпИ ро╡рпЖроЯрпНроХро┐ройро╛ро░рпН, роЕро╡ро░рпНроХро│рпН родроЩрпНроХро│рпН роЪрпКроирпНрод роЬройроиро╛ропроХ рокродро┐рокрпНрокрпИ роорпБроЯро┐ро╡рпБ роЪрпЖропрпНродрпБро│рпНро│ройро░рпН роОройрпНрокродрпИропрпБроорпН, 61 рооро┐ро▓рпНро▓ро┐ропройрпН роЕроорпЖро░ро┐роХрпНроХро░рпНроХро│рпН роЯрпКройро╛ро▓рпНроЯрпН роЯро┐ро░роорпНрокрпИ ро╡ро╛роХрпНроХрпБрокрпН рокрпЖроЯрпНроЯро┐ропро┐ро▓рпН роЖродро░ро┐родрпНродро╛ро░рпНроХро│рпН роОройрпНрокродрпИропрпБроорпН роиро┐ройрпИро╡рпВроЯрпНроЯрпБроХро┐ро▒родрпБ.роЕро╡ро░рпН роЕро╡ро░рпНроХро│ро┐роЯроорпН роЪрпКро▓рпНро╡родройрпН роорпВро▓роорпН родрпКроЯроЩрпНроХро┐ройро╛ро░р

Prediction: Fake News


Enter text to classify (or type 'exit' to quit):  	text 638	роЕроЩрпНроХро╛ро░ро╛ (ро░ро╛ропрпНроЯрпНроЯро░рпНро╕рпН) - ро░ро╖рпНропро╛ро╡ро┐ройрпН роЪрпЛроЪрпНроЪро┐роХрпНроХрпБ роЙродрпНродро┐ропрпЛроХрокрпВро░рпНро╡ ро╡ро┐роЬропродрпНродро┐ройрпН рокрпЛродрпБ ро░ро╖рпНроп роЬройро╛родро┐рокродро┐ ро╡ро┐ро│ро╛роЯро┐рооро┐ро░рпН рокрпБроЯро┐ройрпБроЯройрпН родро┐роЯрпНроЯрооро┐роЯрпНроЯ рооро╛ро╕рпНроХрпЛ роЖродро░ро╡рпБ роЪро┐ро░ро┐роп роХро╛роЩрпНроХро┐ро░ро╕рпИ ро╡ро┐ро╡ро╛родро┐рокрпНрокродро╛роХ родрпБро░рпБроХрпНроХро┐роп роЬройро╛родро┐рокродро┐ родропро┐рокрпН роОро░рпНроЯрпЛроХройрпН родро┐роЩрпНроХро│ройрпНро▒рпБ родрпЖро░ро┐ро╡ро┐родрпНродро╛ро░рпН.ро░ро╖рпНропро╛, роХрпБро╡рпИродрпН рооро▒рпНро▒рпБроорпН роХродрпНродро╛ро░рпН роЖроХро┐ропрпЛро░рпБроХрпНроХрпБ рокрпБро▒рокрпНрокроЯрпБро╡родро▒рпНроХрпБ роорпБройрпНрокрпБ роЪрпЖропрпНродро┐ропро╛ро│ро░рпНроХро│ро┐роЯроорпН рокрпЗроЪро┐роп роОро░рпНроЯрпЛроХройрпН, ро░ро╖рпНропро╛ро╡рпБроЯройрпН ро╡ро┐роЪро╛ роЗро▓рпНро▓ро╛ро

Prediction: Real News


Enter text to classify (or type 'exit' to quit):  	text 664	р░ор░╛ р░Зр░ор░╛р░ор▒Н р░пр▒Кр░Хр▒Нр░Х р░Ър▒Ар░лр▒Н 2007 р░▓р▒Л р░Ер░др░ир▒Нр░ир░┐ р░Ор░ир▒Нр░ир▒Бр░Хр▒Лр░╡р░╛р░▓р░ир▒Бр░Хр▒Бр░ир▒Нр░ир░кр▒Нр░кр▒Бр░бр▒Б р░Ер░др░ир▒Б р░Хр▒Нр░░р▒Ир░╕р▒Нр░др░╡р▒Бр░бр░ир░┐ р░пр▒Бр░ир▒Ир░Яр▒Жр░бр▒Н р░╕р▒Нр░Яр▒Зр░Яр▒Нр░╕р▒Нр░Хр▒Б р░Ър▒Жр░кр▒Нр░кр░╛р░бр▒Б.р░Ер░др░ир░┐ р░ир░▓р▒Нр░▓р░Ьр░╛р░др░┐ р░Йр░жр░╛р░░р░╡р░╛р░ж р░мр▒Лр░зр░Хр▒Бр░бр▒Б р░мр░░р░╛р░Хр▒Н р░Тр░Х р░╕р░╛р░ор░╛р░Ьр░┐р░Х р░ир░┐р░░р▒Нр░╡р░╛р░╣р░Хр▒Бр░бр░┐р░Чр░╛ р░др░и р░╕р▒Нр░ер░┐р░др░┐р░ир░┐ р░кр▒Жр░Вр░Ър▒З р░ор░╛р░░р▒Нр░Чр░Вр░Чр░╛ р░ор░╛р░др▒Нр░░р░ор▒З р░Ър░░р▒Нр░Ър░┐р░ир░┐ р░Йр░кр░пр▒Лр░Чр░┐р░Вр░Ър░╛р░бр▒Б. р░Ер░ор▒Жр░░р░┐р░Хр░ир▒Нр░▓р░Хр▒Б р░╡р▒Нр░пр░др░┐р░░р▒Зр░Хр░Вр░Чр░╛ р░Ър░╛р░▓р░╛ р░жр░╛р░░р▒Бр░гр░В. р░Тр░Х р░жр░╢р░╛р░мр▒Нр░жр░╛р░ир░┐р░Хр░┐ р░кр▒Ир░Чр░╛, р░ор░░р░┐р░пр▒Б р░Хр▒Кр░ир▒Нр░ир░┐ р░кр░░р░┐р░╕р▒Нр░ер░┐р░др▒Бр░▓р░▓р▒Л р░Хр░ир▒Бр░Чр▒Кр░ир░Яр░╛р░ир░┐р░Хр░┐ р░╕р░╣р░╛р░пр░кр░бр░┐р░Вр░жр░┐.р░Ер░зр▒Нр░пр░Хр▒Нр░╖р▒Бр░бр▒Б р░мр░╛р░▓р

Prediction: Fake News


Enter text to classify (or type 'exit' to quit):  	text 679	р░╡р░╛р░░р▒Нр░╕р░╛ (р░░р░╛р░пр░┐р░Яр░░р▒Нр░╕р▒Н) - р░░р░╖р▒Нр░пр░ир▒Н р░╕р▒Ир░ир░┐р░Х р░ир░┐р░╢р▒Нр░Ър░пр░д р░кр▒Жр░░р░┐р░Чр░┐р░и р░╕р░ор░пр░Вр░▓р▒Л р░╡р░▓р░╕р░▓р▒Б р░╡р░Вр░Яр░┐ р░╕р░ор░╕р▒Нр░пр░▓р░кр▒И р░Хр▒Кр░ир▒Нр░ир░┐ р░др▒Вр░░р▒Нр░кр▒Б р░ор░░р░┐р░пр▒Б р░кр░╛р░╢р▒Нр░Ър░╛р░др▒Нр░п р░пр▒Вр░░р▒Лр░кр░┐р░пр░ир▒Н р░пр▒Вр░ир░┐р░пр░ир▒Н р░░р░╛р░╖р▒Нр░Яр▒Нр░░р░╛р░▓ р░ор░зр▒Нр░п р░кр▒Жр░░р▒Бр░Чр▒Бр░др▒Бр░ир▒Нр░и р░╡р░┐р░нр░Ьр░и.р░Рр░░р▒Лр░кр░╛ р░др▒Вр░░р▒Нр░кр▒Б р░ор░░р░┐р░пр▒Б р░╕р░Вр░кр░ир▒Нр░и р░кр░╢р▒Нр░Ър░┐р░о р░жр▒Зр░╢р░╛р░▓р░▓р▒Л р░ор░╛р░Ьр▒А р░Хр░ор▒Нр░пр▒Вр░ир░┐р░╕р▒Нр░Яр▒Н р░░р░╛р░╖р▒Нр░Яр▒Нр░░р░╛р░▓ р░ор░зр▒Нр░п р░Шр░░р▒Нр░╖р░гр░▓р▒Б 2015 р░╡р░▓р░╕ р░╕р░Вр░Хр▒Нр░╖р▒Лр░нр░В р░ор░░р░┐р░пр▒Б р░Хр▒Вр░Яр░ор░┐р░ир░┐ р░╡р░┐р░бр░┐р░Ър░┐р░кр▒Жр░Яр▒Нр░Яр░бр░╛р░ир░┐р░Хр░┐ р░мр▒Нр░░р░┐р░Яр░ир▒Н р░др▒Ар░╕р▒Бр░Хр▒Бр░ир▒Нр░и р░ир░┐р░░р▒Нр░гр░пр░В р░ир▒Бр░Вр░бр░┐ р░кр▒Жр░░р░┐р░Чр░╛р░пр░┐, р░Ор░Вр░жр▒Бр░Хр░Вр░Яр▒З р░ир░╛р░пр░Хр▒Бр░▓р▒Б EU

Prediction: Real News


Enter text to classify (or type 'exit' to quit):  	text 660	vienna (reuters) - at least 3,000 people formed a chain of light in vienna on wednesday to protest against the formation of a government that includes the far-right freedom party. demonstrators holding flickering candles, torches and bicycle lamps encircled the capital s government district.   our republic s most powerful political offices should be exclusively reserved for trustworthy people who are not in the slightest connected to right-wing extremists, said alexander pollak, spokesman for sos mitmensch, one of several human rights groups which organized the demonstration. it was the biggest protest in austria since coalition talks between the conservative people s party (ovp) and the freedom party (fpo) started two weeks ago. organizers estimated the number of people taking part at 8,000 to 10,000, the police at around 3,000. we are here because they (the fpo) feed hatred and want to divide people, said brigitte griesser, 

Prediction: Real News


Enter text to classify (or type 'exit' to quit):  	text 681	donald trump spent months on the campaign trail bashing nato, the cornerstone of global security after world war ii. however, president obama said monday that trump is now committed to nato, the alliance he once referred to as obsolete. trump told president obama that he plans to stick with nato, according to the hill. he expressed a great interest in maintaining our core strategic relationships, obama said. and so, one of the messages i will be able to deliver is his commitment to nato and the transatlantic alliance. i think that s one of the most important functions i can serve at this stage during this trip. is to let them know that there is no weakening of resolve when it comes to america s commitment to maintaining a strong and robust nato relationship and a recognition that those alliances aren t just good for europe, they re good for the united states, he continued. and they re vital for the world. so, why the change of

Prediction: Fake News


Enter text to classify (or type 'exit' to quit):  	text 10399	рдиреНрдпреВ рдпреЙрд░реНрдХ (рд░рд╛рдпрдЯрд░) - рдиреНрдпреВ рдЬрд░реНрд╕реА рдХреЗ рдЧрд╡рд░реНрдирд░ рдХреНрд░рд┐рд╕ рдХреНрд░рд┐рд╕реНрдЯреА рдиреЗ рдПрдХ рдмреЗрд╕рдмреЙрд▓ рдкреНрд░рд╢рдВрд╕рдХ рдХрд╛ рд╕рд╛рдордирд╛ рдХрд┐рдпрд╛, рдЬрд┐рд╕рдиреЗ рдорд┐рд▓реНрд╡реМрдХреА рдореЗрдВ рд░рд╡рд┐рд╡рд╛рд░ рд░рд╛рдд рдХреЗ рдЦреЗрд▓ рдХреЗ рджреМрд░рд╛рди рдЙрд╕реЗ рд╕реНрдерд╛рдиреАрдп рдореАрдбрд┐рдпрд╛ рджреНрд╡рд╛рд░рд╛ рдкреЛрд╕реНрдЯ рдХрд┐рдП рдЧрдП рдПрдХ рд╡реАрдбрд┐рдпреЛ рдХреЗ рдЕрдиреБрд╕рд╛рд░, рдЕрд▓реЛрдХрдкреНрд░рд┐рдп рдЧрд╡рд░реНрдирд░ рдХреЛ рдСрдирд▓рд╛рдЗрди рдЪреБрдЯрдХреБрд▓реЛрдВ рдХреЗ рдПрдХ рдФрд░ рджреМрд░ рдХрд╛ рд▓рдХреНрд╖реНрдп рдмрдирд╛ рджрд┐рдпрд╛редрдШрдЯрдирд╛ рдХреЗ рдПрдХ рд╡реАрдбрд┐рдпреЛ рдХреЗ рдЕрдиреБрд╕рд╛рд░, рджреВрд╕рд░реЗ-рдЕрд╡рдзрд┐ рдХреЗ рд░рд┐рдкрдмреНрд▓рд┐рдХрди рдХреЛ рдирд╛рдЪреЛрд╕ рдХреЗ рдПрдХ рдХрдЯреЛрд░реЗ рдХреЛ рдкрдХрдбрд╝рдХрд░ рдмреНрд░реИрдб рдЬреЛрд╕реЗр

Prediction: Real News


Enter text to classify (or type 'exit' to quit):  	text 10420	рдпрджрд┐ рдЕрдореЗрд░рд┐рдХреА рд░рд╛рдЬреНрдп рд╡рд┐рднрд╛рдЧ 16 рдорд╛рд░реНрдЪ рд╕реЗ рд╢реБрд░реВ рд╣реЛрдиреЗ рдХреЗ рд▓рд┐рдП 120-рджрд┐рди рдХреЗ рдард╣рд░рд╛рд╡ рдХреА рддреИрдпрд╛рд░реА рдХрд░ рд░рд╣рд╛ рдерд╛, рддреЛ рдпрд╣ рдХреИрд╕реЗ рд╣реИ рдХрд┐ рдпрд╣ рдмрдбрд╝реА рд╕рдВрдЦреНрдпрд╛ рдореЗрдВ рд╢рд░рдгрд╛рд░реНрдереА рд╡рд┐рдорд╛рдиреЛрдВ рдХреЗ рд▓рд┐рдП рддреИрдпрд╛рд░ рдереЗ?рдХреНрдпрд╛ рдХреЛрдИ рдкреНрд░рднрд╛рд░реА (рдХреИрд░рд┐рдпрд░ рдХреЗ рд▓реЛрдЧреЛрдВ рдХреЗ рдЕрд▓рд╛рд╡рд╛) рдЬрдирд╕рдВрдЦреНрдпрд╛, рд╢рд░рдгрд╛рд░реНрдерд┐рдпреЛрдВ рдФрд░ рдкреНрд░рд╡рд╛рд╕ рдмреНрдпреВрд░реЛ рдореЗрдВ рд╣реИ?рдХреНрдпрд╛ рд╡реЗ рдЕрднреА рднреА рд╕рднреА рд╢реЙрдЯреНрд╕ рдХрд╣ рд░рд╣реЗ рд╣реИрдВ?рдпрд╛, рдХреНрдпрд╛ рдпрд╣ рд╕рдВрднрд╡ рд╣реИ рдХрд┐ рд╡реНрд╣рд╛рдЗрдЯ рд╣рд╛рдЙрд╕ рдиреЗ рдХрд╛рд░реНрдпрдХрд╛рд░реА рдЖрджреЗрд╢ рдХреЗ рдЗрд╕ рд╣рд┐рд╕реНрд╕реЗ рдкрд░ рд▓рдбрд╝рдиреЗ рдХреЗ рд▓рд┐рдП рддреИрд

Prediction: Fake News


Enter text to classify (or type 'exit' to quit):  quit


Prediction: Fake News


Enter text to classify (or type 'exit' to quit):  exit
