In [1]:
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load XLM-RoBERTa and mBERT models & tokenizers
xlmr_model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=2)
mbert_model = AutoModelForSequenceClassification.from_pretrained("mbbert_fake_news", num_labels=2)

xlmr_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
mbert_tokenizer = AutoTokenizer.from_pretrained("mbbert_fake_news")

def predict(text):
    """Predict using both models and ensemble the results"""
    
    # Tokenize the input
    xlmr_inputs = xlmr_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    mbert_inputs = mbert_tokenizer(text, return_tensors="pt", padding=True, truncation=True)

    # Get model outputs
    with torch.no_grad():
        xlmr_logits = xlmr_model(**xlmr_inputs).logits
        mbert_logits = mbert_model(**mbert_inputs).logits

    # Convert logits to probabilities using softmax
    xlmr_probs = torch.nn.functional.softmax(xlmr_logits, dim=-1)
    mbert_probs = torch.nn.functional.softmax(mbert_logits, dim=-1)

    # Ensemble: Averaging Probabilities
    avg_probs = (xlmr_probs + mbert_probs) / 2

    # Get final prediction
    final_prediction = torch.argmax(avg_probs, dim=-1).item()

    return final_prediction, avg_probs.numpy()

# Example Usage
text = "This is a test sentence."
prediction, probs = predict(text)
print(f"Predicted Label: {prediction}, Probabilities: {probs}")


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Predicted Label: 1, Probabilities: [[0.47537547 0.5246245 ]]


In [5]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load models and tokenizers
xlm_model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base")
mb_model = AutoModelForSequenceClassification.from_pretrained("mbbert_fake_news")

xlm_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
mb_tokenizer = AutoTokenizer.from_pretrained("mbbert_fake_news")

def predict_ensemble(text, max_length=512):
    # Tokenize input with truncation and max length
    xlm_inputs = xlm_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    mb_inputs = mb_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)

    # Get model outputs
    with torch.no_grad():
        xlm_outputs = xlm_model(**xlm_inputs).logits
        mb_outputs = mb_model(**mb_inputs).logits

    # Average the predictions
    avg_logits = (xlm_outputs + mb_outputs) / 2
    predicted_class = torch.argmax(avg_logits, dim=1).item()

    return predicted_class

# Example usage
text = "Breaking: Fake news spreading about elections!"
prediction = predict_ensemble(text)
print("Predicted Class:", prediction)



Predicted Class: 1


In [7]:
import pandas as pd

# Load dataset
dff = pd.read_csv("final_data.csv")

# Select first 5000 rows
first_5k = dff.iloc[:5000]

# Select last 15000 rows
last_15k = dff.iloc[5000:]

# Take 500 random rows from the first 5k
subset_1 = first_5k.sample(n=500, random_state=42)

# Take 1500 random rows from the last 15k
subset_2 = last_15k.sample(n=1500, random_state=42)

# Combine both subsets
df = pd.concat([subset_1, subset_2])

# Shuffle final dataset (optional)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Print dataset size
print(df.shape)  # Should be (2000, columns)

(2000, 2)


In [11]:
import pandas as pd
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tqdm import tqdm

# Load models and tokenizers
xlm_model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base")
mb_model = AutoModelForSequenceClassification.from_pretrained("mbbert_fake_news")

xlm_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
mb_tokenizer = AutoTokenizer.from_pretrained("mbbert_fake_news")

# Function to get prediction
def predict_ensemble_softmax(text, max_length=512):
    if not isinstance(text, str) or text.strip() == "":  # Skip empty or non-string values
        return None  

    xlm_inputs = xlm_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    mb_inputs = mb_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)

    with torch.no_grad():
        xlm_outputs = xlm_model(**xlm_inputs).logits
        mb_outputs = mb_model(**mb_inputs).logits

    # Convert logits to probabilities
    xlm_probs = F.softmax(xlm_outputs, dim=1)
    mb_probs = F.softmax(mb_outputs, dim=1)

    # Average the probabilities
    avg_probs = (xlm_probs + mb_probs) / 2
    predicted_class = torch.argmax(avg_probs, dim=1).item()

    return predicted_class

# Function to evaluate accuracy on your dataset
def evaluate_accuracy(df):
    correct = 0
    total = len(df)

    # Ensure 'text' column is a string and drop NaN values
    df = df.dropna(subset=["text"]).copy()
    df["text"] = df["text"].astype(str)  # Convert all text to string

    for _, row in tqdm(df.iterrows(), total=total):
        text, label = row["text"], row["label"]
        pred = predict_ensemble_softmax(text)
        if pred is not None and pred == label:
            correct += 1

    accuracy = (correct / total) * 100
    print(f"Accuracy: {accuracy:.2f}%")

# Run accuracy test
evaluate_accuracy(df)


100%|█████████████████████████████████████████████████████████████████████████████▊| 1994/2000 [42:59<00:07,  1.29s/it]

Accuracy: 92.70%





In [15]:
def predict_news():
    while True:
        news_text = input("\nEnter the news article (or type 'exit' to quit): ")
        if news_text.lower() == "exit":
            break  # Stop when the user types 'exit'

        prediction = predict_ensemble_softmax(news_text)

        if prediction is None:
            print("Invalid input. Please enter valid text.")
        elif prediction == 1:
            print("Prediction: FAKE News ❌")
        else:
            print("Prediction: REAL News ✅")
# Start prediction
predict_news()


Enter the news article (or type 'exit' to quit):  	text 12	washington (reuters) - a u.s. navy destroyer sailed near islands claimed by china in the south china sea on tuesday, three u.s. officials told reuters, prompting anger in beijing, even as president donald trump s administration seeks chinese cooperation in reining in north korea s missile and nuclear programs. the operation was the latest attempt to counter what washington sees as beijing s efforts to limit freedom of navigation in the strategic waters. but it was not as provocative as previous ones carried out since trump took office in january. the officials, speaking on condition of anonymity, said the chafee, a guided-missile destroyer, carried out normal maneuvering operations that challenged excessive maritime claims near the paracel islands, among a string of islets, reefs and shoals over which china has territorial disputes with its neighbors. china s defense ministry said on wednesday that a warship, two fighter jets 

Prediction: REAL News ✅



Enter the news article (or type 'exit' to quit):  	text 10	a black cincinnati police officer is currently receiving backlash over a controversial facebook post where he warned his african-american friends and family about white officers that may be targeting them because of their race.officer freddie vincent had shared the message earlier this month, which instructed black people on what to do if they happen to get stopped by police. the post read: a message to all my afro america friends and family. when you are encountered by a white officer make sure that you are in a public place, and comply to all of their commands, because they are looking for a reason to kill a black man. and always keep your hands in the air, and never resist. i m so tired of cops using these famous words i was in fear of my life. i m praying for louisiana, that could have been my nephew in b.r. facebookthis post was written after the high profile police brutality incidents in louisiana and minnesota, so it is

Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  exit


In [17]:
import pandas as pd
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

# Load models and tokenizers
xlm_model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base")
mb_model = AutoModelForSequenceClassification.from_pretrained("mbbert_fake_news")

xlm_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
mb_tokenizer = AutoTokenizer.from_pretrained("mbbert_fake_news")

# Function to get prediction
def predict_ensemble_softmax(text, max_length=512):
    if not isinstance(text, str) or text.strip() == "":  # Skip empty or non-string values
        return None  

    xlm_inputs = xlm_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    mb_inputs = mb_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)

    with torch.no_grad():
        xlm_outputs = xlm_model(**xlm_inputs).logits
        mb_outputs = mb_model(**mb_inputs).logits

    # Convert logits to probabilities
    xlm_probs = F.softmax(xlm_outputs, dim=1)
    mb_probs = F.softmax(mb_outputs, dim=1)

    # Average the probabilities
    avg_probs = (xlm_probs + mb_probs) / 2
    predicted_class = torch.argmax(avg_probs, dim=1).item()

    return predicted_class

# Function to compute full evaluation metrics
def evaluate_metrics(df):
    df = df.dropna(subset=["text"]).copy()  # Remove empty values
    df["text"] = df["text"].astype(str)  # Convert text to string

    y_true = []
    y_pred = []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        text, label = row["text"], row["label"]
        pred = predict_ensemble_softmax(text)
        if pred is not None:
            y_true.append(label)
            y_pred.append(pred)

    # Compute Accuracy
    accuracy = accuracy_score(y_true, y_pred)

    # Compute Precision, Recall, F1-score
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", pos_label=1)

    # Compute Confusion Matrix
    conf_matrix = confusion_matrix(y_true, y_pred)

    # Print results
    print(f"Accuracy: {accuracy:.2%}")
    print(f"Precision: {precision:.2%}")
    print(f"Recall: {recall:.2%}")
    print(f"F1-score: {f1:.2%}")
    print("\nConfusion Matrix:")
    print(conf_matrix)

# Run full evaluation
evaluate_metrics(df)


100%|██████████████████████████████████████████████████████████████████████████████| 1994/1994 [39:39<00:00,  1.19s/it]


Accuracy: 92.98%
Precision: 89.52%
Recall: 98.72%
F1-score: 93.89%

Confusion Matrix:
[[ 778  126]
 [  14 1076]]


In [21]:
import pandas as pd
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tqdm import tqdm

# Load models and tokenizers
xlm_model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base")
mb_model = AutoModelForSequenceClassification.from_pretrained("mbbert_fake_news")

xlm_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
mb_tokenizer = AutoTokenizer.from_pretrained("mbbert_fake_news")

# Function to get prediction
def predict_ensemble_softmax(text, max_length=512):
    if not isinstance(text, str) or text.strip() == "":  # Skip empty or non-string values
        return None  

    xlm_inputs = xlm_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    mb_inputs = mb_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)

    with torch.no_grad():
        xlm_outputs = xlm_model(**xlm_inputs).logits
        mb_outputs = mb_model(**mb_inputs).logits

    # Convert logits to probabilities
    xlm_probs = F.softmax(xlm_outputs, dim=1)
    mb_probs = F.softmax(mb_outputs, dim=1)

    # Average the probabilities
    avg_probs = (xlm_probs + mb_probs) / 2
    predicted_class = torch.argmax(avg_probs, dim=1).item()

    return predicted_class

In [23]:
def predict_news():
    while True:
        news_text = input("\nEnter the news article (or type 'exit' to quit): ")
        if news_text.lower() == "exit":
            break  # Stop when the user types 'exit'

        prediction = predict_ensemble_softmax(news_text)

        if prediction is None:
            print("Invalid input. Please enter valid text.")
        elif prediction == 1:
            print("Prediction: FAKE News ❌")
        else:
            print("Prediction: REAL News ✅")
# Start prediction
predict_news()


Enter the news article (or type 'exit' to quit):  	text 12111	ஜார்ஜியா ஒரு குழந்தைக்கு அதிக மாநில பணத்தை மாநில பட்டயப் பள்ளிகளுக்கு அனுப்பும் என்றும், அந்த பட்டயப் பள்ளிகளுக்கு பட்ஜெட் வெட்டுக்கள் பயன்படுத்தப்படாது என்றும் கூறுகிறார்.


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  	text 12112	कहते हैं कि जॉर्जिया प्रति बच्चे को राज्य के चार्टर स्कूलों में अधिक राज्य पैसा भेजेगा और उन चार्टर स्कूलों में बजट में कटौती नहीं की जाती है।


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  	text 10	a black cincinnati police officer is currently receiving backlash over a controversial facebook post where he warned his african-american friends and family about white officers that may be targeting them because of their race.officer freddie vincent had shared the message earlier this month, which instructed black people on what to do if they happen to get stopped by police. the post read: a message to all my afro america friends and family. when you are encountered by a white officer make sure that you are in a public place, and comply to all of their commands, because they are looking for a reason to kill a black man. and always keep your hands in the air, and never resist. i m so tired of cops using these famous words i was in fear of my life. i m praying for louisiana, that could have been my nephew in b.r. facebookthis post was written after the high profile police brutality incidents in louisiana and minnesota, so it is

Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  	text 12	washington (reuters) - a u.s. navy destroyer sailed near islands claimed by china in the south china sea on tuesday, three u.s. officials told reuters, prompting anger in beijing, even as president donald trump s administration seeks chinese cooperation in reining in north korea s missile and nuclear programs. the operation was the latest attempt to counter what washington sees as beijing s efforts to limit freedom of navigation in the strategic waters. but it was not as provocative as previous ones carried out since trump took office in january. the officials, speaking on condition of anonymity, said the chafee, a guided-missile destroyer, carried out normal maneuvering operations that challenged excessive maritime claims near the paracel islands, among a string of islets, reefs and shoals over which china has territorial disputes with its neighbors. china s defense ministry said on wednesday that a warship, two fighter jets 

Prediction: REAL News ✅



Enter the news article (or type 'exit' to quit):  	text 13	washington (reuters) - the top democrats on the u.s. senate and house of representatives intelligence committees accused russia on thursday of trying to influence the nov. 8 u.s. election via computer hacking, and called on president vladimir putin to order a halt. “based on briefings we have received, we have concluded that the russian intelligence agencies are making a serious and concerted effort to influence the u.s. election,” senator dianne feinstein and representative adam schiff said in an unusually strongly worded joint statement. officials in the administration of president barack obama have said they concluded that russia or its proxies were responsible for hacking political party organizations. the government has not publicly blamed russia. russia has denied it sponsors or encourages any hacking activity. the lawmakers said the russian effort was intended to sow doubt about the security of the u.s. election, and th

Prediction: REAL News ✅



Enter the news article (or type 'exit' to quit):  	text 20	(reuters) - highlights of the day for u.s. president donald trump’s administration on wednesday: a crisis over the relationship between trump’s aides and russia deepens as a growing number of trump’s fellow republicans demand expanded congressional inquiries into the matter. the trump administration offers the job of national security adviser to u.s. vice admiral robert harward, sources say, but it is not clear if he accepted. the kremlin says u.s. media reports about russian intelligence connections to trump’s presidential campaign are groundless. russia also says it will not return crimea to ukraine or discuss the matter with foreign partners after the white house says trump expects the annexed black sea peninsula to be returned. trump’s nominee for labor secretary, andrew puzder, withdraws his name from consideration amid concerns that he could not garner enough senate votes to be confirmed.   trump’s choice of representati

Prediction: REAL News ✅



Enter the news article (or type 'exit' to quit):  	text 5702	బ్యూనస్ ఎయిర్స్ (రాయిటర్స్) - బ్యూనస్ ఎయిర్స్ యొక్క ముఖ్య ప్రావిన్స్‌లో సెనేట్ కోసం అర్జెంటీనా యొక్క పాలక పార్టీ అభ్యర్థి ప్రతిపక్ష అభ్యర్థి కంటే 4.8 శాతం పాయింట్ల ముందు ఉందని కన్సల్టెన్సీ మేనేజ్‌మెంట్ & ఫిట్ చేసిన ఒక పోల్ ప్రకారం, శుక్రవారం రాయిటర్స్‌కు ప్రత్యేకంగా పంపబడింది.దేశం యొక్క మధ్యంతర కాంగ్రెస్ ఎన్నికలు ఈ ఆదివారం జరుగుతాయి మరియు అధ్యక్షుడు మారిసియో మాక్రీ తన 2019 తిరిగి ఎన్నికల ప్రచారానికి వెళుతున్న ఎన్నికల శక్తి యొక్క ప్రారంభ సూచికగా దీనిని చూడవచ్చు.మాక్రీ యొక్క కాంబిమోస్ లేదా లెట్ ఎస్ మార్చే పార్టీకి చెందిన ఎస్టెబాన్ బుల్రిచ్, సెనేట్ కోసం తన బిడ్‌లో 35.3 శాతం ఓట్లను గెలుచుకుంటాడని అంచనా.బ్యూనస్ ఎయిర్స్ ప్రావిన్స్ అర్జెంటీనా జనాభాలో మూడింట ఒక వంతు ఉంది.అతని ప్రత్యర్థి, మాజీ అధ్యక్షుడు క్రిస్టినా ఫెర్నాండెజ్, 30.5 శాతం ఓటరు ఉద్దేశాలను కలిగి ఉన్నారు, 2 వేల మంది ఓటర్ల నిర్వహణ & ఫిట్ సర్వే ప్రకారం.పోల్‌లో 3.3 నుండి 3.5 శాతం పాయింట్ల లోపం ఉంది.ఫెర్నాండెజ్ ఆమె ఉదారమైన సామాజిక వ్యయం కోసం చాలా తక్కువ-ఆదాయ అర్జెంటీనా చేత ఇ

Prediction: REAL News ✅



Enter the news article (or type 'exit' to quit):  	text 5701	ब्यूनस आयर्स (रायटर) - ब्यूनस आयर्स के प्रमुख प्रांत में सीनेट के लिए अर्जेंटीना के सत्तारूढ़ पार्टी के उम्मीदवार विपक्षी उम्मीदवार से 4.8 प्रतिशत अंक आगे है, कंसल्टेंसी मैनेजमेंट और फिट के एक पोल के अनुसार शुक्रवार को रॉयटर्स को विशेष रूप से भेजा गया।देश का मध्यावधि कांग्रेस का चुनाव इस रविवार को आयोजित किया जाएगा, और चुनावी शक्ति के शुरुआती संकेतक के रूप में देखा जाएगा कि राष्ट्रपति मौरिसियो मैकरी अपने 2019 के चुनाव अभियान में जा रहे हैं।पोल के अनुसार, मैक्री के कंबीमोस या लेट एस चेंज पार्टी के एस्टेबन बुलरिच ने सीनेट के लिए अपनी बोली में 35.3 प्रतिशत वोट जीतने का अनुमान लगाया है।ब्यूनस आयर्स प्रांत अर्जेंटीना की आबादी के एक तिहाई का घर है।उनके प्रतिद्वंद्वी, पूर्व राष्ट्रपति क्रिस्टीना फर्नांडीज, 2,000 मतदाताओं के प्रबंधन और फिट सर्वेक्षण के अनुसार, 30.5 प्रतिशत मतदाता इरादे हैं।पोल में 3.3 से 3.5 प्रतिशत अंक की त्रुटि थी।फर्नांडीज को अपने उदार सामाजिक खर्च के लिए कई कम आय वाले अर्जेंटीना द्वारा प्यार किया जाता है।मैकरी, 

Prediction: REAL News ✅



Enter the news article (or type 'exit' to quit):  	text 5718	மில்வாக்கி சிக்கல்களில் 52 சதவீதம் கருப்பு ஆண் வேலையின்மை அடங்கும்.


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  	text 5723	టోక్యో (రాయిటర్స్) - జపాన్ కోర్టు మాజీ యు.ఎస్.దక్షిణ ద్వీపమైన ఒకినావాలో ఒక మహిళపై అత్యాచారం మరియు హత్య చేసినందుకు సైనిక స్థావర కార్మికుడు శుక్రవారం జైలు శిక్ష అనుభవిస్తున్నట్లు పబ్లిక్ బ్రాడ్‌కాస్టర్ ఎన్‌హెచ్‌కె నివేదించింది.గత ఏడాది ఏప్రిల్‌లో 20 ఏళ్ల రినా షిమాబుకురోను చంపినందుకు కెన్నెత్ ఫ్రాంక్లిన్ షిన్జాటో (33) ను నహా జిల్లా కోర్టులో కనుగొన్నట్లు ఎన్‌హెచ్‌కె తెలిపింది.కోర్టు ప్రతినిధి రాయిటర్స్‌తో మాట్లాడుతూ, ఈ నిర్ణయాన్ని తాను వెంటనే ధృవీకరించలేకపోయాడు.ఈ కేసు ద్వీపంలో కోపాన్ని రేకెత్తించింది, ఇక్కడ స్థానికులు యు.ఎస్.వారు చెప్పే సైనిక స్థావరాలు ఒకినావాపై భారీ భారాన్ని విధిస్తాయి.ఒకినావా సుమారు 50,000 యు.ఎస్.30,000 మంది సైనిక సిబ్బంది మరియు స్థావరాలలో పనిచేస్తున్న పౌరులతో సహా జాతీయులు.స్థానికులను అంచనా వేసే ప్రయత్నంలో, గత సంవత్సరం యునైటెడ్ స్టేట్స్ కొన్ని యు.ఎస్. కు చట్టపరమైన రక్షణ మరియు ప్రయోజనాలను పరిమితం చేయడానికి అంగీకరించింది.1960 నాటి స్థితిలో ఉన్న ఫోర్సెస్ అగ్రిమెంట్ (సోఫా) కింద జపాన్‌లో మిలటరీ కోసం పనిచేస్తున్న పౌ

Prediction: REAL News ✅



Enter the news article (or type 'exit' to quit):  exit


In [25]:
def predict_news():
    while True:
        news_text = input("\nEnter the news article (or type 'exit' to quit): ")
        if news_text.lower() == "exit":
            break  # Stop when the user types 'exit'

        prediction = predict_ensemble_softmax(news_text)

        if prediction is None:
            print("Invalid input. Please enter valid text.")
        elif prediction == 1:
            print("Prediction: FAKE News ❌")
        else:
            print("Prediction: REAL News ✅")
# Start prediction
predict_news()


Enter the news article (or type 'exit' to quit):  Woman sentenced in case that sparked Springfield cat-eating rumours


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  Whirlwind of misinformation sows distrust ahead of US election day


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  Singer kalpana attempted sucide yesterday


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  Cissy Houston: The Grammy award-winning singer and mother of Whitney Houston died at age 91.


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  Cissy Houston: The Grammy award-winning singer and mother of Whitney Houston died at age 91


Prediction: FAKE News ❌



Enter the news article (or type 'exit' to quit):  exit
