<a href="https://colab.research.google.com/github/yesvitha/Fake-News-Detection/blob/main/FakeNewsDetection(Real_Time).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ‚úÖ Import required libraries
import pandas as pd
import numpy as np
import re
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings("ignore")

# ‚úÖ Download NLTK stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
# ‚úÖ Define text cleaning function
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def clean_text(text):
    text = re.sub(r'[^\w\s]', '', str(text).lower())  # Remove punctuation
    words = text.split()
    words = [stemmer.stem(word) for word in words if word not in stop_words]
    return ' '.join(words)

In [None]:
# ‚úÖ Load uploaded CSV files
fake = pd.read_csv("Fake.csv")
real = pd.read_csv("True.csv")

In [None]:
# ‚úÖ Label the data
fake['label'] = 0  # Fake
real['label'] = 1  # Real



In [None]:
# ‚úÖ Combine and clean# ‚úÖ Combine and shuffle
df = pd.concat([fake, real])
df = df[['title', 'text', 'label']]  # Keep only useful columns
df['combined'] = df['title'] + " " + df['text']
df.dropna(inplace=True)

In [None]:
# ‚úÖ Clean text
df['cleaned_text'] = df['combined'].apply(clean_text)

KeyboardInterrupt: 

In [None]:
# ‚úÖ TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['cleaned_text'])
y = df['label']

KeyError: 'cleaned_text'

In [None]:
# ‚úÖ Split data and train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


NameError: name 'X' is not defined

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# ‚úÖ Evaluate
y_pred = model.predict(X_test)
print("Model Evaluation:\n", classification_report(y_test, y_pred))

In [None]:
# ‚úÖ Predict user-given news
def predict_news(news_title, news_text):
    combined_input = news_title + " " + news_text
    cleaned = clean_text(combined_input)
    features = vectorizer.transform([cleaned])
    prediction = model.predict(features)[0]
    return "Real News ‚úÖ" if prediction == 1 else "Fake News ‚ùå"

In [None]:
# üß™ Example: Real-time prediction
title = input("Enter the news title: ")
body = input("Enter the news body: ")
print("Prediction:", predict_news(title, body))

In [None]:
!pip install transformers datasets scikit-learn


In [None]:
import torch
from datasets import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
# ‚úÖ Combine and shuffle
df = pd.concat([fake, real]).sample(frac=1).reset_index(drop=True)

# ‚úÖ Combine title and body into one column for BERT
df['text'] = df['title'] + " " + df['text']
df = df[['text', 'label']]  # Keep only necessary columns
print(df['label'].value_counts())


In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=512)

train_texts, test_texts, train_labels, test_labels = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

train_dataset = Dataset.from_dict({'text': train_texts.tolist(), 'label': train_labels.tolist()}).map(tokenize, batched=True)
test_dataset = Dataset.from_dict({'text': test_texts.tolist(), 'label': test_labels.tolist()}).map(tokenize, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])


In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch"
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = torch.argmax(torch.tensor(logits), axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}


In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch"
)



In [None]:
def predict_news_bert(text):
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        output = model(**tokens)
        prediction = torch.argmax(output.logits, dim=1).item()
    return "Real News ‚úÖ" if prediction == 1 else "Fake News ‚ùå"

# Example
sample_title = "Waqf Bill Passed: Battle Moves To SC, All You Need To Know"
sample_body = "Hours after Parliament passed the bill, at least two pleas have challenged the constitutionality of Waqf (Amendment) Bill before the Supreme Court.."
combined_text = sample_title + " " + sample_body

print("Prediction:", predict_news_bert(combined_text))


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# Example: Assuming you already cleaned and labeled your data
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X = vectorizer.fit_transform(df['text'])  # tf-idf features
y = df['label']

# Train the model
model = LogisticRegression()
model.fit(X, y)


In [None]:
# üîß Step 1: Install NewsAPI wrapper (run once)
!pip install newsapi-python

# üß† Step 2: Import libraries
from newsapi import NewsApiClient

# üîë Step 3: Initialize NewsAPI
newsapi = NewsApiClient(api_key='651a04db57724cd6851dc20d3ae0afbc')  # Replace with your key

# üì∞ Step 4: Get real-time news headlines
headlines = newsapi.get_top_headlines(language='en', page_size=5)

# üß™ Step 5: Predict using your trained model
def predict_news_from_api(article):
    full_text = (article['title'] or '') + " " + (article['description'] or '')
    processed = vectorizer.transform([full_text])
    pred = model.predict(processed)[0]
    return "üü¢ Real News" if pred == 1 else "üî¥ Fake News"

# üì¢ Step 6: Display predictions
for i, article in enumerate(headlines['articles']):
    print(f"\nüóûÔ∏è News {i+1}:")
    print("Title:", article['title'])
    print("Description:", article['description'])
    print("Prediction:", predict_news_from_api(article))
    print("-" * 60)



üóûÔ∏è News 1:
Title: Phones, other electronics exempt from new tariffs, Trump administration says - The Washington Post
Description: The exemptions come days after the U.S. trade representative said ‚Äúthe president has been clear‚Äù that he does not intend to give exemptions to the tariffs.
Prediction: üî¥ Fake News
------------------------------------------------------------

üóûÔ∏è News 2:
Title: Main Card Results | UFC 314: Volkanovski vs Lopes - UFC.com
Description: See The Fight Results, Watch Post-Fight Interviews With The Main Card Winners And More From UFC 314: Volkanovski vs Lopes, Live From Kaseya, Center In Miami
Prediction: üî¥ Fake News
------------------------------------------------------------

üóûÔ∏è News 3:
Title: SpaceX launches 21 Starlink satellites on Falcon 9 rocket, lands booster on ship at sea - Space.com
Description: It was SpaceX's 41st Falcon 9 mission of the year.
Prediction: üî¥ Fake News
-----------------------------------------------------------