In [None]:
# 📰 Fake News Detection Project
# By: ZARA FAHAD KHAN

# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

# Step 2: Load the dataset
# Make sure to change the path if needed
df = pd.read_csv("fake_news.csv")  # e.g., from Kaggle dataset
print("📊 Dataset Shape:", df.shape)
print(df.head())

# Step 3: Data cleaning
df = df.dropna()  # remove missing values

# Step 4: Split data
X = df['text']     # features
y = df['label']    # target (0 = real, 1 = fake)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Convert text to numeric form (TF-IDF)
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = vectorizer.fit_transform(X_train)
tfidf_test = vectorizer.transform(X_test)

# Step 6: Train model
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(tfidf_train, y_train)

# Step 7: Make predictions
y_pred = model.predict(tfidf_test)

# Step 8: Check accuracy
score = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {round(score*100, 2)}%")

# Step 9: Confusion matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake'])
plt.title("📰 Fake News Detection Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Step 10: Try it manually
sample_text = ["Government launches new scheme for students."]
sample_tfidf = vectorizer.transform(sample_text)
print("🧾 Prediction for sample:", model.predict(sample_tfidf))