In [1]:
!pip install pandas scikit-learn nltk joblib



In [2]:
from google.colab import files
uploaded = files.upload()

Saving archive (9).zip to archive (9).zip


In [4]:
import zipfile

zip_path = "news.zip"   # change name if needed

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall()

print("Unzipped successfully")


Unzipped successfully


In [5]:
import os
os.listdir()


['.config',
 'fake.csv',
 'true.csv',
 'news.zip',
 '.ipynb_checkpoints',
 'sample_data']

In [8]:
import pandas as pd
import re
import nltk
import joblib

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [9]:
fake = pd.read_csv("fake.csv")
true = pd.read_csv("true.csv")

print(fake.shape, true.shape)
fake.head()

(23481, 4) (21417, 4)


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [10]:
fake["label"] = 0
true["label"] = 1

df = pd.concat([fake, true], axis=0)
df = df.sample(frac=1, random_state=42)

print(df.shape)

(44898, 5)


In [11]:
stop_words = set(stopwords.words("english"))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z ]', '', str(text))
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return " ".join(words)

df["clean_text"] = df["text"].apply(clean_text)

In [12]:
X = df["clean_text"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(len(X_train), len(X_test))

35918 8980


In [13]:
vectorizer = TfidfVectorizer(max_features=5000)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [14]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

In [15]:
pred = model.predict(X_test_vec)

print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred))

Accuracy: 0.9866369710467706
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      4710
           1       0.98      0.99      0.99      4270

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [16]:
def predict_news(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)[0]
    prob = model.predict_proba(vec)[0].max()

    print("✅ REAL NEWS" if result==1 else "❌ FAKE NEWS")
    print("Confidence:", round(prob*100,2), "%")

In [17]:
predict_news("Government announces new national AI research program")

❌ FAKE NEWS
Confidence: 73.28 %


In [18]:
joblib.dump(model, "fake_news_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")

['vectorizer.pkl']

In [19]:
import os
os.listdir()

['.config',
 'vectorizer.pkl',
 'fake_news_model.pkl',
 'fake.csv',
 'true.csv',
 'news.zip',
 '.ipynb_checkpoints',
 'sample_data']

In [20]:
from google.colab import files

files.download("fake_news_model.pkl")
files.download("vectorizer.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>