In [2]:
import pandas as pd
import spacy
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

df = pd.read_csv("Fake_Real_Data.csv")
df['label_num'] = df['label'].map({'Fake' : 0, 'Real': 1})

nlp = spacy.load("en_core_web_lg")
df['vector'] = df['Text'].apply(lambda text: nlp(text).vector)

X_train, X_test, y_train, y_test = train_test_split(
    df.vector.values,
    df.label_num,
    test_size=0.2,
    random_state=2022
)

X_train_2d = np.stack(X_train)
X_test_2d = np.stack(X_test)

scaler = MinMaxScaler()
scaled_train_embed = scaler.fit_transform(X_train_2d)
scaled_test_embed = scaler.transform(X_test_2d)

clf = MultinomialNB()
clf.fit(scaled_train_embed, y_train)
y_pred = clf.predict(scaled_test_embed)
print("\n=== MultinomialNB Results ===")
print(classification_report(y_test, y_pred))

clf = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
clf.fit(X_train_2d, y_train)
y_pred = clf.predict(X_test_2d)
print("\n=== KNN Results ===")
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)



=== MultinomialNB Results ===
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      1024
           1       0.95      0.95      0.95       956

    accuracy                           0.95      1980
   macro avg       0.95      0.95      0.95      1980
weighted avg       0.95      0.95      0.95      1980


=== KNN Results ===
              precision    recall  f1-score   support

           0       0.99      0.97      0.98      1024
           1       0.97      0.99      0.98       956

    accuracy                           0.98      1980
   macro avg       0.98      0.98      0.98      1980
weighted avg       0.98      0.98      0.98      1980


Confusion Matrix:
[[990  34]
 [ 13 943]]


In [3]:
# Example: testing on real news sentences
new_texts = [
    "Prime Minister launches new scheme to boost rural economy.",
    "Breaking: Aliens have landed in New York City!",
    "Stock markets rally as inflation fears ease.",
    "Shocking cure for cancer found in just 2 days!"
]

# Convert to vectors using spaCy
new_vectors = [nlp(text).vector for text in new_texts]
new_vectors = np.stack(new_vectors)

# Predict with the trained KNN model
new_preds = clf.predict(new_vectors)

# Map back to labels
label_map = {0: "Fake", 1: "Real"}
for text, pred in zip(new_texts, new_preds):
    print(f"\nText: {text}\nPrediction: {label_map[pred]}")




Text: Prime Minister launches new scheme to boost rural economy.
Prediction: Real

Text: Breaking: Aliens have landed in New York City!
Prediction: Real

Text: Stock markets rally as inflation fears ease.
Prediction: Real

Text: Shocking cure for cancer found in just 2 days!
Prediction: Fake
