In [1]:
import numpy as np
import pandas as pd

**Dataset source** = https://www.kaggle.com/datasets/algord/fake-news


In [3]:
df=pd.read_csv('/content/FakeNewsNet.csv')

In [4]:
df.head()

Unnamed: 0,title,news_url,source_domain,tweet_num,real
0,Kandi Burruss Explodes Over Rape Accusation on...,http://toofab.com/2017/05/08/real-housewives-a...,toofab.com,42,1
1,People's Choice Awards 2018: The best red carp...,https://www.today.com/style/see-people-s-choic...,www.today.com,0,1
2,Sophia Bush Sends Sweet Birthday Message to 'O...,https://www.etonline.com/news/220806_sophia_bu...,www.etonline.com,63,1
3,Colombian singer Maluma sparks rumours of inap...,https://www.dailymail.co.uk/news/article-33655...,www.dailymail.co.uk,20,1
4,Gossip Girl 10 Years Later: How Upper East Sid...,https://www.zerchoo.com/entertainment/gossip-g...,www.zerchoo.com,38,1


In [5]:
df.drop(columns=["news_url","source_domain"], inplace=True)

In [7]:
df['title']


Unnamed: 0,title
0,Kandi Burruss Explodes Over Rape Accusation on...
1,People's Choice Awards 2018: The best red carp...
2,Sophia Bush Sends Sweet Birthday Message to 'O...
3,Colombian singer Maluma sparks rumours of inap...
4,Gossip Girl 10 Years Later: How Upper East Sid...
...,...
23191,Pippa Middleton wedding: In case you missed it...
23192,Zayn Malik & Gigi Hadid’s Shocking Split: Why ...
23193,Jessica Chastain Recalls the Moment Her Mother...
23194,"Tristan Thompson Feels ""Dumped"" After Khloé Ka..."


In [8]:
import re

In [17]:
def wordopt(text):

  text=text.lower()

  text=re.sub(r'\[.*?\]','',text)
  text=re.sub(r"\\W"," ",text)
  text=re.sub(r'https?://\S+|www\.\S+','',text)
  text=re.sub(r'<.*?>+','',text)
  text=re.sub(r'[^\w\s]','',text)
  text=re.sub(r'\n','',text)
  text=re.sub(r'\d','',text)
  text=re.sub(r'\w*\d\w*','',text)
  return text



In [18]:
df['title']=df['title'].apply(wordopt)

In [20]:
df['title'][1]

'peoples choice awards  the best red carpet looks'

In [62]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score ,classification_report
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier



In [22]:
X_train, X_test, y_train, y_test = train_test_split(df['title'], df['real'], test_size=0.2, random_state=42)

# Use TF-IDF Vectorizer to convert text to numerical form
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [77]:
#LogisticRegression:
LR_model = LogisticRegression()
# SVM:
svm_model = SVC(kernel='linear')
# Naive Bayes:
nb_model = MultinomialNB()
# Random Forest:
rf_model = RandomForestClassifier(n_estimators=300)
# Gradient Boosting:
gb_model = GradientBoostingClassifier(n_estimators=300, learning_rate=0.01)

In [83]:
models = {
    'LogisticRegression': LR_model,
    'SVM': svm_model,
    'Naive Bayes': nb_model,
    'Random Forest': rf_model,
    'Gradient Boosting': gb_model
}



In [84]:
for name, model in models.items():
    # Train the model
    model.fit(X_train_tfidf, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test_tfidf)

    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model: {name}, Accuracy: {accuracy}")

    # Print detailed classification report (optional)
    print(f"Classification Report for {name}:\n", classification_report(y_test, y_pred))

Model: LogisticRegression, Accuracy: 0.8310344827586207
Classification Report for LogisticRegression:
               precision    recall  f1-score   support

           0       0.77      0.43      0.56      1131
           1       0.84      0.96      0.90      3509

    accuracy                           0.83      4640
   macro avg       0.81      0.70      0.73      4640
weighted avg       0.82      0.83      0.81      4640

Model: SVM, Accuracy: 0.8394396551724138
Classification Report for SVM:
               precision    recall  f1-score   support

           0       0.75      0.51      0.61      1131
           1       0.86      0.95      0.90      3509

    accuracy                           0.84      4640
   macro avg       0.80      0.73      0.75      4640
weighted avg       0.83      0.84      0.83      4640

Model: Naive Bayes, Accuracy: 0.8092672413793104
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

           0       0.85   