In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [2]:
df_fake = pd.read_csv("Fake.csv")
df_true = pd.read_csv("True.csv")

df_fake['label'] = 0   # Fake
df_true['label'] = 1   # Real

df = pd.concat([df_fake, df_true], axis=0)
df = df.sample(frac=1).reset_index(drop=True)

df.head()


Unnamed: 0,title,text,subject,date,label
0,China top anti-graft watchdog says anti-corrup...,BEIJING (Reuters) - China s top anti-graft wat...,worldnews,"October 24, 2017",1
1,Sanders hits Clinton on campaign finance hours...,NEW YORK (Reuters) - U.S. Democratic president...,politicsNews,"April 18, 2016",1
2,BREAKING BOMBSHELL: All Dem Congressmen Phone ...,Wow! This just keeps getting better and better...,politics,"Aug 12, 2016",0
3,Hateful Letter To Food Bank PROVES We Need Fo...,"A few days before Thanksgiving, a food bank in...",News,"November 25, 2016",0
4,Top Conservative Tries To Define ‘Alt-Right’ ...,One of the problems that Republicans have is w...,News,"February 23, 2017",0


In [3]:
df = df[['text', 'label']]
df.dropna(inplace=True)
df.head()


Unnamed: 0,text,label
0,BEIJING (Reuters) - China s top anti-graft wat...,1
1,NEW YORK (Reuters) - U.S. Democratic president...,1
2,Wow! This just keeps getting better and better...,0
3,"A few days before Thanksgiving, a food bank in...",0
4,One of the problems that Republicans have is w...,0


In [4]:
X = df['text']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [5]:
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)


In [6]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)


In [7]:
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9863028953229399

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      4735
           1       0.98      0.99      0.99      4245

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980


Confusion Matrix:
 [[4664   71]
 [  52 4193]]
