### Fake News Detection

#### Using ConuntVectorizer and MultinomialNB

In [5]:
import pandas as pd
import numpy as np 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split 
from sklearn.naive_bayes import MultinomialNB

In [38]:
data = pd.read_csv("fake_or_real_news.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [9]:
# Check if there are no missing value
data.isnull().sum()

Unnamed: 0    0
title         0
text          0
label         0
dtype: int64

In [12]:
x=np.array(data["title"])
y=np.array(data["label"])

cv=CountVectorizer()
x=cv.fit_transform(x)

In [15]:
# Separate the dataset into traing and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2, random_state=42)
# Train the model(Multinomial Naive algorithm)
model=MultinomialNB()
model.fit(xtrain, ytrain)
# Check the score
model.score(xtest, ytest)


0.8074191002367798

In [26]:
# Make function for detecting fake news
def fake_detect(headline):
    data=cv.transform([headline]).toarray()
    result =model.predict(data)
    return result[0]

In [27]:
# Test the model for real news (https://www.bbc.com/news/business-59353194)
fake_detect("US to release oil reserves in bid to lower prices")

'REAL'

In [28]:
# Test the model for fake news (https://www.snopes.com/fact-check/pope-francis-donald-trump-endorsement/)
fake_detect("Pope Francis shocks world, endorses Donald Trump for president")

'REAL'

In [29]:
# Test the model for fake news (https://www.foxnews.com/food-drink/us-bacon-reserves-hit-50-year-low)
fake_detect("US Bacon Reserves Hit 50 Year Low")

'FAKE'

#### Using TfidfVectorizer and PassiveAgressiveClassifer

In [33]:
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [41]:
labels=data.label
labels.head()

0    FAKE
1    FAKE
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

In [55]:
# Separate the dataset into traing and testing sets
x_train2,x_test2,y_train2, y_test2 = train_test_split(data['text'], labels, test_size=0.2, random_state=7)

In [56]:
#Initialize a TfidfVectorizer 
tf_v = TfidfVectorizer(stop_words='english', max_df=0.7)
tf_v_train = tf_v.fit_transform(x_train2)
tf_v_test=tf_v.transform(x_test2)

In [59]:
#Initialize PassiveAgressive
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(tf_v_train,y_train2)

PassiveAggressiveClassifier(max_iter=50)

In [63]:
y_pred=pac.predict(tf_v_test)
score=accuracy_score(y_test,y_pred)
print(f'Accuracy:{round(score*100,2)}%')

Accuracy:92.98%


#### Source
https://towardsdatascience.com/basics-of-countvectorizer-e26677900f9c

https://thecleverprogrammer.com/2021/06/30/fake-news-detection-with-machine-learning/

https://data-flair.training/blogs/advanced-python-project-detecting-fake-news/



https://thecleverprogrammer.com/2021/07/09/end-to-end-fake-news-detection-with-python/
    