Dataset link: https://github.com/vikaschauhan734/fake_news_classifier/blob/main/news.csv

### Importing neccessary libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords

In [2]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [3]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

### Importing CSV file

In [5]:
df = pd.read_csv("news.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


### Shape of Dataset

In [6]:
df.shape

(6335, 4)

### Dropping 'Unnamed: 0' and 'title' column

In [7]:
df = df[["text","label"]]
df.head()

Unnamed: 0,text,label
0,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,It's primary day in New York and front-runners...,REAL


### Mapping FAKE with 0 and REAL with 1 in 'label' column

In [8]:
df['label'].unique()

array(['FAKE', 'REAL'], dtype=object)

In [9]:
df['label'] = df['label'].map({"FAKE":0, "REAL":1})
df.head()

Unnamed: 0,text,label
0,"Daniel Greenfield, a Shillman Journalism Fello...",0
1,Google Pinterest Digg Linkedin Reddit Stumbleu...,0
2,U.S. Secretary of State John F. Kerry said Mon...,1
3,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",0
4,It's primary day in New York and front-runners...,1


### Text Preprocessing

In [11]:
df['text'].replace("[^a-zA-Z]"," ",regex=True, inplace=True)
df.head()

Unnamed: 0,text,label
0,Daniel Greenfield a Shillman Journalism Fello...,0
1,Google Pinterest Digg Linkedin Reddit Stumbleu...,0
2,U S Secretary of State John F Kerry said Mon...,1
3,Kaydee King KaydeeKing November T...,0
4,It s primary day in New York and front runners...,1


In [13]:
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [15]:
for i in range(df.shape[0]):
  # Lowering the text
  text = df.iloc[i,0].lower()
  # Splitting our text into words
  text = text.split()
  # Removing stop words
  text = [word for word in text if word not in set(stopwords.words("english"))]
  # Lemmatization
  text = [lemmatizer.lemmatize(word) for word in text]
  # Combine all words into sentence
  text = " ".join(text)
  df.iloc[i,0] = text

In [16]:
df.head()

Unnamed: 0,text,label
0,daniel greenfield shillman journalism fellow f...,0
1,google pinterest digg linkedin reddit stumbleu...,0
2,u secretary state john f kerry said monday sto...,1
3,kaydee king kaydeeking november lesson tonight...,0
4,primary day new york front runner hillary clin...,1


### Converting Words into Vectors