# Fake news Classifier Using LSTM

In [1]:
import pandas as pd
import numpy as np
import nltk
import re
from nltk.corpus import stopwords 
from nltk.stem.porter import PorterStemmer

In [2]:
df = pd.read_csv('fake_news_classifier/train.csv')
df.head()

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


In [3]:
df = df.dropna()

## Get Independent Features 
X = df.drop('label', axis = 1) 
##Get Dependent Features 
y = df['label']

X.shape

(18285, 4)

## Preprocessing 

In [4]:
message = X.copy()
message.reset_index(inplace=True)


In [5]:
ps = PorterStemmer()
corpus = []
for i in range(0, len(message)):
    #print(i)
    review = re.sub('[^a-zA-Z]',' ',message['title'][i]) #replace blank for all symbols besides a-z and A-Z
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)
#corpus

## One hot Representation 

In [6]:
import tensorflow as tf
tf.__version__

2024-09-13 20:17:26.820128: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


'2.12.0'

In [7]:
from tensorflow.keras.layers import Embedding 
from tensorflow.keras.preprocessing.sequence import pad_sequences #For inout fed into the embedding layers, need to be same length
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot 
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

In [8]:
## Vocabulary size 
voc_size = 5000

In [9]:
onehot_repr = [one_hot(words,voc_size) for words in corpus]
onehot_repr[:3] #Note not fixed length

[[4640, 199, 1357, 129, 2300, 2405, 2942, 1548, 4063, 2022],
 [1146, 4338, 2486, 4994, 2105, 797, 1216],
 [2636, 172, 3502, 1261]]

## Embedding Representation 

In [10]:
## Make input same length before fed to embedding layer 
sent_length = 20
embedded_docs = pad_sequences(onehot_repr, padding = 'pre', maxlen=sent_length) #pad 0s before the actual values
embedded_docs[:3]

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0, 4640,
         199, 1357,  129, 2300, 2405, 2942, 1548, 4063, 2022],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0, 1146, 4338, 2486, 4994, 2105,  797, 1216],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, 2636,  172, 3502, 1261]],
      dtype=int32)

In [11]:
len(embedded_docs)

18285

In [12]:
## Creating model 
embedding_vector_features = 40
model =  Sequential()
model.add(Embedding(voc_size, embedding_vector_features, input_length=sent_length))
model.add(LSTM(100)) #pass result from embedding layers to LSTM layer with 100 neurons
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics = ['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 20, 40)            200000    
                                                                 
 lstm (LSTM)                 (None, 100)               56400     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 256,501
Trainable params: 256,501
Non-trainable params: 0
_________________________________________________________________
None


2024-09-13 20:17:30.610122: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-09-13 20:17:30.611139: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-09-13 20:17:30.611960: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [13]:
X_final = np.array(embedded_docs)
y_final = np.array(y)
X_final.shape

(18285, 20)

## Model Training 

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size = 0.3, random_state = 42)

In [15]:
model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=64)

Epoch 1/10


2024-09-13 20:18:45.701328: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-09-13 20:18:45.702827: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-09-13 20:18:45.703578: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2024-09-13 20:18:49.202503: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-09-13 20:18:49.203596: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-09-13 20:18:49.204192: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff3997e0e50>

## Performance Metrics and Accuracy 

In [18]:
from sklearn.metrics import confusion_matrix
y_pred_prob = model.predict(X_test) #Get the class probabilities from the model
y_pred = (y_pred_prob > 0.5).astype(int)
confusion_matrix(y_test,y_pred)



array([[2867,  240],
       [ 293, 2086]])

In [19]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.9028436018957346

## Adding Dropout

In [21]:
from tensorflow.keras.layers import Dropout 
## Creating model 
embedding_vector_features = 40
model =  Sequential()
model.add(Embedding(voc_size, embedding_vector_features, input_length=sent_length))
model.add(Dropout(0.2))
model.add(LSTM(100)) #pass result from embedding layers to LSTM layer with 100 neurons
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics = ['accuracy'])

model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=64)


Epoch 1/10


2024-09-13 20:28:24.880753: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-09-13 20:28:24.881540: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-09-13 20:28:24.882337: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2024-09-13 20:28:28.274685: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-09-13 20:28:28.275881: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-09-13 20:28:28.276526: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff35aeeb4f0>