[1] Package import

In [1]:
import os


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


import re
import nltk
from sklearn.datasets import load_files
nltk.download('stopwords')
nltk.download('wordnet')
import pickle
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split


from sklearn.metrics import classification_report, confusion_matrix, accuracy_score



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/wooseungjin/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/wooseungjin/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


[2] functions   

In [2]:
#activation function
#sigmoid function
def sigmoid(x, derivative=False):
    return x*(1-x) if derivative else 1/(1+np.exp(-x))

#
# normalize the values of the input data to be [0, 1]
#
def normalize(data):

    data_normalized = (data - min(data)) / (max(data) - min(data))

    return(data_normalized)

#
# example of distance function between two vectors x and y
#
def distance(x, y):

    d = (x - y) ** 2
    s = np.sum(d)
    # r = np.sqrt(s)

    return(s)


[3] read data

In [3]:
review_data = load_files(r"movie_review")
X, y = review_data.data, review_data.target

documents = []

stemmer = WordNetLemmatizer()

for sen in range(0, len(X)):
    # Remove all the special characters
    document = re.sub(r'\W', ' ', str(X[sen]))
    
    # remove all single characters
    document = re.sub(r'\s+[a-zA-Z]\s+', ' ', document)
    
    # Remove single characters from the start
    document = re.sub(r'\^[a-zA-Z]\s+', ' ', document) 
    
    # Substituting multiple spaces with single space
    document = re.sub(r'\s+', ' ', document, flags=re.I)
    
    # Removing prefixed 'b'
    document = re.sub(r'^b\s+', '', document)
    
    # Converting to Lowercase
    document = document.lower()
    
    # Lemmatization
    document = document.split()
    document = [stemmer.lemmatize(word) for word in document]
    document = ' '.join(document)
    
    documents.append(document)

vectorizer = CountVectorizer(max_features=1500, min_df=5, max_df=0.7, stop_words=stopwords.words('english'))
X = vectorizer.fit_transform(documents).toarray()

tfidfconverter = TfidfTransformer()
X = tfidfconverter.fit_transform(X).toarray()

trainX,testX,trainY, testY = train_test_split(X, y, test_size=0.3, shuffle=False)

trainX,testX,trainY, testY = trainX.T,testX.T,trainY.T, testY.T
    
#Datanum
trDataNum=1401
tDataNum=601


[4] initailize values

In [4]:
# Epoch num
NUM_EPOCH=10000

#Array for writing loss, accuracy
trLossArray=np.zeros((NUM_EPOCH+1,1))
tLossArray=np.zeros((NUM_EPOCH+1,1))
trAcArray=np.zeros((NUM_EPOCH+1,1))
tAcArray=np.zeros((NUM_EPOCH+1,1))

lr=0.05#learning Rate

#weight

# np.random.seed(0)

layer_num = 3

u=np.random.randn(150,1500)#layer 1 weight (input : input feature) 

v=np.random.randn(15,150)#layer 2 weight (input: w1 output feature) 

w=np.random.randn(1,15)#layer 3  weight (input :w2 output feature) 

#bias 
a=np.zeros((1,1))#layer 1  bias 

b=np.zeros((1,1))#layer 2 bias 

c=np.zeros((1,1))#layer 3 bias 

#X for drawing the graph
x_=np.arange(NUM_EPOCH+1)#X

#ramda
ramda = 0.002

[5] training(gradient decent)

In [None]:
for epoch in range(NUM_EPOCH+1):
    
    #forward propagation(train)
    trZ1=np.dot(u,trainX)+a#Layer 1
    trA1=sigmoid(trZ1)

    trZ2=np.dot(v,trA1)+b#Layer 2
    trA2=sigmoid(trZ2)
    
    trZ3=np.dot(w,trA2)+c#Layer 3
    trA3=sigmoid(trZ3)

    #get train loss
    trloss=-(np.multiply(trainY,np.log(trA3))+np.multiply((1-trainY),np.log(1-trA3)))
    trloss=1/trDataNum*np.sum(trloss) + ramda/(2*layer_num)*(np.power(u,2).sum() + np.power(v,2).sum() + np.power(w,2).sum())
    trLossArray[epoch]=trloss
    
    #forward propagation(test)
    tZ1=np.dot(u,testX)+a#Layer 1
    tA1=sigmoid(tZ1)
    
    tZ2=np.dot(v,tA1)+b#Layer 2
    tA2=sigmoid(tZ2)
    
    tZ3=np.dot(w,tA2)+c#Layer 3
    tA3=sigmoid(tZ3)
    
    #get test loss
    tloss=-(np.multiply(testY,np.log(tA3))+np.multiply((1-testY),np.log(1-tA3)))
    tloss=1/tDataNum*np.sum(tloss) + ramda/(2*layer_num)*(np.power(u,2).sum() + np.power(v,2).sum() + np.power(w,2).sum())
    tLossArray[epoch]=tloss
    
    #get Accuracy
    trainPY=np.where(trA3>=0.5,1.,0.)
    trAccuracy=((trainPY == trainY).sum())/trDataNum
    trAcArray[epoch]=trAccuracy
    
    testPY=np.where(tA3>=0.5,1.,0.)
    tAccuracy=((testPY == testY).sum())/tDataNum
    tAcArray[epoch]=tAccuracy
    
    #backward propagation
    dz3=trA3-trainY
    dw=1/trDataNum*np.dot(dz3,trA2.T)
    dc=1/trDataNum*np.sum(dz3,axis=1,keepdims=True)
    
    dz2=np.multiply(np.dot(w.T,dz3),trA2*(1.0-trA2))
    dv=1/trDataNum*np.dot(dz2,trA1.T)
    db=1/trDataNum*np.sum(dz2,axis=1,keepdims=True)
    
    dz1=np.multiply(np.dot(v.T,dz2),trA1*(1.0-trA1))
    du=1/trDataNum*np.dot(dz1,trainX.T)
    da=1/trDataNum*np.sum(dz1,axis=1,keepdims=True)
    
    
    #update weight and bias
    u=(1-ramda*lr)*u-lr*du
    a=(1-ramda*lr)*a-lr*da    

    v=(1-ramda*lr)*v-lr*dv
    b=(1-ramda*lr)*b-lr*db
    
    w=(1-ramda*lr)*w-lr*dw
    c=(1-ramda*lr)*c-lr*dc
    
    
    #check the data per epoch 500
    if epoch%500==0:    
        print("epoch :" + str(epoch+1))
        print("train loss :  " +np.array2string(trloss))
        print("test loss :  " +np.array2string(tloss))
        print("train accuracy :  " +np.array2string(trAccuracy))
        print("test accuracy :  " +np.array2string(tAccuracy))


epoch :1
train loss :  77.00969601
test loss :  76.97172162
train accuracy :  0.49250535
test accuracy :  0.52412646


[6] predict train and test

In [None]:
#forward propagation(train)
trZ1=np.dot(u,trainX)+a#Layer 1
trA1=sigmoid(trZ1)

trZ2=np.dot(v,trA1)+b#Layer 2
trA2=sigmoid(trZ2)

trZ3=np.dot(w,trA2)+c#Layer 3
trA3=sigmoid(trZ3)

#forward propagation(test)
tZ1=np.dot(u,testX)+a#Layer 1
tA1=sigmoid(tZ1)

tZ2=np.dot(v,tA1)+b#Layer 2
tA2=sigmoid(tZ2)

tZ3=np.dot(w,tA2)+c#Layer 3
tA3=sigmoid(tZ3)

#predict
trainPY=np.where(trA3>=0.5,1.,0.)

testPY=np.where(tA3>=0.5,1.,0.)


[7] Plot the loss curve

In [None]:
plt.title("train loss and test loss")
plt.scatter(x_,trLossArray ,c="b",s=1)#train loss 
plt.scatter(x_,tLossArray ,c="r",s=1)#test loss
plt.show()

[8] Plot the accuracy curve

In [None]:
plt.title("train Ac and test Ac")
plt.scatter(x_,trAcArray,c="b",s=1)#train ac 
plt.scatter(x_,tAcArray,c="r",s=1)#test ac 
plt.show()

[9] Plot the quantitative results

In [None]:
print("training")
print(confusion_matrix(trainY,trainPY))
print(classification_report(trainY,trainPY))
print(accuracy_score(trainY,trainPY))

In [None]:
print("testing")
print(confusion_matrix(testY,testPY))
print(classification_report(testY,testPY))
print(accuracy_score(testY,testPY)

[10] Plot testing accuracy

In [None]:
print("Final test accuracy :  " +np.array2string(tAcArray[NUM_EPOCH]))