In [1]:
#importing the required libraries
import numpy as np 
import pandas as pd
import torch 
import torch.nn as nn 
import torch.nn.parallel
import torch.optim as optim 
from torch.autograd import Variable

In [4]:
#importing the dataset
test_set=pd.read_csv('ml-1m/test_set.csv')
training_set=pd.read_csv('ml-1m/training_set.csv')

In [7]:
#checking the info of the datasets
test_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250088 entries, 0 to 250087
Data columns (total 4 columns):
User         250088 non-null int64
Movie        250088 non-null int64
Rating       250088 non-null int64
Timestamp    250088 non-null int64
dtypes: int64(4)
memory usage: 7.6 MB


In [8]:
training_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750121 entries, 0 to 750120
Data columns (total 4 columns):
User         750121 non-null int64
Movie        750121 non-null int64
Rating       750121 non-null int64
Timestamp    750121 non-null int64
dtypes: int64(4)
memory usage: 22.9 MB


In [10]:
#Converting the dataframe to numpy array
training_set=np.array(training_set,dtype=int)
test_set=np.array(test_set,dtype=int)

In [22]:
#Getting the max value number of movie and number of user
nf_users=max(max(training_set[:,0]),max(test_set[:,0]))
nf_movies=max(max(training_set[:,1]),max(test_set[:,1]))

In [27]:
#Convertingthe array to the list of list where one rows corresponds to 
#the reviews of all the movies that were given by one single users
def Converter(data):
    dataset=[]
    for user in range(1,nf_users+1):
        movies=data[:,1][data[:,0]==user]
        ratings=data[:,2][data[:,0]==user]
        new_ratings=np.zeros(nf_movies)
        new_ratings[movies-1]=ratings
        dataset.append(list(new_ratings))
    return dataset

In [28]:
training_set=Converter(training_set)


In [34]:
test_set=Converter(test_set)

In [35]:
#Now Convertng these list of list structure to the pytorch tensors
training_set=torch.FloatTensor(training_set)
test_set=torch.FloatTensor(test_set)

In [36]:
type(training_set)

torch.Tensor

In [38]:
#Now building the Architecture of the Auto Encoder Model
#We will use inheritance to build the architecture from the nn Module class
#we are bulding six layer 3 layer for encoding and 3 layers for the decoding
#we can add more layers depending upon the requirements.
class AutoEncoder(nn.Module):
    def __init__(self,):
        super(AutoEncoder,self).__init__()
        self.full_con1=nn.Linear(nf_movies,40)#encoding
        self.full_con2=nn.Linear(40,20)#encoding
        self.full_con3=nn.Linear(20,10)#encoding
        self.full_con4=nn.Linear(10,20)#decoding
        self.full_con5=nn.Linear(20,40)#decoding
        self.full_con6=nn.Linear(40,nf_movies)#output layer
        self.activation=nn.Sigmoid()
    def forward(self,x):
        x=self.activation(self.full_con1(x))
        x=self.activation(self.full_con2(x))
        x=self.activation(self.full_con3(x))
        x=self.activation(self.full_con4(x))
        x=self.activation(self.full_con5(x))
        x=self.full_con6(x)
        
        return x

In [39]:
#making the object of the class
#making the creterion to calculate the loss
#making the optimization for changing the values of thne weights
auto_encoder=AutoEncoder()
criterion=nn.MSELoss()
optimizer=optim.RMSprop(auto_encoder.parameters(),lr=0.01,weight_decay=0.5)

In [45]:
#Training the Auto encoder
n_epochs=200
for epochs in range(1,n_epochs+1):
    train_loss=0
    s=0.
    for user in range(nf_users):
        input=Variable(training_set[user]).unsqueeze(0)
        target=input.clone()
        if torch.sum(target.data>0)>0:
            output=auto_encoder(input)
            target.require_grad=False
            output[target==0]=0
            loss=criterion(output,target)
            mean_corrector=nf_movies/float(torch.sum(target.data > 0) + 1e-10)
            #decide the direction updation of weights 
            loss.backward()
            train_loss+=np.sqrt(loss.data.item()*mean_corrector)
            s += 1.
            #decide the intensity by which the weights will be updated
            optimizer.step()
    print(f'epoch - {epochs}  loss - {str(train_loss/s)}')

epoch - 1  loss - 1.0069671844389132
epoch - 2  loss - 0.9996829715624717
epoch - 3  loss - 0.9970854003560181
epoch - 4  loss - 0.9959575547251359
epoch - 5  loss - 0.9949112781384611
epoch - 6  loss - 0.99442320017025
epoch - 7  loss - 0.9933930486573685
epoch - 8  loss - 0.9927753002560413
epoch - 9  loss - 0.9924917333713893
epoch - 10  loss - 0.9919021124346419
epoch - 11  loss - 0.9909534261818992
epoch - 12  loss - 0.9900191745968521
epoch - 13  loss - 0.9892806040447778
epoch - 14  loss - 0.9887828504900665
epoch - 15  loss - 0.9880919544732267
epoch - 16  loss - 0.9874697779327835
epoch - 17  loss - 0.9867508568870808
epoch - 18  loss - 0.9859018356466428
epoch - 19  loss - 0.9852832498148297
epoch - 20  loss - 0.9845996631805589
epoch - 21  loss - 0.9837418740376969
epoch - 22  loss - 0.9835745564206186
epoch - 23  loss - 0.9828617708610328
epoch - 24  loss - 0.9824956669000612
epoch - 25  loss - 0.9816252873058987
epoch - 26  loss - 0.9808865032793712
epoch - 27  loss - 0.98

In [48]:
#testing the model
test_loss = 0
s = 0.
for user in range(nf_users):
    input = Variable(training_set[user]).unsqueeze(0)
    target = Variable(test_set[user])
    if torch.sum(target.data > 0) > 0:
        output = auto_encoder(input)
        target.require_grad = False
        output[target.reshape(1,-1) == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nf_movies/float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.data.item()*mean_corrector)
        s += 1.
print(f'Test Loss - {(test_loss/s)}')

Test Loss - 0.9157940134087428
