In [39]:
import os
import pandas as pd
import numpy as np
import scipy.sparse as sp
import argparse

from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [40]:
path = '../Data/Movie_Lens_100k'

In [41]:
train_df = pd.read_csv(os.path.join(path, 'ua.base'), sep = '\t', names = ['user_id', 'movie_id', 'rating', 'timestamp'])
test_df = pd.read_csv(os.path.join(path, 'ua.test'), sep = '\t', names = ['user_id', 'movie_id', 'rating', 'timestamp'])

In [42]:
test_only_movie = list(set(test_df['movie_id'].unique().flatten()) - set(train_df['movie_id'].unique().flatten()))
test_df = test_df[~test_df['movie_id'].isin(test_only_movie)]

In [43]:
genre_df = pd.read_csv(os.path.join(path, 'u.genre'), sep = '|', names = ['genre', 'idx'])

In [44]:
movie_df = pd.read_csv(os.path.join(path, 'u.item'), sep = '|', encoding = 'latin1', header = None)

In [45]:
def user_plus(x):
    x = 'user' + str(x)
    return x

def movie_plus(x):
    x = 'movie' + str(x)
    return x

In [46]:
user_list = list(map (user_plus, train_df['user_id'].unique().tolist()))

In [47]:
movie_list = list(map (movie_plus, train_df['movie_id'].unique().tolist()))

In [48]:
genre_list = genre_df['genre'].unique().tolist()[1:]

In [49]:
label_list = user_list + movie_list + genre_list

In [50]:
encoder = LabelEncoder()

encoder = encoder.fit(label_list)

In [51]:
train_df['user_id'] = train_df['user_id'].apply(lambda x : user_plus(x))
train_df['movie_id'] = train_df['movie_id'].apply(lambda x : movie_plus(x))

test_df['user_id'] = test_df['user_id'].apply(lambda x : user_plus(x))
test_df['movie_id'] = test_df['movie_id'].apply(lambda x : movie_plus(x))

user_le = LabelEncoder().fit(train_df['user_id'])
movie_le = LabelEncoder().fit(train_df['movie_id'])

train_df['user_le'] = user_le.transform(train_df['user_id'])
train_df['movie_le'] = movie_le.transform(train_df['movie_id'])

test_df['user_le'] = user_le.transform(test_df['user_id'])
test_df['movie_le'] = movie_le.transform(test_df['movie_id'])

train_df['user_id'] = encoder.transform(train_df['user_id'])
train_df['movie_id'] = encoder.transform(train_df['movie_id'])

test_df['user_id'] = encoder.transform(test_df['user_id'])
test_df['movie_id'] = encoder.transform(test_df['movie_id'])

In [54]:
train = train_df[['user_le', 'movie_le', 'rating']]
test = test_df[['user_le', 'movie_le', 'rating']]

In [57]:
knowledge_graph = train_df[['user_id', 'movie_id']]
knowledge_graph['relation'] = 1

knowledge_graph = knowledge_graph[['user_id', 'relation', 'movie_id']]
knowledge_graph.columns = ['h', 'r', 't']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  knowledge_graph['relation'] = 1


In [59]:
knowledge_graph.head()

Unnamed: 0,h,r,t
0,1698,1,18
1,1698,1,810
2,1698,1,921
3,1698,1,1032
4,1698,1,1143


In [68]:
train_df['movie_le'].nunique()

1680

In [69]:
movie_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [None]:
movie_df['0']

In [66]:
movie_tmp = movie_df.iloc[:, 5: ]
movie_tmp.columns = genre_df['genre'].unique().tolist()

In [67]:
movie_tmp

Unnamed: 0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1678,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
1679,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
