In [4]:
import numpy as np
import pandas as pd
from torch import nn
import torch
import pickle
import datetime

In [5]:
path = "./data/ml-1m.txt"
data = pd.read_csv(path, sep = '\t', names = ['user', 'item', 'rating', 'time']) #[:100] #slice for testing

In [3]:
data.head()

Unnamed: 0,user,item,rating,time
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [9]:
data["time_new"] = data["time"].map(datetime.datetime.fromtimestamp)

In [11]:
data.head(50)

Unnamed: 0,user,item,rating,time,time_new
0,1,1193,5,978300760,2000-12-31 22:12:40
1,1,661,3,978302109,2000-12-31 22:35:09
2,1,914,3,978301968,2000-12-31 22:32:48
3,1,3408,4,978300275,2000-12-31 22:04:35
4,1,2355,5,978824291,2001-01-06 23:38:11
5,1,1197,3,978302268,2000-12-31 22:37:48
6,1,1287,5,978302039,2000-12-31 22:33:59
7,1,2804,5,978300719,2000-12-31 22:11:59
8,1,594,4,978302268,2000-12-31 22:37:48
9,1,919,4,978301368,2000-12-31 22:22:48


In [14]:
user_n = len(pd.unique(data["user"]))
item_n = len(pd.unique(data["item"]))

replacement_users = {val : i+1 for i, val in enumerate(pd.unique(data["user"]))}
replacement_items = {val : i+1 for i, val in enumerate(pd.unique(data["item"]))}

In [15]:
data.replace({"user": replacement_users, "item": replacement_items}, inplace = True) #consistent unique ids for users and items 

Unnamed: 0,user,item,rating,time,time_new
0,1,1,5,978300760,2000-12-31 22:12:40
1,1,2,3,978302109,2000-12-31 22:35:09
2,1,3,3,978301968,2000-12-31 22:32:48
3,1,4,4,978300275,2000-12-31 22:04:35
4,1,5,5,978824291,2001-01-06 23:38:11


In [16]:
data = data.sort_values(['user', 'time']).reset_index(drop = True) #arrange according to user and increasing time in user 
data.head()

Unnamed: 0,user,item,rating,time,time_new
0,1,32,4,978300019,2000-12-31 22:00:19
1,1,23,5,978300055,2000-12-31 22:00:55
2,1,28,4,978300055,2000-12-31 22:00:55
3,1,38,5,978300055,2000-12-31 22:00:55
4,1,25,3,978300103,2000-12-31 22:01:43


In [126]:
data[data["user"] == 2634]

Unnamed: 0,user,item,rating,time,time_new
432006,2634,2068,3,973610574,2000-11-07 15:22:54
432007,2634,421,5,973610574,2000-11-07 15:22:54
432008,2634,561,5,973610623,2000-11-07 15:23:43
432009,2634,61,4,973610623,2000-11-07 15:23:43
432010,2634,1265,4,973610623,2000-11-07 15:23:43
...,...,...,...,...,...
432279,2634,1255,3,980342937,2001-01-24 13:28:57
432280,2634,1987,3,980342972,2001-01-24 13:29:32
432281,2634,517,4,980343002,2001-01-24 13:30:02
432282,2634,179,3,980343035,2001-01-24 13:30:35


In [19]:
td_1 = data["time_new"][5] - data["time_new"][4]
td_2 = data["time_new"][50] - data["time_new"][49]
td_2

Timedelta('0 days 00:00:39')

In [20]:
g = data.groupby('user')

In [28]:
time_ranges = g.apply(lambda x: x.time_new.max() - x.time_new.min())

In [29]:
time_ranges.mean()

Timedelta('94 days 23:54:37.274999999')

In [33]:
time_ranges_days = time_ranges.apply(lambda x: x.days)

In [124]:
time_ranges[22]

Timedelta('150 days 16:31:28')

In [61]:
time_ranges[time_ranges_days < 90]

user
1      6 days 01:38:52
2      0 days 00:34:10
3      0 days 00:24:46
4      0 days 00:05:58
5      0 days 01:31:53
             ...      
6035   0 days 00:46:34
6036   0 days 12:44:07
6037   1 days 01:47:23
6038   0 days 02:52:57
6039   0 days 14:41:11
Length: 4911, dtype: timedelta64[ns]

In [35]:
time_ranges

user
1        6 days 01:38:52
2        0 days 00:34:10
3        0 days 00:24:46
4        0 days 00:05:58
5        0 days 01:31:53
              ...       
6036     0 days 12:44:07
6037     1 days 01:47:23
6038     0 days 02:52:57
6039     0 days 14:41:11
6040   481 days 14:38:43
Length: 6040, dtype: timedelta64[ns]

In [64]:
time_diffs = g.apply(lambda x: x.time_new.max() - x.time_new)

In [69]:
user = time_diffs[6040].reset_index(drop=True)
user.index[(user > datetime.timedelta(days = 14)) & (user > datetime.timedelta(days = 7))].tolist()

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [47]:
np.array([1, 2, 3])[0]

1

In [54]:
torch.stack([torch.tensor([[1,2], [3, 4]]), torch.tensor([[5, 6], [7,8]])])

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [72]:
relation_matrices_self = pickle.load(open('./data/relation_matrix_ml_50_2048.pickle','rb'))
relation_matrices_org = pickle.load(open('./data/relation_matrix_ml-1m_50_2048.pickle','rb'))
relation_matrices = list(relation_matrices_org.values())

In [73]:
print(relation_matrices_org[1])

[[   0   36   36 ... 2048 2048 2048]
 [  36    0    0 ... 2048 2048 2048]
 [  36    0    0 ... 2048 2048 2048]
 ...
 [2048 2048 2048 ...    0    0    0]
 [2048 2048 2048 ...    0    0    0]
 [2048 2048 2048 ...    0    0    0]]


In [76]:
print(relation_matrices_self[1])

tensor([[  0,   0,  20,  ..., 457, 477, 508],
        [  0,   0,  20,  ..., 457, 477, 508],
        [ 20,  20,   0,  ..., 437, 457, 488],
        ...,
        [457, 457, 437,  ...,   0,  20,  51],
        [477, 477, 457,  ...,  20,   0,  31],
        [508, 508, 488,  ...,  51,  31,   0]], dtype=torch.int32)


In [77]:
print(relation_matrices[1])

[[  0   0  20 ... 457 477 508]
 [  0   0  20 ... 457 477 508]
 [ 20  20   0 ... 437 457 488]
 ...
 [457 457 437 ...   0  20  51]
 [477 477 457 ...  20   0  31]
 [508 508 488 ...  51  31   0]]


In [70]:
np.random.choice([1, 3, 3, 4, 5], 4, replace = False)

array([4, 3, 5, 3])

In [84]:
set(x.reshape(-1).clone().detach().tolist())

{1, 2, 3, 4}

In [107]:
x = torch.tensor([[1.0 ,2], [3, 4]])
y = torch.tensor([[5, 6], [7, 8]])
z = torch.hstack([x,y])

In [110]:
z

tensor([[1., 2., 5., 6.],
        [3., 4., 7., 8.]])

In [112]:
softmax = nn.Softmax(dim = 1) 
softmax(z)

tensor([[0.0048, 0.0131, 0.2641, 0.7179],
        [0.0048, 0.0131, 0.2641, 0.7179]])

In [89]:
x[:, 1] = 1

In [129]:
x.unsqueeze(-1).shape

torch.Size([2, 2, 1])

In [97]:
x = [1, 2, 3, 4]
[x[i] if i not in ([1] + [3])]

SyntaxError: expected 'else' after 'if' expression (3073003593.py, line 2)

In [98]:
set(x) - set([1, 2])

{3, 4}

In [100]:
np.random.choice(list(set([1,2,3])), 1)

array([2])

In [113]:
torch.randn(1)

tensor([-0.9467])

In [130]:
u = torch.tensor([[[1, 2], [3, 4], [5,6]],[[1,2], [3,4], [5,6]]])
t = x

In [131]:
u.shape

torch.Size([3, 2, 2])