In [None]:
import pickle
import torch

In [None]:
def load_obj(name):
    with open(name+'.pkl', 'rb') as f:
        return pickle.load(f)

In [None]:
#160k unqiue user id ve 60k unique movie names.
classes = load_obj('classes')

In [None]:
#Get movie names:
titles = [item for item in classes['title']]

# SEARCH

In [None]:
#Search for target movie
target = "matrix"
[s for s in titles if target in s.lower()]

['Animatrix, The (2003)',
 'Armitage: Dual Matrix (2002)',
 'Matrix Reloaded, The (2003)',
 'Matrix Revolutions, The (2003)',
 'Matrix of Evil (2003)',
 'Matrix, The (1999)',
 'Return to Source: The Philosophy of The Matrix (2004)',
 'The Living Matrix (2009)',
 'The Matrix Revisited (2001)']

# EMBEDDING OPERATIONS

In [None]:
#embeddings as a tensor for 60k movies:
movie_factors = load_obj('movie_factors_tensor')

In [None]:
#INPUTS

#Some movies:
selectedMovies = ['Animatrix, The (2003)',
                  'Matrix, The (1999)']
#Related ratios of each movies:
ratios = [0.5,0.5]

In [None]:
#Find the ids for the selectedMovies
idxs = [titles.index(m) for m in selectedMovies]
idxs

[3433, 30478]

In [None]:
#Şimdi verilen filmlerin embedding'lerini ratios ile çarparak "cocktail embedding'i" oluşturalım:
cocktailEmbd = torch.zeros([1,100])
for i,idx in enumerate(idxs):
    rawEmbd = movie_factors[idx][None] #ilgili id'li filmin embedding'i alınır.
    embd = ratios[i]*rawEmbd #ilgili filmin ratio'su ile çarpılır ve current filmin cocktail'e katkısı belirlenir.
    cocktailEmbd = torch.add(cocktailEmbd,embd)#şimdi ilgili embedding cocktail'e eklenir.

In [None]:
#Her bir filmin (embedding'inin) cocktail'e olan L2 distance'ı bulunuyor:
dists = torch.cdist(movie_factors,cocktailEmbd)

In [None]:
dists.shape

torch.Size([58959, 1])

In [None]:
#Changes dists tensor to a list of lists:
distsList = dists.tolist()
#Changes list of lists to a 1D list
distsList = [j for sub in distsList for j in sub]

In [None]:
#Bu distances zaten movie sırasındaydı, bunlara tekrar id ekleyelim, daha sonra da sort edelim:
indexedDistsList = [(idx, dist) for idx,dist in enumerate(distsList)]
sortedDistances = sorted(indexedDistsList, key=lambda tup: tup[1])
sortedDistances #Cocktail'e en yakın filmden en uzağa doğru bir sıralama yapıldı. (id,distance)

[(56344, 1.1437277793884277),
 (54866, 1.1994479894638062),
 (11988, 1.252663016319275),
 (13657, 1.2783175706863403),
 (15523, 1.2864508628845215),
 (3433, 1.301152229309082),
 (30478, 1.301152229309082),
 (18562, 1.3359848260879517),
 (55311, 1.342953085899353),
 (1754, 1.3432291746139526),
 (16740, 1.3515874147415161),
 (29332, 1.3539785146713257),
 (31275, 1.4002093076705933),
 (42681, 1.4142314195632935),
 (55411, 1.429933786392212),
 (33740, 1.4557809829711914),
 (21305, 1.4573869705200195),
 (24749, 1.4578700065612793),
 (7948, 1.475415825843811),
 (11431, 1.4792667627334595),
 (31817, 1.482145071029663),
 (54512, 1.4859721660614014),
 (24078, 1.490982174873352),
 (44890, 1.4977396726608276),
 (18425, 1.4978724718093872),
 (16839, 1.5007072687149048),
 (29901, 1.510788083076477),
 (11530, 1.5128862857818604),
 (37666, 1.518521785736084),
 (14605, 1.5193941593170166),
 (33008, 1.5209671258926392),
 (14272, 1.5265780687332153),
 (58091, 1.5297833681106567),
 (21502, 1.532571792602

In [None]:
#Cocktail'in diğer filmlere olan ortalama uzaklığı:
avgDistFromCocktail = round(torch.mean(torch.cdist(movie_factors,cocktailEmbd)).item(),2)
avgDistFromCocktail

2.21

In [None]:
#Cocktail'e en yakın 10 film ve cocktail'e olan uzaklıkları print ediliyor.
#Bu uzaklıkların yukarıdaki ortalamadan düşük olması beklenir, yani aşağıdaki filmler cocktail'e ortalamadan daha yakın.
for i in range(10):
    print(str(i+1)+". "'\033[1m' + titles[sortedDistances[i][0]] +'\033[0m'+"  |  "
          +"Distance: "+ str(round(sortedDistances[i][1],2)))

1. [1mWatchmen (2009)[0m  |  Distance: 1.14
2. [1mUnbreakable (2000)[0m  |  Distance: 1.2
3. [1mDark City (1998)[0m  |  Distance: 1.25
4. [1mDistrict 9 (2009)[0m  |  Distance: 1.28
5. [1mEquilibrium (2002)[0m  |  Distance: 1.29
6. [1mAnimatrix, The (2003)[0m  |  Distance: 1.3
7. [1mMatrix, The (1999)[0m  |  Distance: 1.3
8. [1mGattaca (1997)[0m  |  Distance: 1.34
9. [1mV for Vendetta (2006)[0m  |  Distance: 1.34
10. [1mAbyss, The (1989)[0m  |  Distance: 1.34


In [None]:
#Şimdi ise, yukarıdaki her bir filmin, diğer 60k filme olan ortalama uzaklığı hesaplanıyor:
print('\033[1m'+"Average distances of the movies to other movies:"+'\033[0m')
for i in range(10):
    avgDist = round(torch.mean(torch.cdist(movie_factors,movie_factors[sortedDistances[i][0]][None])).item(),2)
    print('\033[1m'+str(i+1)+": "+'\033[0m'+str(avgDist)+"  ",end =" ")

[1mAverage distances of the movies to other movies:[0m
[1m1: [0m1.85   [1m2: [0m1.53   [1m3: [0m1.78   [1m4: [0m1.96   [1m5: [0m1.79   [1m6: [0m1.51   [1m7: [0m3.3   [1m8: [0m1.83   [1m9: [0m2.13   [1m10: [0m1.82   