In [None]:
# !pip install nltk pandas numpy scipy keras tensorflow sklearn
# nltk.download('stopwords')
# nltk.download('wordnet')

In [1]:
import json
import nltk
import string
import copy
import pandas as pd
import numpy as np
import keras.backend as K

from keras import regularizers
from keras.models import Model
from numpy import linalg as LA
from nltk.corpus import stopwords
from scipy.special import gammaln
from keras.models import Sequential
from scipy.sparse import csr_matrix
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import TfidfVectorizer
from keras.layers import Dense, Activation, Embedding, LSTM

Using TensorFlow backend.


In [2]:
def preprocess(pd):
    pd = pd.str.lower()
    pd = pd.str.replace('[{}]'.format(string.punctuation), ' ')
    pd = pd.apply(lambda x: [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(x)])
    pd = pd.apply(lambda x: [item for item in x if item not in stop_words])
    return pd.str.join(' ')

In [3]:
def get_x_lstm(max_vocab, vocab):
    tokenizer = Tokenizer(nb_words=max_vocab, lower=True, split=' ')
    tokenizer.fit_on_texts(vocab)
    X = tokenizer.texts_to_sequences(vocab)
    return pad_sequences(X)

In [4]:
def sampleFromDirichlet(alpha):
    return np.random.dirichlet(alpha)

def sampleFromCategorical(theta):
    theta = theta/np.sum(theta)
    return np.random.multinomial(1, theta).argmax()

def word_indices(wordOccuranceVec):
    for idx in wordOccuranceVec.nonzero()[0]:
        for i in range(int(wordOccuranceVec[idx])):
            yield idx

In [28]:
class SentimentLDAGibbsSampler:
    def __init__(self, numTopics, alpha, beta):
        self.alpha = alpha
        self.beta = beta
        self.numTopics = numTopics
        self.probabilities_t = {}

    def processReviews(self, reviews):
        self.vectorizer = CountVectorizer(analyzer="word",tokenizer=None,preprocessor=None,
                                          stop_words="english",max_features=MAX_VOCAB_SIZE,max_df=.5,min_df=5)
        train_data_features = self.vectorizer.fit_transform(reviews)
        words = self.vectorizer.get_feature_names()
        self.vocabulary = dict(zip(words,np.arange(len(words))))
        self.inv_vocabulary = dict(zip(np.arange(len(words)),words))
        wordOccurenceMatrix = train_data_features.toarray()
        return wordOccurenceMatrix

    def _initialize_(self, reviews, V_weights):
        self.wordOccuranceMatrix = self.processReviews(reviews)
        numDocs, vocabSize = self.wordOccuranceMatrix.shape
        
        # Pseudocounts
        self.n_dt = np.zeros((numDocs, self.numTopics))
        self.n_t = np.zeros((self.numTopics))
        self.n_d = np.zeros((numDocs))
        self.n_vt = np.zeros((vocabSize, self.numTopics))
        self.dt_distribution = np.zeros((numDocs, self.numTopics))
#         self.t_distribution = np.zeros((self.numTopics))
        self.topics = {}

        self.alphaVec = self.alpha.copy()

        for d in range(numDocs):            
#             topicDistribution = sampleFromDirichlet(self.alphaVec)
            topicDistribution = V_weights[d]
            print(topicDistribution.shape)
            
            for i, w in enumerate(word_indices(self.wordOccuranceMatrix[d, :])):
                t = sampleFromCategorical(topicDistribution)

                self.topics[(d, i)] = t
                self.n_dt[d, t] += 1
                self.n_d[d] += 1
                self.n_vt[w, t] += 1
                self.n_t[t] += 1

            self.dt_distribution[d,:] = (self.n_dt[d] + self.alphaVec) / (self.n_d[d] + np.sum(self.alphaVec))
            
    def conditionalDistribution(self, d, v):
        probabilities_t = np.ones((self.numTopics))
        
        firstFactor = (self.n_dt[d,:] + self.alphaVec) / (self.n_d[d] + np.sum(self.alphaVec))
        thirdFactor = (self.n_vt[v, :] + self.beta) / (self.n_t + self.n_vt.shape[0] * self.beta)

        probabilities_t *= firstFactor[:]
        probabilities_t *= thirdFactor
        probabilities_t /= np.sum(probabilities_t)
        return probabilities_t

    def run(self, reviews, V_weights, maxIters=100):
        self._initialize_(reviews, V_weights)
        self.loglikelihoods = np.zeros(maxIters)
        numDocs, vocabSize = self.wordOccuranceMatrix.shape

        for iteration in range(maxIters):
            print ("Starting iteration %d of %d" % (iteration + 1, maxIters))
            loglikelihood = 0
            for d in range(numDocs):
                for i, v in enumerate(word_indices(self.wordOccuranceMatrix[d, :])):
                    t = self.topics[(d, i)]
                
                    self.n_dt[d, t] -= 1
                    self.n_d[d] -= 1
                    self.n_t[t] -= 1
                    self.n_vt[v, t] -= 1

                    probabilities_t = self.conditionalDistribution(d, v)
                    t = sampleFromCategorical(probabilities_t)
                    self.probabilities_t[(d,v)] = probabilities_t[t]
                    
                    self.topics[(d, i)] = t
                    self.n_dt[d, t] += 1
                    self.n_d[d] += 1
                    self.n_vt[v, t] += 1
                    self.n_t[t] += 1
                
                if iteration == maxIters - 1:
                    self.dt_distribution[d,:] = (self.n_dt[d] + self.alphaVec) / (self.n_d[d] + np.sum(self.alphaVec))
                    self.dt_distribution = self.dt_distribution/np.sum(self.dt_distribution, axis=1)[:,np.newaxis]

                loglikelihood += np.sum(gammaln((self.n_dt[d] + self.alphaVec)
                                               )) - gammaln(np.sum((self.n_dt[d] + self.alphaVec))) 
                loglikelihood -= np.sum(gammaln(self.alphaVec)) - gammaln(np.sum(self.alphaVec))

            for k in range(self.numTopics):
                loglikelihood += (np.sum(gammaln((self.n_vt[:, k] + self.beta)
                                                )) - gammaln(np.sum((self.n_vt[:, k] + self.beta))))
                loglikelihood -= (vocabSize * gammaln(self.beta) - gammaln(vocabSize * self.beta))

            self.loglikelihoods[iteration] = loglikelihood        
            print ("Total loglikelihood is {}".format(loglikelihood))

In [29]:
lemmatizer = nltk.stem.WordNetLemmatizer()
stemmer = nltk.stem.PorterStemmer()
w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
stop_words = stopwords.words('english')

In [7]:
dataset = []
with open("Automotive_5.json", 'r') as f:
    p = f.readlines()
    for i in p:
        dataset.append(json.loads(i))
dataset = pd.DataFrame(dataset)

In [8]:
U_user_ids = list(dataset.reviewerID.unique())
V_item_ids = list(dataset.asin.unique())

In [9]:
M_users_count = len(U_user_ids)
N_items_count = len(V_item_ids)

In [10]:
M_users_count, N_items_count

(2928, 1835)

In [11]:
R_rating = np.zeros((M_users_count, N_items_count))

for idx, i in enumerate(dataset[["reviewerID", "overall", "asin"]].values):
    R_rating[U_user_ids.index(i[0])][V_item_ids.index(i[2])] = i[1]
        
R_rating = R_rating/5

In [12]:
D_combined_reviews = [""] * N_items_count

for i in dataset[["asin", "reviewText"]].values:
    D_combined_reviews[V_item_ids.index(i[0])] += i[1]
D_combined_reviews = pd.DataFrame(D_combined_reviews)
D_combined_reviews = preprocess(D_combined_reviews[0])
D_combined_reviews.shape

(1835,)

In [13]:
R_rating

array([[1. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0.8, 0. , 0. , ..., 0. , 0. , 0. ],
       [1. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ]])

## PMF

In [14]:
numtopics = 5
MAX_VOCAB_SIZE = 100
learning_rate_pmf = lambda_u = lambda_v = 0.1

In [15]:
U_user_weights = np.random.rand(numtopics, M_users_count)
V_item_weights = np.random.rand(numtopics, N_items_count)

In [16]:
U_user_weights.shape

(5, 2928)

In [17]:
V_item_weights.shape

(5, 1835)

#### Updating Gradients

In [18]:
# U_user_weights -= learning_rate_pmf * np.array(diff_lu).T
# V_item_weights -= learning_rate_pmf * np.array(diff_lv).T

## HFT

In [53]:
beta = 0.01
maxiter_hft = 10
alpha = 10/numtopics * np.ones(numtopics)

In [31]:
sampler = SentimentLDAGibbsSampler(numtopics, alpha, beta)

In [32]:
sampler.run(D_combined_reviews.values, V_item_weights.T, maxiter_hft)

(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)


(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
(5,)
Starting iteration 1 of 10
Total loglikelihood is -878041.4677901841
Starting iteration 2 of 10
Total loglikelihood is -867982.6154312992
Starting iteration 3 of 10
Total loglikelihood is -843441.1499876172
Starting iteration 4 of 10
Total logli

In [33]:
sampler.n_vt.shape, sampler.dt_distribution.shape

((100, 5), (1835, 5))

## LSTM

In [34]:
lstm_out = 128
batch_size = 8
p_embedding_lstm = 200

In [35]:
X = get_x_lstm(MAX_VOCAB_SIZE, D_combined_reviews.values)



In [36]:
model = Sequential()
model.add(Embedding(MAX_VOCAB_SIZE, p_embedding_lstm, input_length=X.shape[1]))
model.add(LSTM(lstm_out, dropout = 0.2))
model.add(Dense(5, activation='tanh', name ="doc_latent_vector", kernel_regularizer=regularizers.l2()))
model.compile(loss = 'mean_squared_error', optimizer='rmsprop', metrics = ['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [37]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 2103, 200)         20000     
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               168448    
_________________________________________________________________
doc_latent_vector (Dense)    (None, 5)                 645       
Total params: 189,093
Trainable params: 189,093
Non-trainable params: 0
_________________________________________________________________


In [38]:
model.fit(X, V_item_weights.T, epochs=1, batch_size=128)

Instructions for updating:
Use tf.cast instead.
Epoch 1/1


<keras.callbacks.History at 0x7f5c8360ab00>

In [39]:
def get_last_layer_op():
    intermediate_layer_model = Model(inputs=model.input,
                                     outputs=model.get_layer('doc_latent_vector').output)
    return intermediate_layer_model.predict(X)

In [40]:
def get_l1():
    l1 = 0
    for i in range(M_users_count):
        for j in range(N_items_count):
            if R_rating[i][j]>0:
                l1 += (R_rating[i][j] - np.dot(U_user_weights.T[i], V_item_weights.T[j]))**2
    return l1

In [41]:
def get_l3():
    return LA.norm(U_user_weights, 'fro')

In [42]:
def get_l4():
    return LA.norm(V_item_weights.T - get_last_layer_op(), 'fro')

In [43]:
# sum([get_l1(), get_l3(), get_l4()])

In [44]:
lstm_last_layer = get_last_layer_op()

In [45]:
lstm_last_layer.shape

(1835, 5)

## Gradient V

In [46]:
param_k = 0.1
param_Nj = sampler.wordOccuranceMatrix.sum(axis=1)
peakiness = 1
lambda_t = 0.1
param_njk = sampler.n_dt.copy()
dt_distribution = sampler.dt_distribution

In [47]:
diff_lv = []
for j in range(N_items_count):
    temp_sums = [0]*5
    for i in range(M_users_count):
        if R_rating[i][j]>0:
            temp_sums += (R_rating[i][j] - np.dot(U_user_weights.T[i], V_item_weights.T[j]))*U_user_weights.T[i]
    temp_sums += 2 * lambda_v * (V_item_weights.T[j] - lstm_last_layer[j])
    temp_sums -= lambda_t*peakiness*(param_njk[j] - param_Nj[j]*dt_distribution[j]).sum()
    diff_lv.append(list(temp_sums))
diff_lv = np.array(diff_lv)

In [48]:
# diff_lv -= lambda_t*peakiness*(param_njk - param_Nj*sampler.dt_distribution)

In [49]:
diff_lv[0]

array([-3.43893958, -4.81344252, -5.76009553, -5.09524792, -3.90819416])

In [50]:
diff_lv.shape

(1835, 5)

## Gradinet U

In [51]:
diff_lu = []

for i in range(M_users_count):
    temp_sums = [0]*5
    for j in range(N_items_count):
        if R_rating[i][j]>0:
            temp_sums += (R_rating[i][j] - np.dot(U_user_weights.T[i], V_item_weights.T[j]))*V_item_weights.T[j]
    temp_sums += 2 * lambda_u * U_user_weights.T[i]
    diff_lu.append(list(temp_sums))
diff_lu = np.array(diff_lu)

In [52]:
diff_lu.shape

(2928, 5)

## Gradinet phi

In [54]:
param_nkw = sampler.n_vt

In [55]:
param_Nk = sampler.n_vt.T.sum(axis=1)

In [56]:
Phi_weights = np.random.rand(MAX_VOCAB_SIZE, numtopics)

In [65]:
Phi_weights.shape

(100, 5)

In [57]:
param_nkw.shape

(100, 5)

In [71]:
diff_phi = []
for w in range(MAX_VOCAB_SIZE):
    param_zw = np.exp(Phi_weights[w]).sum()
    temp_phi = []
    for k in range(numtopics):
#         print(w, k, param_nkw[w, k], param_Nk[k], np.exp(Phi_weights[w, k])/param_zw )
        temp_phi.append(param_nkw[w, k] - (param_Nk[k] * np.exp(Phi_weights[w, k])/param_zw))
    diff_phi.append(temp_phi)
diff_phi = np.array(diff_phi)

In [72]:
diff_phi.shape

(100, 5)

In [None]:
diff_phi

In [None]:
# def sampleFromDirichlet(alpha):
#     return np.random.dirichlet(alpha)

# def sampleFromCategorical(theta):
#     theta = theta/np.sum(theta)
#     return np.random.multinomial(1, theta).argmax()

# def processReviews(reviews, MAX_VOCAB_SIZE):
#     vectorizer = CountVectorizer(analyzer="word",tokenizer=None,preprocessor=None,
#                                       stop_words="english",max_features=MAX_VOCAB_SIZE,max_df=.5,min_df=5)
#     train_data_features = vectorizer.fit_transform(reviews)
#     words = vectorizer.get_feature_names()
#     vocabulary = dict(zip(words,np.arange(len(words))))
#     inv_vocabulary = dict(zip(np.arange(len(words)),words))
#     wordOccurenceMatrix = train_data_features.toarray()
#     return wordOccurenceMatrix

# numtopics = 5
# alpha = 10/numtopics * np.ones(numtopics)
# beta = .01
# probabilities_t = {}

# wordOccuranceMatrix = processReviews(D_combined_reviews.values, 1000)

# numDocs, vocabSize = wordOccuranceMatrix.shape

# n_dt = np.zeros((numDocs, numTopics))
# n_t = np.zeros((numTopics))
# n_d = np.zeros((numDocs))
# n_vt = np.zeros((vocabSize, numTopics))
# dt_distribution = np.zeros((numDocs, numTopics))

# topics = {}
# alphaVec = alpha.copy()

# def word_indices(wordOccuranceVec):
#     for idx in wordOccuranceVec.nonzero()[0]:
#         for i in range(int(wordOccuranceVec[idx])):
#             yield idx

# for d in range(numDocs):            
#     topicDistribution = sampleFromDirichlet(alphaVec)

#     for i, w in enumerate(word_indices(wordOccuranceMatrix[d, :])):
#         t = sampleFromCategorical(topicDistribution)

#         topics[(d, i)] = t
#         n_dt[d, t] += 1
#         n_d[d] += 1
#         n_vt[w, t] += 1
#         n_t[t] += 1

#     dt_distribution[d,:] = (n_dt[d] + alphaVec) / (n_d[d] + np.sum(alphaVec))

# # Run

# maxIters=100

# loglikelihoods = np.zeros(maxIters)
# numDocs, vocabSize = wordOccuranceMatrix.shape

# def conditionalDistribution( d, v):
#     probabilities_t = np.ones((numTopics))

#     firstFactor = (n_dt[d,:] + alphaVec) / (n_d[d] + np.sum(alphaVec))

#     thirdFactor = (n_vt[v, :] + beta) / (n_t + n_vt.shape[0] * beta)
#     probabilities_t *= firstFactor
#     probabilities_t /= np.sum(probabilities_t)
#     return probabilities_t

# for iteration in range(maxIters):
#     print ("Starting iteration %d of %d" % (iteration + 1, maxIters))
#     loglikelihood = 0
#     for d in range(numDocs):
#         for i, v in enumerate(word_indices(wordOccuranceMatrix[d, :])):
#             t = topics[(d, i)]

#             n_dt[d, t] -= 1
#             n_d[d] -= 1
#             n_vt[v, t] -= 1
#             n_t[t] -= 1
            
#             probabilities_t = conditionalDistribution(d, v)
            
#             ind = sampleFromCategorical(probabilities_t.flatten())
#             t = np.unravel_index(ind, probabilities_t.shape)

#             topics[(d, i)] = t
#             n_dt[d, t] += 1
#             n_d[d] += 1
#             n_vt[v, t] += 1
#             n_t[t] += 1

#         if iteration == maxIters - 1:
#             dt_distribution[d,:] = (n_dt[d] + alphaVec) / (n_d[d] + np.sum(alphaVec))
#             dt_distribution = dt_distribution/np.sum(dt_distribution, axis=1)[:,np.newaxis]

In [None]:
# text_vectorizer = TfidfVectorizer(max_features=3000)
# text_vectorizer.fit(D_combined_reviews.values)
# feature_names = text_vectorizer.get_feature_names()
# # feature_names
# D_combined_reviews.shape
# p = []
# for j in D_combined_reviews:
#     a = []
#     for i in j.split(" "):
#         try:
#     #         print(feature_names.index(i))
#             a.append(feature_names.index(i))
#         except:
#             pass
#     p.append(a)

In [None]:
# text_vectorizer = CountVectorizer(max_features=300)
# D_combined_reviews_vectorised = text_vectorizer.fit_transform(D_combined_reviews.values)
# D_combined_reviews_vectorised.shape

# lda = LatentDirichletAllocation(n_components=5, random_state=0)
# lda.fit(D_combined_reviews_vectorised)

# lda.score(D_combined_reviews_vectorised[0])

# lda.transform(D_combined_reviews_vectorised[0]).sum()

# lda.components_.shape

In [None]:
# l1 = 0
# for i in range(M_users_count):
#     for j in range(N_items_count):
#         if R_rating[i][j]>0:
#             l1 += (R_rating[i][j] - np.dot(U_user_weights.T[i], V_item_weights.T[j]))**2

# l2 = LA.norm(U_user_weights, 'fro')

# l3 = LA.norm(V_item_weights, 'fro')

# L_MF = l1 + lambda_u*l2 + lambda_v*l3

# L_MF, l1, l2, l3

# #### Differentiation

# diff_lu = []

# for i in range(M_users_count):
#     temp_sums = [0]*5
#     for j in range(N_items_count):
#         if R_rating[i][j]>0:
#             temp_sums += (R_rating[i][j] - np.dot(U_user_weights.T[i], V_item_weights.T[j]))*V_item_weights.T[j]
#     temp_sums += 2 * lambda_u * U_user_weights.T[i]
#     diff_lu.append(list(temp_sums))

# diff_lv = []
# for j in range(N_items_count):
#     temp_sums = [0]*5
#     for i in range(M_users_count):
#         if R_rating[i][j]>0:
#             temp_sums += (R_rating[i][j] - np.dot(U_user_weights.T[i], V_item_weights.T[j]))*U_user_weights.T[i]
#     temp_sums += 2 * lambda_v * V_item_weights.T[j]
#     diff_lv.append(list(temp_sums))