## Algorithm

Because of the sparsity of the dataset we need the update our user based collobrative filtering approach to meet the needs of sparse data. This can be acvieved by incorporating a matrix factorization algorithm. Compared regular approaches like collobrative or content filtering *footnote, matrix factorization allows us the uncover the latent features underlying two different factors.This is also the method used by companies such as Netflix (citation).

### Explanation of Matrix Factorization

We want to estimate a matrix $Q_{kxk}$ that captures k latent features for the users and beers. Specifically we want to estimate the vectors $b_i$, $u_j$ of length k for each users and beer in our data sets. The vector $b_i$ for the the beer will represent to which extent the beer i posseses the factor that are being interested by the users, whereas similarly $u_i$ will represent the which extent user j is interested in these factors.

When we take the dot product of these 2 vectors, we can estimate the ratings for a specific userson the specific type of beer. 

\begin{align}
\hat{r_{iu}} & = \ b^T_{i} p_{j}
\end{align}

Assuming our initial rating matrix R has dimensions of $mxn$, we cab combine these vectors into two matrices and illustrate the matrix factorization. 

\begin{align}
\hat{R} & = \ UxB^T
\end{align}

Where U is a $mxk$ matrix that represent each users assosication with latent factors and B is a $nxk$ matrix that represents each beers association with the latent factors. We will use Stochastic Gradient Descent to estimate the matrices Q and P. Our loss/objective function can be defined as :
\begin{align}
\hat{R} & = \ UxB^T
\end{align}



*Explanation

## Data Preperation

In [47]:
import pandas as pd 
import numpy as np
import tensorflow as tf
from scipy.spatial.distance import cosine
from scipy import sparse
import numba
import pickle

In [6]:
df=pd.read_csv("final_data.csv")
dfc=df[["score_overall","user_id","beer_names","beer_id","brewery_name"]]

##df=pd.read_csv("beer_reviews.csv")
##dfc=df[["review_overall","review_profilename","beer_name","beer_beerid","brewery_name"]]

beers=dfc.groupby('beer_id').count().query("score_overall >=10").index
users=dfc.groupby('user_id').count().query("score_overall >=10").index
df_filtered=dfc[dfc.beer_id.isin(beers)][dfc.user_id.isin(users)]

users=pd.factorize(df_filtered.user_id)[0]
beers=pd.factorize(df_filtered.beer_id)[0]
index_to_userid=dict(zip(users,df_filtered.user_id))
index_into_beerid=dict(zip(beers,df_filtered.beer_id))

index_into_beerid = {v: k for k, v in index_into_beerid.items()}
index_to_userid = {v: k for k, v in index_to_userid.items()}
R=np.zeros((len(index_into_beerid),len(index_to_userid))).T

for index, row in df_filtered.iterrows():
    R[index_to_userid[row['user_id']],index_into_beerid[row['beer_id']]]=row['score_overall']
    
index_into_beerid = {v: k for k, v in index_into_beerid.items()}
index_to_userid = {v: k for k, v in index_to_userid.items()}

index_into_beername=dict(zip(beers, df_filtered.beer_names+ " by "+ df_filtered.brewery_name ))


  if __name__ == '__main__':


In [21]:
index_into_beername = {v: k for k, v in index_into_beername.items()}


In [10]:
pickle_out = open("beers.pickle","wb")
pickle.dump(index_into_beername, pickle_out)
pickle_out.close()

## Application Without TensorFlow

In [72]:
def matrix_factor(X,epochs=25000,k=4,lr=0.007,beta=0.001):
    n,m=X.shape
    Q=np.random.rand(n,k)
    P=np.random.rand(m,k).T  
    r_hat=Q@P
    z_i,z_j=np.where(X>0)
    z_i=list(z_i)
    z_j=list(z_j)
    n=len(z_i)
    for epoch in range(epochs):
        if epoch%1000==0:
            loss=np.sum((np.square(X[z_i,z_j]-r_hat[z_i,z_j])))
            print("Epoch : " + str(epoch)+ ", Loss: "+str(loss))
        for u in range(n):
            i=z_i[u]
            j=z_j[u]
            Q[i]=Q[i]+2*lr*(X[i,j]-Q[i]@P[:,j])*P[:,j]-beta*Q[i]
            P[:,j]=P[:,j]+2*lr*(X[i,j]-Q[i]@P[:,j])*Q[i]-beta*P[:,j]
        r_hat=Q@P
    return Q@P,Q,P
     

    
    

In [172]:
def add_user(Q,P,user,lr=0.007,beta=0.001,epochs=10000):
    user_hat=np.zeros(len(user))
    n,m=Q.shape
    Q_n=np.random.rand(m)
    z_i=np.where(user>0)
    for epoch in range(epochs):
        for u in range(len(z_i)):
            i=z_i[0][u]
            Q_n=Q_n+(2*lr*(user[i]-np.dot(Q_n,P[:,i]))*P[:,i]).T-beta*Q_n
    for i in range(len(P.T)):
        user_hat[i]=np.dot(Q_n,P.T[i])   
    return user_hat

In [148]:
@numba.jit("Tuple((double[:,:],double[:,:]))(double[:,:],i8,i8,double,double)",nopython=True)
def matrix_factor_numba(X,epochs=25000,k=4,lr=0.0005,beta=0.0001):
    n,m=X.shape
    Q=np.random.rand(n,k)
    P=np.random.rand(m,k).T  
    z_i,z_j=np.where(X>0)
    z_i=list(z_i)
    z_j=list(z_j)
    n=len(z_i)
    for epoch in range(epochs):
        if epoch%100==0:
            print(epoch)
        for u in range(n):
            i=z_i[u]
            j=z_j[u]    
            Q[i]=Q[i]+2*lr*(X[i,j]-Q[i]@P[:,j])*P[:,j]-beta*Q[i]
            P[:,j]=P[:,j]+2*lr*(X[i,j]-Q[i]@P[:,j])*Q[i]-beta*P[:,j]
    r_hat=Q@P
    return r_hat,Q,P
     

    

In [149]:
%%time
r_hat2,Q2,P2=matrix_factor_numba(R,epochs=1000,k=3,lr=0.005,beta=0.0001)

0
100
200
300
400
500
600
700
800
900
Wall time: 23min 13s


In [174]:
np.save("r_hat",r_hat2)
np.save("Q",Q2)
np.save("P",P2)


In [141]:
np.load("r_hat.npy")

In [179]:
beers = pickle.load( open( "beers.pickle", "rb" ) )
beers = {v: k for k, v in beers.items()}


In [185]:
z=np.array([1,2,5,3,6])

In [190]:
(-z).argsort()[:2][1]

2

In [194]:
np.random.choice(z)

2

## Application With Using Tensorflow

In [48]:
X=np.array([[1,0,0,0,5],[0,0,4,1,0],[0,2,1,0,0],[0,0,4,4,1],[2,3,1,0,0],[5,3,1,0,0]],dtype="float64")


In [66]:
def Matrix_Factorization(X,k=3,lr=0.001,beta=0.0005,epochs=25000,tolerance=0.001):
    m,n=X.shape
    
    Rating= tf.placeholder(tf.float32, [m,n])
    Q = tf.Variable(tf.truncated_normal([int(m), k], stddev=0.2, mean=0), name="users")
    P=tf.Variable(tf.truncated_normal([k, int(n)], stddev=0.2, mean=0), name="beers") 
    
    r_hat=tf.matmul(Q,P)
    mask =  tf.greater(Rating, 0)
    R_h_values = tf.boolean_mask(r_hat, mask)
    R_values = tf.boolean_mask(Rating, mask)
    
    loss = tf.reduce_mean(tf.squared_difference(R_values,R_h_values))+beta*(tf.norm(Q)+tf.norm(P))
    

    optimizer = tf.train.GradientDescentOptimizer(lr)
    train_step = optimizer.minimize(loss)
    
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())
    print("STARTING THE DESCENT")

    for i in range(epochs):
        if i % 1000 == 0:
            l=sess.run(tf.reduce_mean(tf.squared_difference(R_h_values, R_values)),feed_dict={Rating:X})
            print("EPOCH " +str(i)+ ", Loss: " + str(l))
            if l<tolerance:
                break
        sess.run(train_step,feed_dict={Rating:X})
    
    final=tf.matmul(Q,P)
    final_res = sess.run([final])[0]
    return np.round(final_res),sess.run(Q),sess.run(P)
    

In [67]:
zz,Q,P=Matrix_Factorization(X,epochs=25000)

STARTING THE DESCENT
EPOCH 0, Loss: 8.766003
EPOCH 1000, Loss: 8.172392
EPOCH 2000, Loss: 6.2677407
EPOCH 3000, Loss: 2.661744
EPOCH 4000, Loss: 1.0015736
EPOCH 5000, Loss: 0.47182128
EPOCH 6000, Loss: 0.28467563
EPOCH 7000, Loss: 0.21309137
EPOCH 8000, Loss: 0.1701608
EPOCH 9000, Loss: 0.13640362
EPOCH 10000, Loss: 0.10966242
EPOCH 11000, Loss: 0.08998828
EPOCH 12000, Loss: 0.07642423
EPOCH 13000, Loss: 0.06722689
EPOCH 14000, Loss: 0.060698647
EPOCH 15000, Loss: 0.055616673
EPOCH 16000, Loss: 0.051247403
EPOCH 17000, Loss: 0.047201093
EPOCH 18000, Loss: 0.043290023
EPOCH 19000, Loss: 0.039436676
EPOCH 20000, Loss: 0.035622418
EPOCH 21000, Loss: 0.031859685
EPOCH 22000, Loss: 0.02818217
EPOCH 23000, Loss: 0.02463113
EPOCH 24000, Loss: 0.021254275


In [69]:
new=np.array([0,0,5,0,0])

In [None]:
def add_another(X,Q,P,new):
    m,n=X.shape
    Rating= tf.placeholder(tf.float32, [m,n])
    Q= tf.placeholder(tf.float32, [m,k])
    P= tf.placeholder(tf.float32, [m,k])
    Q = tf.Variable(tf.truncated_normal([int(m), k], stddev=0.2, mean=0), name="users")

    


array([[ 1.8729577 , -1.0004058 , -0.25965554],
       [ 1.571367  ,  1.1707896 , -0.46825612],
       [-0.0993017 , -0.04132294, -1.058435  ],
       [ 0.40730035,  1.147678  , -2.0240343 ],
       [-0.02403948, -0.15282382, -1.3832246 ],
       [ 0.321137  , -1.5032159 , -1.8284739 ]], dtype=float32)

In [None]:
tf.dynamic_partition(tf.reshape(r_hat, [-1]), user_ind * tf.shape(r_hat)[1] + item_ind, 1)

In [None]:
zz=Matrix_Factorization(R_f,epochs=3)

In [None]:
r_samp.shape

In [None]:
user_ind=list(user_ind)
item_ind=list(item_ind)

In [None]:
len(item_ind)

In [None]:
tf.reset_default_graph() 

In [None]:
Z=np.zeros((30000,60000))

In [None]:
R.shape

In [None]:
m,n=R_f.shape
    
Rating= tf.placeholder(tf.float32, [m,n])
Q = tf.Variable(tf.truncated_normal([int(m), 3], stddev=0.2, mean=1), name="users")
P=tf.Variable(tf.truncated_normal([3, int(n)], stddev=0.2, mean=1), name="beers")


r_hat=tf.matmul(Q,P)
mask =  tf.greater(Rating, 0)
R_h_values = tf.boolean_mask(r_hat, mask)
R_values = tf.boolean_mask(Rating, mask)
    
loss = tf.reduce_mean(tf.squared_difference(R_h_values, R_values))


In [None]:
optimizer = tf.train.GradientDescentOptimizer(0.001)
train_step = optimizer.minimize(loss)

In [None]:
len(user_ind)

In [None]:
X

In [None]:
mask = tf.greater(X, 0)
non_zero_array = tf.boolean_mask(X, mask)

In [None]:
Matrix_Factorization(X)

In [None]:
global_step = tf.Variable(0, trainable=False)
optimizer = tf.train.GradientDescentOptimizer(0.0001)
train_step = optimizer.minimize(loss)


In [None]:
R_values

In [None]:
user_ind=user_ind.reshape(1571801,1)
item_ind=item_ind.reshape(1571801,1)

In [None]:
list(user_ind)

In [None]:
Rating.rank

In [None]:
len(user_ind)

In [None]:
Rating

In [None]:
tf.gather(Rating,[user_ind])

In [None]:

train_step = optimizer.minimize(loss)

In [None]:
tf.Variable(tf.truncated_normal([5, 3], stddev=0.2, mean=0), name="users")

In [None]:
Matrix_Factorization(R,epochs=2)

In [None]:
X

In [None]:


sess = tf.Session()
sess.run(tf.initialize_all_variables())

for i in range(10000):
    if i % 500 == 0:
        print("EPOCH " +str(i))
    sess.run(train_step,feed_dict={R:X})
    
final=tf.matmul(Q,P)
final_res = sess.run([final])


In [None]:
R= tf.placeholder(tf.float32, [5,5])
Q = tf.Variable(tf.truncated_normal([5, 3], stddev=0.2, mean=0), name="users")
P=tf.Variable(tf.truncated_normal([3, 5], stddev=0.2, mean=0), name="beers")i

r_hat=tf.matmul(Q,P)

In [None]:
R_h_values = tf.gather(tf.reshape(r_hat, [-1]), user_ind * tf.shape(r_hat)[1] + item_ind, name="existing_ratings2")
R_values=tf.gather(tf.reshape(R, [-1]), user_ind * tf.shape(r_hat)[1] + item_ind, name="existing_ratings")

In [None]:
loss = tf.reduce_mean(tf.squared_difference(R_h_values, R_values))
global_step = tf.Variable(0, trainable=False)
optimizer = tf.train.GradientDescentOptimizer(0.002)
train_step = optimizer.minimize(loss, global_step=global_step)


In [None]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())


In [None]:
for i in range(10000):
    if i % 500 == 0:
        print("EPOCH " +str(i))
    sess.run(train_step,feed_dict={R:X})
    
final=tf.matmul(Q,P)
final_res = sess.run([final])

In [None]:
final_res

In [None]:
Z=np.vstack((X,X))

In [None]:
n,m=R.shape
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for k in range(len(i)):
    sess.run(grad_descent)
        
            
            

In [None]:
int(Q[0,1])

In [None]:
eis=R[0,0]-tf.tensordot(Q[0],P[:,0],1)

In [None]:
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
out=sess.run(rows,feed_dict={r:X})
print(out)

In [None]:
yy=np.where(X!=0)

In [None]:
np.sum(X!=0)

In [None]:
def p_q(k):
    ps=[]
    qs=[]
    for i in range(k):
        ps.append(tf.Variable(tf.zeros([k])))
        qs.append(tf.Variable(tf.zeros([k])))
    return qs,ps

In [None]:
sA = sparse.csr_matrix(R)

In [None]:
sA[0,5]

In [None]:
R

In [None]:
count=0
for i in R:
    if np.sum(i>0)<=3:
        count+=1
count

In [None]:
z=np.where(R>0)

In [None]:
len(z[0])

In [None]:
k=z[0]
j=z[1]

In [None]:
Q=np.random.rand(6,3)
P=np.random.rand(6,3).T

In [None]:
for i in range (1571801):
    

In [None]:
Z=sparse.rand(3, 4, density=0.25, format="csr", random_state=42)

In [None]:
Z.todense()

### EXPERIMENTAL

In [None]:
def matrix_factor(X,epochs=15000,k=4,lr=0.007,beta=0.001):
    n,m=X.shape
    Q=np.random.rand(n,k)
    P=np.random.rand(m,k).T  
    z_i,z_j=np.where(X>0)
    z_i=list(z_i)
    z_j=list(z_j)
    n=len(z_i)
    for epoch in range(epochs):
        print("Epoch : " + str(epoch))
        for u in range(n):
            i=z_i[u]
            j=z_j[u]
            Q[i]=Q[i]+2*lr*(X[i,j]-Q[i]@P[:,j])*P[:,j]-beta*Q[i]
            P[:,j]=P[:,j]+2*lr*(X[i,j]-Q[i]@P[:,j])*Q[i]-beta*P[:,j]
    return Q@P
     

    
    

In [None]:
R= tf.placeholder(tf.float32, [5,5])
Q = tf.Variable(tf.truncated_normal([5, 3], stddev=0.2, mean=0), name="users")
P=tf.Variable(tf.truncated_normal([3, 5], stddev=0.2, mean=0), name="beers")

In [None]:
P[1]-0.002*Q[:,0]

In [None]:
Q[:,0]+2.0*0.0002*(X[0,1]-tf.tensordot(Q[:,1],P[0],axes=1))*P[1]-0.002*Q[:,0]

In [None]:
i=0
j=1
lr=0.002
beta=0.001

In [None]:
Q[:,i]=Q[:,i]+2.0*lr*(X[i,j]-tf.tensordot(Q[:,1],P[0],axes=1))*P[j]-beta*Q[:,i]

In [None]:
Q[i]=Q[i]+2*lr*(X[i,j]-tf.tensordot(Q[:,1],P[0],axes=1))*P[j]-beta*Q[:,i]

In [None]:
def grad_descent(X,Q,P,i,j):
    lr=0.002
    beta=0.001
    Q[i]=Q[i]+2*lr*(X[i,j]-tf.matmul(Q[i],P[:,j]))*P[:,j]-beta*Q[i]
    P[:,j]=P[:,j]+2*lr*(X[i,j]-tf.matmul(Q[i],P[:,j]))*Q[i]-beta*P[:,j]
    Q@P

In [None]:
Q=np.random.rand(6,3)
P=np.random.rand(6,3).T

In [None]:
grad_descent(X,Q,P,0,1)

In [None]:
np.save("den.np",R_f)