In [2]:
import sys
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import random

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
import implicit

In [3]:
def def_train_test_split(ratings, split_count, fraction=None):
    """
    Split recommendation data into train and test sets
    
    Params
    ------
    ratings : scipy.sparse matrix
        Interactions between users and items.
    split_count : int
        Number of user-item-interactions per user to move
        from training to test set.
    fractions : float
        Fraction of users to split off some of their
        interactions into test set. If None, then all 
        users are considered.
    """
    # Note: likely not the fastest way to do things below.
    train = ratings.copy().tocoo()
    test = sparse.lil_matrix(train.shape)
    k=3
    if fraction:
        try:
            user_index = np.random.choice(
                np.where(np.bincount(train.row) >= split_count * 2)[0], 
                replace=False,
                size=np.int32(np.floor(fraction * train.shape[0]))
            ).tolist()
        except:
            print(('Not enough users with > {} '
                  'interactions for fraction of {}')\
                  .format(2* k, fraction))
            raise
    else:
        user_index = range(train.shape[0])
        
    train = train.tolil()

    for user in user_index:
        test_ratings = np.random.choice(ratings.getrow(user).indices, 
                                        size=split_count, 
                                        replace=False)
        
        train[user, test_ratings] = 0.
        # These are just 1.0 right now
        test[user, test_ratings] = ratings[user, test_ratings]
   
    
    # Test and training are truly disjoint
    assert(train.multiply(test).nnz == 0)
    return train.tocsr(), test.tocsr(), user_index

In [4]:
raw_data = pd.read_csv('C:/Users/Yingting L/Desktop/recommender/SVD implicit/356365.csv')
#raw_data = raw_data.drop(raw_data.columns[1], axis=1)
#raw_data.columns = ['user', 'st', 'count']

# Drop NaN columns
#data = raw_data.dropna()
original = sparse.csr_matrix((raw_data['srcount'].astype(float), (raw_data['user_id'], raw_data['service_type_id'])))
# original = sparse.csr_matrix((raw_data['srcount'].astype(float), (raw_data['service_type_id'], raw_data['user_id'])))
user_train, user_test, user=def_train_test_split(original,1,fraction=0.1)

In [5]:
#rotate matrix need to work on (transpose)
rotate_train=user_train.transpose()
rotate_test=user_test.transpose()

In [10]:
rotate_train

<670x459368 sparse matrix of type '<class 'numpy.float64'>'
	with 264878 stored elements in Compressed Sparse Column format>

In [11]:
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20)
alpha_val = 15
data_conf = (rotate_train * alpha_val).astype('double')



In [12]:
model.fit(data_conf)

100%|██████████| 20.0/20 [00:05<00:00,  2.55it/s]


In [13]:
u=[]
st=[]
rec_list=[]
rec_st=[]
user_hit=[]

# users_id=data_test["user_id"]
# u=data_test["user_id"]

u=user
# u=[5556]

for item in u:
#     hit_count=0 
    rec=model.recommend(item,user_test,N=3)
    st_only=[i[0] for i in rec]
    rec_st.append(st_only)
    rec_list.append(rec)
    ser_types=raw_data[raw_data["user_id"]==item]
    st.append(ser_types["service_type_id"].tolist())
    

In [13]:
# i=0
# while i<=len(rec_st)-1:
#     item1=rec_st[i]

#     for item2 in rec_st:
#         if item1==item2:
#             hit_count=hit_count+1
#     i=i+1
# user_hit.append(u,hit_count)
    
    

In [21]:
len(user)

45936

In [None]:
len(u)