In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
import scipy.sparse as sparse
import random
import implicit
from implicit.als import AlternatingLeastSquares as ALS

In [3]:
########### 원본
# drop columns 전처리
def drop_columns(delete_columns:list,df:pd.DataFrame) -> pd.DataFrame:
    # 회의 때 얘기한 컬럼 전처리
    # None or 필요 없는 컬럼
    df = df.drop(columns=delete_columns)
    return df

def dict_to_column(columns:list,df:pd.DataFrame) -> pd.DataFrame:
    for col in columns:
        key_set = dict_to_set(col,df)
        df = set_to_column(col,key_set,df)
    return df

def dict_to_set(column:str,df:pd.DataFrame) -> set:
    key_set = set()
    for i in tqdm(df[column]):
        
        # column 내용이 dict일 경우
        if isinstance(i, dict):
            key_set |= set(i.keys())
        
        # column 내용이 None type 일 경우
        elif i == None:
            continue
        
        # column 내용이 [dict]로 감싸져있는 경우
        elif isinstance(i, list)&len(i)>0:
            if isinstance(i[0], str):
                continue
            key_set |= set(i[0].keys())
        
    return key_set

def set_to_column(column:str,key_set:set,df:pd.DataFrame) -> pd.DataFrame:
    for key in key_set:
        
        #중복인 경우 컬럼_중복컬럼으로 추가
        if key in df.columns:
            df[column+'_'+key] = df[column].apply(lambda x: x.get(key, None) if isinstance(x, dict) else None if x==None
                                       else ( None \
                                       if len(x)==0 else ( x[0].get(key, None) \
                                       if isinstance(x[0],dict)  else \
                                           None)) \
                                      )
        #중복이 아닌 경우
        else:
            df[key] = df[column].apply(lambda x: x.get(key, None) if isinstance(x, dict) else None if x==None
                                       else ( None \
                                       if len(x)==0 else ( x[0].get(key, None) \
                                       if isinstance(x[0],dict)  else \
                                           None)) \
                                       
                                      )

    df = df.drop(columns=[column])
    return df

# 컬럼안 key 값이 한 개일 경우 딕셔너리만 풉니다!
def key_to_element(element_columns:list,df:pd.DataFrame)->pd.DataFrame:
    for col in element_columns:
        key_set = dict_to_set(col, df)
        assert len(key_set)==1, f'{col}: key가 2개 이상이므로 dict_to_column 함수 이용하세요'
        for key in key_set:
            df[col] = df[col].apply(lambda x: x.get(key,None) if isinstance(x,dict) else None)
    return df

In [4]:
df_json = pd.read_json('/fastcampus-data/select_column_version_3.json')

In [5]:
df = pd.DataFrame(df_json)

In [6]:
df2 = df.copy()

# 전처리

## 날짜

In [7]:
df2['date_paid'] = pd.to_datetime(df['date_paid'])
df2 = df2[~df2['date_paid'].isna()]
df2 = df2[df2['date_paid'] > "2022-05-12"]

## paid == Ture and cancelled ==False

In [8]:
df2 = df2[(df2['paid'] == True) & (df2['cancelled']==False)]
df2.reset_index(drop=True,inplace=True)

## 한의사, 사업자 여부

In [9]:
# 응답이 없으니 일단 학생으로 넣는다.
df2['한의사 여부'] = df2['한의사 여부'].fillna(0)
df2['사업자 여부'] = df2['사업자 여부'].fillna(0)

## age_group

우선 null값을 etc로 넣는다. 나중에 사업자 여부 학생여부로 연령대를 나눌수도 있겠지만 일단은 미응답자로 넣는다.

In [10]:
df2['age_group'] = df2['age_group'].fillna('etc')

In [11]:
df2.isnull().sum()

_id              0
date_paid        0
customer_id      0
paid             0
name_x           0
category_id_y    0
product_ids      0
quantity         0
price            0
price_total      0
age_group        0
한의사 여부           0
사업자 여부           0
cancelled        0
name             0
slug             0
dtype: int64

## 도서 필터링

In [12]:
df3 = df2[df2['name'] == '도서']

# EDA 유저별 segment
학생, 페이한의사, 창업가, 병원장

In [13]:
df3

Unnamed: 0,_id,date_paid,customer_id,paid,name_x,category_id_y,product_ids,quantity,price,price_total,age_group,한의사 여부,사업자 여부,cancelled,name,slug
24,627a68ad9d93880024062b39,2022-05-12 02:16:04.401000+00:00,5dbd44572bb59605ca3cf52b,True,섭혜민 명의경방험안,5cf8bbba0098b2225c5dfaa3,626a4b8ae1579900234bd4b0,1.0,117000.0,117000.0,20-29,1.0,0.0,False,도서,book
28,627c50279d9388002406326a,2022-05-12 00:10:00.012000+00:00,5d838c7f3f0e6805c4706894,True,장골의 PI 변위는 없다,5cf8bbba0098b2225c5dfaa3,623bc8886f766b0024668eb5,1.0,38800.0,38800.0,30-39,1.0,0.0,False,도서,book
30,627c519c9d93880024063275,2022-05-12 00:15:59.124000+00:00,5ebc8bee09982e0735b2d835,True,초음파 가이드 근골격계 통증 치료의 정석,5cf8bbba0098b2225c5dfaa3,626a4b89e1579900234bd4af,1.0,199500.0,199500.0,30-39,1.0,1.0,False,도서,book
31,627c51a69d93880024063278,2022-05-12 00:21:00+00:00,5dae9a8d0dabe405b156efd0,True,초음파 가이드 근골격계 통증 치료의 정석,5cf8bbba0098b2225c5dfaa3,626a4b89e1579900234bd4af,1.0,199500.0,199500.0,30-39,1.0,1.0,False,도서,book
32,627c51c69d9388002406327e,2022-05-12 00:17:17.201000+00:00,5f6be49c48060c52caf1528e,True,초음파 가이드 근골격계 통증 치료의 정석,5cf8bbba0098b2225c5dfaa3,626a4b89e1579900234bd4af,1.0,199500.0,199500.0,50-59,1.0,1.0,False,도서,book
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57702,632024716c2e37002369f008,2022-09-13 08:51:40+00:00,5e5f01c7bfe4260944782d10,True,한방피부진료레슨,5cf8bbba0098b2225c5dfaa3,5dd397592bb59605ca3d01ea,1.0,43200.0,43200.0,30-39,1.0,1.0,False,도서,book
57710,632029a46c2e37002369f03b,2022-09-13 06:59:14+00:00,6131f79097686100197d7860,True,윤상훈·권병조의 알짜 근육학,5cf8bbba0098b2225c5dfaa3,60d018b7d26af90681b18980,1.0,18000.0,18000.0,etc,0.0,0.0,False,도서,book
57719,63202f736c2e37002369f06e,2022-09-13 07:22:18.847000+00:00,5dee019d4267e105dffef02d,True,트리거포인트 침치료,5cf8bbba0098b2225c5dfaa3,617a3aacbc63410023471ab6,1.0,58500.0,58500.0,20-29,1.0,0.0,False,도서,book
57731,632033be6c2e37002369f09a,2022-09-13 07:40:23.148000+00:00,617e903bbc6341002347262a,True,스파이랄 및 키네지오 테이핑,5cf8bbba0098b2225c5dfaa3,62551d67cb28e6002469d8fb,1.0,28500.0,28500.0,etc,1.0,1.0,False,도서,book


# 베스트 도서의 총 도서 구매 비율을 확인해보자!

In [30]:
best_seller_20 = pd.DataFrame(df3[['product_ids','name_x']].value_counts()[:20])
best_seller_20

Unnamed: 0_level_0,Unnamed: 1_level_0,0
product_ids,name_x,Unnamed: 2_level_1
629860599d93880024071acc,비만문답,594
62bbee3a08e04900234e36c8,플로차트 정형외과 진단,463
62ff2f43ca740b0024397ca4,초음파 유도하 침 시술 가이드북,354
6182113bbc63410023473754,흔히보는 정형외과 외래진료 가이드북,324
60d018b7d26af90681b18980,윤상훈·권병조의 알짜 근육학,251
63046e9358e1680033a580d6,영어 진료 가이드북,227
629860589d93880024071acb,황황교수의 임상의를 위한 근거기반 상한금궤 처방 매뉴얼,215
629860569d93880024071aca,황황교수의 개원 한의사를 위한 상한금궤 처방 강의록,201
62da5dd81b3c480022189fdf,외래에서 꼭 알아야 할 통증증후군 137가지,149
62392e146f766b0024667fe2,카이로프랙틱 기본테크닉론,137


In [96]:
best = df3.groupby('product_ids').sum()['quantity'].sort_values(ascending=False)[:20].reset_index()
# best
best_seller_20 = pd.merge(best,df3[df3['product_ids'].isin(best['product_ids'])][['product_ids','name_x']],how='inner',on='product_ids').drop_duplicates()

In [97]:
best_seller_20

Unnamed: 0,product_ids,quantity,name_x
0,629860599d93880024071acc,597.0,비만문답
594,62bbee3a08e04900234e36c8,465.0,플로차트 정형외과 진단
1057,62ff2f43ca740b0024397ca4,383.0,초음파 유도하 침 시술 가이드북
1411,6182113bbc63410023473754,328.0,흔히보는 정형외과 외래진료 가이드북
1735,60d018b7d26af90681b18980,259.0,윤상훈·권병조의 알짜 근육학
1986,63046e9358e1680033a580d6,229.0,영어 진료 가이드북
2213,629860589d93880024071acb,216.0,황황교수의 임상의를 위한 근거기반 상한금궤 처방 매뉴얼
2428,629860569d93880024071aca,201.0,황황교수의 개원 한의사를 위한 상한금궤 처방 강의록
2629,62da5dd81b3c480022189fdf,149.0,외래에서 꼭 알아야 할 통증증후군 137가지
2778,62392e146f766b0024667fe2,139.0,카이로프랙틱 기본테크닉론


In [59]:
best_seller_ratio = round(sum(best_seller_20['quantity']) / sum(df3['quantity']) * 100, 2)

In [60]:
best_seller_ratio

62.37

In [78]:
student = df3[(df3['한의사 여부'] == 0) &(df3['사업자 여부'] == 0) ]
ceo = df3[(df3['한의사 여부'] == 0) &(df3['사업자 여부'] == 1) ]
paydoc = df3[(df3['한의사 여부'] == 1) &(df3['사업자 여부'] == 0) ]
docceo = df3[(df3['한의사 여부'] == 1) &(df3['사업자 여부'] == 1) ]

# nDCG 평가지표를 만들어서 추천이 잘 되었는지 여부를 파악해야 한다.

## 고객군별 베스트 20 도서

In [483]:
student_top20 = pd.DataFrame(student[['product_ids','name_x','name']].value_counts()[:20])
ceo_top20 = pd.DataFrame(ceo[['product_ids','name_x','name']].value_counts()[:20])
paydoc_top20 = pd.DataFrame(paydoc[['product_ids','name_x','name']].value_counts()[:20])
docceo_top20 = pd.DataFrame(docceo[['product_ids','name_x','name']].value_counts()[:20])

In [484]:
display(student_top20,ceo_top20,paydoc_top20,docceo_top20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
product_ids,name_x,name,Unnamed: 3_level_1
60d018b7d26af90681b18980,윤상훈·권병조의 알짜 근육학,도서,105
629860599d93880024071acc,비만문답,도서,70
62ff2f43ca740b0024397ca4,초음파 유도하 침 시술 가이드북,도서,42
62bbee3a08e04900234e36c8,플로차트 정형외과 진단,도서,40
6182113bbc63410023473754,흔히보는 정형외과 외래진료 가이드북,도서,33
610b6f9ad26af90681b1e699,임상 한의사를 위한 기본 한약처방 강의 2판,도서,29
628f154b9d9388002406f447,"선생님, 이제 그만 저 좀 포기해 주세요",도서,25
617f9228bc63410023472aa4,숲을 보는 요통치료,도서,24
5d78491b19efa30eb29143cc,플로차트 한약치료,도서,23
629860589d93880024071acb,황황교수의 임상의를 위한 근거기반 상한금궤 처방 매뉴얼,도서,23


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
product_ids,name_x,name,Unnamed: 3_level_1
629860599d93880024071acc,비만문답,도서,26
62bbee3a08e04900234e36c8,플로차트 정형외과 진단,도서,24
62ff2f43ca740b0024397ca4,초음파 유도하 침 시술 가이드북,도서,24
63046e9358e1680033a580d6,영어 진료 가이드북,도서,13
6182113bbc63410023473754,흔히보는 정형외과 외래진료 가이드북,도서,12
629860589d93880024071acb,황황교수의 임상의를 위한 근거기반 상한금궤 처방 매뉴얼,도서,9
62cd2dbb3fc97a002bec851b,사암침의 해석과 임상,도서,9
629860569d93880024071aca,황황교수의 개원 한의사를 위한 상한금궤 처방 강의록,도서,9
62da5dd81b3c480022189fdf,외래에서 꼭 알아야 할 통증증후군 137가지,도서,8
628df3509d9388002406ed08,趙紹琴(조소금) 내과학,도서,5


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
product_ids,name_x,name,Unnamed: 3_level_1
629860599d93880024071acc,비만문답,도서,249
62bbee3a08e04900234e36c8,플로차트 정형외과 진단,도서,198
62ff2f43ca740b0024397ca4,초음파 유도하 침 시술 가이드북,도서,140
6182113bbc63410023473754,흔히보는 정형외과 외래진료 가이드북,도서,135
60d018b7d26af90681b18980,윤상훈·권병조의 알짜 근육학,도서,93
629860589d93880024071acb,황황교수의 임상의를 위한 근거기반 상한금궤 처방 매뉴얼,도서,76
63046e9358e1680033a580d6,영어 진료 가이드북,도서,75
629860569d93880024071aca,황황교수의 개원 한의사를 위한 상한금궤 처방 강의록,도서,68
62392e146f766b0024667fe2,카이로프랙틱 기본테크닉론,도서,56
610b6f9ad26af90681b1e699,임상 한의사를 위한 기본 한약처방 강의 2판,도서,55


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
product_ids,name_x,name,Unnamed: 3_level_1
629860599d93880024071acc,비만문답,도서,249
62bbee3a08e04900234e36c8,플로차트 정형외과 진단,도서,201
62ff2f43ca740b0024397ca4,초음파 유도하 침 시술 가이드북,도서,148
6182113bbc63410023473754,흔히보는 정형외과 외래진료 가이드북,도서,144
63046e9358e1680033a580d6,영어 진료 가이드북,도서,124
629860589d93880024071acb,황황교수의 임상의를 위한 근거기반 상한금궤 처방 매뉴얼,도서,107
629860569d93880024071aca,황황교수의 개원 한의사를 위한 상한금궤 처방 강의록,도서,102
62da5dd81b3c480022189fdf,외래에서 꼭 알아야 할 통증증후군 137가지,도서,80
62392e146f766b0024667fe2,카이로프랙틱 기본테크닉론,도서,60
628ecd6b9d9388002406f05b,약침의 정석 –통증편,도서,56


# ALS

In [61]:
products_json = pd.read_json("/fastcampus-data/products/products.json")
products_df = pd.DataFrame(products_json)
products_df = key_to_element(['_id'],products_df)

100%|██████████| 5141/5141 [00:00<00:00, 704072.25it/s]


In [62]:
def recommended_product(products, result):
    products_id = products[result[0][0]]
    return products_df[products_df['_id'] == products_id].loc[:,'meta_title']

In [63]:
def recommend(df, als_model,purchase_sparse, products:list, customers:list,customerId:str):
    id_index = customers.index(customerId)
    result = als_model.recommend(id_index, purchase_sparse[id_index], N=10)
    products = [ df[df['product_ids'] == products[i]]['name_x'].values[0] for i in result[0]]
    return pd.DataFrame(products)

## ALS 전체 고객

In [64]:
# 고객과 아이템 얼마나 구매했는가
grouped_purchased = df3.groupby(['customer_id','product_ids']).sum().reset_index()

In [65]:
customers = list(np.sort(grouped_purchased['customer_id'].unique()))
products = list (grouped_purchased['product_ids'].unique())
quantity = list(grouped_purchased['quantity'])

rows = grouped_purchased['customer_id'].astype('category').cat.codes
cols = grouped_purchased['product_ids'].astype('category').cat.codes
print(len(customers)) # 2663
print(len(products))  # 223
purchase_sparse = sparse.csr_matrix((quantity, (rows, cols)), shape = (len(customers),len(products)))
purchase_sparse #2663 * 252 행렬

2663
252


<2663x252 sparse matrix of type '<class 'numpy.float64'>'
	with 5910 stored elements in Compressed Sparse Row format>

In [66]:
# Sparsity: 얼마나 비어있나?
matrix_size = purchase_sparse.shape[0]* purchase_sparse.shape[1]
num_purchases = len(purchase_sparse.nonzero()[0])
sparsity = 100 * (1 - (num_purchases / matrix_size))
sparsity

99.11932478586628

In [67]:
als_model = ALS(factors=20, regularization=0.01, iterations = 100)
als_model.fit(purchase_sparse)

  0%|          | 0/100 [00:00<?, ?it/s]

In [68]:
result = als_model.recommend(0, purchase_sparse[0], N=10)
result

(array([160,   2, 135, 162,  73, 198,   8,  23, 177,  15], dtype=int32),
 array([0.21755786, 0.15902099, 0.10876625, 0.09228627, 0.08397196,
        0.07077934, 0.06653977, 0.05999781, 0.05383343, 0.05367574],
       dtype=float32))

In [69]:
recommended_product(products, result)

234    사진으로 공부하는 이비인후과학
Name: meta_title, dtype: object

In [70]:
customers[0]

'5d60cab24e77525ec5ca13d5'

In [71]:
recommend(df3,als_model,purchase_sparse,products,customers,customers[0])

Unnamed: 0,0
0,사진으로 공부하는 이비인후과학
1,약침의 정석 –통증편
2,주문봉 진단학강의
3,쉽게 배우는 근근막통증증후군 TPI치료법
4,알기 쉬운 이비인후과 한약처방가이드
5,하루10분운동으로 100세 건강지키기
6,초음파 유도하 침 시술 가이드북
7,사상임상약물대전
8,귤창서영
9,침구과 진료매뉴얼


In [88]:
customers = pd.Series(customers)
student_series = customers[customers.isin(student['customer_id'])]
paydoc_series = customers[customers.isin(paydoc['customer_id'])]
ceo_series = customers[customers.isin(ceo['customer_id'])]
docceo_series = customers[customers.isin(docceo['customer_id'])]

학생

In [89]:
student_series

20      5d60d45a4e77525ec5ca1465
23      5d60d5f14e77525ec5ca1475
40      5d610caa4e77525ec5ca1518
41      5d610ff44e77525ec5ca151a
42      5d6119754e77525ec5ca151e
                  ...           
2647    62c64fa9b99f01002300a886
2654    62f1acfba670760022a3eaa0
2658    63155514c640eb0021af84fc
2661    63195d5e6c2e37002369dd30
2662    63199ec06c2e37002369df0f
Length: 461, dtype: object

# 고객군별 추천

### 학생

In [502]:
grouped_purchased = student.groupby(['customer_id','product_ids']).sum().reset_index()

In [503]:
customers = list(np.sort(grouped_purchased['customer_id'].unique()))
products = list (grouped_purchased['product_ids'].unique())
quantity = list(grouped_purchased['quantity'])

rows = grouped_purchased['customer_id'].astype('category').cat.codes
cols = grouped_purchased['product_ids'].astype('category').cat.codes
print(len(customers)) # 461
print(len(products))  # 147
purchase_sparse = sparse.csr_matrix((quantity, (rows, cols)), shape = (len(customers),len(products)))
purchase_sparse #461 * 147 행렬

461
147


<461x147 sparse matrix of type '<class 'numpy.float64'>'
	with 930 stored elements in Compressed Sparse Row format>

In [504]:
# Sparsity: 얼마나 비어있나?
matrix_size = purchase_sparse.shape[0]* purchase_sparse.shape[1]
num_purchases = len(purchase_sparse.nonzero()[0])
sparsity = 100 * (1 - (num_purchases / matrix_size))
sparsity

98.6276506264111

In [505]:
als_model = ALS(factors=20, regularization=0.01, iterations = 100)
als_model.fit(purchase_sparse)

  0%|          | 0/100 [00:00<?, ?it/s]

In [506]:
result = als_model.recommend(0, purchase_sparse[0], N=10)
result

(array([129, 124,  31, 127,  96, 106, 140,  76, 142,  18], dtype=int32),
 array([0.12192045, 0.11228589, 0.10810536, 0.1052188 , 0.08762927,
        0.07781832, 0.07385367, 0.0696134 , 0.05553171, 0.0544217 ],
       dtype=float32))

In [507]:
recommended_product(products,result)

2776    현동의감
Name: meta_title, dtype: object

In [539]:
recommend(df, als_model,purchase_sparse, products, customers,customerId=customers[4])

Unnamed: 0,0
0,약처방의 정석
1,면역학강의
2,경계 너머의 한방처방
3,섭혜민 명의경방험안
4,Cyriax 정형의학 3판
5,중국 왕문원 평형침구학
6,한방피부진료레슨
7,"[세트] 말초신경 약침의학, 근골격계 약침의학"
8,경락도해
9,플로차트 정형외과 진단


### 페이한의사

In [509]:
grouped_purchased = paydoc.groupby(['customer_id','product_ids']).sum().reset_index()
customers = list(np.sort(grouped_purchased['customer_id'].unique()))
products = list (grouped_purchased['product_ids'].unique())
quantity = list(grouped_purchased['quantity'])

rows = grouped_purchased['customer_id'].astype('category').cat.codes
cols = grouped_purchased['product_ids'].astype('category').cat.codes
print(len(customers)) # 1112
print(len(products))  # 205
purchase_sparse = sparse.csr_matrix((quantity, (rows, cols)), shape = (len(customers),len(products)))
purchase_sparse #461 * 147 행렬

1112
205


<1112x205 sparse matrix of type '<class 'numpy.float64'>'
	with 2380 stored elements in Compressed Sparse Row format>

In [510]:
# Sparsity: 얼마나 비어있나?
matrix_size = purchase_sparse.shape[0]* purchase_sparse.shape[1]
num_purchases = len(purchase_sparse.nonzero()[0])
sparsity = 100 * (1 - (num_purchases / matrix_size))
sparsity

98.95595718547114

In [511]:
als_model = ALS(factors=20, regularization=0.01, iterations = 100)
als_model.fit(purchase_sparse)

  0%|          | 0/100 [00:00<?, ?it/s]

In [512]:
result = als_model.recommend(2, purchase_sparse[2], N=10)
result

(array([180, 202, 190, 140, 185,  16,  98, 156, 195,  91], dtype=int32),
 array([0.02086133, 0.01420359, 0.01266685, 0.01011939, 0.00657014,
        0.00473182, 0.00451789, 0.00428839, 0.0041318 , 0.00407216],
       dtype=float32))

In [513]:
recommended_product(products,result)

49    통증치료를 위한 알기 쉬운 근골격계 이학적 검사법
Name: meta_title, dtype: object

In [514]:
recommend(df, als_model,purchase_sparse, products, customers,customerId=customers[0])

Unnamed: 0,0
0,통증치료를 위한 알기 쉬운 근골격계 이학적 검사법
1,증보운곡본초학
2,나가노식 치료
3,알기 쉬운 이비인후과 한약처방가이드
4,실전한약가이드
5,그림으로 보는 뇌와 신경
6,근골격계 약침의학
7,오국통 온병명방
8,외래진료 달인되기
9,QBook: Case based Review


### 창업가

In [515]:
grouped_purchased = ceo.groupby(['customer_id','product_ids']).sum().reset_index()
customers = list(np.sort(grouped_purchased['customer_id'].unique()))
products = list (grouped_purchased['product_ids'].unique())
quantity = list(grouped_purchased['quantity'])

rows = grouped_purchased['customer_id'].astype('category').cat.codes
cols = grouped_purchased['product_ids'].astype('category').cat.codes
print(len(customers)) # 90
print(len(products))  # 90
purchase_sparse = sparse.csr_matrix((quantity, (rows, cols)), shape = (len(customers),len(products)))
purchase_sparse #90 * 90 행렬

90
90


<90x90 sparse matrix of type '<class 'numpy.float64'>'
	with 266 stored elements in Compressed Sparse Row format>

In [516]:
# Sparsity: 얼마나 비어있나?
matrix_size = purchase_sparse.shape[0]* purchase_sparse.shape[1]
num_purchases = len(purchase_sparse.nonzero()[0])
sparsity = 100 * (1 - (num_purchases / matrix_size))
sparsity

96.71604938271605

In [517]:
als_model = ALS(factors=20, regularization=0.01, iterations = 100)
als_model.fit(purchase_sparse)

  0%|          | 0/100 [00:00<?, ?it/s]

In [518]:
result = als_model.recommend(0, purchase_sparse[0], N=10)
result

(array([41,  7,  5, 31,  6, 70, 82, 28, 14, 57], dtype=int32),
 array([0.14130181, 0.14130181, 0.1413018 , 0.12762347, 0.12762345,
        0.12250176, 0.10146499, 0.07488124, 0.06875424, 0.0675987 ],
       dtype=float32))

In [519]:
recommended_product(products,result)

4341    증례와 함께 하는 한약처방
Name: meta_title, dtype: object

In [520]:
recommend(df, als_model,purchase_sparse, products, customers,customerId=customers[0])

Unnamed: 0,0
0,증례와 함께 하는 한약처방
1,플로차트 정형외과 진단
2,초음파 가이드 근골격계 통증 치료의 정석
3,"실전, 임상한의학 알레르기질환"
4,비만문답
5,한의사를 위한 통증치료 매뉴얼 Part 3
6,복증기람(익)
7,플로차트 정신질환의 한방치료
8,초음파 유도하 침 시술 가이드북
9,나의 갑상선 진료


### 병원장

In [521]:
grouped_purchased = docceo.groupby(['customer_id','product_ids']).sum().reset_index()
customers = list(np.sort(grouped_purchased['customer_id'].unique()))
products = list (grouped_purchased['product_ids'].unique())
quantity = list(grouped_purchased['quantity'])

rows = grouped_purchased['customer_id'].astype('category').cat.codes
cols = grouped_purchased['product_ids'].astype('category').cat.codes
print(len(customers)) # 1000
print(len(products))  # 189
purchase_sparse = sparse.csr_matrix((quantity, (rows, cols)), shape = (len(customers),len(products)))
purchase_sparse #1000 * 189 행렬

1000
189


<1000x189 sparse matrix of type '<class 'numpy.float64'>'
	with 2334 stored elements in Compressed Sparse Row format>

In [522]:
# Sparsity: 얼마나 비어있나?
matrix_size = purchase_sparse.shape[0]* purchase_sparse.shape[1]
num_purchases = len(purchase_sparse.nonzero()[0])
sparsity = 100 * (1 - (num_purchases / matrix_size))
sparsity

98.76507936507937

In [523]:
als_model = ALS(factors=20, regularization=0.01, iterations = 100)
als_model.fit(purchase_sparse)

  0%|          | 0/100 [00:00<?, ?it/s]

In [524]:
result = als_model.recommend(0, purchase_sparse[0], N=10)
result

(array([117, 106, 139, 128,  35,  87, 150,  94,   8, 141], dtype=int32),
 array([0.03631108, 0.01743528, 0.01114338, 0.00790505, 0.00634375,
        0.0060242 , 0.00547767, 0.0050641 , 0.00491204, 0.00424065],
       dtype=float32))

In [525]:
recommended_product(products,result)

572    약처방의 정석 (1, 2권 세트) 
Name: meta_title, dtype: object

In [535]:
recommend(df, als_model,purchase_sparse, products, customers,customerId=customers[0])

Unnamed: 0,0
0,약처방의 정석
1,임상에서 자주 쓰이는 근골격계 검진법
2,척추통증 - 움직임 문제 -(Back Pain: A Movement Problem-...
3,호희서 상한론강의
4,면역학강의
5,주사영양치료
6,경계 너머의 한방처방
7,실전한약가이드
8,플로차트 정형외과 진단
9,불면 장애 -INSOMNIA DISORDERS
