# ExMatrix
### Como usar o sistema de recomendação.

- Author: Israel Oliveira [\[e-mail\]](mailto:'Israel%20Oliveira%20'<prof.israel@gmail.com>)

In [1]:
import sys
sys.path.insert(1, 'src/')
from train import *
from recommender import Recommender 

from tqdm import tqdm
import pandas as pd
from time import time

In [2]:
%load_ext watermark

In [3]:
# Run this cell before close.
%watermark -d --iversion -b -r -g -m -v
!grep 'model name' /proc/cpuinfo |head -n 1 |sed -e 's/model\ name/CPU/'
!free -h |cut -d'i' -f1  |grep -v total

numpy  1.19.0
pandas 1.0.5
2020-08-02 

CPython 3.7.8
IPython 7.16.1

compiler   : GCC 8.3.0
system     : Linux
release    : 5.4.0-7634-generic
machine    : x86_64
processor  : 
CPU cores  : 8
interpreter: 64bit
Git hash   : 4388bdb94299a7610794eaf87efd42e45e9dea8b
Git repo   : https://github.com/ysraell/aceleradev_private.git
Git branch : master
CPU	: Intel(R) Xeon(R) CPU E3-1241 v3 @ 3.50GHz
Mem:           31G
Swap:          19G


# Treinando o modelo.
Dataset em `./data/`, salvando o modelo em `./model/`.

In [4]:
t = time()
train('data/','model/')
dt = time() - t
print("Tempo total de treino: {:.2f} min.".format(dt/60))

2020-08-02 01:22:40.897 | INFO     | utils:load_dataset:82 - Carregando dataset de treino...
2020-08-02 01:22:54.534 | INFO     | utils:load_dataset:91 - ...pronto!
2020-08-02 01:22:54.535 | INFO     | utils:feat_proc:128 - Processando as features...
2020-08-02 01:23:39.555 | INFO     | utils:feat_proc:163 - ...pronto!
2020-08-02 01:23:40.004 | INFO     | train:train:35 - Treinando e modelo...
2020-08-02 01:23:40.251 | INFO     | model:fit:69 - Processando valores.
2020-08-02 01:23:41.319 | INFO     | model:fit:73 - Fatorizando.
2020-08-02 01:25:08.362 | INFO     | model:fit:78 - Matriz pronta.
2020-08-02 01:25:08.364 | INFO     | train:train:37 - ...pronto.
2020-08-02 01:25:11.605 | INFO     | train:train:42 - Model salvo: "model/model_default.pkl"..


Tempo total de treino: 2.51 min.


# Carrega o datset para demonstração.

In [5]:
data, portfolios = load_dataset(path_data = 'data/', train_test_merged = True)

2020-08-02 01:25:11.659 | INFO     | utils:load_dataset:82 - Carregando dataset de treino...
2020-08-02 01:25:25.437 | INFO     | utils:load_dataset:91 - ...pronto!
2020-08-02 01:25:25.438 | INFO     | utils:load_dataset:95 - Carregando dataset de teste...
2020-08-02 01:25:25.450 | INFO     | utils:load_dataset:104 - ...pronto!


# Carrega o modelo.

In [6]:
model = Recommender('model/')

2020-08-02 01:25:26.292 | INFO     | recommender:__init__:33 - Modelo carregado


#### Exemplo de um ID:

In [7]:
portfolios.loc[0]

id    09e95c1a84049001d086470a8f320a19b076f955a89122...
P                                                     1
Name: 0, dtype: object

In [8]:
portfolios.loc[portfolios.id == model.For([portfolios.id.loc[0]])[0]]

Unnamed: 0,id,P
658,bbe355fb2bc8cba02b400b8a67baa29a6e234bde019a80...,2


## Recomendações 1-K
- Entra `1` ID e saem `K` recomendações.

In [10]:
def one_N(N,K):
    tmp = {1: [], 2: []}
    if N > 0:
        for row in tqdm(portfolios.sample(N,random_state=2).iterrows(), total=N):
            recs = model.For([row[1].id],k=K)
            tmp[row[1].P].append(any([x in portfolios.loc[portfolios.P == row[1].P].id.to_list() for x in recs])*1)
    else:
        for row in tqdm(portfolios.iterrows(), total=portfolios.shape[0]):
            recs = model.For([row[1].id],k=K)
            tmp[row[1].P].append(any([x in portfolios.loc[portfolios.P == row[1].P].id.to_list() for x in recs])*1)
    return {i: (len(val), sum(val), round(100*sum(val)/max(1,len(val)),2)) for i,val in tmp.items()}

In [11]:
results = {}
for K in [1, 5, 10]:
    results[K] = one_N(0,K)

100%|██████████| 831/831 [13:35<00:00,  1.02it/s]
100%|██████████| 831/831 [14:15<00:00,  1.03s/it]
100%|██████████| 831/831 [13:32<00:00,  1.02it/s]


 Considerando apenas os portfólios 2 e 3, numerados abaixo como 1 e 2 (resp.).

In [12]:
def show_results(results):
    return pd.DataFrame([[K]+flat([list(a) for a in r.values()]) for K,r in results.items()], columns=['K', 'Total_1', 'Acertos_1', 'Percentual_1 (%)', 'Total_2', 'Acertos_2', 'Percentual_2 (%)'])

In [13]:
show_results(results)

Unnamed: 0,K,Total_1,Acertos_1,Percentual_1 (%),Total_2,Acertos_2,Percentual_2 (%)
0,1,566,180,31.8,265,80,30.19
1,5,566,447,78.98,265,188,70.94
2,10,566,519,91.7,265,221,83.4


## Recomendações S-K
- Entra `S` IDs e saem `K` recomendações.

In [14]:
from tqdm import tqdm

def more_N(N,S,K):
    tmp = {1: [], 2: []}
    for n in tqdm(range(N)):
        for P in portfolios.P.unique():
            condP = portfolios.P == P
            df_tmp = portfolios.loc[condP].sample(n=S)
            recs = model.For(df_tmp.id.to_list(),k=K)
            tmp[P].append(any([x in portfolios.loc[condP].id.to_list() for x in recs])*1)
    return {i: (len(val), sum(val), round(100*sum(val)/max(1,len(val)),2)) for i,val in tmp.items()}

### 5-K

In [15]:
results2 = {}
for K in [1, 5, 10]:
    results2[K] = more_N(100,5,K)

100%|██████████| 100/100 [16:31<00:00,  9.91s/it]
100%|██████████| 100/100 [16:55<00:00, 10.15s/it]
100%|██████████| 100/100 [17:06<00:00, 10.26s/it]


In [16]:
show_results(results2)

Unnamed: 0,K,Total_1,Acertos_1,Percentual_1 (%),Total_2,Acertos_2,Percentual_2 (%)
0,1,100,38,38.0,100,33,33.0
1,5,100,85,85.0,100,89,89.0
2,10,100,100,100.0,100,94,94.0


## 10-K

In [17]:
results3 = {}
for K in [1, 5, 10]:
    results3[K] = more_N(100,10,K)

100%|██████████| 100/100 [34:21<00:00, 20.62s/it]
100%|██████████| 100/100 [33:58<00:00, 20.38s/it]
100%|██████████| 100/100 [35:19<00:00, 21.20s/it]


In [18]:
show_results(results3)

Unnamed: 0,K,Total_1,Acertos_1,Percentual_1 (%),Total_2,Acertos_2,Percentual_2 (%)
0,1,100,30,30.0,100,30,30.0
1,5,100,86,86.0,100,84,84.0
2,10,100,98,98.0,100,96,96.0


In [19]:
model.model.M.shape

(462298, 39)