In [1]:
!pip install fastai Flask SQLAlchemy PyMySQL pandas

Defaulting to user installation because normal site-packages is not writeable


### Imports

In [1]:
import sqlalchemy as db
import pandas as pd
from fastai.tabular.all import *
from fastai.collab import *

### Setup mysql Connection

In [3]:
config = {
    'host': 'localhost',
    'port': 3002,
    'user': 'root',
    'password': 'root_password',
    'database': 'anime_rec'
}
connection_str = 'mysql+pymysql://{user}:{password}@{host}:{port}/{database}'.format(**config)
engine = db.create_engine(connection_str)
connection = engine.connect()

### Dataframe Creation

In [7]:
QUERY = """
SELECT * FROM notas_de_animes_concluidos;
"""

In [8]:
notas = connection.execute(db.text(QUERY))

In [9]:
df = pd.DataFrame(data=notas.fetchall(), columns=['usuario', 'anime', 'nota', 'nome'])

In [10]:
df

Unnamed: 0,usuario,anime,nota,nome
0,0,68,6,Black Cat (TV)
1,0,1689,6,Byousoku 5 Centimeter
2,0,2913,6,Daisougen no Chiisana Tenshi: Bush Baby
3,0,1250,7,Erementar Gerad
4,0,356,9,Fate/stay night
...,...,...,...,...
999995,5601,1735,10,Naruto: Shippuuden
999996,5601,13667,8,Naruto: Shippuuden Movie 6 - Road to Ninja
999997,5601,18897,9,Nisekoi
999998,5601,24227,7,Nisekoi OVA


### DataLoaders

In [11]:
dls = CollabDataLoaders.from_df(df, item_name='nome')

In [12]:
dls.show_batch()

Unnamed: 0,usuario,nome,nota
0,3645,Neon Genesis Evangelion,10
1,3647,Kimi no Na wa.,10
2,2312,"Elfen Lied: Tooriame nite Arui wa, Shoujo wa Ikani Shite Sono Shinjou ni Itatta ka? - Regenschauer",7
3,4837,D.Gray-man,9
4,1235,Morita-san wa Mukuchi.,7
5,5260,Darker than Black: Kuro no Keiyakusha - Sakura no Hana no Mankai no Shita,7
6,4072,One Punch Man 2nd Season,9
7,781,Non Non Biyori: Okinawa e Ikukoto ni Natta,7
8,474,Mahou Shoujo Madokaâ˜…Magica Movie 2: Eien no Monogatari,9
9,5550,Mousou Dairinin,7


In [13]:
dls.classes

{'usuario': ['#na#', 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 97, 98, 99, 101, 102, 103, 104, 105, 107, 108, 110, 111, 112, 113, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136, 137, 138, 139, 141, 143, 145, 146, 147, 148, 149, 150, 152, 153, 154, 155, 156, 157, 158, 160, 162, 163, 164, 165, 166, 167, 169, 170, 171, 172, 173, 174, 177, 178, 179, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 201, 202, 204, 205, 207, 208, 209, 210, 211, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 235, 237, 238, 239, 240, 241, 243, 244, 245, 246, 247

### O que é um Collaborative Filter

Um Filtro Colaborativo é uma técnica de Machine Learning para recomendação. Nela, são usadas as notas dos usuários para os itens que farão parte da recomendação.

![Collab Filter](./example_collab_filter.png)

[`fonte: fastai book`](https://colab.research.google.com/github/fastai/fastbook/blob/master/08_collab.ipynb)

Nesta técnica, são utilizados os chamados `fatores latentes`, que são valores iniciados aleatoriamente e ajustados durante o treino. Após o treino, estes fatores apresentam co-relações entre os itens que são aprendidas pelo modelo de Machine Learning. Uma desvantagem desta técnica é a dificuldade da análise de quais exatamente são as co-relações , devido a aleatoriedade. 

### Training the model

In [16]:
learner = collab_learner(dls, n_factors=50, y_range=(0, 10))

In [17]:
learner.fit_one_cycle(10, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,4.576494,4.47224,03:30
1,4.626329,4.563557,04:27
2,4.364635,4.536044,04:36
3,4.558651,4.480124,04:30
4,4.427332,4.378161,04:25
5,4.167296,4.218033,04:24
6,4.062884,4.109869,04:17
7,4.02148,3.987482,04:17
8,3.837336,3.923435,04:14
9,3.68225,3.91363,04:18


In [18]:
anime_bias = learner.model.i_bias.weight.squeeze()

In [19]:
idxs = anime_bias.argsort(descending=True)[:5]
[dls.classes['nome'][i] for i in idxs]

['Fullmetal Alchemist: Brotherhood',
 'Steins;Gate',
 'Hunter x Hunter (2011)',
 'Death Note',
 'Code Geass: Hangyaku no Lelouch']

In [21]:
anime_factors = learner.model.i_weight.weight
idx = dls.classes['nome'].o2i['Naruto']
distances = nn.CosineSimilarity(dim=1)(anime_factors, anime_factors[idx][None])
idx = distances.argsort(descending=True)[1:10]
dls.classes['nome'][idx]

(#9) ['Code Geass: Hangyaku no Lelouch R2','Dragon Ball Z','Death Note','Code Geass: Hangyaku no Lelouch','Bleach','Planet:Valkyrie','Jungle Taitei: Yuuki ga Mirai wo Kaeru','T7S Longing for Summer Again and Again: Harukaze','Dokonjou Gaeru']