In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

import torch
from transformers import BertTokenizer, BertForSequenceClassification

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Note Embedding

In [None]:
# NEM
note_labels_num = 21
note_tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
nem = BertForSequenceClassification.from_pretrained("bert-base-chinese", num_labels=note_labels_num).to(device)
state_dict = torch.load("models/NEM_v1.pth", map_location=device)
nem.load_state_dict(state_dict)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<All keys matched successfully>

In [None]:
# Note embedding dictionary
df_ne = pd.read_csv("data/note_embedding_v1.csv")
ne_list = df_ne.iloc[:, 1:].values.astype(float)

In [5]:
def note2vec(note):
    # 将香材文本转为向量
    nem.eval()
    with torch.no_grad():
        encoding = note_tokenizer(
            note,
            truncation=True,
            padding='max_length', 
            max_length=64,
            return_tensors='pt'
        )
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        outputs = nem.bert(input_ids=input_ids, attention_mask=attention_mask)
        embedding = outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy()
    
    # 推荐近似香材
    embedding = embedding.reshape(1, -1)
    similarities = cosine_similarity(embedding, ne_list)[0]
    idx = similarities.argmax()
    rc_vec = ne_list[idx]
    rc_name = df_ne.iloc[idx, 0]
    rc_similarity = similarities[idx]
    return rc_vec, rc_name, rc_similarity

In [6]:
def get_note_list_vec(note_list):
    vec_list = []
    for note in note_list:
        vec, name, similarity = note2vec(note)
        vec_list.append(vec)
        print(f"Note: {note} => {name} (similarity: {similarity:.4f})")
    return vec_list

# Perfume Vector

In [7]:
# Get perfume vector
def perfume2vec(top_notes, mid_notes, base_notes):
    top_vector = np.array(get_note_list_vec(top_notes))
    mid_vector = np.array(get_note_list_vec(mid_notes))
    base_vector = np.array(get_note_list_vec(base_notes))
    perfume_vec = top_vector * 0.3 + mid_vector * 0.4 + base_vector * 0.3
    return perfume_vec

# Recommend Perfumes

In [8]:
# Load perfume vector list
df_pe = pd.read_csv("data/perfume_embedding_wa.csv")
pe_list = df_pe.iloc[:, 1:].values.astype(float)

In [9]:
def get_recommended_perfumes(top_notes, mid_notes, base_notes, top_k=5):
    perfume_vec = perfume2vec(top_notes, mid_notes, base_notes).reshape(1, -1)
    similarities = cosine_similarity(perfume_vec, pe_list)[0]
    df_top = df_pe.assign(similarity=similarities).nlargest(top_k, 'similarity')
    recommendation = df_top[['name','similarity']].values.tolist()
    return recommendation

In [10]:
like_top_notes = ["橙子"]
like_mid_notes = ["咖啡"]
like_base_notes = ["巧克力"]
top_k = 10

rc_perfumes = get_recommended_perfumes(like_top_notes, like_mid_notes, like_base_notes, top_k)
display(rc_perfumes)

Note: 橙子 => 橙子 (similarity: 1.0000)
Note: 咖啡 => 咖啡 (similarity: 1.0000)
Note: 巧克力 => 巧克力 (similarity: 1.0000)


[['Burberry Goddess 缪斯女神淡香精迷你瓶', 0.8663206008013937],
 ['Burberry Goddess 缪斯女神淡香精', 0.8611618673029654],
 ['Ariana Grande MOD Vanilla 摩德香草女性淡香精', 0.8530533604036163],
 ['Burberry Brit 风格女性淡香精版本', 0.845229121526251],
 ['Burberry Brit 风格女性淡香精版本TESTER', 0.845229121526251],
 ['Thierry Mugler Angel 天使女性淡香精', 0.8383740321815104],
 ['Burberry Brit 风格女性淡香水', 0.8342656784883614],
 ['Calvin Klein CK Everyone 中性淡香精', 0.8251400590292636],
 ['Calvin Klein CK 中性淡香精版本迷你瓶', 0.8251400590292636],
 ['Armaf Odyssey Dubai Chocolat 杜拜巧克力女性淡香精', 0.816680051465698]]

# Perfume Detail

In [None]:
# Show perfume details
df_perfume = pd.read_csv("data/1976_clean.csv")
rc_names = [item[0] for item in rc_perfumes]
df_rc_perfume = df_perfume[df_perfume['name'].isin(rc_names)]
df_rc_perfume = df_rc_perfume.set_index('name').loc[rc_names].reset_index()
display(df_rc_perfume)

Unnamed: 0,name,description,original_fragrance,top_notes,middle_notes,base_notes,detail_url,gender,fragrance
0,Burberry Goddess 缪斯女神淡香精迷你瓶,当初冲著是Cara跟Kate Moss代言的，喷上去会有一种进入不同时区的感觉，一种知性、温...,诱人美食调,香草、薰衣草、可可和姜,香草鱼子酱,纯香草,https://www.1976.com.tw/prod/21410,female,美食调
1,Burberry Goddess 缪斯女神淡香精,当初冲著是Cara跟Kate Moss代言的，喷上去会有一种进入不同时区的感觉，一种知性、温...,诱人美食调,香草、薰衣草、可可、姜,香草鱼子酱,纯香草,https://www.1976.com.tw/prod/21155,female,美食调
2,Ariana Grande MOD Vanilla 摩德香草女性淡香精,Ariana Grande 打造原创香水MOD系列，Mod Vanilla 和 Mod Bl...,诱人美食调,粉红小苍兰、李子、奶油,鸢尾草奶油、白果仁糖、茉莉,香草精、可可脂,https://www.1976.com.tw/prod/21381,female,美食调
3,Burberry Brit 风格女性淡香精版本,十分有BURBERRY经典风格，和我之前收藏的BURBERRY比较，这款香水的调性比较甜一些...,东方花香调,杏仁、义大利莱姆、梨,甜杏仁、白牡丹,桃花心木、琥珀、香草,https://www.1976.com.tw/prod/18175,female,花香调
4,Burberry Brit 风格女性淡香精版本TESTER,BURBERRY风格女香推出淡香精版本TESTER，味道更持久喔！,东方花香调,杏仁、义大利莱姆、梨,甜杏仁、白牡丹,桃花心木、琥珀、香草,https://www.1976.com.tw/prod/2526,female,花香调
5,Thierry Mugler Angel 天使女性淡香精,开头是甜甜腻腻的食物味道，后面开始出现浓厚的巧克力香，好像整个人都在巧克力城堡一样，慢慢转为...,诱人美食调,柠檬、茉莉,红浆果、露莓、蜂蜜,巧克力、焦糖、广藿香、香草,https://www.1976.com.tw/prod/2703,female,美食调
6,Burberry Brit 风格女性淡香水,既舒服又很让人上瘾的味道，甜甜的，很让人家放松。是一款很百撘场合的女香,东方花香调,义大利莱姆、杏仁、青绿扁桃仁,甜杏仁、白牡丹,桃花心木、琥珀、香草、顿加豆,https://www.1976.com.tw/prod/2524,female,花香调
7,Calvin Klein CK Everyone 中性淡香精,这瓶香水的气味清新，明亮，微微的甜味，没有粉味，男女生用似乎都没有违和感，觉得很适合大学生二...,茶香清新调,有机橘子,斯里兰卡红茶,麝香、香根草,https://www.1976.com.tw/prod/19847,neutral,清新调
8,Calvin Klein CK 中性淡香精版本迷你瓶,这瓶香水的气味清新，明亮，微微的甜味，没有粉味，男女生用似乎都没有违和感，觉得很适合大学生二...,茶香清新调,有机橘子,斯里兰卡红茶,麝香、香根草,https://www.1976.com.tw/prod/22438,neutral,清新调
9,Armaf Odyssey Dubai Chocolat 杜拜巧克力女性淡香精,Armaf Odyssey Dubai Chocolat 杜拜巧克力女性淡香精 的瓶身设计非...,诱人美食调,咖啡、开心果、克纳夫、果仁糖、榛果,巧克力、香草、小荳蔻,焦糖、琥珀木、零陵香豆,https://www.1976.com.tw/prod/23161,female,美食调
