## Flavor Graph pairing

In [1]:
import pandas as pd
from gensim.models import Word2Vec

from src.data_preprocessing.flavor_graph_preprocessing import (
    load_embedding,
    split_unanonimize_nodes,
    pair_item_with_category,
)
from src.pairing_rules.generate_pairings import generate_single_pairing

In [2]:
FLAVOR_GRAPH_PATH = '../data/flavor_graph/FlavorGraph+CSP-embedding_WP-metapath_300-dim_0.0025-initial_lr_3-window_size_10-iterations_5-min_count-_False-isCSP_0.0001-CSPcoef.pickle'
NODES_PATH = '../data/flavor_graph/nodes_with_wine.csv'

In [3]:
fg_embed_dict_ids = load_embedding(FLAVOR_GRAPH_PATH)
fg_embed_dict_ids.keys()

dict_keys(['7198', '960', '7518', '1089', '7243', '5582', '7560', '4600', '7556', '1895', '7205', '4620', '7678', '86', '8193', '7228', '1664', '7523', '7223', '4444', '7471', '4635', '7513', '7307', '5526', '8223', '4150', '7559', '429', '8328', '8378', '5173', '8380', '2947', '7360', '4733', '7576', '7281', '4928', '7773', '328', '7683', '3840', '7142', '7493', '962', '8437', '4885', '994', '4021', '7315', '5579', '4835', '1138', '7423', '3565', '22', '6787', '1429', '5834', '1504', '8334', '6004', '5747', '7439', '423', '374', '5345', '7366', '175', '4370', '5205', '8315', '1129', '5556', '6091', '8677', '1005', '6109', '8332', '3860', '5968', '2167', '6079', '3743', '5929', '5121', '4052', '8782', '6631', '286', '2198', '58', '7830', '4331', '8312', '4744', '2945', '2188', '7399', '1453', '3587', '8723', '88', '93', '4494', '8405', '640', '3720', '8329', '1263', '5770', '7644', '345', '3092', '3572', '7726', '5190', '1479', '3782', '7453', '972', '4309', '7286', '6367', '1670', '67

In [4]:
fg_embed_dict_ids['7198'].shape

(300,)

In [5]:
nodes_df = pd.read_csv(NODES_PATH)
nodes_df.head()

Unnamed: 0,node_id,name,id,node_type,is_hub
0,0,1%_fat_buttermilk,,ingredient,no_hub
1,1,1%_fat_cottage_cheese,,ingredient,no_hub
2,3,10%_cream,,ingredient,no_hub
3,4,100%_bran,,ingredient,no_hub
4,5,10_inch_flour_tortilla,,ingredient,no_hub


In [6]:
fg_embed_dict = split_unanonimize_nodes(nodes_df, fg_embed_dict_ids)
fg_embed_dict.keys()

dict_keys(['compound', 'ingredient', 'wine'])

In [7]:
for key in fg_embed_dict.keys():
    print(f"Category: {key.upper()}, number of items: {len(fg_embed_dict[key])}")
    print(f"Exemplary items: {list(fg_embed_dict[key].keys())[:5]}\n")

Category: COMPOUND, number of items: 1633
Exemplary items: ['eugenol', '3-methylthiopropanol', 'Linalool', 'Beta-Phellandrene', 'Methyl_Isobutyrate']

Category: INGREDIENT, number of items: 6653
Exemplary items: ['canola_oil', 'cheese', 'sage', 'papaya', 'dill']

Category: WINE, number of items: 107
Exemplary items: ['Cabernet Sauvignon, Barossa,  South Australia,  Australia', 'Red Blends, Red Blends,  Southern Oregon,  Oregon,  US', 'Bordeaux-style Red Blend, Listrac-Médoc,  Bordeaux,  France', 'Grillo, Italian White, Sicilia,  Sicily & Sardinia,  Italy', 'White Blend, Etna,  Sicily & Sardinia,  Italy']



In [8]:
# EXEMPLARY EMBEDDING
category = 'ingredient'
name = 'mango'

fg_embed_dict[category][name]

array([-0.2981921 , -0.02563435, -0.2505978 ,  0.10616672, -0.02561186,
        0.06016891, -0.07432453, -0.18450247,  0.16229858,  0.08904628,
       -0.04630957,  0.05405298,  0.1683048 , -0.06777766,  0.05493566,
       -0.03493505, -0.0533022 , -0.2643112 ,  0.08056065,  0.03207704,
        0.11187352, -0.1739578 , -0.12155879,  0.07098675,  0.16595621,
       -0.06562718,  0.19446406, -0.06749196, -0.11292143,  0.05721229,
       -0.10526533, -0.09164085,  0.09912632, -0.02498809,  0.3281356 ,
        0.20079322, -0.10567888,  0.12356447, -0.09444073,  0.07963233,
        0.13190752,  0.06271315,  0.10611369, -0.07464299,  0.02290803,
       -0.19762658,  0.0491326 , -0.31233507, -0.14628816, -0.16454068,
        0.09274351,  0.3000756 , -0.05545472,  0.04565948, -0.0009815 ,
       -0.0800032 , -0.2817043 ,  0.3964886 , -0.18808995,  0.14148882,
        0.2190107 ,  0.02620921,  0.13576405, -0.02985151, -0.19656987,
        0.0196165 , -0.11587851, -0.20072562,  0.06812003,  0.14

### Pair food with wine

In [9]:
item = "burrito"
# item = 'White Blend, Etna,  Sicily & Sardinia,  Italy'
# item = 'Phlorizin'
top_n = 5

#### Regular pairings

In [20]:
wine_vectors = pd.read_csv("../data/wine_aromas_nonaromas.csv", index_col="Unnamed: 0")
descriptor_frequencies = pd.read_csv("../data/wine_variety_descriptors.csv", index_col="index")
wine_word2vec_model = Word2Vec.load("../data/word2vec.bin")
word_vectors = wine_word2vec_model.wv
food_nonaroma_infos = pd.read_csv("../data/food_nonaroma_info.csv", index_col="Unnamed: 0")

generate_single_pairing(item, wine_vectors, word_vectors, food_nonaroma_infos, top_n)

['Bordeaux-style Red Blend, , Stellenbosch,  South Africa',
 'Bordeaux-style Red Blend, Lussac Saint-Émilion,  Bordeaux,  France',
 'Malbec-Cabernet Sauvignon, Bordeaux-style Red Blend, Mendoza,  Mendoza Province,  Argentina',
 'Cabernet Sauvignon, , Judean Hills,  Israel',
 'Bordeaux-style Red Blend, Margaux,  Bordeaux,  France']

#### FlavorGraph pairings

In [10]:
pair_item_with_category(item, fg_embed_dict, top_n = top_n)

['Bordeaux-style Red Blend, Listrac-Médoc,  Bordeaux,  France',
 'Grillo, Italian White, Sicilia,  Sicily & Sardinia,  Italy',
 'Red Blends, Red Blends,  Southern Oregon,  Oregon,  US',
 'Bordeaux-style Red Blend, Mendoza,  Mendoza Province,  Argentina',
 'Viognier,  Sonoma,  California,  US']

In [11]:
pair_item_with_category(item, fg_embed_dict, 'ingredient', top_n = top_n)

['salsa', 'black_olive', 'sour_cream', 'meatloaf', 'lingonberry']

In [12]:
pair_item_with_category(item, fg_embed_dict, 'compound', top_n = top_n)

['CID_644104',
 'thiamine',
 'Heptanoic_Acid',
 'betaine',
 'Thiamine_Hydrochloride']

Multi-ingredeint pairings

In [46]:
multiple_items = 'Ginger +cinnamon'
top_n = 5

In [47]:
pair_item_with_category(multiple_items, fg_embed_dict, top_n = top_n)

['Riesling, Niagara Peninsula,  Ontario,  Canada',
 'Nebbiolo, Piedmont,  Piedmont,  Italy',
 'Cabernet Sauvignon, , Aconcagua Valley,  Chile',
 'Riesling,  Long Island,  New York,  US',
 'Bordeaux-style Red Blend, Castillon Côtes de Bordeaux,  Bordeaux,  France']

In [48]:
pair_item_with_category(multiple_items, fg_embed_dict, 'ingredient', top_n = top_n)

['nutmeg',
 'dried_orange_peel',
 'black_tea_bag',
 'goji_berry',
 'light_molasses']

In [49]:
pair_item_with_category(multiple_items, fg_embed_dict, 'compound', top_n = top_n)

['gingerol',
 'cis-Methylisoeugenol',
 'Acetyleugenol',
 '(5xi,7xi,10xi)-eudesma-4(14),11-diene',
 'carnosol']