# Pairing

In [25]:
import csv
import pandas as pd
from gensim.models import Word2Vec

from src.data_preprocessing.food_normalization import get_food_descriptors
from src.pairing_rules.elimination import eliminate_not_well_together
from src.pairing_rules.congruent_contrasting import congruent_or_contrasting
from src.pairing_rules.similarity import sort_by_aroma_similarity

## Preparation

### Loading files from previous stages

In [2]:
wine_vectors = pd.read_csv("../data/wine_aromas_nonaromas.csv", index_col="Unnamed: 0")
wine_vectors["salt"] = 0
wine_vectors.head()

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter,salt
"Aglianico, Italian Red, Aglianico del Vulture, Southern Italy, Italy",[ 1.47843817e-02 -2.56521249e+00 -1.55532622e+...,0.440418,0.559137,0.081831,0.074137,0.517114,0.190725,0
"Aglianico, Italian Red, Irpinia, Southern Italy, Italy",[-4.51575547e-01 -2.69118047e+00 -1.72507966e+...,0.487605,0.474853,0.089575,0.076614,0.455151,0.277241,0
"Aglianico, Italian Red, Taurasi, Southern Italy, Italy",[-1.1400039e-01 -2.5102782e+00 -1.4832231e+00 ...,0.46362,0.724887,0.117762,0.097869,0.526619,0.183798,0
"Alsace white blend, White Blend, Alsace, Alsace, France",[-7.26654351e-01 -2.61081553e+00 -1.22374463e+...,0.54908,0.508619,0.128604,0.303001,0.499623,0.582247,0
"Arneis, Italian White, Roero, Piedmont, Italy",[ 1.2258542 -1.9858316 -1.7651796 2.699851...,0.35546,0.373485,0.163732,0.092648,0.433071,0.389474,0


In [3]:
descriptor_frequencies = pd.read_csv("../data/wine_variety_descriptors.csv", index_col="index")

wine_word2vec_model = Word2Vec.load("../data/word2vec.bin")
word_vectors = wine_word2vec_model.wv

food_nonaroma_infos = pd.read_csv(
    "../data/food_nonaroma_info.csv", index_col="Unnamed: 0"
)

### Food descriptors

In [49]:
food_nonaromas, aroma_embedding = get_food_descriptors(
    ["pasta", "tomato", "vodka", "cream"], word_vectors, food_nonaroma_infos
)

In [5]:
food_nonaromas

{'weight': 1.0,
 'sweet': 0.7695091050675696,
 'acid': 0.8501988729254087,
 'salt': 0.7323310117139278,
 'piquant': 0.5781713651677354,
 'fat': 0.6335602425243104,
 'bitter': 0.7803759017185823}

## Pairing rules

### Anti-rules
The rules that state which nonaromas don't go well together.

In [6]:
wine_recommendations = wine_vectors.copy()
wine_recommendations = eliminate_not_well_together(wine_recommendations, food_nonaromas)

In [7]:
wine_recommendations.head()

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter,salt
"Aglianico, Italian Red, Aglianico del Vulture, Southern Italy, Italy",[ 1.47843817e-02 -2.56521249e+00 -1.55532622e+...,0.440418,0.559137,0.081831,0.074137,0.517114,0.190725,0
"Aglianico, Italian Red, Irpinia, Southern Italy, Italy",[-4.51575547e-01 -2.69118047e+00 -1.72507966e+...,0.487605,0.474853,0.089575,0.076614,0.455151,0.277241,0
"Aglianico, Italian Red, Taurasi, Southern Italy, Italy",[-1.1400039e-01 -2.5102782e+00 -1.4832231e+00 ...,0.46362,0.724887,0.117762,0.097869,0.526619,0.183798,0
"Arneis, Italian White, Roero, Piedmont, Italy",[ 1.2258542 -1.9858316 -1.7651796 2.699851...,0.35546,0.373485,0.163732,0.092648,0.433071,0.389474,0
"Austrian Red Blend, Red Blends, , Burgenland, Austria",[-1.47745550e-01 -2.67721891e+00 -1.59417951e+...,0.158503,0.403206,0.161893,0.396891,0.467299,0.41713,0


### Congruent or constrasting

In [8]:
wine_recommendations = congruent_or_contrasting(wine_recommendations, food_nonaromas)

In [9]:
wine_recommendations.value_counts(subset="pairing_type")

pairing_type
               441
contrasting     17
Name: count, dtype: int64

### Sorting by aroma similarity

In [10]:
wine_recommendations = sort_by_aroma_similarity(wine_recommendations, aroma_embedding)
wine_recommendations.sort_values(by="pairing_type", axis=0, ascending=False).head(10)

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter,salt,pairing_type,aroma_distance
"Bordeaux-style Red Blend, Virginia, Virginia, US","[0.23608124, -2.7020996, -1.846619, 0.99922335...",0.512996,0.447638,0.380172,0.134121,0.751533,0.328143,0,contrasting,0.560412
"Bordeaux-style Red Blend, Central Coast, California, US","[0.26048073, -2.1731591, -1.9217277, 1.1726497...",0.518079,0.461081,0.145761,0.068132,0.774049,0.221401,0,contrasting,0.563833
"Nerello Mascalese, Italian Red, Terre Siciliane, Sicily & Sardinia, Italy","[-0.46742183, -2.09274006, -1.48543894, 1.7116...",0.508429,1.0,0.084962,0.171461,0.433035,0.278996,0,contrasting,0.555572
"Bordeaux-style Red Blend, Lussac Saint-Émilion, Bordeaux, France","[0.20795342, -2.270046, -2.2612236, 0.7207129,...",0.337021,0.557895,0.437668,0.2126,0.810094,0.362087,0,contrasting,0.558491
"Cabernet Franc, Sonoma, California, US","[0.08412224, -2.732492, -1.4171169, 1.8145885,...",0.518702,0.8242,0.067724,0.08914,0.563305,0.212328,0,contrasting,0.591265
"Bordeaux-style Red Blend, Listrac-Médoc, Bordeaux, France","[0.38030821, -2.0087905, -1.7155811, 1.094432,...",0.371584,0.539129,0.275918,0.071945,0.778811,0.220892,0,contrasting,0.567724
"Bordeaux-style Red Blend, Francs Côtes de Bordeaux, Bordeaux, France","[0.57819313, -2.1525059, -1.9558433, 1.2849407...",0.374776,0.372274,0.289232,0.042674,0.908679,0.214712,0,contrasting,0.577052
"Bordeaux-style Red Blend, Columbia Valley, Washington, US","[0.50772744, -2.210652, -1.8398572, 1.029296, ...",0.457466,0.481743,0.322799,0.043925,0.751439,0.253165,0,contrasting,0.560279
"Bordeaux-style Red Blend, Mendoza, Mendoza Province, Argentina","[0.45802656, -2.1395717, -1.5197405, 0.9620181...",0.501071,0.476194,0.48186,0.064561,0.761808,0.211902,0,contrasting,0.560376
"Malbec-Merlot, Bordeaux-style Red Blend, Cahors, Southwest France, France","[0.45578045, -2.1745262, -1.8124868, 0.7658596...",0.461958,0.401516,0.479763,0.064747,0.862827,0.178645,0,contrasting,0.56457


## List of k-top pairings

In [50]:
with open("../data/food_list_preprocessed.csv", "r") as f:
    csv_reader = csv.reader(f)
    food_list_preprocessed = list(csv_reader)[0]
food_df = pd.read_csv("../data/list_of_foods.csv")
food_list = list(food_df["Food"])

In [51]:
food_list = list(set().union(food_list_preprocessed, food_list))

In [54]:
K = 3
top_wine_dict = dict()

for food in food_list:
    try:
        food_nonaromas, aroma_embedding = get_food_descriptors(
            [food.lower()], word_vectors, food_nonaroma_infos
        )
        wine_recommendations = wine_vectors.copy()
        wine_recommendations = eliminate_not_well_together(
            wine_recommendations, food_nonaromas
        )
        wine_recommendations = congruent_or_contrasting(
            wine_recommendations, food_nonaromas
        )
        wine_recommendations = sort_by_aroma_similarity(
            wine_recommendations, aroma_embedding
        )
        wine_recommendations = wine_recommendations.sort_values(
            by="pairing_type", axis=0, ascending=False
        )
        if len(wine_recommendations) >= 0:
            top = wine_recommendations.head(K)
            top = list(top.index)
            top_wine_dict[food] ={}
            for i in range(min(len(top), K)):
                top_wine_dict[food][f"top{i+1}"] = top[i]
    except KeyError:
        pass
        # print(f"{food} not found")

In [55]:
df = pd.DataFrame.from_dict(top_wine_dict, orient="index")
df.head()

Unnamed: 0,top1,top2,top3
cauliflow,"Pinot Noir, Pernand-Vergelesses, Burgundy, F...","Bordeaux-style Red Blend, Long Island, New Y...","Bordeaux-style Red Blend, , Stellenbosch, Sou..."
scallop,"Bordeaux-style Red Blend, , Stellenbosch, Sou...","Bordeaux-style Red Blend, Washington Other, ...","Malbec-Cabernet Sauvignon, Bordeaux-style Red ..."
soup,"Pinot Noir, , Niederösterreich, Austria","Gamay, Morgon, Beaujolais, France","Pinot Noir, Yarra Valley, Victoria, Australia"
hungri,"Pinot Noir, Crémant dAlsace, Alsace, France","Pinot Nero, Pinot Noir, Alto Adige, Northeast...","Red Blends, Red Blends, Terra Alta, Catalonia..."
prune,"Red Blends, Red Blends, Fronton, Southwest Fr...","Red Blends, Red Blends, Jumilla, Levante, Spain","Chardonnay, Franciacorta, Lombardy, Italy"


In [56]:
df.to_csv("../data/pairing_top3.csv")