# Pairing

In [1]:
import pandas as pd
from gensim.models import Word2Vec

from src.data_preprocessing.food_normalization import get_food_descriptors
from src.pairing_rules.elimination import eliminate_not_well_together
from src.pairing_rules.congruent_contrasting import congruent_or_contrasting
from src.pairing_rules.similarity import sort_by_aroma_similarity

## Preparation

### Loading files from previous stages

In [2]:
wine_vectors = pd.read_csv("../data/wine_aromas_nonaromas.csv", index_col="Unnamed: 0")
wine_vectors.head()

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter
"Aglianico, Italian Red, Aglianico del Vulture, Southern Italy, Italy",[ 1.47843817e-02 -2.56521249e+00 -1.55532622e+...,0.440418,0.559137,0.081831,0.074137,0.517114,0.190725
"Aglianico, Italian Red, Irpinia, Southern Italy, Italy",[-4.51575547e-01 -2.69118047e+00 -1.72507966e+...,0.487605,0.474853,0.089575,0.076614,0.455151,0.277241
"Aglianico, Italian Red, Taurasi, Southern Italy, Italy",[-1.1400039e-01 -2.5102782e+00 -1.4832231e+00 ...,0.46362,0.724887,0.117762,0.097869,0.526619,0.183798
"Alsace white blend, White Blend, Alsace, Alsace, France",[-7.26654351e-01 -2.61081553e+00 -1.22374463e+...,0.54908,0.508619,0.128604,0.303001,0.499623,0.582247
"Arneis, Italian White, Roero, Piedmont, Italy",[ 1.2258542 -1.9858316 -1.7651796 2.699851...,0.35546,0.373485,0.163732,0.092648,0.433071,0.389474


In [3]:
descriptor_frequencies = pd.read_csv("../data/wine_variety_descriptors.csv", index_col="index")

wine_word2vec_model = Word2Vec.load("../data/word2vec.bin")
word_vectors = wine_word2vec_model.wv

food_nonaroma_infos = pd.read_csv(
    "../data/food_nonaroma_info.csv", index_col="Unnamed: 0"
)

### Food descriptors

In [50]:
food_nonaromas, aroma_embedding = get_food_descriptors(
    ["strawberry", "pie"], word_vectors, food_nonaroma_infos
)

In [51]:
food_nonaromas

{'weight': 0.569527162895731,
 'sweet': 0.9427385443746554,
 'acid': 0.6033879714549318,
 'salt': 0.3940605523943651,
 'piquant': 0.3973560020885486,
 'fat': 0.697382705334717,
 'bitter': 0.5397499378754745}

## Pairing rules

### Anti-rules
The rules that state which nonaromas don't go well together.

In [52]:
wine_recommendations = wine_vectors.copy()
wine_recommendations = eliminate_not_well_together(wine_recommendations, food_nonaromas)

In [53]:
wine_recommendations.head()

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter
"Aglianico, Italian Red, Aglianico del Vulture, Southern Italy, Italy",[ 1.47843817e-02 -2.56521249e+00 -1.55532622e+...,0.440418,0.559137,0.081831,0.074137,0.517114,0.190725
"Aglianico, Italian Red, Irpinia, Southern Italy, Italy",[-4.51575547e-01 -2.69118047e+00 -1.72507966e+...,0.487605,0.474853,0.089575,0.076614,0.455151,0.277241
"Aglianico, Italian Red, Taurasi, Southern Italy, Italy",[-1.1400039e-01 -2.5102782e+00 -1.4832231e+00 ...,0.46362,0.724887,0.117762,0.097869,0.526619,0.183798
"Alsace white blend, White Blend, Alsace, Alsace, France",[-7.26654351e-01 -2.61081553e+00 -1.22374463e+...,0.54908,0.508619,0.128604,0.303001,0.499623,0.582247
"Arneis, Italian White, Roero, Piedmont, Italy",[ 1.2258542 -1.9858316 -1.7651796 2.699851...,0.35546,0.373485,0.163732,0.092648,0.433071,0.389474


### Congruent or constrasting

In [54]:
wine_recommendations = congruent_or_contrasting(wine_recommendations, food_nonaromas)

In [55]:
wine_recommendations.value_counts(subset="pairing_type")

pairing_type
               493
contrasting     28
congruent        1
Name: count, dtype: int64

### Sorting by aroma similarity

In [60]:
wine_recommendations = sort_by_aroma_similarity(wine_recommendations, aroma_embedding)
wine_recommendations.sort_values(by="pairing_type", axis=0, ascending=False).head(10)

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter,pairing_type,aroma_distance
"Malbec-Cabernet Sauvignon, Bordeaux-style Red Blend, Mendoza, Mendoza Province, Argentina","[0.639653146, -2.16264701, -1.55487049, 1.0170...",0.448049,0.524253,0.507579,0.106075,1.0,0.172611,contrasting,0.365053
"Bordeaux-style Red Blend, , Stellenbosch, South Africa","[0.72378337, -2.0855939, -1.6394734, 0.9853870...",0.558099,0.578521,0.681478,0.069242,0.763244,0.294445,contrasting,0.376864
"Viognier, Sierra Foothills, California, US","[-1.4385442, -2.8331845, -0.4694867, 2.0911038...",0.506727,0.266762,0.103976,0.685105,0.453151,0.822124,contrasting,0.417421
"Pinot Nero, Pinot Noir, Alto Adige, Northeastern Italy, Italy","[-0.220507994, -2.96203423, -1.14020312, 2.124...",0.259995,0.264625,0.116278,0.790187,0.489861,0.596007,contrasting,0.428113
"Pinot Noir, Pernand-Vergelesses, Burgundy, France","[-0.65434355, -2.9512217, -0.7577858, 2.828970...",0.33352,0.323743,1.0,0.37422,0.570442,0.545163,contrasting,0.451777
"Viognier, Sonoma, California, US","[-1.2124338, -2.8940117, -0.32265207, 2.116998...",0.376925,0.173233,0.12249,0.702609,0.455431,1.0,contrasting,0.43057
"Bordeaux-style Red Blend, Mendoza, Mendoza Province, Argentina","[0.45802656, -2.1395717, -1.5197405, 0.9620181...",0.501071,0.476194,0.48186,0.064561,0.761808,0.211902,contrasting,0.363996
"Bordeaux-style Red Blend, Moulis-en-Médoc, Bordeaux, France","[0.619335651, -1.99680328, -1.8290695, 1.19856...",0.508549,0.518176,0.189213,0.100964,0.800727,0.237536,contrasting,0.363348
"Bordeaux-style Red Blend, Côtes de Bourg, Bordeaux, France","[0.40067133, -2.3302102, -1.8793099, 1.1648152...",0.493367,0.436013,0.408312,0.072697,0.767734,0.252688,contrasting,0.35552
"Bordeaux-style Red Blend, Central Coast, California, US","[0.26048073, -2.1731591, -1.9217277, 1.1726497...",0.518079,0.461081,0.145761,0.068132,0.774049,0.221401,contrasting,0.363301
