# Pairing

In [2]:
import pandas as pd
from gensim.models import Word2Vec

from src.data_preprocessing.food_normalization import get_food_descriptors
from src.pairing_rules import eliminate_not_well_together

## Preparation

### Loading files from previous stages

In [3]:
wine_vectors = pd.read_csv("../data/wine_aromas_nonaromas.csv", index_col="Unnamed: 0")
wine_vectors.head()

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter
"Aglianico, Italian Red, Aglianico del Vulture, Southern Italy, Italy",[ 1.47843817e-02 -2.56521249e+00 -1.55532622e+...,0.440418,0.559137,0.081831,0.074137,0.517114,0.190725
"Aglianico, Italian Red, Irpinia, Southern Italy, Italy",[-4.51575547e-01 -2.69118047e+00 -1.72507966e+...,0.487605,0.474853,0.089575,0.076614,0.455151,0.277241
"Aglianico, Italian Red, Taurasi, Southern Italy, Italy",[-1.1400039e-01 -2.5102782e+00 -1.4832231e+00 ...,0.46362,0.724887,0.117762,0.097869,0.526619,0.183798
"Alsace white blend, White Blend, Alsace, Alsace, France",[-7.26654351e-01 -2.61081553e+00 -1.22374463e+...,0.54908,0.508619,0.128604,0.303001,0.499623,0.582247
"Arneis, Italian White, Roero, Piedmont, Italy",[ 1.2258542 -1.9858316 -1.7651796 2.699851...,0.35546,0.373485,0.163732,0.092648,0.433071,0.389474


In [4]:
descriptor_frequencies = pd.read_csv("../data/wine_variety_descriptors.csv", index_col="index")

wine_word2vec_model = Word2Vec.load("../data/word2vec.bin")
word_vectors = wine_word2vec_model.wv

food_nonaroma_infos = pd.read_csv(
    "../data/food_nonaroma_info.csv", index_col="Unnamed: 0"
)

### Food descriptors

In [5]:
food_nonaromas, aroma_embedding = get_food_descriptors(
    ["peach", "pie"], word_vectors, food_nonaroma_infos
)

## Pairing rules

In [6]:
wine_recommendations = wine_vectors.copy()
wine_recommendations = eliminate_not_well_together(wine_recommendations, food_nonaromas)

In [7]:
wine_recommendations

Unnamed: 0,aroma,weight,sweet,acid,piquant,fat,bitter
"Aglianico, Italian Red, Aglianico del Vulture, Southern Italy, Italy",[ 1.47843817e-02 -2.56521249e+00 -1.55532622e+...,0.440418,0.559137,0.081831,0.074137,0.517114,0.190725
"Aglianico, Italian Red, Irpinia, Southern Italy, Italy",[-4.51575547e-01 -2.69118047e+00 -1.72507966e+...,0.487605,0.474853,0.089575,0.076614,0.455151,0.277241
"Aglianico, Italian Red, Taurasi, Southern Italy, Italy",[-1.1400039e-01 -2.5102782e+00 -1.4832231e+00 ...,0.463620,0.724887,0.117762,0.097869,0.526619,0.183798
"Arneis, Italian White, Roero, Piedmont, Italy",[ 1.2258542 -1.9858316 -1.7651796 2.699851...,0.355460,0.373485,0.163732,0.092648,0.433071,0.389474
"Austrian Red Blend, Red Blends, , Burgenland, Austria",[-1.47745550e-01 -2.67721891e+00 -1.59417951e+...,0.158503,0.403206,0.161893,0.396891,0.467299,0.417130
...,...,...,...,...,...,...,...
"Zinfandel, Lake County, California, US",[-1.46520153e-01 -2.78838158e+00 -1.03163505e+...,0.421493,0.496041,0.191437,0.492402,0.433035,0.654826
"Zinfandel, Mendocino County, California, US",[-0.6121524 -2.9142952 -1.1299366 2.116801...,0.416774,0.217698,0.365325,0.274627,0.445641,0.514468
"Zinfandel, Napa, California, US",[-0.4464032 -2.8052273 -1.2141738 2.192418...,0.306265,0.284703,0.247261,0.352830,0.463343,0.517119
"Zinfandel, Sierra Foothills, California, US",[-0.47484103 -3.063965 -1.1808469 2.333048...,0.376317,0.299497,0.376456,0.328392,0.466701,0.557818
