## 1. Setup

In [None]:
#!pip install --quiet cornac==1.14.2

In [12]:
import os
import sys
from collections import defaultdict

import numpy as np
import pandas as pd; pd.set_option("max_colwidth", 0);
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import re
import cornac
from cornac.utils import cache
from cornac.datasets import filmtrust, amazon_clothing
from cornac.eval_methods import RatioSplit,StratifiedSplit
from cornac.models import PMF, SoRec, WMF, CTR, BPR, VBPR
from cornac.data import TextModality,ReviewModality
from cornac.data.text import BaseTokenizer

#%tensorflow_version 1.x
import tensorflow as tf

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")
print(f"Tensorflow version: {tf.__version__}")

SEED = 42
VERBOSE = True

System version: 3.7.13 (default, Mar 28 2022, 08:03:21) [MSC v.1916 64 bit (AMD64)]
Cornac version: 1.14.2
Tensorflow version: 1.15.0
ERROR! Session/line number was not unique in database. History logging moved to new session 1917


In [2]:
#import cornac
from cornac.models import MF, BPR, WMF, MostPop
from cornac.eval_methods import BaseMethod,RatioSplit
from cornac.metrics import MAE, RMSE
rmse = cornac.metrics.RMSE()
K=20
rec_20 = cornac.metrics.Recall(k=K)
pre_20 = cornac.metrics.Precision(k=K)
ndcg_20 = cornac.metrics.NDCG(k=K)
f1_20=cornac.metrics.FMeasure(k=K)
K=50
rec_50 = cornac.metrics.Recall(k=K)
pre_50 = cornac.metrics.Precision(k=K)
ndcg_50 = cornac.metrics.NDCG(k=K)
f1_50=cornac.metrics.FMeasure(k=K)
K=100
rec_100 = cornac.metrics.Recall(k=K)
pre_100 = cornac.metrics.Precision(k=K)
ndcg_100 = cornac.metrics.NDCG(k=K)
f1_100=cornac.metrics.FMeasure(k=K)
#cornac.metrics.AUC()

In [3]:
def value_counts_df(df, col):
    df = pd.DataFrame(df[col].value_counts())
    df.index.name = col
    df.columns = ['count']
    return df
def sparsity(df):
    l=df.columns 
    a,b=df[l[0]].nunique(),df[l[1]].nunique()
    x=len(df)/a/b
    return x
def to_obj(df):
    return  cornac.data.Dataset.from_uir(df.itertuples(index=False))


In [13]:
def process_text(text: str):
    processed = text.strip()
    processed = re.sub('["]', '', processed)
    processed = re.sub('[\s]', ' ', processed)
    return processed.lower()

# processed_df = review_df.dropna()
# processed_df["processed_text"] = processed_df["review_text"].apply(lambda x: process_text(x))
# processed_df = processed_df[processed_df["processed_text"] != ""]
# processed_df = processed_df[~processed_df["processed_text"].str.contains("^[^A-Za-z0-9\s,.?!@#$%^&*]+$", na=False)]
# processed_df = processed_df[["processed_text", "item_id"]]

## 2. Data Preparation and Exploration

In [5]:
mode='local'  #chanege to 'local' to read from same local directory as this notebook
# url='https://raw.githubusercontent.com/hestella/CS608/main/cs608_ip_train_v3.csv'
# url2='https://raw.githubusercontent.com/hestella/CS608/main/cs608_ip_probe_v3.csv'
if mode=='local':
    url,url2,url3='data/train_ratings_seen.csv','data/test_ratings_unseen.csv','data/review_item_all.csv'
train_df = pd.read_csv(url,sep=",",header=0, names=["UserID", "ItemID", "Rating"])
train_df["Rating"]= pd.to_numeric(train_df["Rating"], errors='coerce')
#traindata = cornac.data.Dataset.from_uir(train_df.itertuples(index=False))
test_df = pd.read_csv(url2,sep=",",header=0, names=["UserID", "ItemID", "Rating"])
test_df["Rating"]= pd.to_numeric(test_df["Rating"], errors='coerce')
#testdata = cornac.data.Dataset.from_uir(test_df.itertuples(index=False))
UC=value_counts_df(train_df ,"UserID")
IC=value_counts_df(train_df,"ItemID")
NUSERS=len(UC)
NITEMS=len(IC)
print(f"Training data contains {NUSERS} users and {NITEMS} items")
percentage="{:.4%}".format(sparsity(train_df))
print(f"Spasity of training rating matrix is {percentage}")
# percentage="{:.4%}".format(spstest)
# print(f"Spasity of training rating matrix is {percentage}")

Training data contains 5066 users and 39520 items
Spasity of training rating matrix is 0.0599%


In [6]:
train_df.tail(10)

Unnamed: 0,UserID,ItemID,Rating
119912,32474245,75772,5.0
119913,32474245,1180252,3.0
119914,32474245,1255035,4.5
119915,32474245,1099407,2.5
119916,32474245,14362,5.0
119917,58534725,24056,3.5
119918,58534725,10241561,4.0
119919,58534725,8584824,4.0
119920,58534725,18978,4.0
119921,58534725,3071535,3.5


In [7]:
default=IC.reset_index().copy()
default["Review"]="This is a wine"
default=default[["ItemID","Review"]]

In [8]:
#10 reviews per item
review_df=pd.read_csv("data/reviews_10_all.csv", header=0)
# More reviews
review_df_v2=pd.read_csv("data/reviews_10_all_v2.csv", header=0)
# use wine info as content 
review_df_v3=pd.read_csv("data/reviews_10_all_v2.csv", header=0)
name_df=pd.read_csv("data/wine_info_all.csv", header=0)
name_df['Wine']=name_df['Wine'].astype(str)
review_df['Review']=review_df['Review'].astype(str)
review_df_v2['Review']=review_df_v2['Review'].astype(str)
review_df=pd.concat([default,review_df])
review_df_v2=pd.concat([default,review_df_v2])

In [9]:
print(len(name_df),len(review_df_v2))

44191 857586


In [10]:
review_df

Unnamed: 0,ItemID,Review
0,1652,This is a wine
1,1135843,This is a wine
2,7103,This is a wine
3,18978,This is a wine
4,14362,This is a wine
...,...,...
424529,2204182,"Caramel, apples and butter.. A really nice wine. Typical aged bourgundy Chardonnay.. Sad to say that my boss have no taste at all... thought that he had a palate worth his income. I was wrong!"
424530,2204182,"This 1er Cru seizes the senses by confirming its terroir provenance but with in a modern kind of approach.Indeed,even though producers affirm the use of 20% new oak,typical Vergelesses aromas of berry fruit,plum, geranium,violet,liquorice and earthiness are rapped up by the wine's toast and vanillin notes,but in a subtle way.Med bodied,the wine seats on a fleshy structure with dry tannins and a limey acidity that prolongs the fresh and mineral finish.Hold for couple of years to allow tannin integration."
424531,2204182,"Excellent. The makings of bankruptcy court. Beautiful perfumed nose that is intense and very aromatic with definite hints of oak. Very long length with excellent structure and a tingle of acidity. Minerally palate with hints of stone fruit and salted butter. Extremely well balanced wine, a work of art."
424532,2204182,(89-92/100). Light but intense ! The wine shows lichee and pear. A nice fresh fruit touch. But the minerality and the touch of oak is what it makes special. A light mix inbetween dagueneau and coche. Not that niveau... But the art of wine. Great value ! Www.rullivins.ch


In [11]:
#A dictionary of already selected items

def selected_items(df):
    selected={}
    for (x,y,_) in df.itertuples(index=False):
        selected[x]=[]
    for (x,y,_) in df.itertuples(index=False):    
        selected[x].append(y)
    return selected
train_seleted=selected_items(train_df)
test_seleted=selected_items(test_df)


### Preprocessing: combine all the review of one item; remove items with black review


In [14]:
review_df["Review"] = review_df["Review"].apply(lambda x: process_text(x))
review_df_v2["Review"] = review_df_v2["Review"].apply(lambda x: process_text(x))

reviews= review_df.groupby(["ItemID"])["Review"].apply(lambda x: ', '.join(x)).reset_index()
reviews_v2= review_df_v2.groupby(["ItemID"])["Review"].apply(lambda x: ', '.join(x)).reset_index()

In [15]:
rating_data= [(x,y,z) for idx,x,y,z in train_df.itertuples()]
#reviews= review_df.groupby(["ItemID"])["Review"].apply(lambda x: ','.join(x)).reset_index()

combine_name=False
#if combine_name :
#******************************
    #name_df.columns=["ItemID","Review"]
#     review_df_2=pd.concat([name_df,review_df])
#     reviews= review_df_2.groupby(["ItemID"])["Review"].apply(lambda x: ','.join(x)).reset_index()
#******************************


review_data= [(y,r) for _,y,r in reviews_v2.itertuples() ]
#itemlist=list(review_df['ItemID'])


ratings = rating_data #amazon_clothing.load_feedback()
item_ids ,docs= [v[0] for v in review_data] , [v[1] for v in review_data]   #amazon_clothing.load_text()

docs[100]

'pleasant without being outstanding. suitable bbq wine, especially with atmosphere of a warm evening on the beach in st.lucia., oak, blackberry, rich., very good pencil shavings taste. full bodied but not fruity. oaky and slightly tart., good value, easy tasting, seams pure alcahol, appears purple, but actually deep ruby. black fruit on the nose. lovely, soft, inky texture. full- body, medium tannins, high dryness, med+ acidity. bold. fruit forward, blackberry, chilli, black pepper. a great wine and great qpr if you are looking for a big wine at a relatively small price point., i can’t agree with the site ranking, because this wine is very good. complex, harmonic and well structured. 4.0  cheers  🍷🍷🍷, smooth - with some structure - good choice for the price, excellent  perfect timing for this 2013 vintage !!!!!, purple red color, woody and fruity, nicely structured but lacks some character..., very nice wine for this price point., nice earthy syrah. good with my mushroom pizza! brasili

In [21]:
#reviews.to_csv("data/reviews_dataframe_UID.csv")

In [11]:
# rating_data= [(x,y,z) for idx,x,y,z in train_df.itertuples()]

# default=IC.reset_index().copy()
# default["Review"]="This is a wine"
# default=default[["ItemID","Review"]]
                 
# review_df=pd.read_csv(url3,sep=",",header=0, names=["Review", "ItemID"],na_values="")
# #print(len(review_df))
# review_df.dropna(inplace=True)
# review_df.to_csv("data/reviews_dataframe.csv")

In [16]:
len(item_ids)

44612

In [17]:

item_text_modality = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
    max_vocab=3000,
    max_doc_freq=0.5,
    use_idf=False
)


stratified_split=StratifiedSplit(data=ratings,group_by='user',fmt ="UIR",
                                 test_size=0.2,
                                 #val_size =0.1,
                                 rating_threshold=3.5,
                                 exclude_unknowns=True,
                                 item_text=item_text_modality,
                                 verbose=VERBOSE,
                                 seed=SEED,
                                 )

rating_threshold = 3.5
exclude_unknowns = True
---
Training data:
Number of users = 5066
Number of items = 34911
Number of ratings = 97965
Max rating = 5.0
Min rating = 1.0
Global mean = 3.9
---
Test data:
Number of users = 4829
Number of items = 8431
Number of ratings = 18146
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5066
Total items = 34911


In [18]:

most_pop = cornac.models.MostPop()

K =20
ctr1 = CTR(k=K, max_iter=60, a=1.0, b=0.01, lambda_u=0.001, lambda_v=0.001, verbose=VERBOSE, seed=SEED)
#wmf1 = WMF(k=K, max_iter=100, a=1.0, b=0.01, learning_rate=0.005, lambda_u=0.01, lambda_v=0.01, verbose=VERBOSE, seed=SEED)
K =50
ctr2 = CTR(k=K, max_iter=60, a=1.0, b=0.01, lambda_u=0.001, lambda_v=0.001, verbose=VERBOSE, seed=SEED)
#wmf1 = WMF(k=K, max_iter=100, a=1.0, b=0.01, learning_rate=0.005, lambda_u=0.01, lambda_v=0.01, verbose=VERBOSE, seed=SEED)

K= 80
ctr3 = CTR(k=K, max_iter=60, a=1.0, b=0.01, lambda_u=0.001, lambda_v=0.001, verbose=VERBOSE, seed=SEED)
#wmf2 = WMF(k=K, max_iter=100, a=1.0, b=0.01, learning_rate=0.005, lambda_u=0.01, lambda_v=0.01, verbose=VERBOSE, seed=SEED)

del K

models= [most_pop,ctr1,ctr2,ctr3]
metrics=[rec_20,rec_50,rec_100,ndcg_20,ndcg_50,ndcg_100]

cornac.Experiment(eval_method=stratified_split, models=models, metrics=metrics).run()


[MostPop] Training started!

[MostPop] Evaluation started!


Ranking: 100%|████████████████████████████████████████████████████████████████████| 4829/4829 [00:07<00:00, 659.43it/s]



[CTR] Training started!


100%|████████████████████████████████████████| 60/60 [09:14<00:00,  9.24s/it, cf_loss=3.44e+5, lda_likelihood=-1.22e+7]


Learning completed!

[CTR] Evaluation started!


Ranking: 100%|████████████████████████████████████████████████████████████████████| 4829/4829 [00:16<00:00, 301.59it/s]



[CTR] Training started!


100%|████████████████████████████████████████| 60/60 [12:23<00:00, 12.40s/it, cf_loss=2.12e+5, lda_likelihood=-1.71e+7]


Learning completed!

[CTR] Evaluation started!


Ranking: 100%|████████████████████████████████████████████████████████████████████| 4829/4829 [00:16<00:00, 300.07it/s]



[CTR] Training started!


100%|████████████████████████████████████████| 60/60 [14:34<00:00, 14.57s/it, cf_loss=1.54e+5, lda_likelihood=-1.94e+7]


Learning completed!

[CTR] Evaluation started!


Ranking: 100%|████████████████████████████████████████████████████████████████████| 4829/4829 [00:17<00:00, 277.82it/s]


TEST:
...
        | NDCG@100 | NDCG@20 | NDCG@50 | Recall@100 | Recall@20 | Recall@50 | Train (s) | Test (s)
------- + -------- + ------- + ------- + ---------- + --------- + --------- + --------- + --------
MostPop |   0.0390 |  0.0208 |  0.0302 |     0.1103 |    0.0373 |    0.0721 |    0.0010 |   7.3528
CTR     |   0.0567 |  0.0375 |  0.0470 |     0.1318 |    0.0545 |    0.0888 |  554.3733 |  16.0155
CTR     |   0.0695 |  0.0522 |  0.0608 |     0.1369 |    0.0671 |    0.0990 |  744.0146 |  16.0977
CTR     |   0.0746 |  0.0573 |  0.0663 |     0.1395 |    0.0703 |    0.1042 |  874.4752 |  17.3870






In [32]:
ctr1.save("CTR_20_60_001_001")
ctr2.save("CTR_50_60_01_001")
ctr3.save("CTR_80_60_001_001")
wmf3.save("WMF_80_60_001_001")

ERROR! Session/line number was not unique in database. History logging moved to new session 1920
CTR model is saved to CTR_20_60_001_001\CTR\2022-06-16_11-10-15-999203.pkl
CTR model is saved to CTR_50_60_01_001\CTR\2022-06-16_11-10-16-013203.pkl
CTR model is saved to CTR_80_60_001_001\CTR\2022-06-16_11-10-16-047236.pkl
WMF model is saved to WMF_80_60_001_001\WMF\2022-06-16_11-10-16-106816.pkl


'WMF_80_60_001_001\\WMF\\2022-06-16_11-10-16-106816.pkl'

In [20]:
K = 80

wmf3 = WMF(k=K, max_iter=80, a=1.0, b=0.01, learning_rate=0.005, lambda_u=0.001, lambda_v=0.001, verbose=VERBOSE, seed=SEED)


models= [wmf3]
metrics=[rec_20,rec_50,rec_100,ndcg_20,ndcg_50,ndcg_100]

cornac.Experiment(eval_method=stratified_split, models=models, metrics=metrics).run()


[WMF] Training started!


100%|███████████████████████████████████████████████████████████████████████| 80/80 [10:44<00:00,  8.06s/it, loss=10.3]


Learning completed!

[WMF] Evaluation started!


Ranking: 100%|████████████████████████████████████████████████████████████████████| 4829/4829 [00:16<00:00, 294.61it/s]


TEST:
...
    | NDCG@100 | NDCG@20 | NDCG@50 | Recall@100 | Recall@20 | Recall@50 | Train (s) | Test (s)
--- + -------- + ------- + ------- + ---------- + --------- + --------- + --------- + --------
WMF |   0.0722 |  0.0557 |  0.0639 |     0.1363 |    0.0705 |    0.1002 |  645.5677 |  16.3940






### https://github.com/PreferredAI/cornac/blob/master/tutorials/param_search_vaecf.ipynb

In [58]:
# vocab = ctr3.train_set.item_text.vocab
# topic_word_dist = ctr3.model.beta.T[:, -ctr3.train_set.item_text.max_vocab:] 
# top_word_inds = np.argsort(topic_word_dist, axis=1) + 4  # ingore 4 special tokens

# topic_words = {}
# topic_df = defaultdict(list)
# print("WORD TOPICS:")
# for t in range(len(topic_word_dist)):
#   top_words = vocab.to_text(top_word_inds[t][-10:][::-1], sep=", ")
#   topic_words[t+1] = top_words
#   topic_df["Topic"].append(t + 1)
#   topic_df["Top words"].append(top_words)
# topic_df = pd.DataFrame(topic_df)
# topic_df[:20]

In [59]:
# UIDX = 5065
# TOPK =5

# item_id2idx = ctr3.train_set.iid_map
# item_idx2id = list(ctr3.train_set.item_ids)

# print(f"USER {UIDX} TOP-{TOPK} TOPICS:")
# topic_df.loc[np.argsort(ctr3.U[UIDX])[-TOPK:][::-1]]

In [93]:
# recommendations, scores = ctr1.rank(UIDX)
# print(f"\nTOP {TOPK} RECOMMENDATIONS FOR USER {UIDX}:")
# rec_df = defaultdict(list)
# for i in recommendations[:TOPK]:
#   rec_df["URL"].append(f"https://www.vivino.com/SG/en/w/{item_idx2id[i]}")
#   rec_df["Description"].append(ctr1.train_set.item_text.corpus[i])
# pd.DataFrame(rec_df)

### Output and evaluation fuctions

In [27]:
train_seleted=selected_items(train_df)
test_seleted=selected_items(test_df)

ERROR! Session/line number was not unique in database. History logging moved to new session 1919


In [28]:
def get_top_N(model,N, userList=None, verbose=False):
    #fname=model.name+ "_result{}.txt".format(datetime.now().strftime('%d%H%M') )
    output={}
    users=list(model.train_set.user_ids)
    items=list(model.train_set.item_ids)
    if userList==None:
        userList=users
    for user in userList:
        if verbose:
            if user%100==0: print(user)
        uid=users.index(user)
        score_all=(model.rank(uid))[0][:N+50]
        lst0=[items[a] for a in score_all]
        lst= [x for x in lst0 if x not in train_seleted[user]][:N]
        #output[user]=str(user)+' : '+' '.join(lst)
        output[user]=lst#' '.join(lst)
            
    assert (len(output)==len(userList))
    return output


In [33]:
def print_top_N(model,N, user, verbose=False):
    output={}
    users=list(model.train_set.user_ids)
    items=list(model.train_set.item_ids)
    uid=users.index(user)
    score_all=(model.rank(uid))[0][:K+50]
    lst0=[items[a] for a in score_all]
    lst= [x for x in lst0 if x not in train_seleted[user]][:N]
    #output[user]=str(user)+' : '+' '.join(lst)
    output[user]=lst#' '.join(lst)
    return(output[user])        


In [34]:

def evaluate_model(model, N,test_seleted=None,test_df=None):
    to_select=get_top_N(model,N+50)
    if test_seleted==None:
        test_seleted=selected_items(test_df)
    recalls=[]
    for user in test_seleted:
        all_list= test_seleted[user]
        recall_temp= len([x for x in to_select[user] if x in all_list])/len(all_list)
        recalls.append(recall_temp)
        
    return recalls, np.mean(np.array(recalls))
    

In [35]:
N=20

## What if we recommed only unseen items?

In [46]:
model_list=[ctr1,ctr2,ctr3,wmf3]

print([mdl.name +':'+'{:.3f}'.format(evaluate_model(mdl,20,test_seleted)[1]) for mdl in model_list])

print([mdl.name +':'+'{:.3f}'.format(evaluate_model(mdl,50,test_seleted)[1]) for mdl in model_list])


['CTR:0.084', 'CTR:0.075', 'CTR:0.073', 'WMF:0.071']
['CTR:0.105', 'CTR:0.094', 'CTR:0.089', 'WMF:0.085']


## Print the wine informations to assess models strengths

In [38]:
wine_info=pd.read_csv("data/wine_info_all.csv", index_col="Wine ID")

### User's currenct selection

In [61]:
my_id=58534725
topN=10

wine_info.loc[train_seleted[my_id]] 

Unnamed: 0_level_0,Wine,region,country,winery
Wine ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
24056,Bin 389 Cabernet - Shiraz,South Australia,Australia,Penfolds
10241561,Le Tradition Châteauneuf-du-Pape,Châteauneuf-du-Pape,France,Clos du Caillou
8584824,Neumagener Rosengartchen Riesling Auslese,Mosel,Germany,Ansgar Clüsserath
18978,Sauvignon Blanc,Marlborough,New Zealand,Cloudy Bay
3071535,Private Selection Old Vines Malbec,Mendoza,Argentina,La Linda


ERROR! Session/line number was not unique in database. History logging moved to new session 1929


### Model recommendations

In [51]:
wine_info.loc[print_top_N(most_pop,topN,my_id)] 

Unnamed: 0_level_0,Wine,region,country,winery
Wine ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1135843,Réserve (de la Comtesse) Pauillac,Pauillac,France,Château Pichon Longueville Comtesse de Lalande
1652,Tignanello,Toscana,Italy,Antinori
7103,Amarone della Valpolicella Classico,Amarone della Valpolicella Classico,Italy,Tommasi
14362,Pauillac (Grand Cru Classé),Pauillac,France,Château Pontet-Canet
75190,Rouge (Gaston Hochar),Bekaa Valley,Lebanon,Château Musar
5078,Sassicaia,Bolgheri Sassicaia,Italy,Tenuta San Guido
75626,Amarone della Valpolicella,Amarone della Valpolicella,Italy,Montresor
7972,Rioja Gran Reserva 904,Rioja,Spain,La Rioja Alta
86684,Brut Champagne,Champagne,France,Dom Pérignon
5080,Guidalberto,Toscana,Italy,Tenuta San Guido


In [52]:
wine_info.loc[print_top_N(model_list[0],topN,my_id)] 

Unnamed: 0_level_0,Wine,region,country,winery
Wine ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9530,Sauvignon Blanc,Marlborough,New Zealand,Oyster Bay
1128385,Brut (Carte Jaune) Champagne,Champagne,France,Veuve Clicquot
1244,Bin 28 Shiraz,Barossa Valley,Australia,Penfolds
1652,Tignanello,Toscana,Italy,Antinori
4275488,Max's Shiraz,South Australia,Australia,Penfolds
2316699,Red Blend,Victoria,Australia,19 Crimes
1104824,Whispering Angel Rosé,Côtes de Provence,France,Château d'Esclans
2968897,Primitivo di Manduria,Primitivo di Manduria,Italy,Borgo del Mandorlo
63564,Sauvignon Blanc,Marlborough,New Zealand,Matua
1654,Tenuta Tignanello 'Marchese Antinori' Chianti Classico Riserva,Chianti Classico,Italy,Antinori


In [53]:
wine_info.loc[print_top_N(model_list[1],topN,my_id)] 

Unnamed: 0_level_0,Wine,region,country,winery
Wine ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
24232,Old Vines Grenache - Shiraz - Mourvedre,Barossa Valley,Australia,Torbreck
1306,Koonunga Hill Shiraz - Cabernet,South Australia,Australia,Penfolds
73757,Founders' Estate Cabernet Sauvignon,California,United States,Beringer
1995,Koonunga Hill Cabernet Sauvignon,Barossa,Australia,Penfolds
1143834,Sauvignon Blanc,Marlborough,New Zealand,Clearwater Cove
5067374,Koonunga Hill Seventy Six Shiraz - Cabernet,South Australia,Australia,Penfolds
5310954,Barossa Ink Shiraz,Barossa,Australia,Grant Burge
1101590,Toscana,Toscana,Italy,Caiarossa
1759011,Crianza,Rioja,Spain,Altos Ibéricos
63564,Sauvignon Blanc,Marlborough,New Zealand,Matua


In [54]:
wine_info.loc[print_top_N(model_list[2],topN,my_id)] 

Unnamed: 0_level_0,Wine,region,country,winery
Wine ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1306,Koonunga Hill Shiraz - Cabernet,South Australia,Australia,Penfolds
73757,Founders' Estate Cabernet Sauvignon,California,United States,Beringer
20751,Chardonnay,Margaret River,Australia,Cape Mentelle
61134,Barolo,Barolo,Italy,Prunotto
1995,Koonunga Hill Cabernet Sauvignon,Barossa,Australia,Penfolds
5739996,Sauvignon Blanc,Marlborough,New Zealand,Wild Rock
1162764,Koonunga Hill Autumn Riesling,Barossa,Australia,Penfolds
1130073,Gold Label Barossa Shiraz,Barossa,Australia,Wolf Blass
5067374,Koonunga Hill Seventy Six Shiraz - Cabernet,South Australia,Australia,Penfolds
1173085,Il Frappato Sicilia,Terre Siciliane,Italy,Occhipinti


In [62]:
wine_info.loc[print_top_N(model_list[3],topN,my_id)] 

Unnamed: 0_level_0,Wine,region,country,winery
Wine ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9530,Sauvignon Blanc,Marlborough,New Zealand,Oyster Bay
1162764,Koonunga Hill Autumn Riesling,Barossa,Australia,Penfolds
94968,Brunello di Montalcino,Brunello di Montalcino,Italy,Collosorbo
20751,Chardonnay,Margaret River,Australia,Cape Mentelle
1953311,Stock's Hill Cabernet Sauvignon,McLaren Vale,Australia,Pirramimma
1461717,Sauvignon Blanc,Marlborough,New Zealand,Babydoll
1202838,Papale Linea Oro Primitivo di Manduria,Primitivo di Manduria,Italy,Varvaglione
1157167,Bin 23 Pinot Noir,Adelaide Hills,Australia,Penfolds
2316699,Red Blend,Victoria,Australia,19 Crimes
8623,Coto de Imaz Rioja Gran Reserva,Rioja,Spain,El Coto
