# Build Classification Models

In [2]:
import pandas as pd

cuisines_df = pd.read_csv('../data/transformed_cuisines.csv')
cuisines_df.head()

Unnamed: 0.1,Unnamed: 0,cuisine,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,indian,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
from sklearn.svm import SVC
import numpy as np

In [6]:
# Divide X and y
cuisines_label_df = cuisines_df['cuisine']
cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

### try a logistic regression

In [8]:
lr = LogisticRegression(multi_class='ovr',solver='liblinear')
model = lr.fit(X_train, np.ravel(y_train))

accuracy = model.score(X_test, y_test)
print ("Accuracy is {}".format(accuracy))

Accuracy is 0.8056713928273561




In [9]:
lr = LogisticRegression(multi_class='ovr',solver='lbfgs')
model = lr.fit(X_train, np.ravel(y_train))

accuracy = model.score(X_test, y_test)
print ("Accuracy is {}".format(accuracy))

Accuracy is 0.804837364470392




In [17]:
# Display results for one test
line_index = 40

print(f'ingredients: {X_test.iloc[line_index][X_test.iloc[line_index]!=0].keys()}')
print(f'cuisine: {y_test.iloc[line_index]}')
test= X_test.iloc[line_index].values.reshape(-1, 1).T
proba = model.predict_proba(test)
classes = model.classes_
resultdf = pd.DataFrame(data=proba, columns=classes)

topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])
topPrediction.head()

ingredients: Index(['grape_juice', 'honey', 'scallion', 'sesame_oil', 'soy_sauce',
       'soybean', 'starch', 'wheat'],
      dtype='object')
cuisine: japanese




Unnamed: 0,0
chinese,0.659023
korean,0.293599
japanese,0.041086
thai,0.006139
indian,0.000152


In [11]:
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

     chinese       0.80      0.73      0.77       247
      indian       0.89      0.92      0.90       225
    japanese       0.68      0.83      0.75       240
      korean       0.84      0.81      0.83       245
        thai       0.85      0.74      0.79       242

    accuracy                           0.80      1199
   macro avg       0.81      0.81      0.81      1199
weighted avg       0.81      0.80      0.81      1199

