In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [48]:
df = pd.read_csv("FastFoodNutritionMenuV3.csv")
df = df.drop(["Calories from Fat", "Weight Watchers Pnts"], axis=1)
df.head()

Unnamed: 0,Company,Item,Calories,Total Fat (g),Saturated Fat (g),Trans Fat (g),Cholesterol (mg),Sodium (mg),Carbs (g),Fiber (g),Sugars (g),Protein (g)
0,McDonald’s,Hamburger,250,9,3.5,0.5,25,520,31,2,6,12
1,McDonald’s,Cheeseburger,300,12,6.0,0.5,40,750,33,2,6,15
2,McDonald’s,Double Cheeseburger,440,23,11.0,1.5,80,1150,34,2,7,25
3,McDonald’s,McDouble,390,19,8.0,1.0,65,920,33,2,7,22
4,McDonald’s,Quarter Pounder® with Cheese,510,26,12.0,1.5,90,1190,40,3,9,29


In [49]:
for col in df.columns[2:]:
    temp_list = []
    for val in df[col]:
        try:
            temp_list.append(float(val))
        except ValueError:
            temp_list.append(np.nan)
    df[col] = temp_list

In [50]:
df = df.dropna()
df.head()

Unnamed: 0,Company,Item,Calories,Total Fat (g),Saturated Fat (g),Trans Fat (g),Cholesterol (mg),Sodium (mg),Carbs (g),Fiber (g),Sugars (g),Protein (g)
0,McDonald’s,Hamburger,250.0,9.0,3.5,0.5,25.0,520.0,31.0,2.0,6.0,12.0
1,McDonald’s,Cheeseburger,300.0,12.0,6.0,0.5,40.0,750.0,33.0,2.0,6.0,15.0
2,McDonald’s,Double Cheeseburger,440.0,23.0,11.0,1.5,80.0,1150.0,34.0,2.0,7.0,25.0
3,McDonald’s,McDouble,390.0,19.0,8.0,1.0,65.0,920.0,33.0,2.0,7.0,22.0
4,McDonald’s,Quarter Pounder® with Cheese,510.0,26.0,12.0,1.5,90.0,1190.0,40.0,3.0,9.0,29.0


### Categorize Items into Type of Meat and Food Type

In [56]:
def categorize_meat(row):
    food_dict = {
        "beef": ["burger", "beef", "rib", "pounder", "angus", "steak", "double", "single", "triple", "mac", "meat"], 
        "chicken": ["chicken", "nuggets", "breast", "thigh", "snack wrap"], 
        "pork": ["bacon", "sausage", "pork", "pepperoni"], 
        "vegetarian": ["vegetarian", "impossible", "salad", "fruit", "veggie", "fries", "potato", "potatoes", "cheese", "hash", "bean", "chili", "mcmuffin", "egg"], 
        "seafood": ["fish", "seafood"],
        "drink": ["tea", "coca", "pepsi", "shake", "dr pepper", "lemonade", "sprite", "coffee", "latte", "frosty", "diet", "tropicana", "freeze", "water"]
    }
    for key, value in food_dict.items():
        for type in value:
            if type in row["Item"].strip().lower():
                return key
    
    return "Other"
    

df["Food Type"] = df.apply(categorize_meat, axis=1)

In [57]:
df

Unnamed: 0,Company,Item,Calories,Total Fat (g),Saturated Fat (g),Trans Fat (g),Cholesterol (mg),Sodium (mg),Carbs (g),Fiber (g),Sugars (g),Protein (g),Food Type
0,McDonald’s,Hamburger,250.0,9.0,3.5,0.5,25.0,520.0,31.0,2.0,6.0,12.0,beef
1,McDonald’s,Cheeseburger,300.0,12.0,6.0,0.5,40.0,750.0,33.0,2.0,6.0,15.0,beef
2,McDonald’s,Double Cheeseburger,440.0,23.0,11.0,1.5,80.0,1150.0,34.0,2.0,7.0,25.0,beef
3,McDonald’s,McDouble,390.0,19.0,8.0,1.0,65.0,920.0,33.0,2.0,7.0,22.0,beef
4,McDonald’s,Quarter Pounder® with Cheese,510.0,26.0,12.0,1.5,90.0,1190.0,40.0,3.0,9.0,29.0,beef
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1142,Pizza Hut,Hand Tossed Slices Ultimate Cheese Lover’s® Me...,230.0,10.0,4.5,0.0,25.0,390.0,25.0,2.0,1.0,11.0,vegetarian
1143,Pizza Hut,Hand Tossed Slices Ultimate Cheese Lover’s® Large,310.0,13.0,6.0,0.0,35.0,540.0,33.0,2.0,1.0,15.0,vegetarian
1144,Pizza Hut,Hand Tossed Slices Veggie Lover’s® Small,120.0,4.0,1.5,0.0,10.0,230.0,17.0,1.0,1.0,5.0,vegetarian
1145,Pizza Hut,Hand Tossed Slices Veggie Lover’s® Medium,200.0,6.0,2.5,0.0,15.0,370.0,27.0,2.0,2.0,8.0,vegetarian


In [6]:
df.iloc[:,2:].corr()

Unnamed: 0,Calories,Total Fat (g),Saturated Fat (g),Trans Fat (g),Cholesterol (mg),Sodium (mg),Carbs (g),Fiber (g),Sugars (g),Protein (g)
Calories,1.0,0.841092,0.808437,0.558733,0.609389,0.73043,0.67134,0.474178,0.252874,0.742114
Total Fat (g),0.841092,1.0,0.887319,0.555164,0.699141,0.821768,0.184623,0.46456,-0.238389,0.807907
Saturated Fat (g),0.808437,0.887319,1.0,0.675898,0.71529,0.688962,0.253274,0.324577,-0.09439,0.773096
Trans Fat (g),0.558733,0.555164,0.675898,1.0,0.364635,0.320358,0.221435,0.070879,0.080362,0.591104
Cholesterol (mg),0.609389,0.699141,0.71529,0.364635,1.0,0.664398,0.124196,0.220025,-0.159341,0.689326
Sodium (mg),0.73043,0.821768,0.688962,0.320358,0.664398,1.0,0.16678,0.604759,-0.311609,0.816994
Carbs (g),0.67134,0.184623,0.253274,0.221435,0.124196,0.16678,1.0,0.203405,0.825194,0.13673
Fiber (g),0.474178,0.46456,0.324577,0.070879,0.220025,0.604759,0.203405,1.0,-0.253565,0.441854
Sugars (g),0.252874,-0.238389,-0.09439,0.080362,-0.159341,-0.311609,0.825194,-0.253565,1.0,-0.238671
Protein (g),0.742114,0.807907,0.773096,0.591104,0.689326,0.816994,0.13673,0.441854,-0.238671,1.0


In [108]:
col_order = ["Company", "Item", "Food Type"] + list(df.columns[2:-1])
df[col_order].to_csv("FastFoodNutritionMenuV3_Cleaned.csv", index=False)

df_relabeled = pd.read_csv("FastFoodNutritionMenuV3_Cleaned_relabeled.csv")

In [113]:
df_relabeled

Unnamed: 0,Company,Item,Food Type,Calories,Total Fat (g),Saturated Fat (g),Trans Fat (g),Cholesterol (mg),Sodium (mg),Carbs (g),Fiber (g),Sugars (g),Protein (g)
0,McDonald’s,Hamburger,beef,250,9.0,3.5,0.5,25,520,31,2,6,12
1,McDonald’s,Cheeseburger,beef,300,12.0,6.0,0.5,40,750,33,2,6,15
2,McDonald’s,Double Cheeseburger,beef,440,23.0,11.0,1.5,80,1150,34,2,7,25
3,McDonald’s,McDouble,beef,390,19.0,8.0,1.0,65,920,33,2,7,22
4,McDonald’s,Quarter Pounder® with Cheese,beef,510,26.0,12.0,1.5,90,1190,40,3,9,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1030,Pizza Hut,Hand Tossed Slices Ultimate Cheese Lover’s® Me...,vegetarian,230,10.0,4.5,0.0,25,390,25,2,1,11
1031,Pizza Hut,Hand Tossed Slices Ultimate Cheese Lover’s® Large,vegetarian,310,13.0,6.0,0.0,35,540,33,2,1,15
1032,Pizza Hut,Hand Tossed Slices Veggie Lover’s® Small,vegetarian,120,4.0,1.5,0.0,10,230,17,1,1,5
1033,Pizza Hut,Hand Tossed Slices Veggie Lover’s® Medium,vegetarian,200,6.0,2.5,0.0,15,370,27,2,2,8


In [114]:
scaler = StandardScaler()
data = df_relabeled.loc[df_relabeled["Food Type"] != "Other", :]
scaler.fit(data.loc[:,"Calories":].values)
new_data = scaler.transform(data.loc[:,"Calories":].values)
new_data

array([[-0.29624794, -0.31412983, -0.23027457, ...,  0.10650538,
        -0.43636494,  0.02654266],
       [-0.0870427 , -0.11453101,  0.233361  , ...,  0.10650538,
        -0.43636494,  0.27328228],
       [ 0.49873197,  0.61733131,  1.16063214, ...,  0.10650538,
        -0.40351157,  1.09574769],
       ...,
       [-0.84018156, -0.64679452, -0.60118302, ..., -0.27880991,
        -0.60063176, -0.54918313],
       [-0.50545318, -0.51372865, -0.4157288 , ...,  0.10650538,
        -0.56777839, -0.3024435 ],
       [-0.25440689, -0.31412983, -0.13754746, ...,  0.49182068,
        -0.53492503, -0.05570388]])

In [115]:
X = new_data
y = data.loc[:,"Food Type"].values

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create SVM classifier
clf = SVC(kernel='rbf')
# clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7553956834532374


In [116]:
# Make predictions
y_pred_all = clf.predict(X)

# Evaluate the classifier
accuracy = accuracy_score(y, y_pred_all)
print("Accuracy:", accuracy)

Accuracy: 0.8219971056439942


In [117]:
data["Predicted Food Type"] = y_pred_all
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["Predicted Food Type"] = y_pred_all


Unnamed: 0,Company,Item,Food Type,Calories,Total Fat (g),Saturated Fat (g),Trans Fat (g),Cholesterol (mg),Sodium (mg),Carbs (g),Fiber (g),Sugars (g),Protein (g),Predicted Food Type
0,McDonald’s,Hamburger,beef,250,9.0,3.5,0.5,25,520,31,2,6,12,beef
1,McDonald’s,Cheeseburger,beef,300,12.0,6.0,0.5,40,750,33,2,6,15,beef
2,McDonald’s,Double Cheeseburger,beef,440,23.0,11.0,1.5,80,1150,34,2,7,25,beef
3,McDonald’s,McDouble,beef,390,19.0,8.0,1.0,65,920,33,2,7,22,beef
4,McDonald’s,Quarter Pounder® with Cheese,beef,510,26.0,12.0,1.5,90,1190,40,3,9,29,beef
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1030,Pizza Hut,Hand Tossed Slices Ultimate Cheese Lover’s® Me...,vegetarian,230,10.0,4.5,0.0,25,390,25,2,1,11,chicken
1031,Pizza Hut,Hand Tossed Slices Ultimate Cheese Lover’s® Large,vegetarian,310,13.0,6.0,0.0,35,540,33,2,1,15,chicken
1032,Pizza Hut,Hand Tossed Slices Veggie Lover’s® Small,vegetarian,120,4.0,1.5,0.0,10,230,17,1,1,5,drink
1033,Pizza Hut,Hand Tossed Slices Veggie Lover’s® Medium,vegetarian,200,6.0,2.5,0.0,15,370,27,2,2,8,vegetarian


In [118]:
col_order = ["Company", "Item", "Food Type", "Predicted Food Type"] + list(data.columns[3:-1])
data[col_order].to_csv("FastFoodNutritionMenuV3_cleaned_predicted.csv")