# Addictive Model

## Libraries

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np
from collections import Counter

## Import Dataset

In [2]:
file_path = '../data/food_data_cleaned.xlsx'
food_data = pd.ExcelFile(file_path)

In [3]:
data = food_data.parse('Sheet1')
data.head()

Unnamed: 0,Food Name,Energy (KJ/100g),Sugars (g/100g),Saturated Fat (g/100g),Salt (g/100g),Proteins (g/100g),Fiber (g/100g),Fruit/Veg (%),Nutri-Score,Ecoscore
0,Nutella,2252.0,56.3,10.6,0.107,6.3,0.0,0.0,e,21
1,Sésame,1961.0,17.0,2.0,0.38,10.0,4.6,0.0,b,57
2,Almonds,2567.0,4.8,4.3,0.01,24.5,12.1,100.0,a,24
3,Alvalle Gazpacho l'original,168.0,3.3,0.4,0.62,0.9,1.2,0.0,a,82
4,70% Cacao noir intense,2350.0,30.0,24.0,0.1,9.5,0.0,0.0,e,29


In [4]:
def AnotherMethodNutriScore(data, nutri_score_mapping, test_size=0.8, random_state=1):
    data['Nutri-Score'] = data['Nutri-Score'].map(nutri_score_mapping)
    data = data.dropna(subset=['Nutri-Score'])
    data['Nutri-Score'] -= 1

    features = ['Energy (KJ/100g)', 'Proteins (g/100g)', 'Salt (g/100g)', 
                'Fiber (g/100g)', 'Saturated Fat (g/100g)', 'Sugars (g/100g)', 
                'Fruit/Veg (%)', 'Ecoscore']
    X = data[features]
    y = data['Nutri-Score']
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
    X = poly.fit_transform(X)
    
    print("Class Distribution Before Resampling:", Counter(y))
    
    smote = SMOTE(random_state=random_state)
    X, y = smote.fit_resample(X, y)
    
    print("Class Distribution After Resampling:", Counter(y))
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    
    models = {
        "Random Forest": RandomForestClassifier(random_state=random_state, class_weight="balanced"),
        "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
        "Gaussian Naive Bayes": GaussianNB(),
        "XGBoost": XGBClassifier(random_state=random_state, use_label_encoder=False, eval_metric='mlogloss')
    }
    
    performance = []
    
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_pred += 1
        y_test_orig = y_test + 1
        accuracy = accuracy_score(y_test_orig, y_pred)
        precision = precision_score(y_test_orig, y_pred, average='weighted')
        recall = recall_score(y_test_orig, y_pred, average='weighted')
        f1 = f1_score(y_test_orig, y_pred, average='weighted')
        performance.append({
            "Model": name,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1-Score": f1
        })
    
    performance_df = pd.DataFrame(performance)
    return performance_df

# Example Usage


In [5]:
if __name__ == "__main__":
    nutri_score_mapping = {'a': 5, 'b': 4, 'c': 3, 'd': 2, 'e': 1}
    
    results = AnotherMethodNutriScore(data, nutri_score_mapping)
    print(results)

Class Distribution Before Resampling: Counter({1: 193, 4: 177, 2: 144, 0: 109, 3: 97})
Class Distribution After Resampling: Counter({0: 193, 3: 193, 4: 193, 1: 193, 2: 193})
                  Model  Accuracy  Precision    Recall  F1-Score
0         Random Forest  0.705959   0.707617  0.705959  0.705794
1   K-Nearest Neighbors  0.588083   0.591620  0.588083  0.583740
2  Gaussian Naive Bayes  0.446891   0.463481  0.446891  0.357750
3               XGBoost  0.722798   0.724381  0.722798  0.722906


Parameters: { "use_label_encoder" } are not used.

