In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

# Load datasets
genetic_df = pd.read_csv("C:\\Users\\trejan\\Desktop\\Sem 2\\Machine Learning\\model\\genetic_profiles.csv")
meal_df = pd.read_csv("C:\\Users\\trejan\\Desktop\\Sem 2\\Machine Learning\\model\\train.csv")

# Handle missing values
numeric_cols = genetic_df.select_dtypes(include=['float64', 'int64']).columns
genetic_df[numeric_cols] = genetic_df[numeric_cols].fillna(genetic_df[numeric_cols].mean())

categorical_cols = genetic_df.select_dtypes(include=['object']).columns
genetic_df[categorical_cols] = genetic_df[categorical_cols].fillna('Unknown')

# Convert Obesity_Risk_Score into categories
genetic_df['Obesity_Risk_Category'] = pd.cut(
    genetic_df['Obesity_Risk_Score'],
    bins=[0, 0.3, 0.6, 1],
    labels=['Low', 'Medium', 'High']
)

# Select relevant features
features = ['BMI', 'Physical_Activity', 'Diet_Type', 'MC4R_Variant', 'PPARG_Variant', 'FTO_Variant', 'LEPR_Variant', 'Obesity_Risk_Score']
genetic_df = genetic_df[features + ['Obesity_Risk_Category']]

genetic_df = pd.get_dummies(genetic_df, columns=['Diet_Type', 'MC4R_Variant', 'PPARG_Variant', 'FTO_Variant', 'LEPR_Variant'], drop_first=True)

# Prepare features and target
X = genetic_df.drop(['Obesity_Risk_Category'], axis=1)
y = genetic_df['Obesity_Risk_Category']

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y_encoded)

# Split data
X_train, X_temp, y_train, y_temp = train_test_split(X_resampled, y_resampled, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear'],
    'gamma': ['scale', 'auto', 0.1, 0.01],
    'class_weight': ['balanced', None]
}

svm = SVC(random_state=42)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Train best model
best_svm = grid_search.best_estimator_
best_svm.fit(X_train, y_train)

# Save the model, encoder, and scaler
with open("svm_model.pkl", 'wb') as f:
    pickle.dump(best_svm, f)
with open("label_encoder.pkl", 'wb') as f:
    pickle.dump(label_encoder, f)
with open("scaler.pkl", 'wb') as f:
    pickle.dump(scaler, f)

def recommend_meals(user_profile, meal_df, model_path, encoder_path, scaler_path, num_meals=5):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    with open(encoder_path, 'rb') as f:
        label_encoder = pickle.load(f)
    with open(scaler_path, 'rb') as f:
        scaler = pickle.load(f)
    
    user_profile_df = pd.DataFrame([user_profile])
    user_profile_df = pd.get_dummies(user_profile_df, columns=['Diet_Type', 'MC4R_Variant', 'PPARG_Variant', 'FTO_Variant', 'LEPR_Variant'], drop_first=True)
    missing_cols = set(X_train.columns) - set(user_profile_df.columns)
    for col in missing_cols:
        user_profile_df[col] = 0
    user_profile_df = user_profile_df[X_train.columns]
    user_profile_scaled = scaler.transform(user_profile_df)
    predicted_risk_category = model.predict(user_profile_scaled)
    predicted_risk_category_label = label_encoder.inverse_transform(predicted_risk_category)[0]
    print(f'Predicted Obesity Risk Category: {predicted_risk_category_label}')

# Example usage
new_profile = {
    'BMI': 28.5,
    'Physical_Activity': 2,
    'Diet_Type': 'High-Fat',
    'MC4R_Variant': 'rs17782313_CT',
    'PPARG_Variant': 'rs1801282_CG',
    'FTO_Variant': 'rs9939609_AT',
    'LEPR_Variant': 'rs1137101_AG',
    'Obesity_Risk_Score': 0.45
}

recommend_meals(new_profile, meal_df, "svm_model.pkl", "label_encoder.pkl", "scaler.pkl")

ValueError: could not convert string to float: 'High'