In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.cluster import KMeans
import random  # For introducing randomness in recommendations
import os  # To set environment variables

# Suppress joblib warning about physical cores
os.environ['LOKY_MAX_CPU_COUNT'] = '4'

#############################
# Pipeline 1: SVM for Obesity Risk Prediction
#############################

# Load genetic dataset (assumes comma-delimited)
genetic_file_path = r"C:\Users\trejan\Desktop\Sem 2\Machine Learning\model\new_genetic_profiles.csv"
genetic_df = pd.read_csv(genetic_file_path)
genetic_df.columns = genetic_df.columns.str.strip()
genetic_df.fillna("None", inplace=True)

# Convert Obesity_Risk_Score into categories (Low, Medium, High)
genetic_df['Obesity_Risk_Category'] = pd.cut(
    genetic_df['Obesity_Risk_Score'],
    bins=[0, 0.5, 0.8, 1],
    labels=['Low', 'Medium', 'High']
)

# Initialize dictionary to store LabelEncoders
label_encoders = {}

# Encode categorical variables (Diet_Type, Physical_Activity)
for col in ["Diet_Type", "Physical_Activity"]:
    le = LabelEncoder()
    genetic_df[col] = le.fit_transform(genetic_df[col])
    label_encoders[col] = le

# Encode gene variant columns as strings (so that "None" is encoded too)
variant_columns = ["MC4R_Variant", "PPARG_Variant", "FTO_Variant", "LEPR_Variant"]
for col in variant_columns:
    genetic_df[col] = genetic_df[col].astype(str)
    le = LabelEncoder()
    genetic_df[col] = le.fit_transform(genetic_df[col])
    label_encoders[col] = le

# Define features and target for the genetic model
features = [
    "Age", "BMI", "Physical_Activity", "Diet_Type",
    "MC4R_Present", "MC4R_Variant",
    "PPARG_Present", "PPARG_Variant",
    "FTO_Present", "FTO_Variant",
    "LEPR_Present", "LEPR_Variant"
]
target = "Obesity_Risk_Category"

X_gen = genetic_df[features]
y_gen = genetic_df[target]

# Encode target labels (Low, Medium, High)
target_le = LabelEncoder()
y_encoded = target_le.fit_transform(y_gen)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_gen, y_encoded, test_size=0.2, random_state=42)

# Scale features (important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train an SVM model with RBF kernel
svm_model = SVC(kernel='rbf', random_state=42)  # RBF kernel
svm_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred_svm = svm_model.predict(X_test_scaled)
print("SVM Test Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Classification Report:")
print(classification_report(y_test, y_pred_svm, target_names=target_le.classes_))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))

#############################
# Pipeline 2: Meal Recommendation (Same as Before)
#############################

# Load the meal dataset (assumed to be comma-delimited)
meal_file_path = r"C:\Users\trejan\Desktop\Sem 2\Machine Learning\model\train.csv"
meal_df = pd.read_csv(meal_file_path)

# Preprocess nutritional features; these columns should exist in your meal dataset
nutritional_features = meal_df[['Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']]
scaler_meal = StandardScaler()
nutritional_features_scaled = scaler_meal.fit_transform(nutritional_features)

# Cluster meals using KMeans (e.g., 10 clusters)
num_clusters = 10
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
meal_df['Meal_Cluster'] = kmeans.fit_predict(nutritional_features_scaled)

# Define a meal recommendation function that uses the predicted obesity risk category
def recommend_meals(user_profile, meal_df, svm_model, target_le, scaler, num_meals=5):
    """
    user_profile: dict with genetic feature values (original, unencoded)
    svm_model: trained SVM model
    target_le: LabelEncoder for the target risk category
    scaler: StandardScaler fitted on genetic features
    """
    # Convert user_profile into a DataFrame
    user_profile_df = pd.DataFrame([user_profile])
    
    # Encode categorical features using stored encoders
    for col in ["Diet_Type", "Physical_Activity"]:
        if col in user_profile_df.columns:
            le = label_encoders[col]
            user_profile_df[col] = le.transform(user_profile_df[col])
    for col in variant_columns:
        if col in user_profile_df.columns:
            le = label_encoders[col]
            user_profile_df[col] = le.transform(user_profile_df[col].astype(str))
    
    # Ensure the user profile contains all required features; fill missing with 0
    missing_cols = set(features) - set(user_profile_df.columns)
    for col in missing_cols:
        user_profile_df[col] = 0
    user_profile_df = user_profile_df[features]
    
    # Scale the user profile using the same scaler as training
    user_profile_scaled = scaler.transform(user_profile_df)
    
    # Predict obesity risk using the SVM model
    predicted_category = svm_model.predict(user_profile_scaled)[0]
    predicted_label = target_le.inverse_transform([predicted_category])[0]
    print(f"\nPredicted Obesity Risk Category: {predicted_label}")
    
    # Define cluster preferences based on predicted risk (example logic)
    if predicted_label == 'Low':
        preferred_clusters = [0, 1, 2, 3]  # Broaden the cluster selection
        sort_by = random.choice(['Protein_g', 'Energy_kcal'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    elif predicted_label == 'Medium':
        preferred_clusters = [4, 5, 6, 7]  # Broaden the cluster selection
        sort_by = random.choice(['Energy_kcal', 'Fat_g'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    else:
        preferred_clusters = [8, 9, 0, 1]  # Broaden the cluster selection
        sort_by = random.choice(['Energy_kcal', 'Carb_g'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    
    # Filter and sort meals from the preferred clusters
    recommended_meals = meal_df[meal_df['Meal_Cluster'].isin(preferred_clusters)]
    recommended_meals = recommended_meals.sample(frac=1).reset_index(drop=True)  # Shuffle the meals
    recommended_meals = recommended_meals.sort_values(by=sort_by, ascending=ascending)
    
    print("\nRecommended Meals:")
    print(recommended_meals[['Descrip', 'Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']].head(num_meals))

# Example new genetic profile for meal recommendation (using original, unencoded values)
new_profile = {
    "Age": 35,
    "BMI": 28.5,
    "Physical_Activity": "Low",      # Original string (will be encoded)
    "Diet_Type": "High-Fat",           # Original string (will be encoded)
    "MC4R_Present": 0,
    "MC4R_Variant": "rs17782313_TT",
    "PPARG_Present": 0,
    "PPARG_Variant": "rs1801282_CG",
    "FTO_Present": 1,
    "FTO_Variant": "rs9939609_AT",
    "LEPR_Present": 1,
    "LEPR_Variant": "rs1137101_AG"
}

# Get meal recommendations using the SVM model
recommend_meals(new_profile, meal_df, svm_model, target_le, scaler, num_meals=5)

SVM Test Accuracy: 0.815
Classification Report:
              precision    recall  f1-score   support

        High       0.88      1.00      0.94        88
         Low       0.75      1.00      0.85       182
      Medium       1.00      0.43      0.60       130

    accuracy                           0.81       400
   macro avg       0.88      0.81      0.80       400
weighted avg       0.86      0.81      0.79       400

Confusion Matrix:
[[ 88   0   0]
 [  0 182   0]
 [ 12  62  56]]

Predicted Obesity Risk Category: Low

Recommended Meals:
                                             Descrip  Energy_kcal  Protein_g  \
13                        Fish, pollock, Alaska, raw         56.0      12.19   
1041              Sea cucumber, yane (Alaska Native)         56.0      13.00   
1425                         Octopus (Alaska Native)         56.0      12.30   
1541  Oopah (tunicate), whole animal (Alaska Native)         67.0      11.70   
119            Mollusks, scallop, mixed species, 

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.cluster import KMeans
import random  # For introducing randomness in recommendations
import os  # To set environment variables
import pickle  # For saving models
import joblib  # For saving large models

# Create directory for saved models if it doesn't exist
save_dir = r"C:\Users\trejan\Desktop\GNN\Saved models"
os.makedirs(save_dir, exist_ok=True)

# Suppress joblib warning about physical cores
os.environ['LOKY_MAX_CPU_COUNT'] = '4'

#############################
# Pipeline 1: SVM for Obesity Risk Prediction
#############################

# Load genetic dataset (assumes comma-delimited)
genetic_file_path = r"C:\Users\trejan\Desktop\Sem 2\Machine Learning\model\new_genetic_profiles.csv"
genetic_df = pd.read_csv(genetic_file_path)
genetic_df.columns = genetic_df.columns.str.strip()
genetic_df.fillna("None", inplace=True)

# Convert Obesity_Risk_Score into categories (Low, Medium, High)
genetic_df['Obesity_Risk_Category'] = pd.cut(
    genetic_df['Obesity_Risk_Score'],
    bins=[0, 0.5, 0.8, 1],
    labels=['Low', 'Medium', 'High']
)

# Initialize dictionary to store LabelEncoders
label_encoders = {}

# Encode categorical variables (Diet_Type, Physical_Activity)
for col in ["Diet_Type", "Physical_Activity"]:
    le = LabelEncoder()
    genetic_df[col] = le.fit_transform(genetic_df[col])
    label_encoders[col] = le

# Encode gene variant columns as strings (so that "None" is encoded too)
variant_columns = ["MC4R_Variant", "PPARG_Variant", "FTO_Variant", "LEPR_Variant"]
for col in variant_columns:
    genetic_df[col] = genetic_df[col].astype(str)
    le = LabelEncoder()
    genetic_df[col] = le.fit_transform(genetic_df[col])
    label_encoders[col] = le

# Define features and target for the genetic model
features = [
    "Age", "BMI", "Physical_Activity", "Diet_Type",
    "MC4R_Present", "MC4R_Variant",
    "PPARG_Present", "PPARG_Variant",
    "FTO_Present", "FTO_Variant",
    "LEPR_Present", "LEPR_Variant"
]
target = "Obesity_Risk_Category"

X_gen = genetic_df[features]
y_gen = genetic_df[target]

# Encode target labels (Low, Medium, High)
target_le = LabelEncoder()
y_encoded = target_le.fit_transform(y_gen)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_gen, y_encoded, test_size=0.2, random_state=42)

# Scale features (important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train an SVM model with RBF kernel
svm_model = SVC(kernel='rbf', random_state=42)  # RBF kernel
svm_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred_svm = svm_model.predict(X_test_scaled)
print("SVM Test Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Classification Report:")
print(classification_report(y_test, y_pred_svm, target_names=target_le.classes_))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))

# Save SVM model
svm_model_path = os.path.join(save_dir, "svm_obesity_model.pkl")
with open(svm_model_path, 'wb') as f:
    pickle.dump(svm_model, f)
print(f"SVM model saved to {svm_model_path}")

# Save genetic features scaler
scaler_path = os.path.join(save_dir, "genetic_features_scaler.pkl")
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"Genetic features scaler saved to {scaler_path}")

# Save label encoders
encoders_path = os.path.join(save_dir, "svm_label_encoders.pkl")
with open(encoders_path, 'wb') as f:
    pickle.dump(label_encoders, f)
print(f"Label encoders saved to {encoders_path}")

# Save target label encoder
target_le_path = os.path.join(save_dir, "svm_target_encoder.pkl")
with open(target_le_path, 'wb') as f:
    pickle.dump(target_le, f)
print(f"Target label encoder saved to {target_le_path}")

# Save feature list
features_path = os.path.join(save_dir, "svm_features.pkl")
with open(features_path, 'wb') as f:
    pickle.dump(features, f)
print(f"Feature list saved to {features_path}")

#############################
# Pipeline 2: Meal Recommendation (Same as Before)
#############################

# Load the meal dataset (assumed to be comma-delimited)
meal_file_path = r"C:\Users\trejan\Desktop\Sem 2\Machine Learning\model\train.csv"
meal_df = pd.read_csv(meal_file_path)

# Preprocess nutritional features; these columns should exist in your meal dataset
nutritional_features = meal_df[['Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']]
scaler_meal = StandardScaler()
nutritional_features_scaled = scaler_meal.fit_transform(nutritional_features)

# Save meal features scaler
scaler_meal_path = os.path.join(save_dir, "meal_features_scaler.pkl")
with open(scaler_meal_path, 'wb') as f:
    pickle.dump(scaler_meal, f)
print(f"Meal features scaler saved to {scaler_meal_path}")

# Cluster meals using KMeans (e.g., 10 clusters)
num_clusters = 10
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
meal_df['Meal_Cluster'] = kmeans.fit_predict(nutritional_features_scaled)

# Save KMeans model
kmeans_path = os.path.join(save_dir, "svm_meal_clusters.pkl")
with open(kmeans_path, 'wb') as f:
    pickle.dump(kmeans, f)
print(f"KMeans model saved to {kmeans_path}")

# Save clustered meal data
meal_df_path = os.path.join(save_dir, "svm_clustered_meals.csv")
meal_df.to_csv(meal_df_path, index=False)
print(f"Clustered meal data saved to {meal_df_path}")

# Define a meal recommendation function that uses the predicted obesity risk category
def recommend_meals(user_profile, meal_df, svm_model, target_le, scaler, num_meals=5):
    """
    user_profile: dict with genetic feature values (original, unencoded)
    svm_model: trained SVM model
    target_le: LabelEncoder for the target risk category
    scaler: StandardScaler fitted on genetic features
    """
    # Convert user_profile into a DataFrame
    user_profile_df = pd.DataFrame([user_profile])
    
    # Encode categorical features using stored encoders
    for col in ["Diet_Type", "Physical_Activity"]:
        if col in user_profile_df.columns:
            le = label_encoders[col]
            user_profile_df[col] = le.transform(user_profile_df[col])
    for col in variant_columns:
        if col in user_profile_df.columns:
            le = label_encoders[col]
            user_profile_df[col] = le.transform(user_profile_df[col].astype(str))
    
    # Ensure the user profile contains all required features; fill missing with 0
    missing_cols = set(features) - set(user_profile_df.columns)
    for col in missing_cols:
        user_profile_df[col] = 0
    user_profile_df = user_profile_df[features]
    
    # Scale the user profile using the same scaler as training
    user_profile_scaled = scaler.transform(user_profile_df)
    
    # Predict obesity risk using the SVM model
    predicted_category = svm_model.predict(user_profile_scaled)[0]
    predicted_label = target_le.inverse_transform([predicted_category])[0]
    print(f"\nPredicted Obesity Risk Category: {predicted_label}")
    
    # Define cluster preferences based on predicted risk (example logic)
    if predicted_label == 'Low':
        preferred_clusters = [0, 1, 2, 3]  # Broaden the cluster selection
        sort_by = random.choice(['Protein_g', 'Energy_kcal'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    elif predicted_label == 'Medium':
        preferred_clusters = [4, 5, 6, 7]  # Broaden the cluster selection
        sort_by = random.choice(['Energy_kcal', 'Fat_g'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    else:
        preferred_clusters = [8, 9, 0, 1]  # Broaden the cluster selection
        sort_by = random.choice(['Energy_kcal', 'Carb_g'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    
    # Filter and sort meals from the preferred clusters
    recommended_meals = meal_df[meal_df['Meal_Cluster'].isin(preferred_clusters)]
    recommended_meals = recommended_meals.sample(frac=1).reset_index(drop=True)  # Shuffle the meals
    recommended_meals = recommended_meals.sort_values(by=sort_by, ascending=ascending)
    
    print("\nRecommended Meals:")
    print(recommended_meals[['Descrip', 'Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']].head(num_meals))

# Save the recommend_meals function as part of a pipeline object
pipeline_components = {
    "svm_model": svm_model,
    "label_encoders": label_encoders,
    "target_encoder": target_le,
    "features": features,
    "scaler": scaler,
    "kmeans": kmeans,
    "meal_scaler": scaler_meal
}

pipeline_path = os.path.join(save_dir, "svm_obesity_recommendation_pipeline.pkl")
with open(pipeline_path, 'wb') as f:
    pickle.dump(pipeline_components, f)
print(f"Complete SVM pipeline saved to {pipeline_path}")

# Example new genetic profile for meal recommendation (using original, unencoded values)
new_profile = {
    "Age": 35,
    "BMI": 28.5,
    "Physical_Activity": "Low",      # Original string (will be encoded)
    "Diet_Type": "High-Fat",           # Original string (will be encoded)
    "MC4R_Present": 0,
    "MC4R_Variant": "rs17782313_TT",
    "PPARG_Present": 0,
    "PPARG_Variant": "rs1801282_CG",
    "FTO_Present": 1,
    "FTO_Variant": "rs9939609_AT",
    "LEPR_Present": 1,
    "LEPR_Variant": "rs1137101_AG"
}

# Get meal recommendations using the SVM model
recommend_meals(new_profile, meal_df, svm_model, target_le, scaler, num_meals=5)

SVM Test Accuracy: 0.815
Classification Report:
              precision    recall  f1-score   support

        High       0.88      1.00      0.94        88
         Low       0.75      1.00      0.85       182
      Medium       1.00      0.43      0.60       130

    accuracy                           0.81       400
   macro avg       0.88      0.81      0.80       400
weighted avg       0.86      0.81      0.79       400

Confusion Matrix:
[[ 88   0   0]
 [  0 182   0]
 [ 12  62  56]]
SVM model saved to C:\Users\trejan\Desktop\GNN\Saved models\svm_obesity_model.pkl
Genetic features scaler saved to C:\Users\trejan\Desktop\GNN\Saved models\genetic_features_scaler.pkl
Label encoders saved to C:\Users\trejan\Desktop\GNN\Saved models\svm_label_encoders.pkl
Target label encoder saved to C:\Users\trejan\Desktop\GNN\Saved models\svm_target_encoder.pkl
Feature list saved to C:\Users\trejan\Desktop\GNN\Saved models\svm_features.pkl
Meal features scaler saved to C:\Users\trejan\Desktop\GNN\Sav