In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.cluster import KMeans
import random  # For introducing randomness in recommendations
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from matplotlib.colors import ListedColormap

# Suppress joblib warning about physical cores
os.environ['LOKY_MAX_CPU_COUNT'] = '4'

# Define visualization save directory 
visualization_dir = r"C:\Users\trejan\Desktop\GNN"
if not os.path.exists(visualization_dir):
    os.makedirs(visualization_dir)

#############################
# Pipeline 1: Deep Learning for Obesity Risk Prediction
#############################

# Load genetic dataset (assumes comma-delimited)
genetic_file_path = r"C:\Users\trejan\Desktop\Sem 2\Machine Learning\model\new_genetic_profiles.csv"
genetic_df = pd.read_csv(genetic_file_path)
genetic_df.columns = genetic_df.columns.str.strip()
genetic_df.fillna("None", inplace=True)

# Convert Obesity_Risk_Score into categories (Low, Medium, High)
genetic_df['Obesity_Risk_Category'] = pd.cut(
    genetic_df['Obesity_Risk_Score'],
    bins=[0, 0.5, 0.8, 1],
    labels=['Low', 'Medium', 'High']
)

# Initialize dictionary to store LabelEncoders
label_encoders = {}

# Encode categorical variables (Diet_Type, Physical_Activity)
for col in ["Diet_Type", "Physical_Activity"]:
    le = LabelEncoder()
    genetic_df[col] = le.fit_transform(genetic_df[col])
    label_encoders[col] = le

# Encode gene variant columns as strings (so that "None" is encoded too)
variant_columns = ["MC4R_Variant", "PPARG_Variant", "FTO_Variant", "LEPR_Variant"]
for col in variant_columns:
    genetic_df[col] = genetic_df[col].astype(str)
    le = LabelEncoder()
    genetic_df[col] = le.fit_transform(genetic_df[col])
    label_encoders[col] = le

# Define features and target for the genetic model
features = [
    "Age", "BMI", "Physical_Activity", "Diet_Type",
    "MC4R_Present", "MC4R_Variant",
    "PPARG_Present", "PPARG_Variant",
    "FTO_Present", "FTO_Variant",
    "LEPR_Present", "LEPR_Variant"
]
target = "Obesity_Risk_Category"

X_gen = genetic_df[features]
y_gen = genetic_df[target]

# Create a visual for feature distributions
plt.figure(figsize=(15, 10))
for i, feature in enumerate(["Age", "BMI", "Physical_Activity", "Diet_Type"]):
    plt.subplot(2, 2, i+1)
    if feature in ["Physical_Activity", "Diet_Type"]:
        counts = genetic_df[feature].value_counts()
        sns.barplot(x=counts.index, y=counts.values)
        plt.title(f'Distribution of {feature}')
        if feature == "Physical_Activity":
            plt.xlabel(f'{feature} (0=Low, 1=Moderate, 2=High)')
        elif feature == "Diet_Type":
            plt.xlabel(f'{feature} (Encoded)')
    else:
        sns.histplot(genetic_df[feature], kde=True)
        plt.title(f'Distribution of {feature}')
        plt.xlabel(feature)
plt.tight_layout()
plt.savefig(os.path.join(visualization_dir, "feature_distributions.png"))
plt.close()

# Visual for risk category distribution
plt.figure(figsize=(10, 6))
risk_counts = genetic_df['Obesity_Risk_Category'].value_counts()
sns.barplot(x=risk_counts.index, y=risk_counts.values)
plt.title('Distribution of Obesity Risk Categories')
plt.xlabel('Risk Category')
plt.ylabel('Count')
plt.savefig(os.path.join(visualization_dir, "risk_category_distribution.png"))
plt.close()

# Encode target labels (Low, Medium, High)
target_le = LabelEncoder()
y_encoded = target_le.fit_transform(y_gen)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_gen, y_encoded, test_size=0.2, random_state=42)

# Scale features (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Visualize the scaled features using PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_train_scaled)

# Create a scatter plot of the PCA components colored by risk category
plt.figure(figsize=(10, 8))
colors = ['green', 'orange', 'red']
for i, risk_level in enumerate(['Low', 'Medium', 'High']):
    indices = np.where(y_train == i)
    plt.scatter(X_pca[indices, 0], X_pca[indices, 1], c=colors[i], label=risk_level, alpha=0.7)
plt.title('PCA of Genetic Features by Obesity Risk Category')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plt.savefig(os.path.join(visualization_dir, "pca_visualization.png"))
plt.close()

# Build a deep learning model using Keras
num_features = X_train_scaled.shape[1]
nn_model = Sequential([
    Dense(64, activation='relu', input_shape=(num_features,)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.3),
    Dense(3, activation='softmax')  # 3 output nodes for 3 risk categories
])

# Compile the model
nn_model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = nn_model.fit(
    X_train_scaled, y_train,
    epochs=100,
    batch_size=16,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

# Visualize the model training history
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.tight_layout()
plt.savefig(os.path.join(visualization_dir, "model_training_history.png"))
plt.close()

# Evaluate the model
test_loss, test_acc = nn_model.evaluate(X_test_scaled, y_test, verbose=0)
print("Neural Network Test Accuracy:", test_acc)

y_pred_probs = nn_model.predict(X_test_scaled)
y_pred = np.argmax(y_pred_probs, axis=1)

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=target_le.classes_))
print("Confusion Matrix:")
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

# Visualize confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=target_le.classes_,
            yticklabels=target_le.classes_)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig(os.path.join(visualization_dir, "confusion_matrix.png"))
plt.close()

# Create feature importance visualization using a simpler model
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Plot feature importances
plt.figure(figsize=(12, 8))
feature_importance = pd.Series(rf_model.feature_importances_, index=features)
feature_importance = feature_importance.sort_values(ascending=False)
sns.barplot(x=feature_importance.values, y=feature_importance.index)
plt.title('Feature Importance for Obesity Risk Prediction')
plt.xlabel('Importance Score')
plt.tight_layout()
plt.savefig(os.path.join(visualization_dir, "feature_importance.png"))
plt.close()

#############################
# Pipeline 2: Meal Recommendation
#############################

# Load the meal dataset (assumed to be comma-delimited)
meal_file_path = r"C:\Users\trejan\Desktop\Sem 2\Machine Learning\model\train.csv"
meal_df = pd.read_csv(meal_file_path)

# Visualize distributions of nutritional values
plt.figure(figsize=(15, 10))
for i, col in enumerate(['Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']):
    plt.subplot(2, 2, i+1)
    sns.histplot(meal_df[col], kde=True)
    plt.title(f'Distribution of {col}')
    plt.xlabel(col)
plt.tight_layout()
plt.savefig(os.path.join(visualization_dir, "nutrition_distributions.png"))
plt.close()

# Preprocess nutritional features; these columns should exist in your meal dataset
nutritional_features = meal_df[['Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']]
scaler_meal = StandardScaler()
nutritional_features_scaled = scaler_meal.fit_transform(nutritional_features)

# Cluster meals using KMeans (e.g., 10 clusters)
num_clusters = 10
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
meal_df['Meal_Cluster'] = kmeans.fit_predict(nutritional_features_scaled)

# Visualize the clusters using PCA
pca_meal = PCA(n_components=2)
meal_pca = pca_meal.fit_transform(nutritional_features_scaled)

plt.figure(figsize=(12, 10))
colors = plt.cm.tab10(np.linspace(0, 1, num_clusters))
for i in range(num_clusters):
    plt.scatter(meal_pca[meal_df['Meal_Cluster'] == i, 0],
                meal_pca[meal_df['Meal_Cluster'] == i, 1],
                c=[colors[i]], label=f'Cluster {i}', alpha=0.7)
plt.title('PCA of Meal Clusters')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.5)
plt.savefig(os.path.join(visualization_dir, "meal_clusters_pca.png"))
plt.close()

# Create radar chart to visualize nutrition profile of each cluster
# Calculate mean nutrition values for each cluster
cluster_means = meal_df.groupby('Meal_Cluster')[['Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']].mean()

# Normalize the cluster means for radar chart
cluster_means_normalized = (cluster_means - cluster_means.min()) / (cluster_means.max() - cluster_means.min())

# Create radar charts for each cluster
categories = ['Energy', 'Protein', 'Fat', 'Carbs']
fig = plt.figure(figsize=(20, 15))

# Maximum clusters to show in one figure (e.g., 5 clusters)
clusters_per_fig = 5
for fig_num in range(2):  # Create 2 figures for all 10 clusters
    start_cluster = fig_num * clusters_per_fig
    end_cluster = min(start_cluster + clusters_per_fig, num_clusters)
    
    fig = plt.figure(figsize=(15, 12))
    for i in range(start_cluster, end_cluster):
        # Number of variables
        N = len(categories)
        
        # What will be the angle of each axis in the plot
        angles = [n / float(N) * 2 * np.pi for n in range(N)]
        angles += angles[:1]  # Close the loop
        
        # Create subplot
        ax = plt.subplot(2, 3, i - start_cluster + 1, polar=True)
        
        # Draw one axis per variable and add labels
        plt.xticks(angles[:-1], categories, size=10)
        
        # Draw the values for this cluster
        values = cluster_means_normalized.iloc[i].values.flatten().tolist()
        values += values[:1]  # Close the loop
        
        # Plot values
        ax.plot(angles, values, linewidth=2, linestyle='solid', label=f'Cluster {i}')
        ax.fill(angles, values, alpha=0.25)
        
        # Add title
        plt.title(f'Cluster {i} Nutrition Profile', size=11, y=1.1)
    
    plt.tight_layout()
    plt.savefig(os.path.join(visualization_dir, f"cluster_nutrition_radar_{fig_num+1}.png"))
    plt.close(fig)

# Define a meal recommendation function that uses the predicted obesity risk category
def recommend_meals(user_profile, meal_df, nn_model, target_le, scaler, visualization_dir, num_meals=5):
    """
    user_profile: dict with genetic feature values (original, unencoded)
    nn_model: trained neural network model
    target_le: LabelEncoder for the target risk category
    scaler: StandardScaler fitted on genetic features
    """
    # Convert user_profile into a DataFrame
    user_profile_df = pd.DataFrame([user_profile])
    
    # Encode categorical features using stored encoders
    for col in ["Diet_Type", "Physical_Activity"]:
        if col in user_profile_df.columns:
            le = label_encoders[col]
            user_profile_df[col] = le.transform(user_profile_df[col])
    for col in variant_columns:
        if col in user_profile_df.columns:
            le = label_encoders[col]
            user_profile_df[col] = le.transform(user_profile_df[col].astype(str))
    
    # Ensure the user profile contains all required features; fill missing with 0
    missing_cols = set(features) - set(user_profile_df.columns)
    for col in missing_cols:
        user_profile_df[col] = 0
    user_profile_df = user_profile_df[features]
    
    # Scale the user profile using the same scaler as training
    user_profile_scaled = scaler.transform(user_profile_df)
    
    # Predict obesity risk using the neural network model
    pred_probs = nn_model.predict(user_profile_scaled)
    predicted_category = np.argmax(pred_probs, axis=1)[0]
    predicted_label = target_le.inverse_transform([predicted_category])[0]
    print(f"\nPredicted Obesity Risk Category: {predicted_label}")
    
    # Define cluster preferences based on predicted risk (example logic)
    if predicted_label == 'Low':
        preferred_clusters = [0, 1, 2, 3]  # Broaden the cluster selection
        sort_by = random.choice(['Protein_g', 'Energy_kcal'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    elif predicted_label == 'Medium':
        preferred_clusters = [4, 5, 6, 7]  # Broaden the cluster selection
        sort_by = random.choice(['Energy_kcal', 'Fat_g'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    else:
        preferred_clusters = [8, 9, 0, 1]  # Broaden the cluster selection
        sort_by = random.choice(['Energy_kcal', 'Carb_g'])  # Randomize sorting
        ascending = random.choice([True, False])  # Randomize order
    
    # Filter and sort meals from the preferred clusters
    recommended_meals = meal_df[meal_df['Meal_Cluster'].isin(preferred_clusters)]
    recommended_meals = recommended_meals.sample(frac=1).reset_index(drop=True)  # Shuffle the meals
    recommended_meals = recommended_meals.sort_values(by=sort_by, ascending=ascending)
    
    print("\nRecommended Meals:")
    top_meals = recommended_meals[['Descrip', 'Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']].head(num_meals)
    print(top_meals)
    
    # Create a visualization of the recommended meals
    plt.figure(figsize=(15, 8))
    top_meals_for_viz = top_meals.reset_index(drop=True)
    
    # Create a subplot for each nutritional component
    for i, nutrient in enumerate(['Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']):
        plt.subplot(2, 2, i+1)
        bars = plt.bar(top_meals_for_viz.index, top_meals_for_viz[nutrient], color='skyblue')
        plt.title(f'{nutrient} in Recommended Meals')
        plt.xticks(top_meals_for_viz.index, [f"Meal {i+1}" for i in range(len(top_meals_for_viz))], rotation=45)
        plt.ylabel(nutrient)
        
        # Add the meal names as text above each bar
        for idx, bar in enumerate(bars):
            meal_name = top_meals_for_viz['Descrip'].iloc[idx]
            if len(meal_name) > 15:
                meal_name = meal_name[:15] + "..."
            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, 
                    meal_name, ha='center', va='bottom', rotation=45, fontsize=8)
    
    plt.tight_layout()
    plt.savefig(os.path.join(visualization_dir, "recommended_meals.png"))
    plt.close()
    
    # Create a parallel coordinates plot to compare nutritional values
    plt.figure(figsize=(14, 6))
    
    # Normalize data for better visualization
    meal_data = top_meals[['Energy_kcal', 'Protein_g', 'Fat_g', 'Carb_g']]
    meal_data_norm = (meal_data - meal_data.min()) / (meal_data.max() - meal_data.min())
    meal_data_norm['Meal'] = [f"Meal {i+1}" for i in range(len(meal_data_norm))]
    
    # Create parallel coordinates
    pd.plotting.parallel_coordinates(meal_data_norm, 'Meal', colormap=plt.cm.tab10)
    plt.title('Nutritional Comparison of Recommended Meals')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.savefig(os.path.join(visualization_dir, "meal_nutrition_comparison.png"))
    plt.close()
    
    return top_meals

# Example new genetic profile for meal recommendation (using original, unencoded values)
new_profile = {
    "Age": 35,
    "BMI": 28.5,
    "Physical_Activity": "Low",      # Original string (will be encoded)
    "Diet_Type": "High-Fat",           # Original string (will be encoded)
    "MC4R_Present": 1,
    "MC4R_Variant": "rs17782313_TT",
    "PPARG_Present": 0,
    "PPARG_Variant": "rs1801282_CG",
    "FTO_Present": 0,
    "FTO_Variant": "rs9939609_AT",
    "LEPR_Present": 1,
    "LEPR_Variant": "rs1137101_AG"
}

# Get meal recommendations using the neural network model
recommended_meals = recommend_meals(new_profile, meal_df, nn_model, target_le, scaler, visualization_dir, num_meals=5)

# Create a visualization of the user profile
plt.figure(figsize=(12, 8))
user_data = pd.Series({
    'Age': new_profile['Age'],
    'BMI': new_profile['BMI'],
    'Activity Level': 'Low',
    'Diet Type': 'High-Fat',
    'MC4R Gene': 'Present',
    'LEPR Gene': 'Present',
    'Predicted Risk': target_le.inverse_transform([np.argmax(nn_model.predict(scaler.transform(pd.DataFrame([new_profile])[features])), axis=1)[0]])[0]
})

colors = ['green', 'orange', 'red']
risk_color = colors[list(target_le.classes_).index(user_data['Predicted Risk'])]

# Create a horizontal bar chart for the user profile
categorical_data = user_data.drop(['Age', 'BMI'])
plt.subplot(2, 1, 1)
bars = plt.barh(range(len(categorical_data)), [1] * len(categorical_data), color='lightgray')
for i, (key, value) in enumerate(categorical_data.items()):
    plt.text(0.5, i, f"{key}: {value}", ha='center', va='center', fontsize=12)
plt.yticks([])
plt.xticks([])
plt.title('User Profile Summary', fontsize=14)

# Add numerical data as text
plt.subplot(2, 1, 2)
plt.axis('off')
plt.text(0.5, 0.7, f"Age: {user_data['Age']} years", ha='center', fontsize=14)
plt.text(0.5, 0.5, f"BMI: {user_data['BMI']} (Overweight)", ha='center', fontsize=14)
plt.text(0.5, 0.3, f"Predicted Risk: {user_data['Predicted Risk']}", ha='center', fontsize=16, color=risk_color, weight='bold')

plt.tight_layout()
plt.savefig(os.path.join(visualization_dir, "user_profile_summary.png"))
plt.close()

#############################
# Save the Trained Deep Learning Model
#############################

# Define the save directory and filename based on the implementation method used
save_model_dir = r"C:\Users\trejan\Desktop\GNN\Saved models"
if not os.path.exists(save_model_dir):
    os.makedirs(save_model_dir)

# Filename includes the method name: here "DeepLearning_Keras_EarlyStopping.h5"
model_filename = os.path.join(save_model_dir, "DeepLearning_Keras_EarlyStopping.h5")

# Save the model using Keras' save method
nn_model.save(model_filename)
print(f"\nTrained deep learning model saved at: {model_filename}")

# Save a model architecture visualization
from tensorflow.keras.utils import plot_model
try:
    plot_model(nn_model, to_file=os.path.join(visualization_dir, "model_architecture.png"), show_shapes=True, show_layer_names=True)
    print(f"Model architecture visualization saved to {visualization_dir}")
except Exception as e:
    print(f"Could not generate model architecture visualization. Error: {e}")
    print("Note: This requires pydot and graphviz to be installed.")

print("\nAll visualizations have been saved to:", visualization_dir)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.3868 - loss: 1.0933 - val_accuracy: 0.7156 - val_loss: 0.7680
Epoch 2/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6232 - loss: 0.7964 - val_accuracy: 0.7875 - val_loss: 0.5662
Epoch 3/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6865 - loss: 0.6546 - val_accuracy: 0.7906 - val_loss: 0.5032
Epoch 4/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7359 - loss: 0.5941 - val_accuracy: 0.8125 - val_loss: 0.4695
Epoch 5/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7761 - loss: 0.5326 - val_accuracy: 0.8031 - val_loss: 0.4497
Epoch 6/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7642 - loss: 0.5269 - val_accuracy: 0.8156 - val_loss: 0.4380
Epoch 7/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━

[WinError 2] The system cannot find the file specified
  File "C:\Users\trejan\tf_env\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\trejan\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 546, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\trejan\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1022, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\trejan\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1491, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step

Predicted Obesity Risk Category: Medium

Recommended Meals:
                                                Descrip  Energy_kcal  \
455      Egg, whole, dried, stabilized, glucose reduced        615.0   
3528  Snacks, popcorn, oil-popped, microwave, regula...        583.0   
3506                                  Egg, whole, dried        605.0   
3149          Puff pastry, frozen, ready-to-bake, baked        558.0   
3337          Candies, HERSHEY'S POT OF GOLD Almond Bar        577.0   

      Protein_g  Fat_g  Carb_g  
455       48.17  43.95    2.38  
3528       7.29  43.55   45.06  
3506      48.37  43.04    1.53  
3149       7.40  38.50   45.70  
3337      12.82  38.46   46.15  


ValueError: could not convert string to float: 'Low'

<Figure size 2000x1500 with 0 Axes>

<Figure size 1200x800 with 0 Axes>

In [2]:
# Create a visualization of the user profile - Fixed version
plt.figure(figsize=(12, 8))

# First, properly process the user profile for prediction
user_profile_df = pd.DataFrame([new_profile])
    
# Encode categorical features using stored encoders
for col in ["Diet_Type", "Physical_Activity"]:
    if col in user_profile_df.columns:
        le = label_encoders[col]
        user_profile_df[col] = le.transform(user_profile_df[col])
for col in variant_columns:
    if col in user_profile_df.columns:
        le = label_encoders[col]
        user_profile_df[col] = le.transform(user_profile_df[col].astype(str))

# Ensure the user profile contains all required features
missing_cols = set(features) - set(user_profile_df.columns)
for col in missing_cols:
    user_profile_df[col] = 0
user_profile_df = user_profile_df[features]

# Scale the user profile and predict
user_profile_scaled = scaler.transform(user_profile_df)
pred_probs = nn_model.predict(user_profile_scaled)
predicted_category = np.argmax(pred_probs, axis=1)[0]
predicted_label = target_le.inverse_transform([predicted_category])[0]

# Now create the visualization with the prediction result
user_data = pd.Series({
    'Age': new_profile['Age'],
    'BMI': new_profile['BMI'],
    'Activity Level': new_profile['Physical_Activity'],  # Use the original string
    'Diet Type': new_profile['Diet_Type'],               # Use the original string
    'MC4R Gene': 'Present' if new_profile['MC4R_Present'] == 1 else 'Absent',
    'LEPR Gene': 'Present' if new_profile['LEPR_Present'] == 1 else 'Absent',
    'Predicted Risk': predicted_label
})

colors = ['green', 'orange', 'red']
risk_color = colors[list(target_le.classes_).index(user_data['Predicted Risk'])]

# Create a horizontal bar chart for the user profile
categorical_data = user_data.drop(['Age', 'BMI'])
plt.subplot(2, 1, 1)
bars = plt.barh(range(len(categorical_data)), [1] * len(categorical_data), color='lightgray')
for i, (key, value) in enumerate(categorical_data.items()):
    plt.text(0.5, i, f"{key}: {value}", ha='center', va='center', fontsize=12)
plt.yticks([])
plt.xticks([])
plt.title('User Profile Summary', fontsize=14)

# Add numerical data as text
plt.subplot(2, 1, 2)
plt.axis('off')
plt.text(0.5, 0.7, f"Age: {user_data['Age']} years", ha='center', fontsize=14)
plt.text(0.5, 0.5, f"BMI: {user_data['BMI']} (Overweight)", ha='center', fontsize=14)
plt.text(0.5, 0.3, f"Predicted Risk: {user_data['Predicted Risk']}", ha='center', fontsize=16, color=risk_color, weight='bold')

plt.tight_layout()
plt.savefig(os.path.join(visualization_dir, "user_profile_summary.png"))
plt.close()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
