In [8]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [9]:
# Load your dataset
# Example: calories.csv should have features + target column "Calories"
data = pd.read_csv("train.csv")

In [10]:

# Assume features like: ['Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
# Drop 'Calories' to get target, also drop 'id' as it's an identifier not a feature.
X = data.drop(["Calories", "id"], axis=1)
y = data["Calories"]

In [11]:
# Map 'Sex' column in X to numerical values
X['Sex'] = X['Sex'].map({'male': 1, 'female': 0})

In [20]:
scaler = StandardScaler()

# Work on a copy of X to ensure preprocessing steps are localized and robust
X_to_process = X.copy()

# Defensive Step 1: Ensure 'id' column is removed if it exists
if 'id' in X_to_process.columns:
    X_to_process.drop('id', axis=1, inplace=True)

# Defensive Step 2: Ensure 'Sex' column is numeric.
# This directly addresses the "could not convert string to float: 'male'" error.
if 'Sex' in X_to_process.columns and X_to_process['Sex'].dtype == 'object':
    X_to_process['Sex'] = X_to_process['Sex'].map({'male': 1, 'female': 0})
    # Optionally, convert to int and handle potential NaNs if map could fail for some values
    # X_to_process['Sex'] = X_to_process['Sex'].astype(float).fillna(-1).astype(int) # Example NaN handling

# Now, X_to_process should have 'Sex' as numeric and no 'id'.
# Identify columns to scale from this cleaned DataFrame (all except 'Sex').
cols_to_scale = [col for col in X_to_process.columns if col != 'Sex']

# Scale the identified columns within X_to_process.
# The 'Sex' column (now numeric) will be skipped by this operation.
if cols_to_scale:
    X_to_process[cols_to_scale] = scaler.fit_transform(X_to_process[cols_to_scale])

# Assign the fully processed DataFrame to X_scaled, which is used by subsequent cells.
X_scaled = X_to_process

In [30]:
X_scaled.head()

Unnamed: 0,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,1,-0.357192,1.115235,0.490201,1.266324,0.583714,1.235772
1,0,1.487943,-0.912137,-1.083172,-0.888309,-1.109436,-0.431163
2,0,0.631273,-1.068088,-0.797104,-1.008011,-1.215258,-0.302938
3,1,-1.411555,1.349162,1.062337,1.146622,1.007002,0.851095
4,0,-0.225397,-0.678209,-1.011655,1.146622,0.689536,0.722869


In [31]:
import json
# Save scaler parameters and the columns they apply to
scaler_params = {
    'mean': scaler.mean_.tolist(),
    'scale': scaler.scale_.tolist(),
    'columns': cols_to_scale # These are the columns the mean_ and scale_ correspond to
}
with open('scaler_config.json', 'w') as f:
    json.dump(scaler_params, f)
print("Scaler configuration saved to scaler_config.json")

Scaler configuration saved to scaler_config.json


In [17]:
# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [27]:
print(tf.__version__)

2.19.0


In [29]:
print(tf.keras.__version__)

3.9.2


In [18]:
# Build the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Single output for regression
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Ensure 'Sex' column in X_train and X_test is numeric - THIS IS NOW HANDLED IN CELL 866aa306
# X_train['Sex'] = X_train['Sex'].map({'male': 1, 'female': 0}) # This line is removed
# X_test['Sex'] = X_test['Sex'].map({'male': 1, 'female': 0}) # This line is removed

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)


Epoch 1/100
[1m15000/15000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step - loss: 619.4028 - mae: 9.0912 - val_loss: 13.6996 - val_mae: 2.2659
Epoch 2/100
[1m15000/15000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step - loss: 619.4028 - mae: 9.0912 - val_loss: 13.6996 - val_mae: 2.2659
Epoch 2/100
[1m15000/15000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 1ms/step - loss: 14.4574 - mae: 2.3090 - val_loss: 13.4546 - val_mae: 2.2381
Epoch 3/100
[1m15000/15000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 1ms/step - loss: 14.4574 - mae: 2.3090 - val_loss: 13.4546 - val_mae: 2.2381
Epoch 3/100
[1m15000/15000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step - loss: 14.1773 - mae: 2.2777 - val_loss: 13.1578 - val_mae: 2.1795
Epoch 4/100
[1m15000/15000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step - loss: 14.1773 - mae: 2.2777 - val_loss: 13.1578 - val_mae: 2.1795
Epoch 4/100
[1m15000/15000[0m [32m━

In [23]:
# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {mae}")

[1m4688/4688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 817us/step - loss: 13.3623 - mae: 2.1620
Test MAE: 2.1591124534606934
[1m4688/4688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 817us/step - loss: 13.3623 - mae: 2.1620
Test MAE: 2.1591124534606934


In [24]:
# Predict
predictions = model.predict(X_test[:5])
print("Sample Predictions:", predictions.flatten())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Sample Predictions: [197.12555   64.187584  40.519905 103.34368   54.717712]
Sample Predictions: [197.12555   64.187584  40.519905 103.34368   54.717712]


In [26]:
model.save("calories_model.h5", save_format='h5')  # Save the model for later use

