In [1]:
## ML Model

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [17]:
# Load the dataset 
df = pd.read_csv('data.csv')

In [18]:
# Label Encoding for categorical columns
label_cols = ['Gender', 'Occupation', 'BMI Category', 'Sleep Disorder']
label_encoders = {}

In [19]:
for col in label_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [20]:
# Split Blood Pressure into 'Systolic' and 'Diastolic'
df[['Systolic', 'Diastolic']] = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic'] = pd.to_numeric(df['Systolic'], errors='coerce')
df['Diastolic'] = pd.to_numeric(df['Diastolic'], errors='coerce')
df.drop('Blood Pressure', axis=1, inplace=True)

In [21]:
# Filling missing values
df.ffill(inplace=True)

In [22]:
# Splitting the dataset into features (X) and target (y)
X = df.drop('Heart Rate', axis=1)  # Assuming Heart Rate as the target for classification
y = df['Heart Rate']

In [23]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [30]:
# Training the Random Forest Classifier model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [31]:
# Predictions
y_pred = model.predict(X_test)

In [32]:
# Evaluate the model with zero_division to handle undefined metrics
print("Accuracy: ", accuracy_score(y_test, y_pred))

# Use the zero_division parameter to avoid warnings
print(classification_report(y_test, y_pred, zero_division=1))

Accuracy:  0.96
              precision    recall  f1-score   support

          65       0.92      1.00      0.96        11
          68       1.00      1.00      1.00        19
          70       1.00      1.00      1.00        14
          72       1.00      1.00      1.00        20
          75       0.83      1.00      0.91         5
          76       0.00      1.00      0.00         0
          77       1.00      0.00      0.00         1
          78       1.00      1.00      1.00         1
          84       1.00      0.00      0.00         2
          85       1.00      1.00      1.00         2

    accuracy                           0.96        75
   macro avg       0.88      0.80      0.69        75
weighted avg       0.98      0.96      0.95        75



In [33]:
# Feature Importance
importances = model.feature_importances_
feature_names = X.columns
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances}).sort_values(by='Importance', ascending=False)
print("Feature Importances:\n", feature_importance_df)

Feature Importances:
                     Feature  Importance
9               Daily Steps    0.169166
6   Physical Activity Level    0.145528
4            Sleep Duration    0.117874
7              Stress Level    0.115027
5          Quality of Sleep    0.083657
0                 Person ID    0.081035
2                       Age    0.061127
11                 Systolic    0.060095
1                    Gender    0.045379
12                Diastolic    0.043130
3                Occupation    0.040702
8              BMI Category    0.028502
10           Sleep Disorder    0.008777


In [34]:
# Health Recommendation Function
def health_recommendation(heart_rate):
    if heart_rate > 100:
        return "Alert: Soldier needs immediate medical attention!"
    elif 60 <= heart_rate <= 100:
        return "Soldier's health is stable."
    else:
        return "Recommendation: Soldier should rest and rehydrate."

In [37]:
# Example health recommendation
soldier_health = 110  # Sample heart rate value
print(health_recommendation(soldier_health))

Alert: Soldier needs immediate medical attention!
