In [1]:
## Model Type 2

In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder, PolynomialFeatures
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier
from scipy.stats import randint

In [55]:
# Load the dataset
df = pd.read_csv('data.csv')

In [56]:
# Additional feature engineering with polynomial features
df['Temperature'] = np.random.uniform(36.0, 40.0, size=len(df))  # Body temperature in °C
df['Oxygen Level'] = np.random.uniform(90, 100, size=len(df))  # Oxygen saturation in %
df['Stress Level'] = np.random.randint(1, 10, size=len(df))  # Stress scale 1-10
df['Hydration Status'] = np.random.choice(['Low', 'Normal', 'High'], size=len(df))
df['Activity Level'] = np.random.choice(['Rest', 'Moderate', 'High'], size=len(df))

In [57]:
# Label Encoding for categorical columns
label_cols = ['Gender', 'Occupation', 'BMI Category', 'Sleep Disorder', 'Hydration Status', 'Activity Level']
label_encoders = {}
for col in label_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [58]:
# Split Blood Pressure into 'Systolic' and 'Diastolic'
df[['Systolic', 'Diastolic']] = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic'] = pd.to_numeric(df['Systolic'], errors='coerce')
df['Diastolic'] = pd.to_numeric(df['Diastolic'], errors='coerce')
df.drop('Blood Pressure', axis=1, inplace=True)

In [59]:
# Handling missing values with SimpleImputer
imputer = SimpleImputer(strategy="median")
df.iloc[:, :] = imputer.fit_transform(df)

In [60]:
# Define target variable and feature set
X = df.drop('Heart Rate', axis=1)  # Assuming Heart Rate as the target for classification
y = df['Heart Rate']

In [61]:
# Polynomial features to capture interactions
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X)

In [62]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

In [63]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [64]:
param_dist = {
    'n_estimators': randint(100, 300),
    'max_depth': randint(5, 15),
    'min_samples_split': randint(2, 10),
    'min_samples_leaf': randint(1, 5),
    'max_features': ['auto', 'sqrt', 'log2']
}

In [65]:
# Confirm that 'auto' is not in param_dist
print("Parameter distribution:", param_dist)

Parameter distribution: {'n_estimators': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x00000156C3BB97C0>, 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x00000156C3BB90A0>, 'min_samples_split': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x00000156C3BBB470>, 'min_samples_leaf': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x00000156C3BBAC90>, 'max_features': ['auto', 'sqrt', 'log2']}


In [66]:
# Use StratifiedKFold for better handling of class imbalances in cross-validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
rfc = RandomForestClassifier(class_weight='balanced', random_state=42)

In [67]:
# RandomizedSearchCV with the updated parameter grid
random_search = RandomizedSearchCV(
    rfc,
    param_distributions=param_dist,
    n_iter=20,
    cv=skf,
    random_state=42,
    n_jobs=-1,
    error_score='raise'  # Raise error to debug fitting issues
)

In [68]:
# Evaluation metrics
print("Updated Random Forest Accuracy: ", accuracy_score(y_test, y_pred_rfc))
print(classification_report(y_test, y_pred_rfc, zero_division=1))

Updated Random Forest Accuracy:  0.9466666666666667
              precision    recall  f1-score   support

          65       0.85      1.00      0.92        11
          68       1.00      1.00      1.00        19
          70       1.00      1.00      1.00        14
          72       1.00      1.00      1.00        20
          75       1.00      1.00      1.00         5
          77       1.00      0.00      0.00         1
          78       1.00      0.00      0.00         1
          80       0.00      1.00      0.00         0
          82       0.00      1.00      0.00         0
          84       1.00      0.00      0.00         2
          85       1.00      1.00      1.00         2

    accuracy                           0.95        75
   macro avg       0.80      0.73      0.54        75
weighted avg       0.98      0.95      0.93        75



In [69]:
# Health Recommendation Function considering more features
def health_recommendation(heart_rate, temperature, oxygen_level, stress_level, hydration_status, activity_level):
    if heart_rate > 100 or temperature > 38.5 or oxygen_level < 92:
        return "Alert: Soldier needs immediate medical attention!"
    elif 60 <= heart_rate <= 100 and 36 <= temperature <= 38 and oxygen_level >= 95:
        if stress_level > 7 or hydration_status == 'Low':
            return "Recommendation: Reduce stress and increase hydration."
        elif activity_level == 'High':
            return "Recommendation: Take a short rest."
        return "Soldier's health is stable."
    else:
        return "Recommendation: Soldier should rest and rehydrate."

In [71]:
# Example health recommendation
sample_soldier_health = {
    'heart_rate': 110,
    'temperature': 39,
    'oxygen_level': 91,
    'stress_level': 8,
    'hydration_status': 'Low',
    'activity_level': 'High'
}
print(health_recommendation(**sample_soldier_health))

Alert: Soldier needs immediate medical attention!
