In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import f1_score


**Load the data**

In [23]:

train_data = pd.read_csv('/content/drive/MyDrive/Train_data.csv')
test_data = pd.read_csv('/content/drive/MyDrive/test_data.csv')

**Define features and target**

In [24]:

features = ['Glucose', 'Cholesterol', 'Hemoglobin', 'Platelets', 'White Blood Cells', 'Red Blood Cells',
            'Hematocrit', 'Mean Corpuscular Volume', 'Mean Corpuscular Hemoglobin',
            'Mean Corpuscular Hemoglobin Concentration', 'Insulin', 'BMI', 'Systolic Blood Pressure',
            'Diastolic Blood Pressure', 'Triglycerides', 'HbA1c', 'LDL Cholesterol', 'HDL Cholesterol',
            'ALT', 'AST', 'Heart Rate', 'Creatinine', 'Troponin', 'C-reactive Protein']
target = 'Disease'

**Preprocessing**

In [25]:

def preprocess_data(data):
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data)
    return scaled_data

**Split the data into train and validation sets**

In [26]:

X_train, X_val, y_train, y_val = train_test_split(train_data[features], train_data[target], test_size=0.2, random_state=42)

**Preprocess the data**

In [27]:

X_train_scaled = preprocess_data(X_train)
X_val_scaled = preprocess_data(X_val)


**Initialize models**

In [28]:

models = {
    'RandomForest': RandomForestClassifier(random_state=42),
    'GradientBoosting': GradientBoostingClassifier(random_state=42),
    'SVM': SVC(kernel='rbf', random_state=42)
}

**Train and evaluate models**

In [29]:

best_model = None
best_f1 = 0

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_val_scaled)
    f1 = f1_score(y_val, y_pred, average='weighted')
    print(f"{name} F1 Score: {f1}")
    if f1 > best_f1:
        best_f1 = f1
        best_model = model

print(f"Best Model: {best_model}")


RandomForest F1 Score: 1.0
GradientBoosting F1 Score: 0.9788600175237674
SVM F1 Score: 1.0
Best Model: RandomForestClassifier(random_state=42)


**Preprocess the test data**

In [30]:

X_test_scaled = preprocess_data(test_data[features])

**Make predictions on the test set using the best model**

In [31]:

y_pred_test = best_model.predict(X_test_scaled)

**Evaluate the model on the test set**

In [32]:

f1_test = f1_score(test_data[target], y_pred_test, average='weighted')
print(f"Best Model F1 Score on Test Set: {f1_test}")


Best Model F1 Score on Test Set: 0.2942237416760795
