In [None]:
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
import joblib


In [None]:
# Load the dataset
data = pd.read_csv('/content/diabetes.csv')

In [None]:
# Split the dataset into features and target
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the LGBM model
model = lgb.LGBMClassifier()

# Define the hyperparameters to tune
param_grid = {
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [100, 500, 1000],
    'max_depth': [3, 5, 7],
    'num_leaves': [5, 10, 15],
}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and the best score
print("Best Hyperparameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Test the LGBM model with the best hyperparameters
model = lgb.LGBMClassifier(**grid_search.best_params_)
model.fit(X_train, y_train)

# Test the LGBM model
y_pred = model.predict(X_test)

# Evaluate the LGBM model
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)

# Save the LGBM model
with open('diabetes.joblib', 'wb') as f:
    joblib.dump(model, f)


Best Hyperparameters: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'num_leaves': 5}
Best Score: 0.7850593096094896
Accuracy: 0.7402597402597403
