In [19]:
import xgboost as xgb
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load California housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Binarize the target variable for a classification task
y_binary = (y > y.mean()).astype(int)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Convert the dataset into DMatrix format, which is required by XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define custom evaluation metric function
def custom_accuracy(preds, dtrain):
    labels = dtrain.get_label()
    preds_binary = (preds > 0.5).astype(int)
    accuracy = accuracy_score(labels, preds_binary)
    return 'accuracy', accuracy

# Define hyperparameters
params = {
    'objective': 'binary:logistic',  # Use binary logistic regression for binary classification
    "eval_metric": "logloss"
}

# Training the model with early stopping and custom evaluation metric
num_rounds = 1000  # Increase the number of rounds to allow for early stopping
early_stopping_rounds = 20  # Number of rounds to wait for early stopping
evals_result = {}  # Dictionary to store evaluation results
model = xgb.train(params, dtrain, num_rounds, evals=[(dtrain, 'train'), (dtest, 'test')],
                  evals_result=evals_result, verbose_eval=True, maximize=True,
                #   custom_metric=custom_accuracy, 
                  early_stopping_rounds=early_stopping_rounds)

# Predictions
y_pred_prob = model.predict(dtest)
y_pred = (y_pred_prob > 0.5).astype(int)  # Binarize predictions using a threshold of 0.5

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Plotting the training and evaluation metrics
import matplotlib.pyplot as plt

# train_metric = evals_result['train']['accuracy']
# test_metric = evals_result['test']['accuracy']

# plt.plot(train_metric, label='Train Accuracy')
# plt.plot(test_metric, label='Test Accuracy')
# plt.xlabel('Iterations')
# plt.ylabel('Accuracy')
# plt.title('Training and Evaluation Metrics')
# plt.legend()
# plt.show()

[0]	train-logloss:0.54099	test-logloss:0.54412
[1]	train-logloss:0.46285	test-logloss:0.47311
[2]	train-logloss:0.41259	test-logloss:0.42798
[3]	train-logloss:0.37720	test-logloss:0.39660


[4]	train-logloss:0.34389	test-logloss:0.36607
[5]	train-logloss:0.32561	test-logloss:0.35102
[6]	train-logloss:0.30934	test-logloss:0.33700
[7]	train-logloss:0.28943	test-logloss:0.31933
[8]	train-logloss:0.27624	test-logloss:0.30951
[9]	train-logloss:0.26393	test-logloss:0.29986
[10]	train-logloss:0.25644	test-logloss:0.29326
[11]	train-logloss:0.24978	test-logloss:0.28816
[12]	train-logloss:0.23825	test-logloss:0.27908
[13]	train-logloss:0.23055	test-logloss:0.27346
[14]	train-logloss:0.22561	test-logloss:0.26950
[15]	train-logloss:0.22239	test-logloss:0.26682
[16]	train-logloss:0.21887	test-logloss:0.26466
[17]	train-logloss:0.21195	test-logloss:0.25931
[18]	train-logloss:0.20730	test-logloss:0.25488
[19]	train-logloss:0.20405	test-logloss:0.25261
[20]	train-logloss:0.20205	test-logloss:0.25186
Accuracy: 0.8921996124031008


In [23]:
my_list = [10, 30, 20, 20, 50, 50, 40]

# Find the index of the maximum value
max_index = my_list.index(max(my_list))

print("Index of the maximum value:", max_index)

Index of the maximum value: 4
