In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import save_model
import joblib

In [2]:
csv_files = glob.glob("/content/drive/MyDrive/Datos_TFG_HP/Datos_finales/Multiclase/*.csv")


df_list = [pd.read_csv(f) for f in csv_files]
df = pd.concat(df_list, ignore_index=True)

print(f"Total de muestras: {len(df)}")
print("Columnas:", df.columns)

Total de muestras: 12100
Columnas: Index(['event_id', 'clase', 'game_clock_1', 'shot_clock_1', 'x_loc_ball_1',
       'y_loc_ball_1', 'z_loc_ball_1', 'x_loc_player_1_1', 'y_loc_player_1_1',
       'x_loc_player_2_1',
       ...
       'x_loc_player_6_40', 'y_loc_player_6_40', 'x_loc_player_7_40',
       'y_loc_player_7_40', 'x_loc_player_8_40', 'y_loc_player_8_40',
       'x_loc_player_9_40', 'y_loc_player_9_40', 'x_loc_player_10_40',
       'y_loc_player_10_40'],
      dtype='object', length=1002)


In [3]:
y = df["clase"].values

X = df.drop(columns=["event_id", "clase"]).values

print("Forma de X:", X.shape)
print("Forma de y:", y.shape)

Forma de X: (12100, 1000)
Forma de y: (12100,)


In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import numpy as np


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, stratify=y, random_state=42)

param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [4, 6, 8, 10],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 1, 5],
    'reg_lambda': [0, 1, 10],
    'reg_alpha': [0, 1, 5]
}

xgb = XGBClassifier(
    objective='multi:softmax',
    num_class=len(np.unique(y)),
    use_label_encoder=False,
    eval_metric='mlogloss',
    verbosity=0
)

random_search = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=param_dist,
    n_iter=30,
    scoring='accuracy',
    cv=3,
    verbose=2,
    n_jobs=-1,
    random_state=42
)

random_search.fit(X_train, y_train)

best_model = random_search.best_estimator_

joblib.dump(best_model, 'best_xgb_model.joblib')

y_pred = best_model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred, digits=4))

print("Matriz de Confusión:")
print(pd.DataFrame(confusion_matrix(y_test, y_pred),
                   index=[f"Clase {i}" for i in np.unique(y)],
                   columns=[f"Pred {i}" for i in np.unique(y)]))

print("\nMejores hiperparámetros:")
print(random_search.best_params_)