In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras import models, layers, optimizers
from itertools import product
import tensorflow as tf

In [11]:
# Impostazioni per la stampa dei numeri
np.set_printoptions(precision=20, suppress=True)

# Caricamento dei dati
my_data = np.genfromtxt('ML-CUP24-TR.csv', delimiter=',')
X = my_data[:, 1:13]
y = my_data[:, 13:16]
print(X.shape, y.shape)

# Suddividi i dati in train (60%) e temp (40%) (HOLDOUT)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)

# Suddividi temp in validation (20%) e test (20%)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Normalizza i dati
scaler_X = StandardScaler().fit(X_train)
scaler_y = StandardScaler().fit(y_train)

X_train = scaler_X.transform(X_train)
X_val = scaler_X.transform(X_val)
X_test = scaler_X.transform(X_test)

y_train = scaler_y.transform(y_train)
y_val = scaler_y.transform(y_val)
y_test = scaler_y.transform(y_test)

(250, 12) (250, 3)


In [13]:
# Funzione di attivazione RBF con TensorFlow
def rbf_activation(x, centers, gamma=1.0):
    # Calcola la distanza euclidea tra l'input e i centri
    diff = x[:, np.newaxis, :] - centers
    dist_sq = tf.reduce_sum(tf.square(diff), axis=-1)  # Usa tf.reduce_sum invece di np.sum
    return tf.exp(-gamma * dist_sq)

# Creare il modello
def build_rbf_model(input_dim, output_dim, n_centers=10, gamma=1.0):
    model = models.Sequential()

    # Layer di input: utilizza 'shape' invece di 'input_dim'
    model.add(layers.InputLayer(shape=(input_dim,)))

    # Layer nascosta con attivazione RBF (calcola distanza dal centro)
    centers = tf.Variable(np.random.randn(n_centers, input_dim), dtype=tf.float32)  # Centri randomici
    model.add(layers.Lambda(lambda x: rbf_activation(x, centers, gamma)))

    # Layer di output con una dimensione pari al numero di target (3 in questo caso: x, y, z)
    model.add(layers.Dense(output_dim))

    return model

## Test 1:

In [27]:
# Funzione per eseguire la Grid Search
def grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid, epochs=50):
    results = []
    
    for params in product(*param_grid.values()):
        # Costruire il modello con i parametri correnti
        param_dict = dict(zip(param_grid.keys(), params))
        print(f"Testing parameters: {param_dict}")
        
        model = build_rbf_model(
            input_dim=X_train.shape[1],
            output_dim=y_train.shape[1],
            n_centers=param_dict["n_centers"],
            gamma=param_dict["gamma"]
        )
        
        # Configurare l'ottimizzatore
        optimizer = param_dict["optimizer"](learning_rate=param_dict["learning_rate"])
        
        # Passare il valore di batch_size
        batch_size = param_dict["batch_size"]
        
        # Compilare e allenare il modello
        model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])
        history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size, verbose=0)
        
        # Ottenere i valori di loss per train e validation
        val_loss = history.history['val_loss'][-1]
        
        # Valutare il modello sui dati di test
        test_loss, test_mse = model.evaluate(X_test, y_test, verbose=0)
        print(f"Test Loss: {test_loss:.4f}")
        
        # Calcolare l'MSE sui dati di test
        y_pred = model.predict(X_test)
        test_mse = mean_squared_error(y_test, y_pred)
        
        print(f"Test MSE: {test_mse:.4f}")
        
        # Salvare i risultati
        results.append({
            **param_dict,
            "test_loss": test_loss,
            "val_loss": val_loss,
            "test_mse": test_mse
        })
    
    # Salvare i risultati in un file CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv("TestsData/grid_search_results.csv", index=False)
    print("Grid Search completata. Risultati salvati in grid_search_results.csv")

# Definizione della griglia dei parametri
param_grid = {
    "n_centers": [5, 10, 15],  # Numero di centri
    "gamma": [0.1, 0.5, 1.0],  # Gamma per RBF
    "optimizer": [optimizers.Adam, optimizers.SGD],  # Ottimizzatori
    "learning_rate": [0.001, 0.01],  # Learning rate
    "batch_size": [32, 64, 128]  # Diverse batch size
}

# Eseguire la Grid Search
if __name__ == "__main__":
    grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid)

Testing parameters: {'n_centers': 5, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'batch_size': 32}
Test Loss: 0.9017
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Test MSE: 0.9017
Testing parameters: {'n_centers': 5, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'batch_size': 64}
Test Loss: 0.8891
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Test MSE: 0.8891
Testing parameters: {'n_centers': 5, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'batch_size': 128}
Test Loss: 0.8461
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Test MSE: 0.8461
Testing parameters: {'n_centers': 5, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.01, 'batch_size': 32}
Test Loss: 0.6904
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/st

### Analisi result1:
Migliori valori con cui si raggiungono i migliori risultati:
- Numero di centri alto (10-15)
- Gamma basso (0.1)
- Learning rate alto (0.01)
- Batch size (indifferente - tendente basso 32)
- Adam è il migliore in assoluto

## Test 2:

- N centri: aggiungo 20 tolgo 5
- Gamma: tolgo 0.5 e 1 e aggiungo 0.001 e 0.01

In [35]:
# Funzione per eseguire la Grid Search
def grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid, epochs=50):
    results = []
    
    for params in product(*param_grid.values()):
        # Costruire il modello con i parametri correnti
        param_dict = dict(zip(param_grid.keys(), params))
        print(f"Testing parameters: {param_dict}")
        
        model = build_rbf_model(
            input_dim=X_train.shape[1],
            output_dim=y_train.shape[1],
            n_centers=param_dict["n_centers"],
            gamma=param_dict["gamma"]
        )
        
        # Configurare l'ottimizzatore
        optimizer = param_dict["optimizer"](learning_rate=param_dict["learning_rate"])
        
        # Passare il valore di batch_size
        batch_size = param_dict["batch_size"]
        
        # Compilare e allenare il modello
        model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])
        history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size, verbose=0)
        
        # Ottenere i valori di loss per train e validation
        val_loss = history.history['val_loss'][-1]
        
        # Valutare il modello sui dati di test
        test_loss, test_mse = model.evaluate(X_test, y_test, verbose=0)
        print(f"Test Loss: {test_loss:.4f}")
        
        # Calcolare l'MSE sui dati di test
        y_pred = model.predict(X_test)
        test_mse = mean_squared_error(y_test, y_pred)
        
        print(f"Test MSE: {test_mse:.4f}")
        
        # Salvare i risultati
        results.append({
            **param_dict,
            "test_loss": test_loss,
            "val_loss": val_loss,
            "test_mse": test_mse
        })
    
    # Salvare i risultati in un file CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv("TestsData/grid_search_results2.csv", index=False)
    print("Grid Search completata. Risultati salvati in grid_search_results.csv")

# Definizione della griglia dei parametri
param_grid = {
    "n_centers": [10, 15, 20],  # Numero di centri
    "gamma": [0.001, 0.01, 0.1],  # Gamma per RBF
    "optimizer": [optimizers.Adam, optimizers.SGD],  # Ottimizzatori
    "learning_rate": [0.001, 0.01],  # Learning rate
    "batch_size": [32, 64, 128]  # Diverse batch size
}

# Eseguire la Grid Search
if __name__ == "__main__":
    grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid)

Testing parameters: {'n_centers': 10, 'gamma': 0.001, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'batch_size': 32}
Test Loss: 1.0619
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Test MSE: 1.0619
Testing parameters: {'n_centers': 10, 'gamma': 0.001, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'batch_size': 64}
Test Loss: 0.8700
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Test MSE: 0.8700
Testing parameters: {'n_centers': 10, 'gamma': 0.001, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'batch_size': 128}
Test Loss: 0.8893
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Test MSE: 0.8893
Testing parameters: {'n_centers': 10, 'gamma': 0.001, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.01, 'batch_size': 32}
Test Loss: 0.9028
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

### Analisi results2:
- Gamma: 0.1 è perfetto
- Adam è il migliore nei primi 16
- Centri alti sono meglio

## Test3:

- Fissiamo Adam
- Fissiamo gamma a 0.1
- centri: tolgo 10, metto 25
- learning rate: tolgo 0.001 e metto 0.05 e 0.1

In [43]:
# Funzione per eseguire la Grid Search
def grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid, epochs=50):
    results = []
    
    for params in product(*param_grid.values()):
        # Costruire il modello con i parametri correnti
        param_dict = dict(zip(param_grid.keys(), params))
        print(f"Testing parameters: {param_dict}")
        
        model = build_rbf_model(
            input_dim=X_train.shape[1],
            output_dim=y_train.shape[1],
            n_centers=param_dict["n_centers"],
            gamma=param_dict["gamma"]
        )
        
        # Configurare l'ottimizzatore
        optimizer = param_dict["optimizer"](learning_rate=param_dict["learning_rate"])
        
        # Passare il valore di batch_size
        batch_size = param_dict["batch_size"]
        
        # Compilare e allenare il modello
        model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])
        history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size, verbose=0)
        
        # Ottenere i valori di loss per train e validation
        val_loss = history.history['val_loss'][-1]
        
        # Valutare il modello sui dati di test
        test_loss, test_mse = model.evaluate(X_test, y_test, verbose=0)
        print(f"Test Loss: {test_loss:.4f}")
        
        # Calcolare l'MSE sui dati di test
        y_pred = model.predict(X_test)
        test_mse = mean_squared_error(y_test, y_pred)
        
        print(f"Test MSE: {test_mse:.4f}")
        
        # Salvare i risultati
        results.append({
            **param_dict,
            "test_loss": test_loss,
            "val_loss": val_loss,
            "test_mse": test_mse
        })
    
    # Salvare i risultati in un file CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv("TestsData/grid_search_results3.csv", index=False)
    print("Grid Search completata. Risultati salvati in grid_search_results.csv")

# Definizione della griglia dei parametri
param_grid = {
    "n_centers": [15, 20, 25],  # Numero di centri
    "gamma": [0.1],  # Gamma per RBF
    "optimizer": [optimizers.Adam],  # Ottimizzatori
    "learning_rate": [0.01, 0.05, 0.1],  # Learning rate
    "batch_size": [32, 64, 128]  # Diverse batch size
}

# Eseguire la Grid Search
if __name__ == "__main__":
    grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid)

Testing parameters: {'n_centers': 15, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.01, 'batch_size': 32}
Test Loss: 0.3531
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Test MSE: 0.3531
Testing parameters: {'n_centers': 15, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.01, 'batch_size': 64}
Test Loss: 0.3354
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Test MSE: 0.3354
Testing parameters: {'n_centers': 15, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.01, 'batch_size': 128}
Test Loss: 0.4953
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Test MSE: 0.4953
Testing parameters: {'n_centers': 15, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.05, 'batch_size': 32}
Test Loss: 0.1824
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/s

### Analisi results3:
- Learning rate: 0.01 non più competitivo, 0.1 domina
- Centri: preferisce gli alti (20-25)
- Batch size sembra non avere correlazioni

## Test 4:

- Learning rate: togliamo 0.01, aggiungiamo 0.5 e 1.0
- centri: togliamo 15, mettiamo 30

In [1]:
import keras_tuner as kt
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers, optimizers
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from itertools import product
import pandas as pd

In [9]:
# Funzione di attivazione RBF con TensorFlow
def rbf_activation(x, centers, gamma=1.0):
    diff = x[:, np.newaxis, :] - centers
    dist_sq = tf.reduce_sum(tf.square(diff), axis=-1)  
    return tf.exp(-gamma * dist_sq)

# Funzione per costruire il modello con Keras Tuner
# Funzione per costruire il modello con Keras Tuner
def build_rbf_model(hp=None):
    if hp:  # Quando usato con Keras Tuner
        # Parametri da ottimizzare
        n_centers = hp.Int('n_centers', min_value=5, max_value=30, step=5)
        gamma = hp.Float('gamma', min_value=0.1, max_value=1.0, step=0.1)
        learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')
        optimizer_choice = hp.Choice('optimizer', values=['adam', 'sgd'])
    else:  # Quando non usato con Keras Tuner (grid search)
        # Usa i parametri direttamente
        n_centers = 25
        gamma = 0.1
        batch_size = 64
        learning_rate = 0.05
        optimizer_choice = 'adam'

    # Creare il modello
    model = models.Sequential()
    model.add(layers.InputLayer(shape=(X_train.shape[1],)))

    # Centri randomici
    centers = tf.Variable(np.random.randn(n_centers, X_train.shape[1]), dtype=tf.float32)  
    model.add(layers.Lambda(lambda x: rbf_activation(x, centers, gamma)))

    # Layer di output
    model.add(layers.Dense(y_train.shape[1]))

    # Ottimizzatore
    if optimizer_choice == 'adam':
        optimizer = optimizers.Adam(learning_rate=learning_rate)
    else:
        optimizer = optimizers.SGD(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])

    return model

In [19]:
# Funzione per eseguire la Grid Search
def grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid, epochs=50):
    results = []
    
    for params in product(*param_grid.values()):
        # Costruire il modello con i parametri correnti
        param_dict = dict(zip(param_grid.keys(), params))
        print(f"Testing parameters: {param_dict}")
        
        # Costruire il modello senza passare n_centers e gamma
        model = build_rbf_model()  # Non passare hp=None, qui viene gestito da GridSearch
        
        # Configurare l'ottimizzatore
        optimizer = param_dict["optimizer"](learning_rate=param_dict["learning_rate"])
        
        # Passare il valore di batch_size
        batch_size = param_dict["batch_size"]
        
        # Compilare e allenare il modello
        model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])
        history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size, verbose=0)
        
        # Ottenere i valori di loss per train e validation
        val_loss = history.history['val_loss'][-1]
        
        # Valutare il modello sui dati di test
        test_loss, test_mse = model.evaluate(X_test, y_test, verbose=0)
        print(f"Test Loss: {test_loss:.4f}")
        
        # Calcolare l'MSE sui dati di test
        y_pred = model.predict(X_test)
        test_mse = mean_squared_error(y_test, y_pred)
        
        print(f"Test MSE: {test_mse:.4f}")
        
        # Salvare i risultati
        results.append({
            **param_dict,
            "test_loss": test_loss,
            "val_loss": val_loss,
            "test_mse": test_mse
        })
    
    # Salvare i risultati in un file CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv("TestsData/grid_search_results4.csv", index=False)
    print("Grid Search completata. Risultati salvati in grid_search_results.csv")

class MyTuner(kt.tuners.RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        kwargs['batch_size'] = trial.hyperparameters.Int('batch_size', 32, 64, 128)
        #kwargs['epochs'] = trial.hyperparameters.Int('epochs', 10, 30)
        return super(MyTuner, self).run_trial(trial, *args, **kwargs)

# Esegui il tuning con Keras Tuner
def run_kerastuner():
    tuner = myTuner(
        build_rbf_model,
        objective='val_loss',  # Ottimizzare la validazione della loss
        max_trials=10,  # Numero massimo di prove
        executions_per_trial=1,
        directory='my_dir',
        project_name='rbf_tuning'
    )

    tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

    best_model = tuner.get_best_models(num_models=1)[0]

    # Valutare il miglior modello
    test_loss, test_mse = best_model.evaluate(X_test, y_test)
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test MSE: {test_mse:.4f}")

# Caricamento dei dati
my_data = np.genfromtxt('ML-CUP24-TR.csv', delimiter=',')
X = my_data[:, 1:13]
y = my_data[:, 13:16]

# Suddividi i dati in train (60%) e temp (40%) (HOLDOUT)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Normalizza i dati
scaler_X = StandardScaler().fit(X_train)
scaler_y = StandardScaler().fit(y_train)
X_train = scaler_X.transform(X_train)
X_val = scaler_X.transform(X_val)
X_test = scaler_X.transform(X_test)
y_train = scaler_y.transform(y_train)
y_val = scaler_y.transform(y_val)
y_test = scaler_y.transform(y_test)

# Esegui la grid search
if __name__ == "__main__":
    grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid)
    run_kerastuner()

Testing parameters: {'n_centers': 20, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.05, 'batch_size': 32}
Test Loss: 0.0921
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Test MSE: 0.0921
Testing parameters: {'n_centers': 20, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.05, 'batch_size': 64}
Test Loss: 0.1957
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Test MSE: 0.1957
Testing parameters: {'n_centers': 20, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.05, 'batch_size': 128}
Test Loss: 0.1675
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Test MSE: 0.1675
Testing parameters: {'n_centers': 20, 'gamma': 0.1, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.1, 'batch_size': 32}
Test Loss: 0.1415
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/st

KeyboardInterrupt: 