In [35]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [36]:
# df = pd.read_csv('/Users/yelderiny/Projects/Dissertation/Data/processed-data.csv')
df = pd.read_csv('/Users/yelderiny/Projects/Dissertation/Data/processed-data-outliers-capped.csv')

In [37]:
features = df.drop('pr_points', axis=1)
target = df['pr_points']

In [38]:
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=301)

In [39]:
def create_model():
    model = tf.keras.Sequential([
        keras.Input(shape=[len(x_train.keys())]),
        layers.Dense(64, activation='elu'),
        layers.Dropout(0.2),
        layers.Dense(128, activation='elu'),
        layers.Dense(1)
    ])
    
    optimizer = tf.keras.optimizers.Adam(0.001)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
    
    return model

In [41]:
# Initialize an empty list to store R2 scores
r2_scores = []

# Loop through random states from 0 to 100
for random_state in range(1000):
    print(f"Random State: {random_state}")
    
    # Split the data with the current random state
    x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=random_state)

    # Create and compile the model
    model_relu_64 = create_model()

    # Train the model
    n_epochs = 1000
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    model_relu_64.fit(x_train, y_train, epochs=n_epochs, validation_split=0.2, verbose=False, callbacks=[early_stop])

    # Make predictions on the test set
    y_pred = model_relu_64.predict(x_test)

    # Compute R2 score and append to the list
    r2 = r2_score(y_test, y_pred)
    r2_scores.append((random_state, r2))

Random State: 0
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 1
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 2
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 3
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 4
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 6
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 7
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 8
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 9
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 11
[1m8/8[0m [

In [42]:
# Sort R2 scores in descending order based on the second element (R2 score)
r2_scores.sort(key=lambda x: x[1], reverse=True)

# Print the sorted R2 scores
for random_state, r2 in r2_scores[:20]:
    print(f"Random State: {random_state}, R2 Score: {r2}")

Random State: 995, R2 Score: 0.6977465419982029
Random State: 675, R2 Score: 0.6922896300854717
Random State: 301, R2 Score: 0.6853683933015852
Random State: 55, R2 Score: 0.6851709141814826
Random State: 207, R2 Score: 0.6843006644610228
Random State: 623, R2 Score: 0.6835478186969692
Random State: 338, R2 Score: 0.6823134728045082
Random State: 285, R2 Score: 0.6713327577976739
Random State: 652, R2 Score: 0.6701976811868023
Random State: 545, R2 Score: 0.6686108471357659
Random State: 945, R2 Score: 0.6680083262196822
Random State: 330, R2 Score: 0.667558849303503
Random State: 109, R2 Score: 0.667542264235328
Random State: 227, R2 Score: 0.6663343210085739
Random State: 704, R2 Score: 0.6640363659658399
Random State: 299, R2 Score: 0.6636973605971943
Random State: 590, R2 Score: 0.661869133295165
Random State: 14, R2 Score: 0.6618096502908676
Random State: 977, R2 Score: 0.6604869447127815
Random State: 721, R2 Score: 0.6601216794128374
