In [17]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [18]:
# df = pd.read_csv('/Users/yelderiny/Projects/Dissertation/Data/processed-data.csv')
df = pd.read_csv('/Users/yelderiny/Projects/Dissertation/Data/processed-data-outliers-trimmed.csv')

In [19]:
features = df.drop('pr_points', axis=1)
target = df['pr_points']

In [20]:
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=301)

In [21]:
def create_model():
    model = tf.keras.Sequential([
        keras.Input(shape=[len(x_train.keys())]),
        layers.Dense(32, activation='relu'),
        # layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)
    ])
    
    optimizer = tf.keras.optimizers.Adam(0.001)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
    
    return model

def build_model():
    inputs = keras.Input(shape=(len(x_train.keys()),))
    x = layers.Dense(64, activation='elu')(inputs)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='elu')(x)
    outputs = layers.Dense(1)(x)
    model = keras.Model(inputs, outputs)
    
    optimizer = tf.keras.optimizers.Adam(0.001)
    
    model.summary()
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mse'])
    
    return model

In [22]:
# model_relu_64 = create_model()
# model_relu_64.summary()

model = build_model()

In [None]:
# Initialize an empty list to store R2 scores
r2_scores = []

# Loop through random states from 0 to 100
for random_state in range(1001):
    print(f"Random State: {random_state}")
    
    # Split the data with the current random state
    x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=random_state)

    # Create and compile the model
    model_relu_64 = create_model()

    # Train the model
    n_epochs = 1000
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    model_relu_64.fit(x_train, y_train, epochs=n_epochs, validation_split=0.2, verbose=False, callbacks=[early_stop])

    # Make predictions on the test set
    y_pred = model_relu_64.predict(x_test)

    # Compute R2 score and append to the list
    r2 = r2_score(y_test, y_pred)
    r2_scores.append((random_state, r2))

Random State: 0
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 1
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 2
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 3
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [None]:
# Sort R2 scores in descending order based on the second element (R2 score)
r2_scores.sort(key=lambda x: x[1], reverse=True)

# Print the sorted R2 scores
for random_state, r2 in r2_scores[:20]:
    print(f"Random State: {random_state}, R2 Score: {r2}")