In [17]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [18]:
# df = pd.read_csv('/Users/yelderiny/Projects/Dissertation/Data/processed-data3.csv')
df = pd.read_csv('/Users/yelderiny/Projects/Dissertation/Data/processed-data3-outliers-capped.csv')

In [19]:
features = df.drop(columns=['pr_points1', 'pr_points2', 'pr_points3', 'contributor_xp1', 'contributor_xp3'], axis=1)
target = df['pr_points1']

In [20]:
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=12)

In [21]:
def create_model():
    model = tf.keras.Sequential([
        keras.Input(shape=[len(x_train.keys())]),
        layers.Dense(32, activation='elu'),
        layers.Dropout(0.2),
        layers.Dense(64, activation='elu'),
        layers.Dense(1)
    ])
    
    optimizer = tf.keras.optimizers.Adam(0.001)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
    
    return model

In [22]:
model_relu_64 = create_model()
model_relu_64.summary()

In [23]:
# n_epochs = 1000
# early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
# training_hist = model_relu_64.fit(x_train, y_train, epochs=n_epochs, validation_split=0.2, verbose=True, callbacks=[early_stop])

In [24]:
# y_pred = model_relu_64.predict(x_test).flatten()
# plt.figure(figsize=(10, 8))
# plt.scatter(y_test, y_pred, s=200, c='darkblue')
# plt.xlabel("Actual pr point values")
# plt.ylabel("Predicted pr point values")
# plt.show()

In [25]:
# r2_score(y_test, y_pred)

In [26]:
# Initialize an empty list to store R2 scores
r2_scores = []

# Loop through random states from 0 to 100
for random_state in range(1000):
    print(f"Random State: {random_state}")
    
    # Split the data with the current random state
    x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=random_state)

    # Create and compile the model
    model_relu_64 = create_model()

    # Train the model
    n_epochs = 1000
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    model_relu_64.fit(x_train, y_train, epochs=n_epochs, validation_split=0.2, verbose=False, callbacks=[early_stop])

    # Make predictions on the test set
    y_pred = model_relu_64.predict(x_test)

    # Compute R2 score and append to the list
    r2 = r2_score(y_test, y_pred)
    r2_scores.append((random_state, r2))

Random State: 0
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 1
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 2
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 3
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 4
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 6
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 7
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Random State: 8
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 9
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random St

In [27]:
# Sort R2 scores in descending order based on the second element (R2 score)
r2_scores.sort(key=lambda x: x[1], reverse=True)

# Print the sorted R2 scores
for random_state, r2 in r2_scores[:20]:
    print(f"Random State: {random_state}, R2 Score: {r2}")

Random State: 248, R2 Score: 0.7313979603867616
Random State: 817, R2 Score: 0.7310863870760324
Random State: 465, R2 Score: 0.7270288070058686
Random State: 464, R2 Score: 0.7262378109646317
Random State: 386, R2 Score: 0.7257060358780502
Random State: 613, R2 Score: 0.7203060727837387
Random State: 126, R2 Score: 0.7191039825065533
Random State: 346, R2 Score: 0.7186766880496781
Random State: 900, R2 Score: 0.7167408945768327
Random State: 635, R2 Score: 0.7159853624474823
Random State: 862, R2 Score: 0.7125441563998389
Random State: 794, R2 Score: 0.7079732371013003
Random State: 720, R2 Score: 0.7078731318074041
Random State: 476, R2 Score: 0.7077588378618903
Random State: 460, R2 Score: 0.7076709375625754
Random State: 392, R2 Score: 0.707332215336318
Random State: 833, R2 Score: 0.7053122846643858
Random State: 885, R2 Score: 0.7050413086682901
Random State: 322, R2 Score: 0.7044921073240787
Random State: 13, R2 Score: 0.704266515676268
