In [85]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [86]:
df = pd.read_csv('/Users/yelderiny/Projects/Dissertation/Data/processed-data3.csv')

In [87]:
features = df.drop(columns=['pr_points1', 'pr_points2', 'pr_points3', 'contributor_xp1', 'contributor_xp3'], axis=1)
target = df['pr_points1']

In [88]:
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=12)

In [89]:
def create_model():
    model = tf.keras.Sequential([
        keras.Input(shape=[len(x_train.keys())]),
        layers.Dense(32, activation='elu'),
        layers.Dropout(0.2),
        layers.Dense(64, activation='elu'),
        layers.Dense(1)
    ])
    
    optimizer = tf.keras.optimizers.Adam(0.001)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
    
    return model

In [90]:
model_relu_64 = create_model()
model_relu_64.summary()

In [91]:
# n_epochs = 1000
# early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
# training_hist = model_relu_64.fit(x_train, y_train, epochs=n_epochs, validation_split=0.2, verbose=True, callbacks=[early_stop])

In [92]:
# y_pred = model_relu_64.predict(x_test).flatten()
# plt.figure(figsize=(10, 8))
# plt.scatter(y_test, y_pred, s=200, c='darkblue')
# plt.xlabel("Actual pr point values")
# plt.ylabel("Predicted pr point values")
# plt.show()

In [93]:
# r2_score(y_test, y_pred)

In [94]:
# Initialize an empty list to store R2 scores
r2_scores = []

# Loop through random states from 0 to 100
for random_state in range(1000):
    print(f"Random State: {random_state}")
    
    # Split the data with the current random state
    x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=random_state)

    # Create and compile the model
    model_relu_64 = create_model()

    # Train the model
    n_epochs = 1000
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    model_relu_64.fit(x_train, y_train, epochs=n_epochs, validation_split=0.2, verbose=False, callbacks=[early_stop])

    # Make predictions on the test set
    y_pred = model_relu_64.predict(x_test)

    # Compute R2 score and append to the list
    r2 = r2_score(y_test, y_pred)
    r2_scores.append((random_state, r2))

Random State: 0
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 875us/step
Random State: 1
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 825us/step
Random State: 2
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 809us/step
Random State: 3
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 870us/step
Random State: 4
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 882us/step
Random State: 5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 6
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 933us/step
Random State: 7
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 889us/step
Random State: 8
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 9
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Random State: 10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 941us/step
R

In [95]:
# Sort R2 scores in descending order based on the second element (R2 score)
r2_scores.sort(key=lambda x: x[1], reverse=True)

# Print the sorted R2 scores
for random_state, r2 in r2_scores[:20]:
    print(f"Random State: {random_state}, R2 Score: {r2}")

Random State: 465, R2 Score: 0.7258538571943225
Random State: 248, R2 Score: 0.7181385703612997
Random State: 900, R2 Score: 0.7128189398544756
Random State: 58, R2 Score: 0.6980482586597382
Random State: 794, R2 Score: 0.6912703843670662
Random State: 230, R2 Score: 0.6873563160116813
Random State: 692, R2 Score: 0.6873442618100202
Random State: 613, R2 Score: 0.6851266680225383
Random State: 629, R2 Score: 0.6844725827099112
Random State: 532, R2 Score: 0.6829232316008658
Random State: 82, R2 Score: 0.6808134645004575
Random State: 126, R2 Score: 0.6803594510006128
Random State: 885, R2 Score: 0.6786527475664348
Random State: 447, R2 Score: 0.6781255068913968
Random State: 720, R2 Score: 0.6774690653542328
Random State: 346, R2 Score: 0.6765274095947997
Random State: 820, R2 Score: 0.6756926691775289
Random State: 322, R2 Score: 0.6746707239270415
Random State: 625, R2 Score: 0.6740318154238649
Random State: 396, R2 Score: 0.6713145042696211


In [96]:
for random_state, r2 in r2_scores:
    if random_state == 386:
        print(f"Random State: {random_state}, R2 Score: {r2}")

Random State: 386, R2 Score: 0.5988837545021104
