<a href="https://colab.research.google.com/github/zenAurelius/HRAI3/blob/main/notebooks/test_recur1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras import layers

from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/HRAI/data

In [None]:
df = pd.read_csv('pmu2017_os.zip')
df

In [None]:
df.columns.to_list()

In [None]:
#SELECTION DES FEATURES ET TARGET
#df = df[['rfi_prix','rfi_distance','pfi_chNbPlaces_1','pff_rapportDirect_1','pff_ord_1','pfi_chNbPlaces_2','pff_rapportDirect_2','pff_ord_2','tgf_win_1']].copy()
df = df[['aid_cr', 'pis_cheval_1','pff_ord_1','pff_ord_2', 'pis_cheval_2','pff_normcote_1','pff_rapportDirect_1','pff_rapportDirect_2','pff_foch_1','pff_normcote_2','pff_foch_2','tgf_pwin_1']].copy()
df = df[(~df.pff_normcote_2.isna()) & (~df.pff_normcote_1.isna())].copy()
df['rel_cote_1'] = df.pff_normcote_1 / (df.pff_normcote_1 + df.pff_normcote_2)
df['rel_force_1'] = df.pff_foch_1 / (df.pff_foch_1 + df.pff_foch_2)
#df = df.sort_values('aid_pt')
df

In [None]:
limit_train = df['aid_cr'].iloc[int(0.8 * len(df))]
limit_val = df['aid_cr'].iloc[int(0.9 * len(df))]
print(limit_train, limit_val)
train = df[df.aid_cr < limit_train].copy()
val = df[(df.aid_cr >= limit_train) & (df.aid_cr < limit_val)].copy()
test = df[df.aid_cr >= limit_val].copy()
print(len(train), 'training examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

In [None]:
train

In [None]:
NUM_FEATURES = ['rel_cote_1', 'rel_force_1']
STR_FEATURES = []
TARGET = ['tgf_pwin_1']
FEATURES = NUM_FEATURES
ALL_COLS = FEATURES + TARGET

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  #df = dataframe[['pff_rapportDirect_1','pff_foch_1','pff_rapportDirect_2','pff_foch_2','tgf_pwin_1']].copy()
  df = dataframe[ALL_COLS].copy()
  labels = df.pop(TARGET[0])
  df = {key: np.array(value)[:,tf.newaxis] for key, value in df.items()}
  ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(df))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(batch_size)
  return ds

In [None]:
def get_normalization_layer(name, dataset):
  # Create a Normalization layer for the feature.
  normalizer = layers.Normalization(axis=None)

  # Prepare a Dataset that only yields the feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)

  return normalizer

In [None]:
batch_size = 256
train_ds = df_to_dataset(train, shuffle=False, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)


In [None]:
all_inputs = []
encoded_features = []

# Numerical features.
#for header in ['rfi_prix', 'rfi_distance', 'pfi_chNbPlaces_1', 'pff_rapportDirect_1', 'pff_ord_1', 'pfi_chNbPlaces_2', 'pff_rapportDirect_2', 'pff_ord_2']:
for header in NUM_FEATURES:
  numeric_col = tf.keras.Input(shape=(1,), name=header)
  normalization_layer = get_normalization_layer(header, train_ds)
  encoded_numeric_col = normalization_layer(numeric_col)
  all_inputs.append(numeric_col)
  encoded_features.append(encoded_numeric_col)

In [None]:
all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
x = layers.Dense(64, activation='relu')(x)
output = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(all_inputs, output)
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(0.001))


In [None]:
# Use `rankdir='LR'` to make the graph horizontal.
tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")


In [None]:
model.fit(train_ds, epochs=5, validation_data=val_ds)


In [None]:
def predict(model, ds, df, pcol):
  predictions = model.predict(ds)

  df[pcol] = predictions

  threshold = 0.5
  df[f'{pcol}_w'] = (predictions > threshold).astype(int)

  # Le reste pourrait être ailleurs
def prepare_comparaison(df):
  df['win1'] = (df.tgf_pwin_1 > 0.5).astype(int)
  df['win_cote'] = (df['pff_rapportDirect_2'] > df['pff_rapportDirect_1']).astype(int)
  df['win_force'] = (df['pff_foch_1'] > df['pff_foch_2']).astype(int)
  df['win_ord'] = (df['pff_ord_1'] > df['pff_ord_2']).astype(int)

In [None]:
predict(model, train_ds, train, 'pred4')
prepare_comparaison(train)
train

In [None]:
prepare_comparaison(train)
eval(train, 'pred0')
eval(train, 'pred1')
eval(train, 'pred2')
eval(train, 'pred3')
eval(train, 'pred4')

In [None]:
predict(model, test_ds, test, 'pred')
prepare_comparaison(test)
test

In [None]:
def calc_force_bycr(df, forces, byc, pcol):
  # La force de la prochaine course, calculée à partir de l'erreur de prédiction
  df[f'D_ELO_{byc}'] = 100 * (df.tgf_pwin_1 - df[pcol])
  df[f'NEXT_ELO_{byc}'] = df['pff_foch_1'] + df[f'D_ELO_{byc}']
  # La force calculée à partir de la précédente course => le prochain 'fit' sera fait avec cette valeur
  df[f'ELO_{byc}_1'] = df[f'{byc}_1'].astype(str).map(forces).fillna(df.pff_foch_1)
  df[f'ELO_{byc}_2'] = df[f'{byc}_2'].astype(str).map(forces).fillna(df.pff_foch_2)
  forces.update(df.groupby([f'{byc}_1'])[f'NEXT_ELO_{byc}'].agg("mean").astype(int).to_dict())
  return df.reset_index(drop=True)

def calc_oskill(df, forces, pcol):
  bycr = df.groupby('aid_cr')
  r = bycr.apply(lambda x: calc_force_bycr(x, forces, 'pis_cheval', pcol))
  return r.reset_index(drop=True)

In [None]:
forces = {}
train = calc_oskill(train, forces, 'pred4')
train

In [None]:
train[['pff_foch_1','pff_foch_2','rel_force_1', 'pff_normcote_1','pff_normcote_2','rel_cote_1','win1','pred0','pred1','pred2','pred3','pred4','pred5','pred6','pred7','pred8','pred9','tgf_pwin_1']]

In [None]:
train.pff_foch_1 = train.ELO_pis_cheval_1
train.pff_foch_2 = train.ELO_pis_cheval_2
train['rel_force_1'] = train.pff_foch_1 / (train.pff_foch_1 + train.pff_foch_2)

In [None]:
def runIteration(num):
  global train
  print(num)
  ds = df_to_dataset(train, shuffle=False, batch_size=batch_size)

  all_inputs = []
  encoded_features = []

  # Numerical features.
  #for header in ['rfi_prix', 'rfi_distance', 'pfi_chNbPlaces_1', 'pff_rapportDirect_1', 'pff_ord_1', 'pfi_chNbPlaces_2', 'pff_rapportDirect_2', 'pff_ord_2']:
  for header in NUM_FEATURES:
    numeric_col = tf.keras.Input(shape=(1,), name=header)
    normalization_layer = get_normalization_layer(header, ds)
    encoded_numeric_col = normalization_layer(numeric_col)
    all_inputs.append(numeric_col)
    encoded_features.append(encoded_numeric_col)

  all_features = tf.keras.layers.concatenate(encoded_features)
  x = tf.keras.layers.Dense(32, activation="relu")(all_features)
  x = layers.Dense(64, activation='relu')(x)
  output = tf.keras.layers.Dense(1)(x)

  model = tf.keras.Model(all_inputs, output)
  model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(0.001))

  model.fit(ds, epochs=5, validation_data=val_ds)
  predict(model, ds, train, f'pred{num}')
  forces = {}
  train = calc_oskill(train, forces, f'pred{num}')
  train.pff_foch_1 = train.ELO_pis_cheval_1
  train.pff_foch_2 = train.ELO_pis_cheval_2
  train['rel_force_1'] = train.pff_foch_1 / (train.pff_foch_1 + train.pff_foch_2)

In [None]:
for x in range(10):
  runIteration(x)

In [None]:
model.save('classifier_test.keras')
reloaded_model = tf.keras.models.load_model('classifier_test.keras')

In [None]:
predictions = reloaded_model.predict(test_ds)
predictions

In [None]:
train_ds

In [None]:
def eval(df, pcol):
  print('positifs')
  print(len(df[(df[f'{pcol}_w'] == 1) & (df.win1 == 1)]) / len(df[(df.win1 == 1)]))
  print(len(df[(df.win_cote == 1) & (df.win1 == 1)]) / len(df[(df.win1 == 1)]))
  print(len(df[(df.win_ord == 1) & (df.win1 == 1)]) / len(df[(df.win1 == 1)]))
  print('negatifs')
  print(len(df[(df[f'{pcol}_w'] == 0) & (df.win1 == 0)]) / len(df[(df.win1 == 0)]))
  print(len(df[(df.win_cote == 0) & (df.win1 == 0)]) / len(df[(df.win1 == 0)]))
  print(len(df[(df.win_ord == 0) & (df.win1 == 0)]) / len(df[(df.win1 == 0)]))

In [None]:
eval(test, 'pred')

In [None]:
test.pff_foch_1 = test.ELO_pis_cheval_1

In [None]:
train['pred'] = reloaded_model.predict(train_ds)

In [None]:
train = calc_oskill(train, {})
train

In [None]:
train[train.pis_cheval_1 == 'DREAMMOKO'].head(20)

In [None]:
train.pff_foch_1 = train.ELO_pis_cheval_1

TEST DE REGRESSION LINEAIRE

In [None]:

# Define the input layer for 'rel_cote_1'
rel_cote_1_input = tf.keras.Input(shape=(1,), name='rel_cote_1')

# Define the linear regression layer
output = tf.keras.layers.Dense(1, name='tgf_pwin_1')(rel_cote_1_input)

# Create the model
model = tf.keras.Model(inputs=rel_cote_1_input, outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mse') # Use mean squared error for regression

# Prepare the data
X_train = np.array(train['rel_cote_1']).reshape(-1, 1)
y_train = np.array(train['tgf_pwin_1']).reshape(-1, 1)

X_test = np.array(test['rel_cote_1']).reshape(-1, 1)
y_test = np.array(test['tgf_pwin_1']).reshape(-1, 1)

# Train the model
model.fit(X_train, y_train, epochs=2, batch_size=256, validation_split=0.2) # Adjust epochs and batch size as needed

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model.predict(X_test)


In [None]:
test['predr'] = predictions
prepare_comparaison(test)
test['predr_w'] = (predictions > 0.5).astype(int)
test

In [None]:
eval(test, 'predr')