In [None]:
!pip install flappy_bird_gymnasium gymnasium

In [None]:
import os
import pandas as pd
import numpy as np
from keras import Sequential
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import flappy_bird_gymnasium
import gymnasium

In [None]:
CSV_PATH = '/kaggle/input/flappy-bird-gymnasium/flappy_bird.csv'
WEIGHTS = '/kaggle/working/weights.h5'
MODEL_PATH = '/kaggle/working/flappy_bird_model.h5'

In [None]:
df = pd.read_csv(CSV_PATH)

# Drop all frames that led the bird to crash
for index, row in df.iterrows():
    if row['reward'] < 0:
        df.drop(df[(df['score'] == row['score']) & (df['game'] == row['game']) & (df.index <= index)].index,
                inplace=True)

# We don't need the action, score, game and reward. Also, 'player_s_vertical_velocity' and 'player_s_rotation' make it get "addicted" to the last action taken, which makes the bird to crash all the time
data = df.drop(columns=['action',
                        'player_s_vertical_velocity',
                        'player_s_rotation',
                        'score',
                        'game',
                        'reward'])

y_data = df['action']

X_train, X_test, y_train, y_test = train_test_split(data, y_data, test_size=0.2)

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(X_train.shape[1:]), activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=16, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

if os.path.exists(WEIGHTS):
    model.load_weights(WEIGHTS)
    
model.fit(X_train, y_train,
          epochs=1000,
          batch_size=64,
          verbose=1,
          validation_data=[X_test, y_test],
          callbacks=[EarlyStopping(monitor='loss', min_delta=1e-10, patience=50, verbose=1),
                     ReduceLROnPlateau(monitor='loss', factor=0.2, patience=10, verbose=1),
                     ModelCheckpoint(filepath='weights.h5', monitor='loss',
                                     save_best_only=True, verbose=1)])
model.save(MODEL_PATH)
print('Done')

In [None]:
env = gymnasium.make("FlappyBird-v0", audio_on=False)

best_score = 0
game = 0
while True:
    state, _ = env.reset()
    done = False
    game += 1
    game_score = 0

    while not done:
        state = state[:-2]
        state = np.expand_dims(state, axis=0)
        action = model.predict(state, verbose=0)
        action = action[0][0]
        action = 1 if action >= 0.2 else 0

        state, _, done, _, info = env.step(action)
        if info['score'] > game_score:
            game_score = info['score']
        if game_score > best_score:
            best_score = game_score        
            print('New best score:', best_score)
        action = None

        if done:
            print(f'Game {game} finished with score {game_score}')
            break