# Load packages

In [None]:
from pathlib import Path
from environment import Santa2022Environment
from utils import *

import pandas as pd
from stable_baselines3 import PPO

import matplotlib.pyplot as plt

# Load Image of Christmas card

In [None]:
df_image = pd.read_csv("image.csv")
image = df_to_image(df_image)

In [None]:
plt.imshow(image)
plt.show()

# Load first submission file

In [None]:
all_confs = []
for sub_file in Path("./submissions").glob("*.csv"):
    s = pd.read_csv(sub_file.as_posix())
    list_of_confs = s.apply(lambda x: [list(map(int, link.split())) for link in x.configuration.split(";")], axis=1).tolist()
    all_confs.extend(list_of_confs)
    break

# Load PPO model

In [None]:
max_iter = 1000
env = Santa2022Environment(image, max_iter=max_iter)
model = PPO(
    "MultiInputPolicy",
    env = env,
    verbose=1
)
model.load("./models/rl_model_2000000_steps")

# Run model on env

In [None]:
obs = env.reset()
is_done = False
model_rewards = []
while not is_done:
    action = int(model.predict(obs, deterministic=True)[0])
    obs, reward, is_done, _ = env.step(action)
    model_rewards.append(reward)

# Get rewards of submission file (baseline model)

In [None]:
rewards = []
obs = env.reset()

for conf in all_confs[1:]:
    action = env.new_confs.index(conf)
    obs, reward, done, info = env.step(action)
    rewards.append(reward)

# Plot rewards

## Submission file rewards

In [None]:
plt.plot(rewards[:1000], '.')
plt.show()

# PPO model rewards

In [None]:
plt.plot(model_rewards[:1000], '.')
plt.show()

## Submission file advantages

In [None]:
counter = 0
for i in range(0, len(rewards), 1000):
    rewards_window = rewards[i:i+1000]
    values = discounted_cumulative_sums(rewards_window, 0.99)[:-1]
    plt.plot(values)
    plt.show()
    counter+=1
    if counter == 20:
        break

## PPO model advantages

In [None]:
rewards_window = rewards[i:i+1000]
values = discounted_cumulative_sums(model_rewards, 0.99)[:-1]
plt.plot(values)
plt.show()