In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

pio.templates.default = "simple_white"



from scienceworld import ScienceWorldEnv

from sources.scienceworld import load_step_function,parse_observation
from sources.agent import BDIAgent
from sources.bdi_components.inference import NLIModel
from sources.bdi_components.belief import State

import numpy as np
from os import listdir
from os.path import isfile, join
import re


tasks = ['melt', 'find-non-living-thing']
all_data = []
plan_statistics = pd.read_csv("plan_statistics.csv")

for task in tasks:
    results_df = pd.read_csv(f"../results/results_{task}.csv")
    # TODO ajustar dps o script
    results_df.loc[results_df["plans_pct"] == 1, "plans_pct"] = 100
    results_df.loc[results_df["plans_pct"] == 2, "plans_pct"] = 25
    results_df.loc[results_df["plans_pct"] == 5, "plans_pct"] = 50
    results_df.loc[results_df["plans_pct"] == 7, "plans_pct"] = 75
    results_df['task'] = task
    results_df['rl_score'] = results_df['rl_score'] / 100
    results_df['bdi_score'] = results_df['bdi_score'] / 100
    results_df['final_score'] = results_df['final_score'] / 100
    all_data.append(results_df)

results_df = pd.concat(all_data)
results_df

Unnamed: 0,num_bdi_actions,num_rl_actions,plan_found,variation,error,bdi_score,rl_score,final_score,complete,num_plans,plan_library_size,plans_pct,eps,drrn_model_file,task
0,0,50,0,21,True,0.0,0.03,0.03,False,0,12,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt
1,0,50,0,22,True,0.0,0.03,0.03,False,0,13,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt
2,0,50,0,23,True,0.0,0.03,0.03,False,0,13,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt
3,0,50,0,24,True,0.0,0.03,0.03,False,0,14,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt
4,0,50,0,25,True,0.0,0.03,0.03,False,0,14,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3745,9,0,1,295,False,1.0,0.00,1.00,True,3,41,100,593,models/models_task13/model-steps80000-eps593.pt,find-non-living-thing
3746,11,0,1,296,False,1.0,0.00,1.00,True,3,41,100,593,models/models_task13/model-steps80000-eps593.pt,find-non-living-thing
3747,11,0,1,297,False,1.0,0.00,1.00,True,3,41,100,593,models/models_task13/model-steps80000-eps593.pt,find-non-living-thing
3748,11,0,1,298,False,1.0,0.00,1.00,True,3,41,100,593,models/models_task13/model-steps80000-eps593.pt,find-non-living-thing


In [2]:
results_df = pd.merge(results_df, plan_statistics, on=['plans_pct', 'task'])
# get EPS with best rl_score
eps_df = results_df.groupby(['task', 'eps']).agg({'rl_score': 'max'}).reset_index()
eps_df = eps_df.sort_values(by=['rl_score', 'eps'], ascending=[False, True]).groupby(['task']).head(1)
eps_df = eps_df.drop(columns=['rl_score'])
display(eps_df)
results_df.head()

Unnamed: 0,task,eps
3,find-non-living-thing,242
16,melt,457


Unnamed: 0,num_bdi_actions,num_rl_actions,plan_found,variation,error,bdi_score,rl_score,final_score,complete,num_plans_x,plan_library_size,plans_pct,eps,drrn_model_file,task,num_plans_y,num_common_plans,num_specific_plans
0,0,50,0,21,True,0.0,0.03,0.03,False,0,12,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt,180,180,0
1,0,50,0,22,True,0.0,0.03,0.03,False,0,13,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt,180,180,0
2,0,50,0,23,True,0.0,0.03,0.03,False,0,13,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt,180,180,0
3,0,50,0,24,True,0.0,0.03,0.03,False,0,14,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt,180,180,0
4,0,50,0,25,True,0.0,0.03,0.03,False,0,14,0,117,models/model_task1melt/model-steps8000-eps117.pt,melt,180,180,0


- Heatmap comparing plans_pct vs eps_trained
- Num variations completed by using RL x BDI

In [3]:
projected_cols = ['task', 'plans_pct', 'eps', 'num_specific_plans']
aggregations = {'variation':'count', 'final_score': 'mean', 'rl_score': 'mean', 'bdi_score': 'mean', 'num_bdi_actions': 'mean', 'num_rl_actions': 'mean'}
filtered_results_df = pd.merge(results_df, eps_df, how='inner', on=['task', 'eps'])
filtered_results_df.head()

Unnamed: 0,num_bdi_actions,num_rl_actions,plan_found,variation,error,bdi_score,rl_score,final_score,complete,num_plans_x,plan_library_size,plans_pct,eps,drrn_model_file,task,num_plans_y,num_common_plans,num_specific_plans
0,0,50,0,21,True,0.0,0.03,0.03,False,0,12,0,457,models/model_task1melt/model-steps56000-eps457.pt,melt,180,180,0
1,0,50,0,22,True,0.0,0.03,0.03,False,0,13,0,457,models/model_task1melt/model-steps56000-eps457.pt,melt,180,180,0
2,0,50,0,23,True,0.0,0.03,0.03,False,0,13,0,457,models/model_task1melt/model-steps56000-eps457.pt,melt,180,180,0
3,0,50,0,24,True,0.0,0.03,0.03,False,0,14,0,457,models/model_task1melt/model-steps56000-eps457.pt,melt,180,180,0
4,0,50,0,25,True,0.0,0.03,0.03,False,0,14,0,457,models/model_task1melt/model-steps56000-eps457.pt,melt,180,180,0


In [4]:
projected_cols = ['task', 'plans_pct', 'eps', 'num_specific_plans']
aggregations = {'variation':'count', 'final_score': 'mean', 'rl_score': 'mean', 'bdi_score': 'mean', 'num_bdi_actions': 'mean', 'num_rl_actions': 'mean'}
# TODO: mostrar nro de variações (qtd)
# TODO: user número exato de planos
grouped_df = filtered_results_df.groupby(projected_cols).agg(aggregations).reset_index()
grouped_df['dense_rank'] = (grouped_df.groupby(['plans_pct', 'task'])['final_score'].rank(method='dense', ascending=False).astype(int))

#grouped_df = grouped_df.sort_values(['plans_pct', 'dense_rank'], ascending=[True, True]).reset_index()
grouped_df = grouped_df[(grouped_df['dense_rank'] == 1)].sort_values(["task", "num_specific_plans"])
# avoiding tied rows
grouped_df

Unnamed: 0,task,plans_pct,eps,num_specific_plans,variation,final_score,rl_score,bdi_score,num_bdi_actions,num_rl_actions,dense_rank
0,find-non-living-thing,0,242,0,75,0.658533,0.658533,0.0,0.0,50.0,1
1,find-non-living-thing,25,242,8,75,0.748933,0.448933,0.3,3.333333,38.0,1
2,find-non-living-thing,50,242,15,75,0.8384,0.2584,0.58,6.253333,24.0,1
3,find-non-living-thing,75,242,23,75,0.9088,0.115467,0.793333,7.64,13.333333,1
4,find-non-living-thing,100,242,30,75,0.98,0.0,0.98,9.186667,4.0,1
5,melt,0,457,0,9,0.032222,0.032222,0.0,0.0,50.0,1
6,melt,25,457,4,9,0.14,0.025556,0.114444,5.111111,44.444444,1
7,melt,50,457,7,9,0.355556,0.018889,0.336667,10.888889,33.333333,1
8,melt,75,457,10,9,0.568889,0.01,0.558889,17.111111,22.222222,1
9,melt,100,457,13,9,0.676667,0.003333,0.673333,20.888889,16.666667,1


In [5]:
cols_to_write = ['task', 'variation', 'eps', 'num_specific_plans', 'final_score', 'bdi_score','rl_score', 'num_bdi_actions', 'num_rl_actions']
grouped_df = grouped_df[cols_to_write]
grouped_df.rename(columns={
    'num_specific_plans': 'Num Plans',
    'bdi_score': 'BDI Score',
    'rl_score': 'RL Score',
    'final_score': 'Score',
    'num_bdi_actions': 'Num BDI actions',
    'num_rl_actions': 'Num RL actions'

}, inplace=True)

grouped_df.to_csv("paper_results.csv", index=False, float_format='%.2f')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_df.rename(columns={


In [10]:
pio.templates.default = "simple_white"

for task in tasks:
    plots_df = results_df[results_df['task'] == task]
    plots_df = plots_df[['final_score', 'eps', 'num_specific_plans']]
    plots_df = plots_df.groupby(['eps', 'num_specific_plans']).agg(mean_score = ('final_score', 'mean'), max_score=('final_score', 'max'), min_score=('final_score', 'min'), stddev_score=('final_score', 'std')).reset_index()
    all_scaters = []
    for group, group_df in plots_df.groupby("num_specific_plans"):
        all_scaters.append(go.Scatter(name=f"{group} plans",
            x=group_df['eps'],
            y=group_df['mean_score'],
        ))

    fig = go.Figure(data=all_scaters)
    fig.update_layout(width=700,height=400)
    # TODO: verificar qtd de planos em 0%
    fig.show()

In [11]:
pio.write_image(fig, 'output_file2.pdf', format='pdf')


In [7]:
for task in tasks:

    curve_plot_df = results_df[results_df['task'] == task]
    curve_plot_df = curve_plot_df[['num_specific_plans', 'bdi_score', 'rl_score']]
    curve_plot_df = pd.melt(curve_plot_df, id_vars=['num_specific_plans'], value_vars=['bdi_score', 'rl_score'], var_name='agent', value_name='score')
    #curve_plot_df['plans_pct'] = curve_plot_df['plans_pct'].astype(str)
    curve_plot_df = curve_plot_df.groupby(['num_specific_plans', 'agent']).agg({'score':'mean'}).reset_index(drop=False)

    fig = px.bar(curve_plot_df, x="num_specific_plans", y="score", color="agent", title="Score Distribution")
    fig.update_layout(width=500, height=350, xaxis = {"type": "category"})
    fig.show()

pio.write_image(fig, 'output_file.pdf', format='pdf')

In [8]:
# TODO: impacto de aumento de EPS de treino vs impacto de aumento de número de planos na PL

heatmap_df = results_df[['plan_library_size', 'eps', 'final_score']]
heatmap_df = heatmap_df.groupby(['plan_library_size', 'eps']).agg({'final_score': 'mean'}).reset_index()
heatmap_df = heatmap_df.pivot(index='plan_library_size', columns='eps')['final_score'].fillna(0)

fig = px.imshow(heatmap_df, x=heatmap_df.columns, y=heatmap_df.index)
fig.update_layout(width=1000,height=700)
fig.update_xaxes(type='category')
#fig.update_yaxes(type='category')
fig.show()

In [9]:
curve_points_df = results_df[['plans_pct', 'eps', 'final_score']]
curve_points_df = curve_points_df.groupby(['plans_pct', 'eps']).agg({'final_score': 'mean'}).reset_index()
fig = px.scatter(curve_points_df, x="plans_pct", y="eps", size='final_score')
fig.update_layout(width=700,height=700)
fig.show()