In [1]:
import ast
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from opportunistic_planning import processing, visualization
from scipy.stats import friedmanchisquare, wilcoxon

In [2]:
with open ('../../model_evaluation/rnn/results/pytorch_rnn_prequential_summed_2022-03-24.txt',
          'r') as file:
    rnn_results = ast.literal_eval(file.read())

In [3]:
len(rnn_results)

190

In [4]:
with open ('../../model_evaluation/rnn/results/nn_spatialinfo_prequential_summed_2022-03-23.txt',
          'r') as file:
    nn_results = ast.literal_eval(file.read())

In [5]:
len(nn_results)

190

In [6]:
results_sum = processing.read_results('results/results_2D_n100_2022-03-15.csv')

In [7]:
results_sum

Unnamed: 0,"c: 1.0; k: 0.0,0.1,1.1; xy","c: 1.1; k: 0.0,0.1,1.1; xy","c: 1.2; k: 0.0,0.1,1.1; xy","c: 1.3; k: 0.0,0.1,1.1; xy","c: 1.4; k: 0.0,0.1,1.1; xy","c: 1.5; k: 0.0,0.1,1.1; xy","c: 1.6; k: 0.0,0.1,1.1; xy","c: 1.7; k: 0.0,0.1,1.1; xy","c: 1.8; k: 0.0,0.1,1.1; xy","c: 1.9; k: 0.0,0.1,1.1; xy",...,"c: 1.3; k: 0.8,0.9,1.9; xy","c: 1.4; k: 0.8,0.9,1.9; xy","c: 1.5; k: 0.8,0.9,1.9; xy","c: 1.6; k: 0.8,0.9,1.9; xy","c: 1.7; k: 0.8,0.9,1.9; xy","c: 1.8; k: 0.8,0.9,1.9; xy","c: 1.9; k: 0.8,0.9,1.9; xy",sequence,error,ID
1,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,...,4.000000,4.000000,4.000000,4.000000,4.000000,4.000000,4.000000,pocgkr,0.723,a1
2,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,...,7.000000,7.000000,7.000000,7.000000,7.000000,7.000000,7.000000,cgwpcfks,0.785,a3
3,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,...,4.000000,4.000000,4.000000,4.000000,4.000000,4.000000,4.000000,kfsfkspwg,0.806,a5
4,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,...,7.000000,7.000000,7.000000,7.000000,7.000000,7.000000,7.000000,pfkswkfsococg,0.862,a11
5,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,...,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,wptgkfsoc,0.806,a13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,2.500000,2.000000,2.500000,2.000000,2.000000,3.000000,3.000000,2.000000,2.000000,3.000000,...,2.000000,2.000000,3.000000,3.000000,2.000000,2.000000,3.000000,hhsdgb,0.723,v9
188,1.000000,1.000000,1.000000,2.000000,2.000000,1.000000,2.000000,1.500000,2.000000,2.000000,...,1.500000,2.000000,2.000000,2.000000,1.000000,2.000000,2.000000,hhsgdb,0.723,v10
189,2.000000,1.000000,2.000000,1.000000,1.500000,2.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,2.000000,1.000000,2.000000,2.000000,1.000000,2.000000,hhsgdb,0.723,v11
190,3.000000,2.000000,3.000000,2.000000,3.000000,2.500000,2.500000,2.500000,3.000000,2.000000,...,2.000000,2.000000,2.000000,2.000000,3.000000,2.000000,2.000000,hhsdgb,0.723,v12


In [8]:
lowest_mean, lowest_mean_idx, lowest_median, results_median = processing.get_lowest_error(results_sum)

### Plot best model with baselines

In [15]:
#%matplotlib inline
%matplotlib qt

IDs = results_sum['ID'][:-3]

error = results_sum['error']

seqs = results_sum['sequence'][:-3].values

res = results_sum[lowest_mean_idx[0]][:-3].values
median = [np.nanmedian(res)] * len(nn_results)

x = [x for x in range (0,len(seqs))]

#fig, ax = plt.figure(figsize=(12,8))

# plot RNN baseline (w/o spatial info)
# RNN results
plt.scatter(x, rnn_results, marker='o', s=20, c='navy', alpha=0.95, 
            label=str('RNN baseline median: ') + str(round(np.median(rnn_results),3)))
# median
plt.plot(x, [np.median(rnn_results)] * len(x), '-', c='navy', alpha=0.95, linewidth=2)
# line for RNN results
#plt.plot(x, rnn_results, '-', c='navy', alpha=0.95, linewidth=1)
# connection between RNN and simulation results
plt.plot((x,x),(rnn_results,res), '--', c='navy', alpha=0.6)
# area under RNN results
#plt.fill_between(x, rnn_results, nn_results, alpha=0.2, color='dodgerblue')

# plot NN baseline (w/ spatial info)
plt.scatter(x, nn_results, marker='o', s=20, c='darkgreen', alpha=0.95, 
            label=str('NN baseline median: ') + str(round(np.median(nn_results),3)))
plt.plot(x, [np.median(nn_results)] * len(x), '-', c='green', alpha=0.9, linewidth=2)
#plt.plot(x, nn_results, '-', c='green', alpha=0.9, linewidth=1)
plt.plot((x,x),(nn_results,res), '--', c='darkgreen', alpha=0.6)
#plt.fill_between(x, nn_results, alpha=0.3, color='green')

# plot scatter + lines for simulations
plt.scatter(x, res, marker='o', s=26, c='darkred', alpha=0.8, 
            label=str('model-generated median: ') + str(round(lowest_median,3)))
#plt.plot(x, res, c='blue', alpha=0.6)
plt.plot(x, median, c='darkred', alpha=0.95, linewidth=2)
#plt.fill_between(x, res, alpha=0.3, color='darkviolet')


plt.xticks(x, labels=IDs, rotation=90, fontsize=5)
#plt.xticklabels(IDs, rotation=90, fontsize=6)

plt.ylabel('accumulated prediction error', fontsize=22)
plt.xlabel('sequence', fontsize=22)
#plt.ylim(0.0, 0.51)
plt.title('best model: ' + str(lowest_mean_idx[0]), fontsize=24, pad=20)
plt.margins(0.01)

plt.legend(fontsize=20, framealpha=0.8, loc='upper right', markerscale=2.5)

#plt.savefig('plot_median_editdist_individualerrors_diff.png', bbox_inches='tight')
plt.show()

### Prep data for stat analysis

In [25]:
# Get errors for model
list_xy = []

for col in results_sum:
    if col != 'sequence' and col != 'ID' and col != 'error' and col.split(';')[2].strip() == 'xy':
        list_xy.append(results_sum.at['mean',col])
        
avg_xy = np.mean(list_xy)
med_xy = np.median(list_xy)
std_xy = np.std(list_xy)
print('Average xy: ' + str(avg_xy)  + ', stdev: ' + str(std_xy) + ', median: ' + str(med_xy))

Average xy: 4.271179337231969, stdev: 0.20150529419847138, median: 4.213157894736842


In [26]:
# model vs NN
stat, p = wilcoxon(res, nn_results, zero_method='wilcox')
print('Wilcoxon: W = %.3f, p = %.5f' % (stat, p))

Wilcoxon: W = 1544.500, p = 0.00000


In [27]:
# get sequences where OPM performs worse than baseline

worse_than_nn = results_sum[lowest_mean_idx[0]].loc[results_sum[lowest_mean_idx[0]] >= 4.0]
worse_than_nn_sequences = [results_sum.at[x, 'sequence'] for x in worse_than_nn.index]

len(worse_than_nn_sequences)

105

In [28]:
# get sequences where NN performs worse than baseline

worse_than_opm = [i for i in nn_results if i > 4.0]

len(worse_than_opm)

18