# Statistical tests of differences in group means of results

In [1]:
library(MASS)

## Load evaluation data

In [2]:
results_dir <- '../flatlander/entrypoints/results/'

In [3]:
small.spa <- read.csv(paste(results_dir, "SPA-small_1000-episodes.csv", sep = ""), header = TRUE)
small.spa_cpr <- read.csv(paste(results_dir, "SPA-CPR-small_1000-episodes.csv", sep = ""), header = TRUE)
small.spa_tcpr <- read.csv(paste(results_dir, "SPA-TCPR-small_1000-episodes.csv", sep = ""), header = TRUE)
small.ato <- read.csv(paste(results_dir, "ATO-small_1000-episodes.csv", sep = ""), header = TRUE)
small.rlpr_tcpr <- read.csv(paste(results_dir, "RLPR-TCPR-small_1000-episodes.csv", sep = ""), header = TRUE)
small.rlps_tcpr <- read.csv(paste(results_dir, "RLPS-TCPR-small_1000-episodes.csv", sep = ""), header = TRUE)

m.spa <- read.csv(paste(results_dir, "SPA-medium_1000-episodes.csv", sep = ""), header = TRUE)
m.spa_cpr <- read.csv(paste(results_dir, "SPA-CPR-medium_1000-episodes.csv", sep = ""), header = TRUE)
m.spa_tcpr <- read.csv(paste(results_dir, "SPA-TCPR-medium_1000-episodes.csv", sep = ""), header = TRUE)
m.ato <- read.csv(paste(results_dir, "ATO-medium_1000-episodes.csv", sep = ""), header = TRUE)
m.rlpr_tcpr <- read.csv(paste(results_dir, "RLPR-TCPR-medium_1000-episodes.csv", sep = ""), header = TRUE)
m.rlps_tcpr <- read.csv(paste(results_dir, "RLPS-TCPR-medium_1000-episodes.csv", sep = ""), header = TRUE)


l.spa <- read.csv(paste(results_dir, "SPA-large_200-episodes.csv", sep = ""), header = TRUE)
l.spa_cpr <- read.csv(paste(results_dir, "SPA-CPR-large_200-episodes.csv", sep = ""), header = TRUE)
l.spa_tcpr <- read.csv(paste(results_dir, "SPA-TCPR-large_200-episodes.csv", sep = ""), header = TRUE)
l.ato <- read.csv(paste(results_dir, "ATO-large_200-episodes.csv", sep = ""), header = TRUE)
l.rlpr_tcpr <- read.csv(paste(results_dir, "RLPR-TCPR-large_200-episodes.csv", sep = ""), header = TRUE)
l.rlps_tcpr <- read.csv(paste(results_dir, "RLPS-TCPR-large_200-episodes.csv", sep = ""), header = TRUE)

## Investigate sample distributions

In [4]:
pdf(file = "./qqplot_cpr.pdf", 
    width = 4,
    height = 4)

qqnorm(small.spa_cpr$returns, pch = 1, frame = FALSE, main="")
qqline(small.spa_cpr$returns, col = "steelblue", lwd = 2, main="")
dev.off()

In [5]:
pdf(file = "qqplot_tcpr.pdf", 
    width = 4,
    height = 4)

qqnorm(small.spa_tcpr$returns, pch = 1, frame = FALSE, main="")
qqline(small.spa_tcpr$returns, col = "steelblue", lwd = 2, main="")
dev.off()

In [6]:
small.returns_data = data.frame(cbind(spa=small.spa$returns, 
                                      spa_cpr=small.spa_cpr$returns, 
                                      spa_tcpr=small.spa_tcpr$returns, 
                                      ato=small.ato$returns, 
                                      rlpr_tcpr=small.rlpr_tcpr$returns,
                                      rlps_tcpr=small.rlps_tcpr$returns))

In [7]:
m.returns_data = data.frame(cbind(spa=m.spa$returns,
                                  spa_cpr=m.spa_cpr$returns, 
                                  spa_tcpr=m.spa_tcpr$returns, 
                                  ato=m.ato$returns, 
                                  rlpr_tcpr=m.rlpr_tcpr$returns,
                                  rlps_tcpr=m.rlps_tcpr$returns))

In [8]:
l.returns_data = data.frame(cbind(spa=l.spa$returns,
                                  spa_cpr=l.spa_cpr$returns, 
                                  spa_tcpr=l.spa_tcpr$returns, 
                                  ato=l.ato$returns,
                                  rlpr_tcpr=l.rlpr_tcpr$returns,
                                  rlps_tcpr=l.rlps_tcpr$returns))

In [None]:
small.stacked_returns_data <- stack(small.returns_data)

m.stacked_returns_data <- stack(m.returns_data)

l.stacked_returns_data <- stack(l.returns_data)

## Perform tests to asses significant differences in group means

In [None]:
pairwise.wilcox.test(small.stacked_returns_data$values, small.stacked_returns_data$ind, p.adjust.method = "BH", paired=FALSE)

In [None]:
pairwise.wilcox.test(m.stacked_returns_data$values, m.stacked_returns_data$ind, p.adjust.method = "BH", paired=FALSE)

In [None]:
pairwise.wilcox.test(l.stacked_returns_data$values, l.stacked_returns_data$ind, p.adjust.method = "BH", paired=FALSE)

## Group means for percentage complete

In [None]:
mean(small.spa$pc)
mean(small.spa_cpr$pc)
mean(small.spa_tcpr$pc)
mean(small.ato$pc)
mean(small.rlps_tcpr$pc)
mean(small.rlpr_tcpr$pc)

In [None]:
mean(m.spa$pc)
mean(m.spa_cpr$pc)
mean(m.spa_tcpr$pc)
mean(m.ato$pc)
mean(m.rlps_tcpr$pc)
mean(m.rlpr_tcpr$pc)

In [None]:
mean(l.spa$pc)
mean(l.spa_cpr$pc)
mean(l.spa_tcpr$pc)
mean(l.ato$pc)
mean(l.rlpr_tcpr$pc)
mean(l.rlps_tcpr$pc)

## Group means for normalized returns

In [None]:
mean(small.spa$returns)
mean(small.spa_cpr$returns)
mean(small.spa_tcpr$returns)
mean(small.ato$returns)
mean(small.rlpr_tcpr$returns)
mean(small.rlps_tcpr$returns)

In [None]:
mean(m.spa$returns)
mean(m.spa_cpr$returns)
mean(m.spa_tcpr$returns)
mean(m.ato$returns)
mean(m.rlps_tcpr$returns)
mean(m.rlpr_tcpr$returns)

In [None]:
mean(l.spa$returns)
mean(l.spa_cpr$returns)
mean(l.spa_tcpr$returns)
mean(l.ato$returns)
mean(l.rlps_tcpr$returns)
mean(l.rlpr_tcpr$returns)