epa_grade.Rmd

---
title: 'MyRWA: Annual EPA Grade Assessment - 2014'
author: "Jeffrey D Walker, PhD; Andy Hrycyna; Nathan Sanders, PhD; Veronique Vicard; Patrick Herron, PhD"
date: '`r format(Sys.Date(), "%B %d, %Y")`'
output: 
  html_document: 
    number_sections: yes
    toc: yes
    toc_depth: 2
    css: styles.css
---

```{r libraries, echo=FALSE, warning=FALSE, message=FALSE}
library(dplyr)
library(tidyr)
library(myrwaR)
library(lubridate)
library(knitr)
library(ggplot2)
theme_set(theme_bw())
library(gridExtra)
library(ggmap)

knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE)

scale_fill_weather <- scale_fill_manual('Weather', values=c('Dry'='orangered',
                                                            'Wet'='deepskyblue'))
scale_fill_meets <- scale_fill_manual('Standard', values=c("Swim"="deepskyblue",
                                                           "Boat"="chartreuse3"))

# default number of years for rolling window
n_year <- 3
```

```{r download}
# download files if not exist
if (!file.exists("db_locations.csv")) {
  download.file("https://s3.amazonaws.com/myrwa/epa-grade-2014/db_locations.csv", 
                destfile="db_locations.csv", method="curl")
}
if (!file.exists("db_bacteria.csv")) {
  download.file("https://s3.amazonaws.com/myrwa/epa-grade-2014/db_bacteria.csv", 
                destfile="db_bacteria.csv", method="curl")
}
if (!file.exists("logan_hourly.csv")) {
  download.file("https://s3.amazonaws.com/myrwa/epa-grade-2014/logan_hourly.csv", 
                destfile="logan_hourly.csv", method="curl")
}
```

```{r bact-std}
bact_std <- data.frame(WaterType=c("Saline", "Fresh"),
                       CharacteristicID=c("ENT", "ECOLI"),
                       BoatStd=c(350, 1260),
                       SwimStd=c(104, 235))
```

```{r load-precip}
prcp <- read.csv(file = "logan_hourly.csv", stringsAsFactors=FALSE) %>%
  mutate(datetime=ymd_hms(datetime, tz="EST")) %>%
  rename(Datetime=datetime, Precip=prcp_in) %>%
  as.data.frame
prcp$Precip <- ifelse(is.na(prcp$Precip), 0, prcp$Precip)
prcp$Precip48 <- antecedent_precip(prcp)
prcp$Weather <- factor(ifelse(prcp$Precip48 > 0.25, 'Wet', 'Dry'))
```

```{r load-bact}
df <- read.csv("db_bacteria.csv", stringsAsFactors=FALSE) %>%
  mutate(Datetime=ymd_hms(Datetime, tz="EST"))
loc <- read.csv("db_locations.csv", stringsAsFactors=FALSE)

# surface samples only
df <- mutate(df, 
              SampleDepthType=as.character(SampleDepthType),
              SampleDepthType=ifelse(is.na(SampleDepthType), "S", SampleDepthType)) %>%
  filter(SampleDepthType=="S")

# exclude flags
df_flagged <- filter(df, !is.na(FlagID))
df <- filter(df, is.na(FlagID))
stopifnot(all(df_flagged$FlagID=="L"))

# locations
loc <- rename(loc, LocationID=ID) %>%
  mutate(Location_Waterbody=paste(WaterBodyID, LocationID, sep="-")) %>%
  arrange(WaterBodyID, LocationID) %>%
  mutate(LocationID=ordered(LocationID, levels=LocationID),
         WaterBodyID=ordered(WaterBodyID, levels=unique(WaterBodyID)),
         WaterType=ordered(WaterType, levels=c("Fresh", "Saline")),
         Location_Waterbody=ordered(Location_Waterbody, levels=Location_Waterbody),
         Agency=ifelse(stringr::str_sub(LocationID, 1, 4)=="MWRA", "MWRA", "MyRWA"),
         Agency=factor(Agency))

# fix Island End River
idx <- which(loc$LocationID=="MWRA183")
loc[idx, "Latitude"] <- 42.39205
loc[idx, "Longitude"] <- -71.05041
loc[idx, "WaterType"] <- "Saline"

# add precip and weather to df
df <- mutate(df, Datehour=round_date(Datetime, unit="hour")) %>%
  left_join(prcp, by=c("Datehour"="Datetime"))

# add location info
df <- left_join(df, select(loc, LocationID, WaterBodyID, WaterType, Location_Waterbody, Agency),
                by="LocationID") %>%
  mutate(LocationID=ordered(LocationID, levels=levels(loc$LocationID)))

# filter characteristic by salt/fresh location
df <- mutate(df, 
             WaterType=as.character(WaterType),
             WaterType=ifelse(is.na(WaterType), 'Saline', WaterType),
             WaterType=ordered(WaterType, levels=c('Fresh', 'Saline')))
df <- filter(df, paste(WaterType, CharacteristicID) %in% c('Saline ENT', 'Fresh ECOLI'))

# add standards
df <- left_join(df, bact_std, by=c("CharacteristicID", "WaterType")) %>%
  mutate(Meets=ifelse(ResultValue <= SwimStd, "Swim",
                      ifelse(ResultValue <= BoatStd, "Boat", "None")),
         Meets=ordered(Meets, levels=c("Swim", "Boat", "None")))

# add year and month
df <- mutate(df, Year=year(Datetime), Month=month(Datetime))

# fix duplicate date
df <- df[-which(df$LocationID=="MWRA052" & as.Date(df$Datetime)==as.Date("1991-02-07") & df$ID==279878),]
```

```{r check-bact}
df %>%
  mutate(Date=floor_date(Datetime, unit="day")) %>%
  group_by(Location_Waterbody, Agency, ProjectID, CharacteristicID, Date) %>%
  summarise(N=n()) %>%
  (function(x) {
    stopifnot(all(x$N==1))
  })

stopifnot(all(is.na(df$FlagID)))
stopifnot(all(!is.na(df$ResultValue)))
stopifnot(all((df$Weather=="Dry" & df$Precip48 <= 0.25) | (df$Weather=="Wet" & df$Precip48 > 0.25)))
```

```{r grades}
grades <- data.frame(letter=c(rep(c("A","B","C","D"), each=3), "F"),
                     plusminus=c(rep(c("+"," ","-"), times=4), " "),
                     min_value=c(seq(1-0.05, by=-0.05, length.out=12), 0),
                     max_value=seq(1, by=-0.05, length.out=13),
                     stringsAsFactors=FALSE) %>%
  mutate(grade=paste(letter, plusminus, sep=''),
         grade=ordered(grade, levels=grade),
         label=paste0(round(min_value*100, 0), '% - ', round(max_value*100, 0), '%'))

get_grade <- function(x) {
  for (i in 1:nrow(x)) {
    y <- x[i, ]
    average_compliance <- y$Compliance[1]
    idx <- min(which(grades$min_value < average_compliance))
    x[i, 'Grade'] <- grades$grade[idx]
  }
  x$Grade <- ordered(x$Grade, levels=grades$grade)
  x
}
```

```{r compute-roll}
grade_years <- function(n=1, start=2006, end=2014) {
  data.frame(GradeYear=rep(seq(start, end, by=1), each=n)) %>% mutate(Year=GradeYear+seq(1-n, 0, by=1))
}

df_loc <- lapply(seq(1, 5), function(n) {
  x <- grade_years(n=n, start=2006, end=2014) %>%
    left_join(df, by="Year") %>%
    group_by(LocationID, GradeYear) %>%
    mutate(N_Location=n()) %>%
    group_by(Agency, WaterType, Location_Waterbody, LocationID, WaterBodyID, 
             CharacteristicID, GradeYear, Weather, Meets, N_Location) %>%
    summarise(N=n()) %>%
    ungroup %>%
    arrange(WaterType, LocationID, GradeYear, Weather, Meets) %>%
    mutate(Meets=paste("N", Meets, sep="_")) %>%
    unite(MeetsWeather, Meets, Weather) %>%
    spread(MeetsWeather, N, fill=0) %>%
    mutate(N_Wet=N_Swim_Wet+N_Boat_Wet+N_None_Wet,
           N_Dry=N_Swim_Dry+N_Boat_Dry+N_None_Dry,
           
           Swim_Dry=N_Swim_Dry/N_Dry,
           Swim_Wet=N_Swim_Wet/N_Wet,
           
           Boat_Dry=(N_Swim_Dry+N_Boat_Dry)/N_Dry,
           Boat_Wet=(N_Swim_Wet+N_Boat_Wet)/N_Wet,
           
           None_Dry=(N_Swim_Dry+N_Boat_Dry+N_None_Dry)/N_Dry,
           None_Wet=(N_Swim_Wet+N_Boat_Wet+N_None_Wet)/N_Wet,
           
           Swim=0.75*Swim_Dry+0.25*Swim_Wet,
           Boat=0.75*Boat_Dry+0.25*Boat_Wet,
           None=0.75*None_Dry+0.25*None_Wet,
           
           Compliance=(Swim+Boat)/2) %>%
    (function(x) {
      stopifnot(all(x$N_Location-(x$N_Wet+x$N_Dry)==0))
      stopifnot(all((x$Swim >= 0 & x$Swim <= 1) | (x$N_Wet==0)))
      stopifnot(all((x$Boat >= 0 & x$Boat <= 1) | (x$N_Wet==0)))
      stopifnot(all(abs(x$None - 1) <= 1e-6 | (x$N_Wet==0)))
      stopifnot(all(x$Boat >= x$Swim | (x$N_Wet==0)))
      x
    })
  x$N_Year <- n
  x
}) %>%
  do.call(rbind, .)

# equal weighted locations
df_wbody <- group_by(df_loc, WaterType, WaterBodyID, CharacteristicID, GradeYear, N_Year) %>%
  summarise(N_Waterbody=sum(N_Location),
            N_Location=n(),
            N_Wet=sum(N_Wet),
            N_Dry=sum(N_Dry),
            Swim=mean(Swim, na.rm=TRUE),
            Swim_Dry=mean(Swim_Dry, na.rm=TRUE),
            Swim_Wet=mean(Swim_Wet, na.rm=TRUE),
            Boat=mean(Boat, na.rm=TRUE),
            Boat_Dry=mean(Boat_Dry, na.rm=TRUE),
            Boat_Wet=mean(Boat_Wet, na.rm=TRUE),
            None=mean(None, na.rm=TRUE),
            Compliance=mean(Compliance, na.rm=TRUE)) %>%
  ungroup %>%
  get_grade %>%
  (function(x) {
    stopifnot(all(x$N_Waterbody-(x$N_Wet+x$N_Dry)==0))
    stopifnot(all(x$Swim >= 0 & x$Swim <= 1, na.rm=TRUE))
    stopifnot(all(x$Boat >= 0 & x$Boat <= 1, na.rm=TRUE))
    stopifnot(all(abs(x$None - 1) <= 1e-6, na.rm=TRUE))
    stopifnot(all(x$Boat >= x$Swim, na.rm=TRUE))
    stopifnot(all(abs(x$Compliance - (x$Swim+x$Boat)/2) <= 1e-6, na.rm=TRUE))
    x
  })
```


# Executive Summary

This document presents an analysis conducted by the Mystic River Watershed Association (MyRWA) to evaluate indicator bacteria levels in the Mystic River basin. The goals of this analysis are to:

1) better understand the spatial and temporal variability in compliance with state water quality standards
2) evaluate a quantitative methodology for assigning letter grades based on the previous EPA grading system
3) explore alternative methods for computing the compliance and for evaluating individual water bodies

Compliance rates for each sampling location and waterbody are based on the state water quality standards for boating and swimming. The E. coli (ECOLI) data and standards will be used to evaluate freshwater locations, and Enterococcus (ENT) will be used for saltwater locations. The following table lists the numeric criteria for each standard and parameter.

```{r tbl-bact-std, results="asis"}
bact_std %>%
  rename("Location Type"=WaterType,
         "Parameter"=CharacteristicID,
         "Boating (#/100mL)"=BoatStd,
         "Swimming (#/100mL)"=SwimStd) %>%
  kable()
```

MyRWA recommends the following grades for each waterbody in 2014. These grades are based on data collected over a `r n_year`-year period (`r 2014-n_year+1`-2014) by both MyRWA and the Massachusetts Water Resources Authority (MWRA). Multiple years of data are used to reduce uncertainty and increase stability in the year-to-year grade. The grade is directly related to the mean of the individual swimming and boating compliance rates. This mean compliance rate is shown as red points in the figure below. 

For each waterbody, the compliance rate is computed as the mean of all locations within the waterbody. For each standard (swimming and boating), the compliance rate is based on a weighted average of dry and wet weather compliance using weights of 0.75 and 0.25, respectively. Individual samples are classified as wet or dry weather using a threshold precepitation amount of 0.25" over the previous 48 hours from when the sample was collected.  Details of this methodology are described in the sections below.

```{r es-plot-wbody-grade, fig.width=10, fig.height=6}
p.2014 <- filter(df_wbody, N_Year==n_year, GradeYear==2014) %>%
  select(WaterType, WaterBodyID, Grade, Compliance, Swim, Boat) %>%
  arrange(WaterType, Grade, desc(Compliance)) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), 
                             levels=unique(WaterBodyID)),
         Boat=Boat-Swim) %>%
  gather(Meets, ComplianceMeet, Swim, Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterType, WaterBodyID, rev(Meets)) %>%
  ggplot() +
  geom_bar(aes(x=WaterBodyID, y=ComplianceMeet, fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(x=WaterBodyID, y=Compliance, color='Average\nCompliance\nRate'), show_guide=TRUE,
             data=filter(df_wbody, N_Year==n_year, GradeYear==2014) %>%
              select(WaterType, WaterBodyID, Compliance) %>% 
              unique) +
  geom_text(aes(x=WaterBodyID, label=Grade), y=1.05, size=4, 
            data=filter(df_wbody, N_Year==n_year, GradeYear==2014) %>%
              select(WaterType, WaterBodyID, Grade) %>% 
              unique) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.07),
                     breaks=seq(0, 1, by=0.1)) +
  scale_color_manual('', values='red') +
  scale_fill_meets +
  guides(fill = guide_legend(override.aes=list(shape=NA, colour=NULL), order=1), 
         color = guide_legend('', override.aes = list(fill = NA)), order=2) +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title="Compliance Rates and Grades for 2014")
p.2014
```

The following table lists the grade and numeric values for each waterbody, and includes the individual compliance rates for the swimming and boating standards under dry and wet weather.

```{r es-table-wbody-grade, results="asis"}
filter(df_wbody, N_Year==n_year, GradeYear==2014) %>%
  select(WaterType, WaterBodyID, Grade, Compliance, Swim, Boat, Swim_Dry, Swim_Wet, Boat_Dry, Boat_Wet) %>%
  arrange(WaterType, Grade, desc(Compliance)) %>%
  mutate(Compliance=scales::percent(Compliance),
         Swim=scales::percent(Swim),
         Boat=scales::percent(Boat),
         Swim_Dry=scales::percent(Swim_Dry),
         Swim_Wet=scales::percent(Swim_Wet),
         Boat_Dry=scales::percent(Boat_Dry),
         Boat_Wet=scales::percent(Boat_Wet)) %>%
  rename("Water Type"=WaterType, "Waterbody"=WaterBodyID, "Average"=Compliance,
         "Avg Swim"=Swim, "Avg Boat"=Boat, "Swim/Dry"=Swim_Dry, "Swim/Wet"=Swim_Wet,
         "Boat/Dry"=Boat_Dry, "Boat/Wet"=Boat_Wet) %>%
  kable()
```


# Dataset Summary

The water quality data used in this analysis was extracted from the MyRWA Water Quality Database, which has been developed over the past few years. The database was designed to store water quality data collected for multiple sample programs managed by MyRWA as well as other agencies and organizations. The database contains important metadata such as field and lab methods as well as quality assurance information to ensure all data are properly characterized and used appropriately.

The analysis is also facilitated by the [myrwaR](https://github.com/walkerjeffd/myrwaR) R package, which is currently under development by MyRWA. This package contains R functions for loading data from the MyRWA Database, merging percipitation and computing wet/dry conditions. Using this package improves reproducibility and transparency of MyRWA's data analyses and is becoming an integral part of the organization's capabilities for exploring, reporting and understanding datasets collected in the watershed.

## Precipitation Data

An hourly precipitation dataset was obtained for Logan Airport from the Northeast Regional Climate Center and the NOAA Climate Data Online warehouse. The precipitation dataset is continuous (i.e. no gaps) and spans from 1982 - 2014.

The precipitation dataset can be downloaded here: [logan_hourly.csv](https://s3.amazonaws.com/myrwa/epa-grade-2014/logan_hourly.csv)

Precipitation data is used to characterized each water quality sample as either dry or wet weather. This classification is based on a threshold of 48-hour antecedent precipitation > 0.25\".

The following figure shows the fraction of hours for each weather category by year.

```{r plot-prcp-frac, fig.width=8, fig.height=4}
mutate(prcp, Year=year(Datetime)) %>%
  filter(Year > 1981, Year < 2015) %>%
  group_by(Year, Weather) %>%
  summarise(N=n()) %>%
  ggplot(aes(factor(Year), N, fill=Weather)) +
  geom_bar(position='fill', stat='identity') +
  scale_fill_weather +
  labs(x="Year", y="Fraction",
       title='Fraction Time Dry/Wet by Year') +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```

Overall, the annual mean fraction of Wet weather hours from 1982-2014 was `r scales::percent((mutate(prcp, Year=year(Datetime)) %>%
  filter(Year > 1981, Year < 2015) %>%
  group_by(Year, Weather) %>%
  summarise(N=n()) %>%
  spread(Weather, N) %>%
  mutate(Fraction_Wet=Wet/(Dry+Wet)) %>%
  summarise(Mean_Fraction_Wet=mean(Fraction_Wet)))$Mean_Fraction_Wet)
`.

This figure shows the weather fractions for only the summer (May-Oct) of each year. Although there is more year-to-year variability than the plot above, the overall mean fraction is about the same indicating no major seasonal differences in the occurance of wet and dry weather. However, note that "Wet" conditions during winter months may contribute to snowpack, and thus not immediately generate runoff that could affect the instream bacteria concentrations. An improved classification might consider snowmelt during winter months for characterizing wet events.

```{r plot-prcp-frac-summer, fig.width=8, fig.height=4}
mutate(prcp, Year=year(Datetime)) %>%
  filter(Year > 1981, Year < 2015) %>%
  filter(month(Datetime) %in% seq(5, 10)) %>%
  group_by(Year, Weather) %>%
  summarise(N=n()) %>%
  ggplot(aes(factor(Year), N, fill=Weather)) +
  geom_bar(position='fill', stat='identity') +
  scale_fill_weather +
  labs(x="Year", y="Fraction",
       title='Fraction Time Dry/Wet During Summer (May-Oct) by Year') +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```

## Bacteria Data

Bacteria data were extracted from the MyRWA Water Quality Database for three sampling programs from two organizations (MyRWA and MWRA):

```{r table-project, results="asis"}
select(df, Agency, ProjectID) %>%
  unique %>%
  kable(format = 'markdown', row.names=FALSE)
```

The raw dataset can be downloaded in csv format here: [bacteria.csv](https://s3.amazonaws.com/myrwa/epa-grade-2014/bacteria.csv)

The corresponding locations table can be downloaded in csv format here: [locations.csv](https://s3.amazonaws.com/myrwa/epa-grade-2014/locations.csv)

Two types of bacteria indicators were measured:

- E. coli (`ECOLI`)
- Enterococcus (`ENT`)

The E. coli measurements are used to assess compliance in freshwater locations (above Amelia Earhart Dam), and the Enterococcus measurements are used for saltwater locations (below the dam).

The dataset has been filtered to include only samples that passed all QAQC tests. As a result, `r nrow(df_flagged)` samples (for MyRWA Baseline data only) were excluded due to failing the lab RPD (Relative Percent Difference) test.

### Station Locations

The following map shows the location of each sampling station.

```{r get-map}
map <- get_map(location = c(lon = mean(range(loc$Longitude, na.rm=TRUE)),
                            lat = mean(range(loc$Latitude, na.rm=TRUE))),
               zoom = 12, maptype = "terrain")
```

```{r map-locations, fig.width=8, fig.height=6}
label_right <- c("ABR049", "ABR028", "ABR006", "UPL001", "MYR071",
                 "MEB001", "MWRA056", "MWRA177", "MAR036", "MWRA176",
                 "MARW067", "MWRA167", "MYR275", "MWRA183", "MYRMMP", 
                 "MIC004", "BEI093", "CHR95S", "MWRA057",
                 "ALB006", "MWRA172", "MWRA074", "WIB001")
label_middlebottom <- c("MWRA070", "MWRA069", "MWRA015", "MWRA027")
label_middletop <- c("MWRA137")

ggmap(map, extent="device", darken=c(0.5, "white")) +
  geom_point(aes(x=Longitude, y=Latitude, color=WaterBodyID, shape=Agency),
             data=loc, size=3) +
  geom_text(aes(x=Longitude+0.003, y=Latitude, label=LocationID),
            data=filter(loc, LocationID %in% label_right), 
            size=3, hjust=0) +
  geom_text(aes(x=Longitude, y=Latitude-0.001, label=LocationID),
            data=filter(loc, (LocationID %in% c(label_middlebottom))), 
            size=3, hjust=0.5, vjust=1) +
  geom_text(aes(x=Longitude, y=Latitude+0.001, label=LocationID),
            data=filter(loc, (LocationID %in% c(label_middletop))), 
            size=3, hjust=0.5, vjust=0) +
  geom_text(aes(x=Longitude-0.003, y=Latitude, label=LocationID),
            data=filter(loc, !(LocationID %in% c(label_middlebottom, label_middletop, label_right))), 
            size=3, hjust=1)
```

### Sampling Counts

This figure shows the number of samples for each location (grouped by program) and year for 2002-2014 (note that some MWRA stations go back to 1989, but the MyRWA baseline program began in 2002). The freshwater MyRWA stations all span the period 2002-2014. Additional baseline stations were added in 2008-2009 in the saltwater portion of the watershed. The MWRA locations include more samples per year (approximately 1 sample every 1-2 weeks) than MyRWA locations that are sampled monthly. A number of samples from May 2010 - Dec 2011 are missing for the saltwater MyRWA stations due to methodological errors that were identified with the lab analysis.

```{r plot-tile-cnt-yr, fig.width=10, fig.height=8}
df %>%
  filter(Year %in% seq(2002, 2014)) %>%
  mutate(Location_Waterbody=ordered(as.character(Location_Waterbody), 
                                    levels=rev(levels(Location_Waterbody)))) %>%
  group_by(ProjectID, Location_Waterbody, WaterType, Year) %>%
  summarise(N=n()) %>%
  ggplot(aes(factor(Year), Location_Waterbody, fill=N)) +
  geom_tile() +
  scale_fill_gradientn('No. Samples', limits=c(0, NA),
                       colours=rev(scales::brewer_pal(type = "seq", palette = 'GnBu')(9))) +
  labs(x="Year", y="") +
  facet_grid(WaterType~ProjectID, space="free_y", scales="free_y") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5, size=6))
```

This figure shows the number of samples at each location by month. The MyRWA Baseline program (`BASE`) contains samples evenly distributed across the year. The MWRA `CSORWM` program focuses on May-Oct, and `BHWQM` is more evenly distributed but with fewer samples in Jan-Mar.

```{r plot-tile-cnt-month, fig.width=10, fig.height=8}
df %>%
  filter(Year %in% seq(2002, 2014)) %>%
  mutate(Location_Waterbody=ordered(as.character(Location_Waterbody), 
                                    levels=rev(levels(Location_Waterbody)))) %>%
  group_by(ProjectID, Location_Waterbody, WaterType, Month) %>%
  summarise(N=n()) %>%
  ggplot(aes(factor(Month), Location_Waterbody, fill=N)) +
  geom_tile() +
  # scale_x_continuous(breaks=seq(1, 12, by=2)) +
  scale_fill_gradientn('No. Samples', limits=c(0, NA),
                       colours=rev(scales::brewer_pal(type = "seq", palette = 'GnBu')(9))) +
  labs(x="Month", y="") +
  facet_grid(WaterType~ProjectID, space="free_y", scales="free_y") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```

### Distributions

This figure shows the distribution of bacteria concentrations at each site under Dry and Wet weather for 2010-2014. At nearly all sites, there is a significant difference between Dry and Wet weather.

```{r plot-bact-box, fig.width=10, fig.height=6}
df %>%
  filter(Year %in% seq(2010, 2014)) %>%
  ggplot(aes(Location_Waterbody, ResultValue, fill=Weather)) +
  geom_boxplot() +
  geom_hline(aes(yintercept=Value, linetype=Standard),
             data=gather(bact_std, Standard, Value, BoatStd, SwimStd),
             show_guide = TRUE) +
  scale_y_log10(labels=scales::comma) +
  scale_fill_weather +
  labs(x='', y="Ecoli/Ent Concentration (#/100mL)") +
  facet_grid(.~WaterType+CharacteristicID, scales='free_x', space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```

This figure shows the distributions of concentrations by waterbody for 2010-2014. The data are grouped by simpling combining all samples from individual locations. Therefore, locations with more samples have greater weight in the distribution.

```{r plot-bact-box-wbody, fig.width=8, fig.height=5}
df %>%
  filter(Year %in% seq(2010, 2014)) %>%
  ggplot(aes(WaterBodyID, ResultValue, fill=Weather)) +
  geom_boxplot() +
  geom_hline(aes(yintercept=Value, linetype=Standard),
             data=gather(bact_std, Standard, Value, BoatStd, SwimStd),
             show_guide = TRUE) +
  scale_y_log10(labels=scales::comma) +
  scale_fill_weather +
  labs(x='', y="Ecoli/Ent Concentration (#/100mL)") +
  facet_grid(.~WaterType+CharacteristicID, scales='free_x', space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```

This figure compares the bacteria distributions under dry and wet weather conditions between Summer (May-Oct) and Winter (Nov-Apr) for the MyRWA stations from 2002-2014. The distributions under dry weather are fairly similar at most locations between the two seasons. However, under wet weather, winter samples tend to be lower than summer samples at some of the locations (especially Aberjona River, Mill Creek). These results are likely due to precipitation falling as snow and thus not generating runoff that would contribute to elevated bacteria levels. During winter, the wet/dry classification could instead by determined by changes in streamflow. However, new thresholds for defining wet and dry would be required. Therefore, for this analysis the 48-hour antecedent precipitation threshold will be used in all seasons.

```{r plot-bact-box-season, fig.width=8, fig.height=6}
df %>%
  filter(Agency=="MyRWA") %>%
  mutate(Season=ifelse(Month %in% seq(5, 10), "Summer", "Winter"),
         Season=ordered(Season, levels=c("Summer", "Winter"))) %>%
  ggplot(aes(Location_Waterbody, ResultValue, fill=Season)) +
  geom_boxplot() +
  geom_hline(aes(yintercept=Value, linetype=Standard),
             data=gather(bact_std, Standard, Value, BoatStd, SwimStd),
             show_guide = TRUE) +
  scale_y_log10(labels=scales::comma) +
  scale_fill_manual("Season", values=c(Summer="orangered", Winter="deepskyblue")) +
  labs(x='', y="Ecoli/Ent Concentration (#/100mL)") +
  facet_grid(Weather~WaterType+CharacteristicID, scales='free_x', space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```


# Compliance Rates

The compliance of each location and waterbody is computed as a weighted mean of the compliances under dry and wet weather. This weighting scheme removes the year-to-year variability due to weather as well as the proportion of samples collected under each weather condition in a given year. As shown above, the long-term average fraction of the year under wet and dry conditions are 25% and 75%, respectively. Therefore, these fractions will be used to aggregate the compliances under dry/wet weather into an average compliance for a given year.

The mean compliance for standard $s$ at location $l$ is computed by the following equation:

$$Y_{l,s} = 0.75 Y_{l,s,dry} + 0.25 Y_{l,s,wet}$$

The mean compliance for standard $s$ in waterbody $w$ is computed as the mean of $n_w$ locations within the waterbody:

$$Y_{w,s} = \frac{1}{n_w}\sum_{l \in w}{Y_{l,s}}$$

Finally, the overall compliance rate for waterbody $w$ that is used to assign a grade is the mean of the boating and swimming compliance rates:

$$Y_w = \frac{Y_{w,swim}+Y_{w,boat}}{2}$$

## Compliance By Location

This figure shows the compliance rates for each location undery dry and wet weather in 2014.

```{r plot-bact-comp-weather, fig.width=10, fig.height=6}
filter(df_loc, N_Year==1, GradeYear==2014) %>%
  select(Agency, WaterType, Location_Waterbody, Swim_Wet, Swim_Dry, Boat_Wet, Boat_Dry) %>%
  mutate(Boat_Wet=Boat_Wet-Swim_Wet,
         Boat_Dry=Boat_Dry-Swim_Dry) %>%
  gather(MeetsWeather, Compliance, Swim_Wet:Boat_Dry) %>%
  mutate(MeetsWeather=as.character(MeetsWeather)) %>%
  separate(MeetsWeather, c("Meets", "Weather"))  %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(Location_Waterbody, rev(Meets)) %>%
  ggplot(aes(Location_Waterbody, Compliance, fill=Meets, color=Agency)) +
  geom_bar(stat="identity", position="stack") +
  scale_fill_meets +
  scale_color_manual(values=c("MyRWA"="red", "MWRA"="grey50")) +
  scale_y_continuous(labels=scales::percent) +
  facet_grid(Weather~WaterType, scales='free_x', space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  guides(color=guide_legend(override.aes = list(fill=NA))) +
  labs(x="", y="Compliance Fraction", 
       title="Fraction of Samples Meeting Standards for 2014 by Weather and Location")
```

For each location, the compliance rates for dry and wet weather are then averaged using weights of 75% and 25%, respectively. These weights are based on the long-term average fraction of the year corresponding to each weather condition.

```{r plot-bact-comp, fig.width=10, fig.height=6}
filter(df_loc, N_Year==1, GradeYear==2014) %>%
  select(Agency, WaterType, Location_Waterbody, Swim, Boat) %>%
  mutate(Boat=Boat-Swim) %>%
  gather(Meets, Compliance, Swim:Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(Location_Waterbody, rev(Meets)) %>%
  ggplot(aes(Location_Waterbody, Compliance, fill=Meets, color=Agency)) +
  geom_bar(stat="identity") +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_color_manual(values=c("MyRWA"="red", "MWRA"="grey50")) +
  scale_fill_meets +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  guides(color=guide_legend(override.aes = list(fill=NA))) +
  labs(x="", y="Weight Average % Compliance", 
       title="Weighted Average % Compliance for 2014")
```


## Compliance By Water Body

### Locations Equally Weighted

The primary approach to the waterbody aggregation is to assign equal weight to each location. This calculation is performed by first computing the compliance rate for each location, and then taking the arithmetic mean of compliance rates for all locations within each water body. The following figure shows the resulting compliance rates for each waterbody in 2014.

```{r plot-bact-comp-wbody-equal, fig.width=10, fig.height=6}
filter(df_wbody, N_Year==1, GradeYear==2014) %>%
  select(WaterType, WaterBodyID, Swim, Boat) %>%
  mutate(Boat=Boat-Swim) %>%
  gather(Meets, Compliance, Swim:Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterBodyID, rev(Meets)) %>%
  ggplot(aes(WaterBodyID, Compliance, fill=Meets)) +
  geom_bar(stat="identity", color='grey50') +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  guides(fill=guide_legend(override.aes = list(colour=NULL))) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="Weight Average % Compliance", 
       title="Weighted Average % Compliance for 2014 by Water Body\nLocations Equally Weighted")
```

### Locations Weighted by Sample Number

An alternative approach is to compute a weighted mean compliance rate for each waterbody by weighting each location by its number of samples. Locations with more samples thus have a great influence on the waterbody compliance rates. 

```{r plot-bact-comp-wbody, fig.width=10, fig.height=6}
df_wbody_eql <- df_loc %>%
  group_by(WaterType, WaterBodyID, CharacteristicID, GradeYear, N_Year) %>%
  summarise(N_Waterbody=sum(N_Location),
            Swim=sum(Swim*N_Location)/sum(N_Location),
            Boat=sum(Boat*N_Location)/sum(N_Location),
            None=sum(None*N_Location)/sum(N_Location),
            N_Location=n()) %>%
  ungroup %>%
  (function(x) {
    stopifnot(all(x$Swim >= 0 & x$Swim <= 1, na.rm=TRUE))
    stopifnot(all(x$Boat >= 0 & x$Boat <= 1, na.rm=TRUE))
    stopifnot(all(abs(x$None - 1) <= 1e-6, na.rm=TRUE))
    stopifnot(all(x$Boat >= x$Swim, na.rm=TRUE))
    x
  })
  
df_wbody_eql %>%
  filter(N_Year==1, GradeYear==2014) %>%
  mutate(Boat=Boat-Swim) %>%
  gather(Meets, Compliance, Swim, Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterBodyID, rev(Meets)) %>%
  ggplot(aes(WaterBodyID, Compliance, fill=Meets)) +
  geom_bar(stat="identity", color='grey50') +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  guides(fill=guide_legend(override.aes = list(colour=NULL))) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="Weight Average % Compliance", 
       title="Weighted Average % Compliance for 2014 by Water Body\nLocations Weighted by Sample Number")
```

### Comparison

This figure compares the overall mean compliance rate (average of swimming and boating compliance) for each waterbody between the two weighting schemes. The results show relatively small differences between weighting all locations within each waterbody equally or by the number of samples for each location.

```{r plot-bact-wbody-compare, fig.width=10, fig.height=6}
df_wbody_eql %>%
  filter(N_Year==1, GradeYear==2014) %>%
  mutate(Compliance=(Swim+Boat)/2,
         Weight="Equal") %>%
  select(Weight, WaterType, WaterBodyID, Compliance) %>%
  rbind(filter(df_wbody, N_Year==1, GradeYear==2014) %>%
          mutate(Weight="# Samples") %>%
          select(Weight, WaterType, WaterBodyID, Compliance)) %>%
  ggplot(aes(WaterBodyID, Compliance, fill=Weight)) +
  geom_bar(position="dodge", stat="identity") +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_manual("Weight Scheme", values=c("Equal"="orangered", "# Samples"="deepskyblue")) +
  labs(x="", y="Overall Mean Compliance Rate (%)",
       title="Comparison of Mean Compliance Rate by Waterbody with\nLocations Weighted Equally or by Number of Samples") +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```

### Remarks

The two methods for aggregating the compliance rates by water body yield similar results. The waterbodies that contain both MyRWA and MWRA locations (e.g. Alewife Brook, Malden River, and Mystic River (Fresh)) show only minor differences between the two methods.


## Year-to-Year Stability

The previous EPA grades were based on only one year of data. However, this limits the amount of data used to compute the compliance rate (and thus the grade) especially for MyRWA baseline stations where 12 samples are collected at most each year. In some years, all samples happen to occur during dry weather and thus there is no estimate of the compliance under wet weather, which invalidates the 25/75% weighting by weather condition.

With fewer samples, the compliance for each location and waterbody varies from year to year due to high uncertainty in the compliance under dry and wet conditions. For the purpose of assigning an annual set of grades, a more stable metric would be preferred that is not subject to large changes from year to year.

One method for smoothing the annual compliance rates of each waterbody is to use a rolling window containing multiple years of data. For example, a 2-year window would use data from 2013 and 2014 to compute the compliance rate and assign the grade for 2014.

The choice of rolling window length involves some tradeoffs:

- Shorter rolling windows better represent short-term variability in annual compliance and response to infrastructure improvements. However, shorter windows also contain higher uncertainty, greater variability, and less year-to-year stability due to smaller sample sizes
- Longer rolling windows are more stable from one year to the next due to the large sample size and because the data used for each year includes overlapping samples across multiple years. The grade for each year also reflects the average conditions for multiple years in addition to the assessment year.

### Freshwater MyRWA Locations

The following figure shows the change in annual compliance rates between windows of 1-5 years for the MyRWA stations in the freshwater portion of the basin. Note that the 1 year window includes only data collected in each given year, and thus there is no overlap in the datasets used to compute the compliance rates. Compliances are not shown in 2009 for the 1 Year window because no wet samples were collected, and thus the mean compliance rate could not be computed.

```{r plot-grade-rolling-myrwa, fig.width=10, fig.height=7}
df_loc %>%
  filter(Agency=="MyRWA", WaterType=="Fresh") %>%
  mutate(Label=paste(N_Year, "Year")) %>%
  select(Label, WaterType, GradeYear, LocationID, Swim, Boat) %>%
  mutate(Boat=Boat-Swim) %>%
  gather(Meets, Compliance, Swim:Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(LocationID, rev(Meets)) %>%
  ggplot(aes(factor(GradeYear), Compliance, fill=Meets)) +
  geom_bar(stat="identity") +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_color_manual(values=c("MyRWA"="red", "MWRA"="grey50")) +
  scale_fill_meets +
  facet_grid(Label~LocationID) +
  guides(fill=guide_legend(override.aes = list(colour=NULL))) +
  labs(x="", y="Weight Average % Compliance", 
       title="Average % Compliance with Varying Rolling Windows") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5, size=6),
        axis.text.y=element_text(size=8),
        strip.text.x=element_text(size=8))
```

```{r plot-grade-rolling-myrwa-cnt, fig.width=10, fig.height=7, eval=FALSE}
df_loc %>%
  filter(Agency=="MyRWA", WaterType=="Fresh") %>%
  mutate(Label=paste(N_Year, "Year")) %>%
  select(Label, WaterType, GradeYear, LocationID, N_Location) %>%
  ggplot(aes(factor(GradeYear), N_Location)) +
  geom_bar(stat="identity") +
  facet_grid(Label~LocationID) +
  labs(x="", y="Number of Samples", 
       title="Number of Samples with Varying Rolling Windows") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5, size=6),
        axis.text.y=element_text(size=8),
        strip.text.x=element_text(size=8))
```

### Freshwater MWRA Locations

This figure shows same results for MWRA freshwater locations on the Mystic River mainstem (Mystic River (Fresh)). Note that the MWRA locations tend to have many more samples per year than the MyRWA stations, and thus have a larger sample size for each year. Nevertheless, using a multi-year rolling window does have a significant smoothing effect on the change in annual compliance rates.

```{r plot-grade-rolling-mwra, fig.width=10, fig.height=7}
df_loc %>%
  filter(Agency=="MWRA", WaterType=="Fresh", WaterBodyID=="Mystic River (Fresh)") %>%
  mutate(Label=paste(N_Year, "Year")) %>%
  select(Label, WaterType, GradeYear, LocationID, Swim, Boat) %>%
  mutate(Boat=Boat-Swim) %>%
  gather(Meets, Compliance, Swim:Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(LocationID, rev(Meets)) %>%
  ggplot(aes(factor(GradeYear), Compliance, fill=Meets)) +
  geom_bar(stat="identity") +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_color_manual(values=c("MyRWA"="red", "MWRA"="grey50")) +
  scale_fill_meets +
  facet_grid(Label~LocationID) +
  guides(fill=guide_legend(override.aes = list(colour=NULL))) +
  labs(x="", y="Weight Average % Compliance", 
       title="Average % Compliance with Varying Rolling Windows") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5, size=6),
        axis.text.y=element_text(size=8),
        strip.text.x=element_text(size=8))
```

```{r plot-grade-rolling-mwra-cnt, fig.width=10, fig.height=7, eval=FALSE}
df_loc %>%
  filter(Agency=="MWRA", WaterType=="Fresh", WaterBodyID=="Mystic River (Fresh)") %>%
  mutate(Label=paste(N_Year, "Year")) %>%
  select(Label, WaterType, GradeYear, LocationID, N_Location) %>%
  ggplot(aes(factor(GradeYear), N_Location)) +
  geom_bar(stat="identity") +
  facet_grid(Label~LocationID) +
  labs(x="", y="Number of Samples", 
       title="Number of Samples with Varying Rolling Windows") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5, size=6),
        axis.text.y=element_text(size=8),
        strip.text.x=element_text(size=8))
```

### Remarks

Using a rolling window greater than 1 year reduces the year-to-year variability in the annual compliance rate and effectively smooths the general trends in compliance of each water body. By doing this, the sample size for each year and location is increased thus yielding a more stable estimate of the annual compliance rates.

An alternative method to increase the sample size is to use bootstrap methods that resample the data for each year. However, for locations with only monthly data (MyRWA Baseline), there is still a good chance that no wet events are sampled in a given year, which would require a different method to address.

A rolling window of 3 years will be used for all subsequent analyses and to assign the recommended grades for 2014. The figures above show that 3 years is sufficient to achieve a stable year-to-year change in grades and compliance at most locations. It also captures short-term variability better than the longer windows as shown for the MyRWA station MAR036 on the Malden River.


# Grade System

## Previous EPA Grade Method

In previous years, USEPA used the following method to assign the annual grade for the Mystic River based on MyRWA Baseline Sampling Data ([excerpt from 2012 Grade Announcement](http://yosemite.epa.gov/opa/admpress.nsf/6427a6b7538955c585257359003f0230/ebcd0cd04595086a85257bc0006a48ef!OpenDocument)).

> ...when assessing water quality to assign a grade to the Mystic River Watershed, EPA uses an average between the overall percentages that water quality met the state criteria for swimming and boating (for 2012, it is 61%) as well as qualitative criteria that are similar to those developed for the Charles River Initiative, as follows:
>
> A - met swimming and boating standards nearly all of the time
> 
> B - met swimming and boating standards most of the time 
>
> C - met swimming standards some of the time, and boating standards most of the time 
>
> D - met swimming and boating standards some of the time 
>
> F - fail swimming and boating standards most of the time 

The grade is thus a combination of a quantitative evaluation using the mean of swimming and boating compliance, and a qualitative evaluation using the descriptions above. Note, however, the meaning of "nearly all", "most of", and "some of the time" are not defined. The compliance rates are also computed by pooling all samples collected in the watershed. Thus the grade is heavily weighted towards reflecting conditions in the tributaries since there is only one freshwater station on the Mystic River mainstem. 

## Revised Grading Method

The revised grading method is similar to the EPA method, except that it uses a strictly quantitative approach as well as distinguishes between the individual waterbodies.

Similar to the EPA method, the grade is based on the mean of the swimming and boating compliance rates, each of which is computed as the weighted average for wet and dry weather using weights of 25% and 75%. This value is then assigned to individual grades in 5% increments

```{r table-grades, results="asis"}
select(grades, Grade=grade, "Compliance Range"=label) %>%
  kable(align='c')
```


# Results

To review, the methodology used to compute the final grades for each waterbody includes:

1. Annual compliance for swimming and boating is computed as the weighted mean of the compliances under dry and wet weather using weights of 75% and 25%, respectively.
2. Locations are equally weighted when computing the mean annual compliance rates for each waterbody.
3. A `r n_year`-year rolling window is used to increase the sample size for computing the annual compliance of each year.
4. The grading system uses a strictly quantitative scale for assigning grades as defined above.

## 2014 Results

### By Location

This figure shows the annual compliance rate of each location for 2014 (note that this includes data from `r 2014-n_year+1`-2014 due to the `r n_year`-year rolling window). The red points are the mean of the swimming and boating compliance, and thus the value used to assign each grade.

```{r plot-result-2014-loc, fig.width=10, fig.height=6}
df_loc_2014 <- filter(df_loc, N_Year==n_year, GradeYear==2014) %>%
  get_grade %>%
  select(Agency, WaterType, Location_Waterbody, Swim, Boat, Compliance, Grade) %>%
  mutate(Boat=Boat-Swim) %>%
  arrange(WaterType, Grade, desc(Compliance)) %>%
  mutate(Location_Waterbody=ordered(as.character(Location_Waterbody), levels=unique(Location_Waterbody)))

df_loc_2014 %>% 
  select(Agency, WaterType, Location_Waterbody, Swim, Boat) %>%
  gather(Meets, Compliance, Swim, Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(Location_Waterbody, rev(Meets)) %>%
  ggplot(aes(Location_Waterbody, Compliance)) +
  geom_bar(aes(fill=Meets, color=Agency), stat="identity") +
  geom_point(aes(x=Location_Waterbody, y=Compliance),
             data=select(df_loc_2014, WaterType, Location_Waterbody, Compliance) %>% 
              unique,
             color='red') +
  geom_text(aes(x = Location_Waterbody, label=Grade), y=1.05, size=4, 
            data=select(df_loc_2014, Agency, WaterType, Location_Waterbody, Grade) %>% unique) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.07),
                     breaks=seq(0, 1, by=0.1)) +
  scale_color_manual(values=c("MyRWA"="red", "MWRA"="grey50")) +
  scale_fill_meets +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  guides(fill = guide_legend(override.aes=list(shape=NA, colour=NULL), order=1), 
         color = guide_legend('', override.aes = list(fill = NA)), order=2) +
  labs(x="", y="% Compliance", 
       title=paste0("Compliance Rates and Grade by Location for Assessment Year 2014 (Data from ", 2014-n_year+1, "-2014)"))

```

### By Waterbody

This figure shows the annual compliance rate of each waterbody for 2014 (data from `r 2014-n_year+1`-2014) using all MyRWA and MWRA data and by assigning equal weights to the locations with each waterbody.

```{r plot-2014-wbody, fig.width=10, fig.height=6}
df_wbody_2014 <- filter(df_wbody, GradeYear==2014, N_Year==n_year) %>%
  select(WaterType, WaterBodyID, Swim, Boat, Compliance, Grade) %>%
  mutate(Boat=Boat-Swim) %>%
  arrange(WaterType, Grade, desc(Compliance)) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), 
                             levels=unique(WaterBodyID)))

df_wbody_2014 %>%
  gather(Meets, ComplianceMeet, Swim, Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterBodyID, rev(Meets)) %>%
  ggplot(aes(WaterBodyID, ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(x=WaterBodyID, y=Compliance, color='Average\nCompliance\nRate'), show_guide=TRUE,
             data=select(df_wbody_2014, WaterType, WaterBodyID, Compliance) %>% 
              unique) +
  geom_text(aes(x = WaterBodyID, label=Grade), y=1.05, size=4, 
            data=select(df_wbody_2014, WaterType, WaterBodyID, Grade) %>% unique) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.07),
                     breaks=seq(0, 1, by=0.1)) +
  scale_fill_meets +
  scale_color_manual('', values=c('red')) +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  guides(fill = guide_legend(override.aes=list(shape=NA, colour=NULL), order=1), 
         color = guide_legend('', override.aes = list(fill = NA)), order=2) +
  labs(x="", y="% Compliance", 
       title=paste0("Compliance Rates and Grade by Waterbody for Assessment Year 2014 (Data from ", 2014-n_year+1, "-2014)"))

```

The following table lists the grade and individual compliance rates for each waterbody corresponding to the previous figure.

```{r table-2014-wbody, results="asis"}
filter(df_wbody, N_Year==n_year, GradeYear==2014) %>%
  select(WaterType, WaterBodyID, Grade, Compliance, Swim, Boat, Swim_Dry, Swim_Wet, Boat_Dry, Boat_Wet) %>%
  arrange(WaterType, Grade, desc(Compliance)) %>%
  mutate(Compliance=scales::percent(Compliance),
         Swim=scales::percent(Swim),
         Boat=scales::percent(Boat),
         Swim_Dry=scales::percent(Swim_Dry),
         Swim_Wet=scales::percent(Swim_Wet),
         Boat_Dry=scales::percent(Boat_Dry),
         Boat_Wet=scales::percent(Boat_Wet)) %>%
  rename("Water Type"=WaterType, "Waterbody"=WaterBodyID, "Average"=Compliance,
         "Avg Swim"=Swim, "Avg Boat"=Boat, "Swim/Dry"=Swim_Dry, "Swim/Wet"=Swim_Wet,
         "Boat/Dry"=Boat_Dry, "Boat/Wet"=Boat_Wet) %>%
  kable()
```


#### MyRWA Only

This figure shows the grades for each waterbody based only on the MyRWA data to evaluate the effect of including MWRA data in the compliance calculations. 

```{r plot-2014-wbody-myrwa, fig.width=10, fig.height=6}
df_wbody_myrwa <- filter(df_loc, Agency=="MyRWA") %>%
  droplevels %>%
  group_by(WaterType, WaterBodyID, CharacteristicID, GradeYear, N_Year) %>%
  summarise(N_Waterbody=sum(N_Location),
            N_Location=n(),
            N_Wet=sum(N_Wet),
            N_Dry=sum(N_Dry),
            Swim=mean(Swim),
            Swim_Dry=mean(Swim_Dry),
            Swim_Wet=mean(Swim_Wet),
            Boat=mean(Boat),
            Boat_Dry=mean(Boat_Dry),
            Boat_Wet=mean(Boat_Wet),
            None=mean(None),
            Compliance=mean(Compliance)) %>%
  ungroup %>%
  get_grade %>%
  (function(x) {
    stopifnot(all(x$N_Waterbody-(x$N_Wet+x$N_Dry)==0))
    stopifnot(all(x$Swim >= 0 & x$Swim <= 1, na.rm=TRUE))
    stopifnot(all(x$Boat >= 0 & x$Boat <= 1, na.rm=TRUE))
    stopifnot(all(abs(x$None - 1) <= 1e-6, na.rm=TRUE))
    stopifnot(all(x$Boat >= x$Swim, na.rm=TRUE))
    stopifnot(all(abs(x$Compliance - (x$Swim+x$Boat)/2) <= 1e-6, na.rm=TRUE))
    x
  })
df_wbody_myrwa_2014 <- filter(df_wbody_myrwa, GradeYear==2014, N_Year==n_year) %>%
  select(WaterType, WaterBodyID, Swim, Boat, Compliance, Grade) %>%
  mutate(Boat=Boat-Swim) %>%
  arrange(WaterType, Grade, desc(Compliance)) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), 
                             levels=unique(WaterBodyID)))

df_wbody_myrwa_2014 %>%
  gather(Meets, ComplianceMeet, Swim, Boat) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterBodyID, rev(Meets)) %>%
  ggplot(aes(WaterBodyID, ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(x=WaterBodyID, y=Compliance, color='Average\nCompliance\nRate'), show_guide=TRUE,
             data=select(df_wbody_myrwa_2014, WaterType, WaterBodyID, Compliance) %>% 
              unique) +
  geom_text(aes(x = WaterBodyID, label=Grade), y=1.05, size=4, 
            data=select(df_wbody_myrwa_2014, WaterType, WaterBodyID, Grade) %>% unique) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.07),
                     breaks=seq(0, 1, by=0.1)) +
  scale_fill_meets +
  scale_color_manual('', values=c('red')) +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  guides(fill = guide_legend(override.aes=list(shape=NA, colour=NULL), order=1), 
         color = guide_legend('', override.aes = list(fill = NA)), order=2) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title=paste0("Weighted Average % Compliance for Grade Year 2014 (Data from ", 2014-n_year+1, "-2014)\nMyRWA Data Only"))
```

This figure compares the overall mean compliance rate for each waterbody using only MyRWA data vs using both MyRWA and MWRA data. The waterbodies that contain MWRA stations (Mystic River (Fresh), Alewife Brook, Malden River, Mystic River (Salt)) do show some significant differences of 10% or more.

```{r plot-2014-wbody-compare, fig.width=10, fig.height=6}
select(df_wbody_myrwa_2014, WaterType, WaterBodyID, Compliance) %>%
  mutate(Dataset="MyRWA Only") %>%
  rbind(select(df_wbody_2014, WaterType, WaterBodyID, Compliance) %>%
    mutate(Dataset="MyRWA+MWRA")) %>%
  spread(Dataset, Compliance, fill=0) %>%
  gather(Dataset, Compliance, `MyRWA Only`, `MyRWA+MWRA`) %>%
  ggplot(aes(WaterBodyID, Compliance, fill=Dataset)) +
  geom_bar(position="dodge", stat="identity") +
  scale_fill_manual("Dataset Scheme", values=c("MyRWA Only"="orangered", "MyRWA+MWRA"="deepskyblue"), drop=FALSE) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.1)) +
  labs(x="", y="Overall Mean Compliance Rate (%)",
       title="Comparison of Overall Mean Compliance Rate by Waterbody\n using MyRWA Only and MyRWA+MWRA Data") +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
```


## Historical Grades

```{r grade-hist}
df_wbody_hist <- df_wbody %>%
  filter(N_Year==n_year) %>%
  arrange(GradeYear, WaterType, Grade, desc(Compliance)) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), 
                             levels=unique(WaterBodyID))) %>%
  mutate(Boat=Boat-Swim) %>%
  gather(Meets, ComplianceMeet, Swim, Boat)
```

This figure shows the compliance rates and grades for each freshwater waterbody since 2006 using a `r n_year`-year rolling window.

```{r plot-grade-hist-fresh, fig.width=10, fig.height=8}
p.hist.fresh <- filter(df_wbody_hist, WaterType=="Fresh") %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterBodyID, rev(Meets)) %>%
  ggplot(aes(factor(GradeYear), ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(y=Compliance, color='Average\nCompliance\nRate'),
             show_guide=TRUE) +
  geom_text(aes(x = factor(GradeYear), label=Grade), y=1.07, size=3, 
            data=filter(df_wbody_hist, WaterType=="Fresh") %>%
              select(WaterType, WaterBodyID, GradeYear, Grade) %>% unique) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  scale_color_manual('', values=c('red')) +
  facet_wrap(~WaterBodyID) +
  guides(fill = guide_legend(override.aes=list(shape=NA, colour=NULL), order=1), 
         color = guide_legend('', override.aes = list(fill = NA)), order=2) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title=paste0("Historical Compliance Rates and Grades by Waterbody from 2006-2014 (", n_year," Year Rolling Window)\nFreshwater Waterbodies"))
p.hist.fresh
```

This figure shows the historical compliance rates and grades for saltwater waterbodies. Note that some waterbodies show fewer years since sampling did not begin after 2006.

```{r plot-grade-hist-salt, fig.width=10, fig.height=6}
p.hist.saline <- filter(df_wbody_hist, WaterType=="Saline") %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterBodyID, rev(Meets)) %>%
  ggplot(aes(factor(GradeYear), ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(y=Compliance, color='Average\nCompliance\nRate'),
             show_guide=TRUE) +
  geom_text(aes(x = factor(GradeYear), label=Grade), y=1.07, size=3, 
            data=filter(df_wbody_hist, WaterType=="Saline") %>%
              select(WaterType, WaterBodyID, GradeYear, Grade) %>% unique) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  scale_color_manual('', values=c('red')) +
  facet_wrap(~WaterBodyID) +
  guides(fill = guide_legend(override.aes=list(shape=NA, colour=NULL), order=1), 
         color = guide_legend('', override.aes = list(fill = NA)), order=2) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title=paste0("Historical Compliance Rates and Grades by Waterbody from 2006-2014 (", n_year," Year Rolling Window)\nSaltwater Waterbodies"))
p.hist.saline
```

## Comparison to Historical Charles River Grades

In addition to the Mystic River, the USEPA also assigns annual grades for the Charles River watershed. However, there are significant differences in how these grades are assigned that invalidates any direct comparison of the grades. The primary difference is that the Charles River grade is based on sampling from the mainstem only, whereas the Mystic River grade has historically been based on all locations which are primarily within tributaries to the Mystic. The grades are also based on different quantitative and qualitative criteria. 

For comparison, the following figure shows the historical grades and compliance rates from 2006-2014 between the Charles River and the freshwater portion of the Mystic River mainstem. The Charles River grades and compliance rates are those reported by EPA in its annual grade announcements. For the Mystic River (Fresh) mainstem, the grades are based on both MyRWA and MWRA data using only samples collected in 2014 and based on the revised grading method described above. Although the grades presented in previous sections were based on a 3-year rolling window of data, a 1-year window is used for this comparison to be consistent with the Charles River grades. 

The figure indicates that the revised grading system developed by MyRWA is very consistent with the historical grades reported by USEPA for the Charles River. It also shows that the rate of swimming compliance in the freshwater portion of the Mystic River mainstem has historically been greater than that in the Charles River mainstem in all years from 2006-2014, and that the rate of boating compliance is similar between the two rivers although less variable in the Mystic River. 

```{r plot-charles-compare, fig.width=10, fig.height=5}
charles <- read.csv('charles_grades.csv', stringsAsFactors=FALSE) %>%
  mutate(WaterBodyID="Charles River",
         Boat=Boat/100,
         Swim=Swim/100,
         Boat_Dry=Boat_Dry/100,
         Swim_Dry=Swim_Dry/100,
         Boat_Wet=Boat_Wet/100,
         Swim_Wet=Swim_Wet/100,
         Compliance=(Boat+Swim)/2)
mystic <- filter(df_wbody, WaterBodyID=="Mystic River (Fresh)", N_Year==1)
compare <- rbind(select(charles, WaterBodyID, GradeYear, Grade, Compliance, Boat, Swim),
                 select(mystic, WaterBodyID, GradeYear, Grade, Compliance, Boat, Swim))

mutate(compare, Boat=Boat-Swim) %>%
  gather(Meets, ComplianceMeet, Boat, Swim) %>%
  mutate(Meets=ordered(as.character(Meets), levels=c('Boat', 'Swim'))) %>%
  arrange(WaterBodyID, desc(Meets)) %>%
  ggplot(aes(factor(GradeYear), ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(y=Compliance, color='Average\nCompliance\nRate'),
             show_guide=TRUE) +
  geom_text(aes(x = factor(GradeYear), label=Grade), y=1.07, size=3, 
            data=select(compare, WaterBodyID, GradeYear, Grade) %>% unique) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  scale_color_manual('', values='red') +
  facet_wrap(~WaterBodyID) +
  guides(fill = guide_legend(override.aes=list(shape=NA, colour=NULL), order=1), 
         color = guide_legend('', override.aes = list(fill = NA)), order=2) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title=paste0("Comparison of Historical Compliance Rates and Grades for the\nCharles River and Mystic River (Fresh)"))
```

This table shows the corresponding compliance rates and grades for each river.

```{r table-charles-compare, results="asis"}
rbind(select(charles, WaterBodyID, GradeYear, Grade, Compliance, Swim, Boat, Swim_Dry, Swim_Wet, Boat_Dry, Boat_Wet),
                 select(mystic, WaterBodyID, GradeYear, Grade, Compliance, Swim, Boat, Swim_Dry, Swim_Wet, Boat_Dry, Boat_Wet)) %>%
  arrange(WaterBodyID, GradeYear) %>%
  mutate(Compliance=scales::percent(Compliance),
         Swim=scales::percent(Swim),
         Boat=scales::percent(Boat),
         Swim_Dry=scales::percent(Swim_Dry),
         Swim_Wet=scales::percent(Swim_Wet),
         Boat_Dry=scales::percent(Boat_Dry),
         Boat_Wet=scales::percent(Boat_Wet)) %>%
  rename("Waterbody"=WaterBodyID, "Average"=Compliance, "Year"=GradeYear,
         "Avg Swim"=Swim, "Avg Boat"=Boat, "Swim/Dry"=Swim_Dry, "Swim/Wet"=Swim_Wet,
         "Boat/Dry"=Boat_Dry, "Boat/Wet"=Boat_Wet) %>%
  kable()
```


# Conclusions

The following conclusions were reached from this analysis:

- There is wide variation in the compliance and resulting grades for different waterbodies throughout the Mystic River basin. Tributaries tend to have lower compliance than mainstem stations. Based on the revised grading system, the freshwater portion of Mystic River mainstem has been acheiving a grade of B+ or better since 2006 including both MWRA and MyRWA data.
- Including the MWRA data greatly increases the number of samples in some waterbodies (Alewife Brook, Mystic River, Malden River). However, the MWRA data do introduce some seasonal bias because samples are primarily collected during the recreation season. 
- Weighting the waterbody compliance by the number of samples at each location produces similar results as using equal weights among the locations.
- Using a multi-year rolling window to evaluate the compliance and grade for each waterbody results in a more stable assessment. This approach effectively increases the sample size used to compute the rates of compliance for each assessment year. This is particularly useful for locations and waterbodies that only have MyRWA data, which are collected monthly. In some years, no wet weather samples were collected and thus the weighted average compliance standardized to 25% and 75% wet and dry weather cannot be directly computed.
- A 3-year rolling window was identified as resulting in a good balance between year-to-year stability and reflecting short-term changes. A more robust statistical analysis to identify the sample size required to achieve a target level of uncertainty is recommended.
- In comparison to the historical compliance rates and grades reported by USEPA for the Charles River, the revised grading system for the Mystic River would yield comparable results when comparing only the freshwater mainstem portion of the Mystic.

The source code for this analysis is available on Github: <https://github.com/walkerjeffd/myrwa-epa-grade>

```{r export}
select(df, ID, Agency, ProjectID, WaterType, WaterBodyID, LocationID, 
       CharacteristicID, Datetime, ResultValue, Units, Qualifier, 
       FlagID, Precip48, Weather, MeetStd=Meets) %>%
  arrange(Agency, LocationID, Datetime) %>%
  write.csv(file="bacteria.csv", row.names=FALSE)
filter(loc, LocationID %in% unique(df$LocationID)) %>%
  select(-Location_Waterbody) %>%
  write.csv(file="locations.csv", row.names=FALSE)
```

```{r pdf}
pdf('pdf/figure-2014-grades.pdf', width=11, height=6)
print(p.2014)
dev.off()
pdf('pdf/figure-historical-grades-freshwater.pdf', width=11, height=8.5)
print(p.hist.fresh)
dev.off()
pdf('pdf/figure-historical-grades-saline.pdf', width=11, height=6)
print(p.hist.saline)
dev.off()
```


```{r plot-boat-cume, eval=FALSE}
# cumulative distribution of relative boat compliance
df_2014_std %>%
  spread(Meets, Compliance) %>%
  mutate(BoatFrac=Boat/(1-Swim),
         BoatPtile=cume_dist(BoatFrac)) %>%
  arrange(desc(BoatPtile)) %>%
  ggplot(aes(BoatPtile, BoatFrac)) +
  geom_point()
```


```{r plot-corr-boat-swim, eval=FALSE}
# Strong correlation between swim and boat compliances
p1 <- df_2014_std %>%
  spread(Meets, Compliance) %>%
  mutate(Boat=Boat+Swim) %>%
  ggplot(aes(Swim, Boat)) +
  geom_point() +
  geom_smooth(method="lm") +
  xlim(0, 1) +
  # ylim(0, 1) +
  facet_wrap(~WaterType) +
  labs(x="Swimming Compliance", y="Boating Compliance",
       title="Swimming vs Boating Compliance for 2010-2014")

p2 <- df_2014_std %>%
  spread(Meets, Compliance) %>%
  ggplot(aes(Swim, Boat/(1-Swim))) +
  geom_point() +
  geom_smooth(method="lm") +
  xlim(0, 1) +
  # ylim(0, 1) +
  facet_wrap(~WaterType) +
  labs(x="Swimming Compliance", y="Boating Compliance Relative to Swimming\nBoat/(1-Swim)",
       title="Swimming vs Relative Boating Compliance for 2010-2014")

grid.arrange(p1, p2, nrow=2)
```

```{r plot-corr-avg-swim, eval=FALSE}
# correlation between average and swimming compliance
df_2014_std %>%
  spread(Meets, Compliance) %>%
  mutate(Compliance=(Swim+Boat)/2) %>%
  ggplot(aes(Compliance, Swim)) +
  geom_point() +
  geom_smooth(method="lm") +
  # xlim(0, 1) +
  ylim(0, 1) +
  facet_wrap(~WaterType) +
  labs(x="Average Compliance", y="Swimming Compliance",
       title="Swimming vs Average Compliance for 2010-2014")

```


```{r plot-bact-2014-cnt-season, fig.width=8, fig.height=6, eval=FALSE}
# This figure shows the number of sampling events in Summer (May-Oct) and Winter (Nov-Apr) at each location. Note that the MyRWA stations all have about 10-12 samples as expected due to the monthly sampling schedule. The MWRA locations vary and tend to have far more samples in summer than winter. This dataset includes both the `CSORWM` and `BHWQM` programs, which may have different sampling schedules.

filter(df, Year == 2014) %>%
  mutate(Season=ifelse(Month %in% seq(5, 10), "Summer", "Winter")) %>%
  group_by(Agency, Location_Waterbody, LocationID,
           WaterBodyID, WaterType, CharacteristicID, Season) %>%
  summarise(N=n()) %>%
  ungroup %>%
  ggplot(aes(Location_Waterbody, N, fill=Season)) +
  geom_bar(stat="identity", position="stack") +
  facet_grid(.~WaterType, scales='free_x', space="free_x") +
  scale_fill_manual(values=c(Summer='orangered', Winter='deepskyblue')) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x='', y="Number of Samples in 2014 by Season",
       title="Number of Sampling Events in 2014")
```

```{r plot-bact-2014-cnt-weather, fig.width=8, fig.height=6, eval=FALSE}
# This figure shows the number of samples collected in Dry and Wet weather at each location. Note that the MyRWA stations are almost 50% samples in wet weather, which is high relative to the expected frequency of wet weather conditions (25%) shown in the Precipitation Data section. Some of the MWRA stations, such as those in Alewife Brook, also have relatively high fractions of wet weather. This could be the result of the `CSORWM` program targetting wet weather sampling events.

filter(df, Year == 2014) %>%
  group_by(Agency, Location_Waterbody, CharacteristicID, WaterType, Weather) %>%
  summarise(N=n()) %>%
  ungroup %>%
  ggplot(aes(Location_Waterbody, N, fill=Weather)) +
  geom_bar(stat="identity", position="stack") +
  scale_fill_weather +
  labs(x='') +
  facet_grid(.~WaterType, scales='free_x', space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="Fraction",
       title="Number of Samples in 2014 by Weather")
```

```{r plot-bact-hist-cnt, fig.width=8, fig.height=6, eval=FALSE}
# This figure shows the number of samples collected at each site and grouped by weather conditions. This reveals a potential issue as some years (2003, 2009) have only dry weather samples. Therefore the computation of mean compliance by aggregating over dry and wet weather is incorrect.

filter(df, Year %in% seq(2003, 2014),
       LocationID %in% c("MIB001", "MIC004", "MYR071", "MAR036")) %>%
  filter(!(LocationID=="MAR036" & CharacteristicID=="ENT")) %>%
  group_by(Location_Waterbody, CharacteristicID, Year, Weather) %>%
  summarise(N=n()) %>%
  ggplot(aes(factor(Year), N, fill=Weather)) +
  geom_bar(stat="identity") +
  facet_wrap(~Location_Waterbody+CharacteristicID) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  scale_fill_weather +
  labs(x="", y="Number of Samples",
       title="Number of Samples at Selected MyRWA Stations from 2003-2014")

```


```{r plot-bact-2014-meets, fig.width=10, fig.height=6, eval=FALSE}
# This figure shows the fraction of total samples that meet the swimming and boating standards (or don't meet either standard) under dry and wet weather conditions in 2014.

filter(df, Year == 2014) %>%
  group_by(Agency, WaterType, Location_Waterbody, LocationID,
           WaterBodyID, CharacteristicID, Weather, Meets) %>%
  summarise(N_Meets=n()) %>%
  ungroup %>%
  ggplot(aes(Location_Waterbody, N_Meets, fill=Meets)) +
  geom_bar(stat="identity", position="fill") +
  scale_fill_manual(values=c("Swim"="deepskyblue", "Boat"="chartreuse3",
                             "None"="orangered")) +
  scale_y_continuous(labels=scales::percent) +
  facet_grid(Weather~WaterType, scales='free_x', space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="Compliance Fraction",
       title="Fraction of Samples Meeting Standards in 2014 by Weather and Location")
```

```{r plot-bact-2014-comp-wbody, fig.width=8, fig.height=6, eval=FALSE}
# This figure shows the same sample fractions in compliance but grouped by water body. The data were grouped by simply combining all samples from the individual locations. Therefore, locations with more data would be given higher weight in computing the overall compliances.

filter(df, Year == 2014) %>%
  group_by(WaterType, WaterBodyID, CharacteristicID, Weather, Meets) %>%
  summarise(N_Meets=n()) %>%
  ggplot(aes(WaterBodyID, N_Meets, fill=Meets)) +
  geom_bar(stat="identity", position="fill") +
  scale_fill_manual(values=c("Swim"="deepskyblue", "Boat"="chartreuse3",
                             "None"="orangered")) +
  scale_y_continuous(labels=scales::percent) +
  facet_grid(Weather~WaterType, scales='free_x', space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="Compliance Fraction",
       title="Fraction Events Meeting Standards in 2014 by Weather and Waterbody")
```

```{r plot-bact-2014-comp-std, fig.width=10, fig.height=6, eval=FALSE}
# For each station, the fraction of samples in compliance is aggregated over the two weather conditions by computing a weighted mean of Dry and Wet weather compliance. The weights are based on the long-term mean frequency of Dry and Wet weather (75% and 25%). The average % compliance for each standard and at each location is shown in the following figure. Note that MyRWA stations are bordered in red.

# This figure thus represents the compliance with each standard under a typical year (25% wet weather). It thus removes the hydrologic variability and any potential bias associated with sampling more wet events relative to the annual frequency of wet weather.

filter(df, Year == 2014) %>%
  group_by(Agency, WaterType, Location_Waterbody, LocationID,
           WaterBodyID, CharacteristicID, Weather, Meets) %>%
  summarise(N_Meets=n()) %>%
  mutate(N_Weather=sum(N_Meets)) %>%
  group_by(LocationID, WaterBodyID, CharacteristicID) %>%
  mutate(N_Loc=sum(N_Meets)) %>%
  ungroup %>%
  filter(Meets != "None") %>%
  mutate(Compliance=N_Meets/N_Weather) %>%
  select(-N_Meets, -N_Weather) %>%
  spread(Weather, Compliance, fill=0) %>%
  mutate(WtdAvg=Dry*0.75+Wet*0.25) %>%
  gather(Weather, Compliance, Dry:WtdAvg) %>%
  arrange(WaterType, Location_Waterbody, CharacteristicID, Meets, Weather) %>%
  filter(Weather=="WtdAvg") %>%
  ggplot(aes(Location_Waterbody, Compliance, fill=Meets, color=Agency)) +
  geom_bar(stat="identity") +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_color_manual(values=c("MyRWA"="red", "MWRA"="grey50")) +
  scale_fill_manual(values=c("Swim"="deepskyblue", "Boat"="chartreuse3")) +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="Weight Average % Compliance",
       title="Average % Compliance in 2014")
```

```{r plot-bact-2014-comp, fig.width=10, fig.height=6, eval=FALSE}
# This figure shows the mean compliance computed over the swimming and boating standards (effectively the mid-point of the green bars in the previous figure). This overall average compliance rate was previously used to assign grades for the entire watershed. However, it does not account for the variability between the relative compliance meeting boating and swimming standards.

filter(df, Year == 2014) %>%
  group_by(WaterType, Agency, Location_Waterbody, LocationID, WaterBodyID,
           CharacteristicID, Weather, Meets) %>%
  summarise(N_Meets=n()) %>%
  mutate(N_Weather=sum(N_Meets)) %>%
  group_by(LocationID, WaterBodyID, CharacteristicID) %>%
  mutate(N_Loc=sum(N_Meets)) %>%
  ungroup %>%
  filter(Meets != "None") %>%
  spread(Meets, N_Meets, fill=0) %>%
  mutate(Boat=Swim+Boat) %>%
  gather(Meets, N_Meets, Swim:Boat) %>%
  arrange(Location_Waterbody, CharacteristicID, Meets, Weather) %>%
  mutate(Compliance=N_Meets/N_Weather) %>%
  select(-N_Weather, -N_Meets) %>%
  spread(Weather, Compliance, fill=0) %>%
  mutate(WtdAvg=Dry*0.75+Wet*0.25) %>%
  gather(Weather, Compliance, Dry:WtdAvg) %>%
  arrange(Location_Waterbody, CharacteristicID, Meets, Weather) %>%
  filter(Weather=="WtdAvg") %>%
  spread(Meets, Compliance, fill=0) %>%
  mutate(WtdAvg=(Swim+Boat)/2) %>%
  gather(Meets, Compliance, Swim:WtdAvg) %>%
  filter(Meets=="WtdAvg") %>%
  ggplot(aes(Location_Waterbody, Compliance, color=Agency)) +
  geom_bar(stat="identity", fill="grey50") +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  facet_grid(.~WaterType, scales="free_x", space = "free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  scale_color_manual(values=c("MyRWA"="orangered", "MWRA"="grey50")) +
  labs(x="", y="Weight Average % Compliance",
       title="Average % Compliance in 2014")
```

```{r plot-bact-2014-comp-std-wbody, fig.width=10, fig.height=6, eval=FALSE}
# This figure shows the weighted compliance rates for each waterbody by grouping all samples from the individual locations.

filter(df, Year == 2014) %>%
  group_by(WaterType, WaterBodyID, CharacteristicID, Weather, Meets) %>%
  summarise(N_Meets=n()) %>%
  mutate(N_Weather=sum(N_Meets)) %>%
  ungroup %>%
  filter(Meets != "None") %>%
  mutate(Compliance=N_Meets/N_Weather) %>%
  select(-N_Meets, -N_Weather) %>%
  spread(Weather, Compliance, fill=0) %>%
  mutate(WtdAvg=Dry*0.75+Wet*0.25) %>%
  gather(Weather, Compliance, Dry:WtdAvg) %>%
  arrange(WaterBodyID, CharacteristicID, Meets, Weather) %>%
  filter(Weather=="WtdAvg") %>%
  ggplot(aes(WaterBodyID, Compliance, fill=Meets)) +
  geom_bar(stat="identity", color='grey50') +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  facet_grid(.~WaterType, scales="free_x", space="free_x") +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  scale_fill_manual(values=c("Swim"="deepskyblue", "Boat"="chartreuse3")) +
  labs(x="", y="Weight Average % Compliance",
       title="Average % Compliance in 2014 by Water Body")
```


```{r alt-grade-system, eval=FALSE}
# This grading method uses the compliance for swimming as the primary letter part of the grade (A, B, ...), and the relatively compliance for boating as the minor part of the grade (+/ /-).

# If the boating standard is more than 66% of the difference between 100% and the swimming compliance, the grade is adjusted up to a "+". If it is between 33% and 66%, then no adjustment. And if it is less than 33%, then it is a "-".

grades_swim <- data.frame(letter=c(rep(c("A","B","C","D"), each=3), "F"),
                     plusminus=c(rep(c("+"," ","-"), times=4), " "),
                     stringsAsFactors=FALSE) %>%
  mutate(grade=paste(letter, plusminus, sep=''),
         grade=ordered(grade, levels=grade),
         swim=as.numeric(plyr::revalue(letter,
                                       c(A=90, B=70, C=50, D=30, F=0))),
         boat=as.numeric(plyr::revalue(plusminus,
                                       c("+"=66, " "=33, "-"=0))),
         boat=(100-swim)*boat/100 + swim,
         boat=boat-swim,
         boat=ifelse(letter=="F", swim, boat)) %>%
  filter(!(grade %in% c('F+', 'F-')))

select(grades_swim, grade, boat, swim) %>%
  gather(standard, value, -grade) %>%
  mutate(standard=ordered(standard, levels=c('swim', 'boat'))) %>%
  arrange(standard, grade) %>%
  ggplot(aes(grade, value/100, fill=standard)) +
  geom_bar(stat="identity", position="stack", color='grey50') +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_manual('Standard',
                    values=c("swim"="deepskyblue", "boat"="chartreuse3")) +
  labs(x="Grade", y="Compliance %")

grade <- function(x) {
  res <- character(0)
  for (i in 1:nrow(x)) {
    y <- x[i, ]
    letter_grade <- grades_swim[min(which(y$Swim[1]*100 > grades_swim$swim)), 'letter']

    minor_grade <- ifelse(y$Boat[1] >= (1-y$Swim[1])*(2/3), "+",
                          ifelse(y$Boat[1] < (1-y$Swim[1])*(1/3), "-", " "))
    if (letter_grade == "F") minor_grade <- " "
    x[i, 'Grade'] <- paste0(letter_grade, minor_grade)
  }
  x$Grade <- ordered(x$Grade, levels=grades_swim$grade)
  x
}
```

```{r tab-grades-swim, results="asis", eval=FALSE}
# The following table lists the specific numeric criteria for each grade.

grades_swim %>%
  mutate(swim_min=swim,
         swim_max=ifelse(letter=="A", Inf,
                         ifelse(letter=="F", 30, swim+20)),
         swim_text=ifelse(swim_max==Inf, paste(">=", swim_min),
                          ifelse(swim_min==0, paste("<", swim_max),
                                 paste(swim_min, "-", swim_max))),
         boat=boat+swim,
         boat_min=boat,
         boat_max=ifelse(plusminus=="+", Inf,
                         ifelse(letter=="F", 100, lag(boat))),
         boat_text=ifelse(boat_max==Inf, paste(">=", round(boat_min, digits=0)),
                          paste(round(boat_min, digits=0), "-", round(boat_max, digits=0)))) %>%
  select(Grade=grade, `Swim Compliance`=swim_text, `Boat Compliance`=boat_text) %>%
  kable(., format="markdown", align = 'c')
```


```{r compute-basin, eval=FALSE}
df_roll_basin <- lapply(seq(1, 5), function(n) {
  x <- grade_years(n=n, start=2006, end=2014) %>%
    left_join(filter(df, Agency=="MyRWA"), by="Year") %>%
    arrange(GradeYear, Year, Weather, Meets) %>%
    group_by(GradeYear, Weather, Meets) %>%
    summarise(N_Meets=n()) %>%
    spread(Meets, N_Meets, fill=0) %>%
    gather(Meets, N_Meets, Swim, Boat, None) %>%
    group_by(GradeYear) %>%
    mutate(N_Sample=sum(N_Meets)) %>%
    spread(Weather, N_Meets, fill=0) %>%
    gather(Weather, N_Meets, Dry, Wet) %>%
    group_by(GradeYear, Weather) %>%
    mutate(N_Weather=sum(N_Meets)) %>%
    ungroup %>%
    mutate(Compliance=N_Meets/N_Weather) %>%
    mutate(WeatherWeight=plyr::revalue(Weather, c("Dry"=0.75, "Wet"=0.25)),
           WeatherWeight=as.numeric(as.character(WeatherWeight))) %>%
    group_by(GradeYear, Meets, N_Sample) %>%
    summarise(Compliance=sum(Compliance*WeatherWeight)/sum(WeatherWeight)) %>%
    ungroup
  x$N_Year <- n
  x
}) %>%
  do.call(rbind, .)

df_basin <- filter(df_roll_basin, Meets!="None") %>%
  spread(Meets, Compliance) %>%
  mutate(Compliance=(Swim*2+Boat)/2) %>%
  get_grade
```

```{r plot-grade-basin, fig.width=6, fig.height=8, eval=FALSE}
## Basin-wide

# In previous years, USEPA pooled all available data to compute a single grade for the entire watershed. For comparison, the following figure shows the result of this approach using only MyRWA data with multiple rolling windows. The 1-Year window should yield the same compliance rates used for previous EPA grade assessments. Note that the grade, however, is different because the EPA grade is based on both a quantitative and qualitative assessment as described above with grade thresholds that probably differ from those used here.


df_basin %>%
  gather(Meets, ComplianceMeet, Swim, Boat) %>%
  arrange(Meets) %>%
  mutate(Label=paste(N_Year, "Years")) %>%
  ggplot(aes(factor(GradeYear), ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(y=Compliance), color='red') +
  geom_text(aes(x = factor(GradeYear), label=Grade), y=1.07, size=3, 
            data=select(df_basin, N_Year, GradeYear, Grade) %>% 
              unique %>% 
              mutate(Label=paste(N_Year, "Years"))) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  facet_grid(Label~.) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="Weight Average % Compliance", 
       title="Basin-wide Compliance from 2006-2014\nMyRWA Data Only")

```

```{r table-basin-grade-5, eval=FALSE}
# This table lists the compliance rates for swimming and boating, as well as the average compliance rate and resulting grade for each year. The results in this table are based only one 1 Year of data (i.e. no rolling window).

# It is important to note that MyRWA samples the freshwater and saltwater locations on different days. Therefore, the distribution of wet/dry sample days is not consistent between the two groups of locations. 

filter(df_basin, N_Year==1) %>%
  mutate(Boat=Boat+Swim,
         Swim=scales::percent(Swim),
         Boat=scales::percent(Boat),
         Compliance=scales::percent(Compliance)) %>%
  select(-N_Year) %>%
  rename(Year=GradeYear,
         "No. Samples"=N_Sample,
         "Swim Compliance"=Swim,
         "Boat Compliance"=Boat,
         "Avg Compliance"=Compliance) %>%
  kable(align='c')
```


```{r historical-grade, eval=FALSE}
pdf("pdf/historical-grades.pdf", width=17, height=11)
p1 <- filter(df_wbody, N_Year %in% seq(1, 3), WaterType=="Fresh") %>%
  mutate(Boat=Boat-Swim,
         Label=paste(N_Year, "Years")) %>%
  gather(Meets, ComplianceMeet, Swim, Boat) %>%
  ggplot(aes(factor(GradeYear), ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(y=Compliance), color='red') +
  geom_text(aes(x = factor(GradeYear), label=Grade), y=1.07, size=3, 
            data=filter(df_wbody, N_Year %in% seq(1, 3), WaterType=="Fresh") %>%
              mutate(Label=paste(N_Year, "Years"))) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  facet_grid(Label~WaterBodyID) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title=paste0("Historical Compliance Rates and Grades from 2006-2014 with 1-3 Year Windows\nFreshwater Waterbodies"))

p2 <- filter(df_wbody, N_Year %in% seq(1, 3), WaterType=="Fresh") %>%
  filter(!is.na(Grade)) %>%
  mutate(Grade=ordered(as.character(Grade), levels=rev(levels(Grade))),
         Label=paste(N_Year, "Years")) %>%
  arrange(N_Year, WaterBodyID, GradeYear) %>%
  ggplot(aes(GradeYear, Grade, group=WaterBodyID)) +
  geom_point() +
  geom_line() +
  facet_grid(Label~WaterBodyID) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
        axis.text.y=element_text(size=6)) +
  labs(x="", y="% Compliance", 
       title=paste0("Historical Grades by Waterbody from 2006-2014 with 1-3 Year Windows\nFreshwater Waterbodies"))

grid.arrange(p1, p2, nrow=2)

dev.off()
```

```{r nocso, eval=FALSE}
df_loc_nocso <- lapply(seq(1, 5), function(n) {
  x <- grade_years(n=n, start=2006, end=2014) %>%
    left_join(filter(df, ProjectID %in% c("BASE", "BHWQM")), by="Year") %>%
    group_by(LocationID, GradeYear) %>%
    mutate(N_Location=n()) %>%
    group_by(Agency, WaterType, Location_Waterbody, LocationID, WaterBodyID, 
             CharacteristicID, GradeYear, Weather, Meets, N_Location) %>%
    summarise(N=n()) %>%
    ungroup %>%
    arrange(WaterType, LocationID, GradeYear, Weather, Meets) %>%
    mutate(Meets=paste("N", Meets, sep="_")) %>%
    unite(MeetsWeather, Meets, Weather) %>%
    spread(MeetsWeather, N, fill=0) %>%
    mutate(N_Wet=N_Swim_Wet+N_Boat_Wet+N_None_Wet,
           N_Dry=N_Swim_Dry+N_Boat_Dry+N_None_Dry,
           
           Swim_Dry=N_Swim_Dry/N_Dry,
           Swim_Wet=N_Swim_Wet/N_Wet,
           
           Boat_Dry=(N_Swim_Dry+N_Boat_Dry)/N_Dry,
           Boat_Wet=(N_Swim_Wet+N_Boat_Wet)/N_Wet,
           
           None_Dry=(N_Swim_Dry+N_Boat_Dry+N_None_Dry)/N_Dry,
           None_Wet=(N_Swim_Wet+N_Boat_Wet+N_None_Wet)/N_Wet,
           
           Swim=0.75*Swim_Dry+0.25*Swim_Wet,
           Boat=0.75*Boat_Dry+0.25*Boat_Wet,
           None=0.75*None_Dry+0.25*None_Wet,
           
           Compliance=(Swim+Boat)/2) %>%
    (function(x) {
      stopifnot(all(x$N_Location-(x$N_Wet+x$N_Dry)==0))
      stopifnot(all((x$Swim >= 0 & x$Swim <= 1) | (x$N_Wet==0)))
      stopifnot(all((x$Boat >= 0 & x$Boat <= 1) | (x$N_Wet==0)))
      stopifnot(all(abs(x$None - 1) <= 1e-6 | (x$N_Wet==0)))
      stopifnot(all(x$Boat >= x$Swim | (x$N_Wet==0)))
      x
    })
  x$N_Year <- n
  x
}) %>%
  do.call(rbind, .)

df_wbody_nocso <- df_loc_nocso %>%
  droplevels %>%
  group_by(WaterType, WaterBodyID, CharacteristicID, GradeYear, N_Year) %>%
  summarise(N_Waterbody=sum(N_Location),
            N_Location=n(),
            N_Wet=sum(N_Wet),
            N_Dry=sum(N_Dry),
            Swim=mean(Swim),
            Swim_Dry=mean(Swim_Dry),
            Swim_Wet=mean(Swim_Wet),
            Boat=mean(Boat),
            Boat_Dry=mean(Boat_Dry),
            Boat_Wet=mean(Boat_Wet),
            None=mean(None),
            Compliance=mean(Compliance)) %>%
  ungroup %>%
  get_grade %>%
  (function(x) {
    stopifnot(all(x$N_Waterbody-(x$N_Wet+x$N_Dry)==0))
    stopifnot(all(x$Swim >= 0 & x$Swim <= 1, na.rm=TRUE))
    stopifnot(all(x$Boat >= 0 & x$Boat <= 1, na.rm=TRUE))
    stopifnot(all(abs(x$None - 1) <= 1e-6, na.rm=TRUE))
    stopifnot(all(x$Boat >= x$Swim, na.rm=TRUE))
    stopifnot(all(abs(x$Compliance - (x$Swim+x$Boat)/2) <= 1e-6, na.rm=TRUE))
    x
  })

df_wbody_nocso_grade <- filter(df_wbody_nocso) %>%
  filter(!is.na(Grade)) %>%
  mutate(Programs="BASE, BHWQM")
df_wbody_grade <- filter(df_wbody) %>%
  filter(!is.na(Grade)) %>%
  mutate(Programs="BASE, BHWQM, CSORWM")
df_grade_program <- rbind(df_wbody_nocso_grade, df_wbody_grade)

pdf('pdf/exclude-csorwm.pdf', width=11, height=8.5)

df_grade_program %>%
  filter(N_Year %in% c(3)) %>%
  mutate(WaterBodyID=as.character(WaterBodyID)) %>%
  arrange(WaterType, WaterBodyID, N_Year, GradeYear) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), levels=unique(WaterBodyID))) %>%
  mutate(Grade=ordered(as.character(Grade), levels=rev(levels(Grade)))) %>%
  ggplot(aes(GradeYear, Grade, group=interaction(WaterBodyID, Programs), color=Programs)) +
  geom_point() +
  geom_line() +
  scale_color_manual(values=c('orangered', 'deepskyblue')) +
  facet_wrap(~WaterBodyID) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
        axis.text.y=element_text(size=6),
        legend.position='top') +
  labs(x="", y="Grade", 
       title=paste0("Historical Grades by Waterbody from 2006-2014 (3-yr Window)\nWith and Without CSORWM"))

df_grade_program %>%
  filter(N_Year %in% c(3)) %>%
  mutate(Grade=ordered(as.character(Grade), levels=rev(levels(Grade))),
         WaterBodyID=as.character(WaterBodyID)) %>%
  arrange(WaterType, WaterBodyID, N_Year, GradeYear) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), levels=unique(WaterBodyID))) %>%
  select(Programs, WaterBodyID, GradeYear, N_Waterbody) %>%
  spread(Programs, N_Waterbody, fill=0) %>%
  gather(Programs, N_Waterbody, -WaterBodyID, -GradeYear) %>%
  ggplot(aes(factor(GradeYear), N_Waterbody, fill=Programs)) +
  geom_bar(position='dodge', stat='identity') +
  scale_fill_manual(values=c('orangered', 'deepskyblue')) +
  facet_wrap(~WaterBodyID) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
        axis.text.y=element_text(size=6),
        legend.position='top') +
  labs(x="", y="# Samples", 
       title=paste0("Number of Samples by Waterbody from 2006-2014 (3-yr Window)\nWith and Without CSORWM"))


filter(df_wbody_nocso, N_Year %in% c(3)) %>%
  arrange(WaterType, WaterBodyID, N_Year, GradeYear) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), levels=unique(WaterBodyID))) %>%
  mutate(Boat=Boat-Swim,
         Label=paste(N_Year, "Years")) %>%
  gather(Meets, ComplianceMeet, Swim, Boat) %>%
  ggplot(aes(factor(GradeYear), ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(y=Compliance), color='red') +
  geom_text(aes(x = factor(GradeYear), label=Grade), y=1.07, size=3, 
            data=filter(df_wbody_nocso, N_Year %in% c(3)) %>%
              mutate(Label=paste(N_Year, "Years"))) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  facet_wrap(~WaterBodyID) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title=paste0("Historical Compliance Rates and Grades from 2006-2014 with 1-3 Year Windows\nPrograms: BASE + BHWQM"))


filter(df_wbody, N_Year %in% c(3)) %>%
  arrange(WaterType, WaterBodyID, N_Year, GradeYear) %>%
  mutate(WaterBodyID=ordered(as.character(WaterBodyID), levels=unique(WaterBodyID))) %>%
  mutate(Boat=Boat-Swim,
         Label=paste(N_Year, "Years")) %>%
  gather(Meets, ComplianceMeet, Swim, Boat) %>%
  ggplot(aes(factor(GradeYear), ComplianceMeet)) +
  geom_bar(aes(fill=Meets), stat="identity", color='grey50') +
  geom_point(aes(y=Compliance), color='red') +
  geom_text(aes(x = factor(GradeYear), label=Grade), y=1.07, size=3, 
            data=filter(df_wbody, N_Year %in% c(3)) %>%
              mutate(Label=paste(N_Year, "Years"))) +
  scale_y_continuous(labels=scales::percent, lim=c(0, 1.1),
                     breaks=seq(0, 1, by=0.2)) +
  scale_fill_meets +
  facet_wrap(~WaterBodyID) +
  theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(x="", y="% Compliance", 
       title=paste0("Historical Compliance Rates and Grades from 2006-2014 with 1-3 Year Windows\nPrograms: BASE + BHWQM + CSORWM"))

dev.off()
```