In [17]:
require(ggplot2)
require(maptools)
require(rgeos)
require(Cairo)
require(ggmap)
require(scales)
require(RColorBrewer)
require(rgdal)
require(maps)
require(tidyr)
require(dplyr)

Loading required package: rgeos
“there is no package called ‘rgeos’”Loading required package: Cairo
“there is no package called ‘Cairo’”Loading required package: rgdal
“there is no package called ‘rgdal’”

In [None]:
# import US map
states.coords <- map_data("state")
dim(states.coords)
ggplot() + 
geom_polygon(data=states.coords, 
             aes(x=long, y=lat, group = group),
             colour="white", fill="grey1") + 
theme_bw()

In [None]:
## Convert 'state_tb2' states abbreviations to full names states 'state_tb2$State_FN' using abb2state.R function (Thanks to Guangyang Li)
abb2state <- function(name, convert = F, strict = F){
  data(state)
  # state data doesn't include DC
  state = list()
  state[['name']] = c(state.name,"District Of Columbia")
  state[['abb']] = c(state.abb,"DC")
  
  if(convert) state[c(1,2)] = state[c(2,1)]
  
  single.a2s <- function(s){
    if(strict){
      is.in = tolower(state[['abb']]) %in% tolower(s)
      ifelse(any(is.in), state[['name']][is.in], NA)
    }else{
      # To check if input is in state full name or abb
      is.in = rapply(state, function(x) tolower(x) %in% tolower(s), how="list")
      state[['name']][is.in[[ifelse(any(is.in[['name']]), 'name', 'abb')]]]
    }
  }
  sapply(name, single.a2s)
}
states.coords$region_FN <- abb2state(states.coords$region)

In [None]:
getwd()
# '/home/jovyan/work/Repos/707wine' the data is in the Data file

In [None]:
# create toy data for merging
mydata <- data.frame(region = unique(states.coords$region_FN),price=rnorm(length(unique(states.coords$region_FN)),100,25))
# read in US wine data with mean price, mean points, and count
usdat <- read.csv("~/Data/wine-reviews/us_wine.csv")%>%select(-X)
usdat$province <- as.character(usdat$province)
wprice <- merge(mydata,usdat,by.x="region",by.y="province",all.x=T,sort=T)%>%select(-price)
# Merge map and wine data using respective columns 'region_FN' and 'region'
states.dat<-merge(states.coords, wprice, by.x = 'region_FN', by.y = 'region', sort = T, all.x=T)

In [None]:
# create labels
us.name <- aggregate(cbind(long,lat)~region_FN,data = states.dat,FUN = function(x)mean(range(x)))
us.name <- merge(us.name,wprice,by.x="region_FN",by.y="region",all.x=T,sort=T)

In [None]:
# plotting
ggplot(states.dat) + 
  # Represent data into polygons (states)
  geom_polygon(aes(x = long, y = lat, group = group,fill=mean_points), colour = "black", lwd = 0.3) + 
  # Create a projection
  coord_map(project="conic", lat0 = 30) + 
  # Define a continuous gradient scale to illustrate the "density" of variable of interest
  scale_fill_continuous(low="white", high="darkgreen", name ="wine mean points") +
  # Add title, hide axes, background and gridlines
  labs(title="Wine mean points by state in USA") +
  geom_text(data=us.name,aes(long,lat,label=mean_points),size=3,fontface='bold')+
  theme_bw() +
  theme(axis.ticks = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.title.x= element_blank(),
        axis.title.y= element_blank(),
        panel.border = element_blank(),
        panel.grid.minor=element_blank(),
        panel.grid.major=element_blank())

## For europe wines:

In [18]:
getwd()
# '/home/jovyan/work/Repos/707wine' the data is in the Data file

In [None]:
#### Europe wine data
eucountry <- read.csv("~/Data/wine-reviews/eu_country.csv")%>%select(-X)
eudata <- read.csv("~/Data/wine-reviews/eu_wine.csv")%>%select(-X)
euwine <- merge(eucountry,eudata,by.x="b",by.y="country",sort=T,all.x=T)

In [None]:
#### Europe map
eumap <- readShapeSpatial("~/Data/wine-reviews/NUTS_RG_60M_2016_3035_LEVL_0.shp")
plot(eumap)

In [None]:
## create data table with mean price, mean points, count
eumap<- fortify(eumap,region = 'NUTS_ID')
eudat <- merge(eumap,euwine,by.x="id",by.y="a",all.x=T,sort=T)
## restrict to main europe region
eudat1 <- subset(eudat, long > -2000000 & long < 8000000 & lat > -2000000 & lat < 5400000)

In [None]:
## create text fill for map
eu.name <- aggregate(cbind(long,lat)~b,data = eudat1,FUN = function(x)mean(range(x)))
eu.name <- merge(eu.name,euwine,by.x="b",by.y="b",all.x=T,sort=T)

In [None]:
## plotting
ggplot(data=eudat1) +
  geom_polygon(aes(x=long, y=lat, group=group, fill=count)) +
  geom_path(aes(x=long, y=lat, group=group), color='black', alpha=.5) +
  scale_fill_continuous(low="white", high="blue", name ="wine count")+
  theme_bw() +
  geom_text(data=eu.name,aes(long,lat,label=count),size=3,fontface='bold')+
  theme(axis.ticks = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.title.x= element_blank(),
        axis.title.y= element_blank(),
        panel.border = element_blank(),
    panel.grid.minor=element_blank(),
        panel.grid.major=element_blank())+
  labs(title="Wine count by country in Europe")