# Lab 1 : Spatial Autocorrelation

In [None]:
# We import our libraries here
library(spdep)
library(classInt)
library(rgdal)
library(RColorBrewer)
# library(ggplot2)
# library(dplyr)
# library(broom)
# library(sf)

## Read in the data and take a look at it

Read the shape file from the data directory

In [None]:
auck <- readOGR("data/ak-TB-ethnicity-0506.shp", integer64="allow.loss")

In [None]:
# Let's see a summary of our spatial data
summary(auck)

In [None]:
names(auck)

In [None]:
#The data frame -> row and header of data, in "tabular" format
#let's make df the dataframe
df <- data.frame(auck)
head(df)

## Exploring the data in maps

As in the previous lab, we will make some choropleth maps to examine the various data of possible interest in this setting. To make this a bit less arduous, here is that simple choropleth mapping function from the previous notebook, which you can use to make maps of the different variables of interest. 

In [1]:
## Definition of a function to automate a series of commands and make a choropleth map
choro <- function(sf, varname, nclasses=5, pal='Reds', sty='equal', ttl=varname) {
    palette <- brewer.pal(nclasses, pal)
    classes <- classIntervals(sf[[varname]], nclasses, style=sty)
    colors <- findColours(classes, palette)
    plot(sf, col=colors, lwd=0.2)
    legend('top', ncol=3, legend=names(attr(colors, 'table')), fill=attr(colors, 'palette'), cex=0.8, bty='n')
    title(ttl)
}

Principally we want you to look at the tuberculosis rate (in cases per 100,000 population) `TB_RATE`,  and also at the different distributions of the various census-defined ethnic groups, NZ European `EUR_06`, Māori `MAO_06`, Pasifika `PAC_06`, Asian `ASI_06`, and Middle-eastern and Latin American (inccongruously combined in NZ census data, `MEL_06`), and 'Other' `OTH_06`. You need really only pay attention to the tuberculosis rate, and to the four major population groupings here, European, Māori, Pasifika, and Asian.

Use the above function in the cell below, to map the tuberculosis rate.

Don't forget that you have options for changing the map colors (`pal`), the number of classes (`nclasses`) and the classification method (`sty`) in this map and others you make.

In [None]:
# Put a line of code here to map the tuberculosis rate data

Use the cell below to map all four of the major population groups. idea you might explore is to make all four of the ethnicity/race distribution maps in single display, by first issuing the `par(mfrow=c(2,2))` command, which will set up the display area for a 2 by 2 grid of maps. Then make four distinct maps.

In [None]:
# this line sets up the graphic display for a two by two array of plots
# with narrower margins of 0.1 of the overall display area
par(mfrow=c(2,2), mai=rep(0.1,4))
# write a line of code to make a map
# write a line to make another map
# and another
# and then a fourth one

# Graphing the data

In [None]:
hist(auck$ASI_P_06, labels=T)

# Spatial autocorrelation

In [None]:
# Generate the weight matrix
# queen = False option means at least two boundary points must be 
# within the snap distance of each other
# with the conventional name of a ‘rook’ relationship.
# The snap option is the same as precision threshold in geoda

nb <- poly2nb(auck, row.names=auck$FIRST_CAU_, queen=FALSE, snap=1e-05)
length(nb)


In [None]:
#example of the first polygon in w, numbers are the neighbors for the first polygon
nb[1]
auck$AU_NAME[1]

#make a summary of w
summary(nb)

In [None]:
plot(auck, col='gray', border='white', lwd=0.35)
xy <- coordinates(auck)
plot(nb, xy, col='red', cex=0.35, lwd=0.5, add=TRUE)

## Moran's *I* in equation form

$I = \frac{n}{\sum_{i=1}^n (y_i - \bar{y})^2} \frac{\sum_{i=1}^n \sum_{j=1}^n w_{ij}(y_i - \bar{y})(y_j - \bar{y})}{\sum_{i=1}^n \sum_{j=1}^n w_{ij}}$

In [None]:
# Let's first see what manual computation of Moran's I looks like

#n is the number of observations (length of our dataset)
# n <- length(auck)

#we set y to the column of PC_ASIAN, then we get the mean.
# y <- auck$PC_ASIAN
# ybar <- mean(y)

#find the difference between y and ybar(the mean)
# dy <- y - ybar
# yi <- rep(dy, each=n)
# yj <- rep(dy)
# yiyj <- yi * yj

# pm <- matrix(yiyj, ncol=n)

# pmw <- pm * wm

# spmw <- sum(pmw)

# smw <- sum(wm)
# sw  <- spmw / smw
# vr <- n / sum(dy^2)
# MI <- vr * sw

#Morans I
# cat("Moran's I is", MI)

In [None]:
# let's make a weights object so that we 
# can use it with a less manual way of computing moran's i
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
print(lw, zero.policy=TRUE)

In [None]:
summary(lw, zero.policy=TRUE)

In [None]:

m_scatterplot <- function (sf, varname, listweights, ttl='', linecol='red') {
    
scaled_col <- paste('s', varname, sep='')
lagged_col <- paste('lag', varname, sept='')
    
sf$scaled_col <- scale(sf[[varname]]) 
#sf$scaled_col_name
    
sf$lagged_col <- lag.listw(listweights, sf[[scaled_col]], zero.policy=TRUE)
#sf$lagged_col

plot(x=sf$scaled_col, y=sf$lagged_col, main=ttl)
    
abline(h=0, v=0)
best_fit_line <- lm(sf$lagged_col ~ sf$scaled_col)
abline(best_fit_line, lty=2, lwd=1, col=linecol)
    
# #Note that the slope of the regression line is nearly the same as Moran's I
coefficients(best_fit_line)[2]
    
}

In [None]:
#Call the function to make the plot
m_scatterplot(auck, 'ASI_06', lw, ttl='Moran Scatterplot Percent Asian', linecol='red')


In [None]:
moran(auck$ASI_06, lw, n=length(lw$neighbours), S0=Szero(lw), NAOK=TRUE, zero.policy=TRUE)

In [None]:
moran.test(auck$ASI_06, lw, randomisation=FALSE, zero.policy=TRUE)

In [None]:
mmc <- moran.mc(auck$ASI_06, lw, nsim=999, zero.policy=TRUE)

In [None]:
hist(mmc$res, main="Histogram of results from permutation", xlab="Moran's index")
abline(v=mmc$statistic, col='red', lty=2)

# Univariate Local Moran’s I

In [None]:
# We use the localmoran function instead of moran
locm <- localmoran(auck$PC_ASIAN, lw)
summary(locm)

In [None]:
auck$sLPC_ASIAN <- scale(auck$PC_ASIAN) 

auck$lag_LPC_ASIAN <- lag.listw(lw, auck$sLPC_ASIAN,zero.policy=TRUE)

plot(x = auck$sLPC_ASIAN, y = auck$lag_LPC_ASIAN, main = "Local Moran Scatterplot PC_ASIAN")
abline(h = 0, v = 0)
abline(lm(auck$lag_LPC_ASIAN ~ auck$sLPC_ASIAN), lty = 2, lwd = 1, col = "red")

#Note that the slope of the regression line is nearly the same as Moran's I
slope <- lm(auck$lag_LPC_ASIAN ~ auck$sLPC_ASIAN)
coefficients(slope)[2]


In [None]:
sig <- 0.001
# identify the moran plot quadrant for each observation to make the cluster map
auck$QUAD_SIG <- NA
auck$QUAD_SIG[(auck$sLPC_ASIAN >= 0 & auck$lag_LPC_ASIAN >= 0) & (locm[, 5] <= sig)] <- 1
auck$QUAD_SIG[(auck$sLPC_ASIAN <= 0 & auck$lag_LPC_ASIAN <= 0) & (locm[, 5] <= sig)] <- 2
auck$QUAD_SIG[(auck$sLPC_ASIAN >= 0 & auck$lag_LPC_ASIAN <= 0) & (locm[, 5] <= sig)] <- 3
auck$QUAD_SIG[(auck$sLPC_ASIAN <= 0 & auck$lag_LPC_ASIAN >= 0) & (locm[, 5] <= sig)] <- 4
auck$QUAD_SIG[(locm[, 5] > sig)] <- 5  
#5 are non significant observations

In [None]:
# Set the breaks for the thematic map classes
# We use the seq function to generation a sequence from 1-5, going up by 1.
# you could also use 
breaks <- 1:6

# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Signif.")

# Define color swatches
pal <- c("red", "blue", "lightpink", "skyblue2", "white")

# Generate the map
spplot(auck, "QUAD_SIG", at=breaks, col.regions=pal, main=list(label="Local Moran's I, LISA Cluster Map",cex=1))