# Lab 1 : Spatial Autocorrelation

In [None]:
# We import our libraries here
library(raster)
library(spdep)
library(classInt)
library(rgdal)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(broom)

## Read in the data and take a look at it

Read the shape file from the data directory

In [None]:
auck <- readOGR("data/akCity_CAU01_ethnic.shp")

In [None]:
# Let's see a summary of our spatial data
# summary(auck)

In [None]:
#names function gives you the header (first row) of your data
names(auck)

In [None]:
#The data frame -> row and header of data, in "tabular" format
#let's make df the dataframe
# df <- data.frame(auck)
# df

In [None]:
auck <- auck[,c(3,13)]
auck

## Plotting the data on a map

### Using plot

In [None]:
# Test plot the dataset using plot
plot(auck)

Note that the above has no color -- we need to add breaks 
to the data and do some extra work in plot to get color. Fortunately there are two libraries in R that do a lot of out of the box geospatial work

### Using spplot

In [None]:
# We can also use spplot for this, a geospatial package for R
# spplot(data, column, legend settings)
spplot(auck, "PC_ASIAN")

## Equal Intervals Map

In [None]:
#First, let's make breaks in the data by using classIntervals()
#See https://www.rdocumentation.org/packages/classInt/versions/0.1-24/topics/classIntervals for
#Documentation of all options

# 7 breaks, since we set n=7 in options below
n <- 7
ci <- classIntervals(auck$PC_ASIAN, n, style="equal")
breaks <- signif(ci$brks,2)

In [None]:
# Custom colors using brewer
pal  <- brewer.pal(length(breaks), "Blues")

In [None]:
# Generate the map
spplot(auck, z="PC_ASIAN", at=breaks, col.regions=pal)

## Standard Deviations Map

In [None]:
# Generate breaks
ci <- classIntervals(auck$PC_ASIAN, n, style = "sd")
breaks <- signif(ci$brks, 2)
# Define color swatches
pal  <- rev(brewer.pal(length(breaks), "RdBu"))

# Generate the map
spplot(auck, z="PC_ASIAN", at=breaks, col.regions=pal)

## Quantiles Map

In [None]:
# Generate breaks
ci <-  classIntervals(auck$PC_ASIAN, n, style = "quantile")
breaks <- signif(ci$brks, 2)
# Define color swatches
pal  <- brewer.pal(length(breaks), "Reds")

# Generate the map
spplot(auck, z="PC_ASIAN", at=breaks, col.regions=pal)

## Natural Breaks (Jenks) map

In [None]:
# Generate breaks
ci <-  classIntervals(auck$PC_ASIAN, style = "fisher")
breaks <- signif(ci$brks, 2)
# Define color swatches
pal  <- brewer.pal(length(breaks), "Purples")

# Generate the map
spplot(auck, z="PC_ASIAN", at=breaks, col.regions=pal)

# Graphing the data

In [None]:
#brks
#s1$PC_ASIAN

hist(auck$PC_ASIAN, labels=T ,col=pal)

# Spatial autocorrelation

In [None]:
# Generate the weight matrix
# queen = False option means at least two boundary points must be 
# within the snap distance of each other
# with the conventional name of a ‘rook’ relationship.
# The snap option is the same as precision threshold in geoda

nb <- poly2nb(auck, row.names=auck$FIRST_CAU_, queen=FALSE, snap=1e-05)
length(nb)


In [None]:
#example of the first polygon in w, numbers are the neighbors for the first polygon
nb[1]
auck$FIRST_CAU_[1]

#make a summary of w
summary(nb)

In [None]:
plot(auck, col='gray', border='blue', lwd=1)
xy <- coordinates(auck)
plot(nb, xy, col='red', lwd=1, add=TRUE)

In [None]:
#style = B indicates show the weights as binaries (0 or 1)
# wm <- nb2mat(nb, style='B', zero.policy=TRUE)

In [None]:
#this is what the first few rows of the weights matrix looks like, 
# since we set style=B, values are either 0 or 1 
# head(wm)

## Moran's *I* in equation form

$I = \frac{n}{\sum_{i=1}^n (y_i - \bar{y})^2} \frac{\sum_{i=1}^n \sum_{j=1}^n w_{ij}(y_i - \bar{y})(y_j - \bar{y})}{\sum_{i=1}^n \sum_{j=1}^n w_{ij}}$

In [None]:
# Let's first see what manual computation of Moran's I looks like

#n is the number of observations (length of our dataset)
# n <- length(auck)

#we set y to the column of PC_ASIAN, then we get the mean.
# y <- auck$PC_ASIAN
# ybar <- mean(y)

#find the difference between y and ybar(the mean)
# dy <- y - ybar
# yi <- rep(dy, each=n)
# yj <- rep(dy)
# yiyj <- yi * yj

# pm <- matrix(yiyj, ncol=n)

# pmw <- pm * wm

# spmw <- sum(pmw)

# smw <- sum(wm)
# sw  <- spmw / smw
# vr <- n / sum(dy^2)
# MI <- vr * sw

#Morans I
# cat("Moran's I is", MI)

In [None]:
# let's make a weights object so that we 
# can use it with a less manual way of computing moran's i
lw <- nb2listw(nb, style="W", zero.policy=TRUE)

In [None]:
summary(lw)

In [None]:
auck$sPC_ASIAN <- scale(auck$PC_ASIAN) 

auck$lag_PC_ASIAN <- lag.listw(lw, auck$sPC_ASIAN,zero.policy=TRUE)

plot(x=auck$sPC_ASIAN, y=auck$lag_PC_ASIAN, main = " Moran Scatterplot PC_ASIAN")
abline(h=0, v=0)
best_fit_line <- lm(auck$lag_PC_ASIAN ~ auck$sPC_ASIAN)
abline(best_fit_line, lty=2, lwd=1, col="red")

#Note that the slope of the regression line is nearly the same as Moran's I
#coefficients(best_fit_line)[2]


In [None]:
moran(auck$PC_ASIAN,lw, n=length(lw$neighbours), S0=Szero(lw))

In [None]:
moran.test(auck$PC_ASIAN, lw, randomisation=FALSE)

In [None]:
mmc <- moran.mc(auck$PC_ASIAN, lw, nsim=999)

In [None]:
hist(mmc$res, main="Histogram of results from permutation", xlab="Moran's index")
abline(v=mmc$statistic, col='red', lty=2)

# Univariate Local Moran’s I

In [None]:
# We use the localmoran function instead of moran
locm <- localmoran(auck$PC_ASIAN, lw)
summary(locm)

In [None]:
auck$sLPC_ASIAN <- scale(auck$PC_ASIAN) 

auck$lag_LPC_ASIAN <- lag.listw(lw, auck$sLPC_ASIAN,zero.policy=TRUE)

plot(x = auck$sLPC_ASIAN, y = auck$lag_LPC_ASIAN, main = "Local Moran Scatterplot PC_ASIAN")
abline(h = 0, v = 0)
abline(lm(auck$lag_LPC_ASIAN ~ auck$sLPC_ASIAN), lty = 2, lwd = 1, col = "red")

#Note that the slope of the regression line is nearly the same as Moran's I
slope <- lm(auck$lag_LPC_ASIAN ~ auck$sLPC_ASIAN)
coefficients(slope)[2]


In [None]:
sig <- 0.001
# identify the moran plot quadrant for each observation to make the cluster map
auck$QUAD_SIG <- NA
auck$QUAD_SIG[(auck$sLPC_ASIAN >= 0 & auck$lag_LPC_ASIAN >= 0) & (locm[, 5] <= sig)] <- 1
auck$QUAD_SIG[(auck$sLPC_ASIAN <= 0 & auck$lag_LPC_ASIAN <= 0) & (locm[, 5] <= sig)] <- 2
auck$QUAD_SIG[(auck$sLPC_ASIAN >= 0 & auck$lag_LPC_ASIAN <= 0) & (locm[, 5] <= sig)] <- 3
auck$QUAD_SIG[(auck$sLPC_ASIAN <= 0 & auck$lag_LPC_ASIAN >= 0) & (locm[, 5] <= sig)] <- 4
auck$QUAD_SIG[(locm[, 5] > sig)] <- 5  
#5 are non significant observations

In [None]:
# Set the breaks for the thematic map classes
# We use the seq function to generation a sequence from 1-5, going up by 1.
# you could also use 
breaks <- 1:6

# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Signif.")

# Define color swatches
pal <- c("red", "blue", "lightpink", "skyblue2", "white")

# Generate the map
spplot(auck, "QUAD_SIG", at=breaks, col.regions=pal, main=list(label="Local Moran's I, LISA Cluster Map",cex=1))