# Lab 1 : Spatial Autocorrelation

In [None]:
# We import our libraries here
library(raster)
# library(ggmap)
# library(maps)
library(spdep)
library(classInt)
library(rgdal)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(broom)

In [None]:
# Read the shape file from the data directory. 
# You'll need to make sure you have a folder called "data" 
# in the same directory as your lab folder

s1 <- readOGR("data/akCity_CAU01_ethnic.shp")

In [None]:
#Let's see a summary of our spatial data

summary(s1)


In [None]:
#names function gives you the header (first row) of your data

names(s1)

In [None]:
#The data frame -> row and header of data, in "tabular" format
#let's make df the dataframe
df <- data.frame(s1)
df

## Plotting the data on a map

### Using plot

In [None]:
# Test plot the dataset using plot
plot(s1['PC_ASIAN'])


Note that the above has no color -- we need to add breaks 
to the data and do some extra work in plot to get color. Fortunately there are two libraries in R that do a lot of out of the box geospatial work

### Using spplot

In [None]:
# We can also use spplot for this, a geospatial package for R
# spplot(data, column, legend settings)
spplot(s1, z="PC_ASIAN", par.settings=list(fontsize=list(text=5)))

### Using ggplot

In [None]:
# Some people prefer ggplot, which has greater control over legends than spplot 
# and a lot more features since it's a general graphics library.
# The only issue is that ggplot needs the data as a dataframe rather than reading it as a shapefile
# ggplot(data = my_data_frame, mapping = aes(x = name_of_column_with_x_value, 
#      y = name_of_column_with_y_value)) + geom_point()
#s1
#s1@data$id = rownames(s1@data)

In [None]:
#s1df.poly <- fortify(s1, region="id")
#s1df
#s1_df = left_join(df, s1@data, by="id")
#s1_df
#ggplot(s1_df, aes(long,lat,group=group))
#ggplot()
#spplot(s1, z="PC_ASIAN", par.settings=list(fontsize=list(text=10)))

## Equal Intervals Map

In [None]:
#First, let's make breaks in the data by using classIntervals()
#See https://www.rdocumentation.org/packages/classInt/versions/0.1-24/topics/classIntervals for
#Documentation of all options above


# 7 breaks, since we set n=7 in options below
brks <-  classIntervals(s1$PC_ASIAN, n = 7, style = "equal")$brks
brks 

brks[length(brks)] <- brks[length(brks)] + 1

In [None]:
# Custom colors using brewer
pal  <- brewer.pal(length(brks), "Blues")

In [None]:

# Generate the map
spplot(s1, z="PC_ASIAN", at = brks, col.regions=pal)

## Standard Deviations Map

In [None]:
# Generate breaks
brks <-  classIntervals(s1$PC_ASIAN, style = "sd")$brks
brks[length(brks)] <- brks[length(brks)] + 1
brks
# Define color swatches
pal  <- brewer.pal(length(brks), "Greens")

# Generate the map
spplot(s1, z="PC_ASIAN", at = brks, col.regions=pal)

## Quantiles Map

In [None]:
# Generate breaks
brks <-  classIntervals(s1$PC_ASIAN, style = "quantile")$brks
brks[length(brks)] <- brks[length(brks)] + 1
brks
# Define color swatches
pal  <- brewer.pal(length(brks), "Reds")

# Generate the map
spplot(s1, z="PC_ASIAN", at = brks, col.regions=pal)

## Natural Breaks (Jenks) map

In [None]:
# Generate breaks
brks <-  classIntervals(s1$PC_ASIAN, style = "fisher")$brks
brks[length(brks)] <- brks[length(brks)] + 1
brks
# Define color swatches
pal  <- brewer.pal(length(brks), "Purples")

# Generate the map
map <- spplot(s1, z="PC_ASIAN", at = brks, col.regions=pal)

#s1$PC_ASIAN

In [None]:
# Since "map" is assigned as the plot, calling map here displays it
map

# Graphing the data

In [None]:
#brks
#s1$PC_ASIAN

hist(s1$PC_ASIAN, freq = TRUE, right=TRUE, labels=TRUE,col=pal)

# Spatial autocorrelation

In [None]:
# Generate the weight matrix
# queen = False option means at least two boundary points must be 
# within the snap distance of each other
# with the conventional name of a ‘rook’ relationship.
# The snap option is the same as precision threshold in geoda

nb <- poly2nb(s1, row.names=s1$FIRST_CAU_, queen=FALSE, snap=1e-05)
length(nb)


In [None]:
#example of the first polygon in w, numbers are the neighbors for the first polygon

nb[1]
s1$FIRST_CAU_[1]


#make a summary of w
summary(nb)

In [None]:
plot(s1, col='gray', border='blue', lwd=1)
xy <- coordinates(s1)
plot(nb, xy, col='red', lwd=1, add=TRUE)

In [None]:
#style = B indicates show the weights as binaries (0 or 1)
wm <- nb2mat(nb, style='B', zero.policy=TRUE)

In [None]:
#this is what the weight matrix looks like, since we set style=B.
#it's either 0 or 1 
wm

## Moran's *I* in equation form

$I = \frac{n}{\sum_{i=1}^n (y_i - \bar{y})^2} \frac{\sum_{i=1}^n \sum_{j=1}^n w_{ij}(y_i - \bar{y})(y_j - \bar{y})}{\sum_{i=1}^n \sum_{j=1}^n w_{ij}}$

In [None]:
# Let's first see what manual computation of Moran's I looks like

#n is the number of observations (length of our dataset)
n <- length(s1)

#we set y to the column of PC_ASIAN, then we get the mean.
y <- s1$PC_ASIAN
ybar <- mean(y)

#find the difference between y and ybar(the mean)
dy <- y - ybar
yi <- rep(dy, each=n)
yj <- rep(dy)
yiyj <- yi * yj

pm <- matrix(yiyj, ncol=n)

pmw <- pm * wm

spmw <- sum(pmw)

smw <- sum(wm)
sw  <- spmw / smw
vr <- n / sum(dy^2)
MI <- vr * sw

#Morans I
cat("Moran's I is", MI)

In [None]:
# let's make a weights object so that we 
# can use it with a less manual way of computing moran's i
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
lw
length(lw)

In [None]:
summary(lw)

In [None]:
s1$sPC_ASIAN <- scale(s1$PC_ASIAN) 

s1$lag_PC_ASIAN <- lag.listw(lw, s1$sPC_ASIAN,zero.policy=TRUE)

plot(x = s1$sPC_ASIAN, y = s1$lag_PC_ASIAN, main = " Moran Scatterplot PC_ASIAN")
abline(h = 0, v = 0)
abline(lm(s1$lag_PC_ASIAN ~ s1$sPC_ASIAN), lty = 1, lwd = 4, col = "red")

#Note that the slope of the regression line is nearly the same as Moran's I
slope <- lm(s1$lag_PC_ASIAN ~ s1$sPC_ASIAN)
coefficients(slope)[2]


In [None]:
moran(s1$PC_ASIAN,lw, n=length(lw$neighbours), S0=Szero(lw))

In [None]:
moran.test(s1$PC_ASIAN, lw, randomisation=FALSE)

In [None]:
moran.mc(s1$PC_ASIAN, lw, nsim=299)

# Univariate Local Moran’s I

In [None]:
# We use the localmoran function instead of moran
locm <- localmoran(s1$PC_ASIAN,lw)
summary(locm)

In [None]:
s1$sLPC_ASIAN <- scale(s1$PC_ASIAN) 

s1$lag_LPC_ASIAN <- lag.listw(lw, s1$sLPC_ASIAN,zero.policy=TRUE)

plot(x = s1$sLPC_ASIAN, y = s1$lag_LPC_ASIAN, main = "Local Moran Scatterplot PC_ASIAN")
abline(h = 0, v = 0)
abline(lm(s1$lag_LPC_ASIAN ~ s1$sLPC_ASIAN), lty = 1, lwd = 4, col = "red")

#Note that the slope of the regression line is nearly the same as Moran's I
slope <- lm(s1$lag_LPC_ASIAN ~ s1$sLPC_ASIAN)
coefficients(slope)[2]


In [None]:
# identify the moran plot quadrant for each observation to make the cluster map
s1$QUAD_SIG <- NA
s1@data[(s1$sLPC_ASIAN >= 0 & s1$lag_LPC_ASIAN >= 0) & (locm[, 5] <= 0.05), "QUAD_SIG"] <- 1
s1@data[(s1$sLPC_ASIAN <= 0 & s1$lag_LPC_ASIAN <= 0) & (locm[, 5] <= 0.05), "QUAD_SIG"] <- 2
s1@data[(s1$sLPC_ASIAN >= 0 & s1$lag_LPC_ASIAN <= 0) & (locm[, 5] <= 0.05), "QUAD_SIG"] <- 3
s1@data[(s1$sLPC_ASIAN >= 0 & s1$lag_LPC_ASIAN <= 0) & (locm[, 5] <= 0.05), "QUAD_SIG"] <- 4
s1@data[(s1$sLPC_ASIAN <= 0 & s1$lag_LPC_ASIAN >= 0) & (locm[, 5] <= 0.05), "QUAD_SIG"] <- 5  
#5 are non significant observations

In [None]:

# Set the breaks for the thematic map classes
# We use the seq function to generation a sequence from 1-5, going up by 1.
# you could also use 
breaks <- c(1,2,3,4,5)

# Set the corresponding labels for the thematic map classes
labels <- c("high-High", "low-Low", "High-Low", "Low-High", "Not Signif.")

# Define color swatches
pal  <- brewer.pal(length(brks), "Greens")

#colors <- c("red", "blue", "lightpink", "skyblue2", "white")

# Generate the map
spplot(s1, z="QUAD_SIG",
        col.regions=pal, main=list(label="Local Moran's I, LISA Cluster Map",cex=1))

In [None]:
# identify the moran plot quadrant for each observation to make the P-significance cluster map
s1$P_SIG <- 0
s1$P_SIGB <- 0
#locm[, 5]
#s1$sLPC_ASIAN >= 0 
#s1$lag_LPC_ASIAN
#locm[,5]
#locm

s1@data['P_SIG'] <- locm[, 5]

#breaks <- findInterval(s1$P_SIG, breaks)

#breaks <- c(0,0.0001,0.001,0.01,0.05)
#breaks <- classIntervals(s1$P_SIG, n=5, style="fixed", fixedBreaks=c(0.0001,0.001,0.01,0.05))
#pal  <- colorRampPalette(brewer.pal(breaks, "Greens"))
#classIntervals(s1$PC_ASIAN, n = 7, style = "equal")$brks

#spplot(s1, c('P_SIG'), cut)
spplot(s1, z='P_SIG') 
#main=list(label="Local Moran's I, LISA Cluster Map",cex=1))
#s1@data[(s1$sLPC_ASIAN >= 0 & s1$lag_LPC_ASIAN >= 0) & (locm[, 5] <= 0.05), "P_SIG"] <- 1
#s1@data[(s1$sLPC_ASIAN <= 0 & s1$lag_LPC_ASIAN <= 0) & (locm[, 5] <= 0.05), "P_SIG"] <- 2
#s1@data[(s1$sLPC_ASIAN >= 0 & s1$lag_LPC_ASIAN <= 0) & (locm[, 5] <= 0.05), "P_SIG"] <- 3
#s1@data[(s1$sLPC_ASIAN >= 0 & s1$lag_LPC_ASIAN <= 0) & (locm[, 5] <= 0.05), "P_SIG"] <- 4
#s1@data[(s1$sLPC_ASIAN <= 0 & s1$lag_LPC_ASIAN >= 0) & (locm[, 5] <= 0.05), "P_SIG"] <- 5 

#s1@data