In [9]:
library(readxl)
library(tidyverse)
library(data.table)
library(dplyr)
library(readr)
library(ggplot2)
install.packages("rworldmap")
library(rworldmap)
library(maptools)
library(maps)
library(RColorBrewer)
library(repr)
library(ggmap) #DataViz module requirement
library(plotly) #DataViz module requirement
library(ggthemes) #DataViz module requirement
library(rgdal) #DataViz module requirement
library(leaflet) #DataViz module requirement
options(stringsAsFactors = FALSE)
library(rgeos)

Updating HTML index of packages in '.Library'
Making 'packages.html' ... done
rgeos version: 0.4-3, (SVN revision 595)
 GEOS runtime version: 3.6.2-CAPI-1.10.2 
 Linking to sp version: 1.3-1 
 Polygon checking: TRUE 



In [2]:
loadFiles <- function(p) {
    if(FALSE) {
    "       
        Input: Directory of CSVs
        Output: Dataframe of merged CSVs
        Process: Loads all files from data folder into a list and merges all files into one data frame
        Verified by: BP, WCM, JAW, EWGS
        Author: EWGS
        Last Modified: 6/12
    "
    } #Documentation
    
    df <- list.files(path = p, pattern = ".csv", full.names = TRUE) %>% 
        lapply(read_csv, col_types = cols()) %>% #modified to supress the status messages 
        bind_rows
    
    message(paste("Files in \"", path, "\" loaded.", sep = ""))
    return(df)
}

reqCols <- function(df) {
    if(FALSE) {
    "
        Input: Raw dataframe
        Output: Subset of dataframe for needed columns
        Process: Hardcoded columns to be used, na omitted, deduplicated rows
        Verified by: BP, WCM, JAW, EWGS
        Author: JAW
        Last Modified: 6/12
    "
    } #Documentation

    df <- subset(df, select = c(
        'time',
        'latitude',
        'longitude',
        'depth',
        'mag',
        'id',
        'updated'
    ))
    df <- na.omit(df)
    df <- unique(df)
    
    message("Subset Completed.")
    return(df)
}

parseDt <- function(df, col) {
    if(FALSE) {
    "
        Input: Dataframe with a POSIXct datetime column
        Output: Adds a column with the date, time, year into their own columns
        Process: Isolates the dates and times from df$time and df$updated into their own columns.
        Verified by: BP, WCM, JAW, EWGS
        Author: EWGS
        Last Modified: 6/12
    "
    } #Documentation
    
    #New columns will be named by: col + "_suffix"
    newDate = paste(col, "Date", sep = "_")
    newTime = paste(col, "Time", sep = "_")
    newYear = paste(col, "Year", sep = "_")
    
    df[, newDate] <- as.Date(df[[col]])
    df[, newTime] <- as.ITime(strftime(df[[col]], format = "%H:%M:%S"))
    df[, newYear] <- as.numeric(format(df[[col]], '%Y'))
    
    message(paste("Parsed column: ", col, ".", sep = ""))
    return(df)
}

tospdf <- function(df) {
    if(FALSE) {
    "
        Input: Raw dataframe
        Output: Subset of dataframe for needed columns
        Process: Hardcoded columns to be used, na omitted, deduplicated rows
        Verified by: 
        Author: JAW
        Last Modified: 6/13
    "
    } #Documentation
    df <- na.omit(df)
    df <- subset(df, select = c(
        'time',
        'latitude',
        'longitude',
        'depth',
        'mag',
        'id',
        'updated'
    ))
#we select all the columns from the general df

lats <- df$latitude
longs <- df$longitude
#we want to pick out the lat lon cordinates to store the location of each of the df attributes

quakeattributes <- df[,c('depth','mag','time', 'id', 'updated')]
#here we then assign the rest of the coulmns to the attributes
    
adddf <- as.data.frame(quakeattributes)
#make attributes into it's own df
    
ptcoords <- cbind(as.numeric(longs),as.numeric(lats))
#we want to perpare the cordinates for our SpatialPointsDataFrame function
    
spts <- SpatialPointsDataFrame(ptcoords,data=adddf,proj4string=CRS("+proj=longlat +datum=WGS84"))
#we now have a spatial points df that we can use to create maps and in our analysis
     message(paste("spatial df created"))
    return(spts)
}

In [3]:
path <- "~/jupyter/cs2019_Group11/GroupProducts/data" #Explicit location of data

#we run all the previously created functions
df <- loadFiles(path) %>%
    reqCols %>%
    parseDt('time')

dfsp <- tospdf(df)

Files in "~/jupyter/cs2019_Group11/GroupProducts/data" loaded.
Subset Completed.
Parsed column: time.
spatial df created


# Start Module 4

In [4]:
#jessica

#transform data into another datum for better readability 
tpts <- spTransform(dfsp,  CRS("+init=epsg:4087"))


In [6]:
#jessica

#load region with homogeneous seismic conditions 
tectonicdata = "fe.kmz"
tectonicFeatures <- readOGR(tectonicdata)


OGR data source with driver: LIBKML 
Source: "/dsa/home/jaw56m/jupyter/cs2019_Group11/GroupProducts/fe.kmz", layer: "fe"
with 754 features
It has 12 fields


“Z-dimension discarded”

In [7]:
#jessica

#also transform data into same regions as the earthquake data for later comparisons
transTectonicFeatures <- spTransform(tectonicFeatures,  CRS("+init=epsg:4087"))


In [10]:
#jessica

#add empty names row to be added to 
tpts@data$newname = "empty"

#start for loop to go through each seismic homogeneous region
for (i in 1:nrow(transTectonicFeatures)) {
    region = transTectonicFeatures[i, ]  

#here we determine which region each earthquake belongs to
    selFeat <- overGeomGeom(region, tpts, returnList = TRUE, fn = NULL)

#Unlist the list
    intSet = unlist(selFeat)
    
#print to check to ensure that it ran correctly
    print(i)
    
#we want to now add an attribute that captures which region each earthquake belongs to
    #to be named there must be at least one earthquake 
    if (length(intSet) > 0){
        tpts@data[intSet,]$newname <- i}
    #if no earthquakes in the region then none are named
    else {
        print("none")}
     }


[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] "none"
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] "none"
[1] 45
[1] 46
[1] 47
[1] 48
[1] 49
[1] 50
[1] 51
[1] 52
[1] 53
[1] 54
[1] 55
[1] 56
[1] 57
[1] 58
[1] 59
[1] 60
[1] 61
[1] 62
[1] 63
[1] 64
[1] 65
[1] 66
[1] 67
[1] 68
[1] 69
[1] 70
[1] 71
[1] 72
[1] 73
[1] 74
[1] 75
[1] 76
[1] "none"
[1] 77
[1] 78
[1] 79
[1] 80
[1] 81
[1] 82
[1] 83
[1] 84
[1] 85
[1] 86
[1] 87
[1] 88
[1] 89
[1] 90
[1] 91
[1] 92
[1] 93
[1] 94
[1] 95
[1] 96
[1] 97
[1] 98
[1] 99
[1] 100
[1] 101
[1] 102
[1] 103
[1] "none"
[1] 104
[1] 105
[1] 106
[1] 107
[1] 108
[1] 109
[1] 110
[1] 111
[1] 112
[1] "none"
[1] 113
[1] 114
[1] 115
[1] 116
[1] 117
[1] 118
[1] 119
[1] 120
[1] 121
[1] 122
[1] 123
[1] 124
[1] 125
[1] 126
[1] 127
[1] 128
[1] 129
[1] 130
[1] "none"
[1

In [11]:
#jessica

head(tpts)

           coordinates   depth mag                time         id
1  (421900.9, 5059471)   2.000 2.6 1999-07-01 23:29:27 usp0009awp
2 (-13667580, 5199789)   4.613 3.1 1999-07-01 22:10:51 uw10474128
3  (19811750, 5788057) 106.800 4.2 1999-07-01 21:42:41 usp0009awk
4 (-8020903, -3531722)  26.100 3.9 1999-07-01 20:50:11 usp0009awh
5 (-17094220, 6666924) 136.300 3.1 1999-07-01 19:52:19 usp0009awf
6   (3469383, 4013068)  30.200 3.8 1999-07-01 19:52:12 usp0009awe
              updated newname
1 2014-11-07 01:08:01     374
2 2016-07-23 18:59:57       6
3 2014-11-07 01:08:01       1
4 2014-11-07 01:08:01      53
5 2014-11-07 01:08:01      24
6 2014-11-07 01:08:01     741

In [13]:
#jessica

head(dfsp)

          coordinates   depth mag                time         id
1       (3.79, 45.45)   2.000 2.6 1999-07-01 23:29:27 usp0009awp
2 (-122.778, 46.7105)   4.613 3.1 1999-07-01 22:10:51 uw10474128
3   (177.972, 51.995) 106.800 4.2 1999-07-01 21:42:41 usp0009awk
4  (-72.053, -31.726)  26.100 3.9 1999-07-01 20:50:11 usp0009awh
5    (-153.56, 59.89) 136.300 3.1 1999-07-01 19:52:19 usp0009awf
6     (31.166, 36.05)  30.200 3.8 1999-07-01 19:52:12 usp0009awe
              updated
1 2014-11-07 01:08:01
2 2016-07-23 18:59:57
3 2014-11-07 01:08:01
4 2014-11-07 01:08:01
5 2014-11-07 01:08:01
6 2014-11-07 01:08:01

In [14]:
#jessica

head(df)

time,latitude,longitude,depth,mag,id,updated,time_Date,time_Time,time_Year
1999-07-01 23:29:27,45.45,3.79,2.0,2.6,usp0009awp,2014-11-07 01:08:01,1999-07-01,18:29:27,1999
1999-07-01 22:10:51,46.7105,-122.778,4.613,3.1,uw10474128,2016-07-23 18:59:57,1999-07-01,17:10:51,1999
1999-07-01 21:42:41,51.995,177.972,106.8,4.2,usp0009awk,2014-11-07 01:08:01,1999-07-01,16:42:41,1999
1999-07-01 20:50:11,-31.726,-72.053,26.1,3.9,usp0009awh,2014-11-07 01:08:01,1999-07-01,15:50:11,1999
1999-07-01 19:52:19,59.89,-153.56,136.3,3.1,usp0009awf,2014-11-07 01:08:01,1999-07-01,14:52:19,1999
1999-07-01 19:52:12,36.05,31.166,30.2,3.8,usp0009awe,2014-11-07 01:08:01,1999-07-01,14:52:12,1999
