In [1]:
using HTTP
using Dates
using CSV
using DataFrames
using Geodesy
using JSON
using DataStructures

In [2]:
#=
    The object provided by the weather api for historical data, includes sunrise and sunset time and perhaps temperature 
    either at sunrise/sunset or an hour that is close to that time which would be called the terminator. Time.  The first
    call to the api would give us the sunrise/sunset time and it could be used to get the temperature for sunrise/sunset
    for the previous days as well as previous years
=#

In [3]:
earthquake_datetime = "2000-01-08T16:47:20.580Z"
base_url = "https://api.darksky.net/forecast"
earthquakes_csv = "/home/antonio/Repos/iono2/earthquakes_csv/1999_2017_eq79.csv" #magnitude 7-9
cities_csv = "/home/antonio/Repos/iono2/cities_csv/world_cities_data.csv"

"/home/antonio/Repos/iono2/cities_csv/world_cities_data.csv"

In [4]:
function read_apikey(apikey_file)
    key = open(apikey_file) do file
          readlines(file)
        end 
    return String(key[1])
end

read_apikey (generic function with 1 method)

In [5]:
apikey = read_apikey("/home/antonio/Repos/iono2/julia_scripts/darksky_api.txt")
println("Done reading apikey!")

Done reading apikey!


In [6]:
earthquakes_df = DataFrame(CSV.File(earthquakes_csv))

Unnamed: 0_level_0,Column1,datetime,latitude,longitude,depth,magnitude
Unnamed: 0_level_1,Int64,String,Float64,Float64,Float64,Float64
1,96,2000-01-08T16:47:20.580Z,-16.925,-174.248,183.4,7.2
2,571,2000-02-25T01:43:58.640Z,-19.528,173.818,33.0,7.1
3,891,2000-03-28T11:00:22.510Z,22.338,143.73,126.5,7.6
4,1149,2000-04-23T09:27:23.320Z,-28.307,-62.99,608.5,7.0
5,1297,2000-05-04T04:21:16.210Z,-1.105,123.573,26.0,7.6
6,1420,2000-05-12T18:43:18.120Z,-23.548,-66.452,225.0,7.2
7,1674,2000-06-04T16:28:26.170Z,-4.721,102.087,33.0,7.9
8,1997,2000-06-18T14:44:13.310Z,-13.802,97.453,10.0,7.9
9,2869,2000-08-06T07:27:12.900Z,28.856,139.556,394.8,7.4
10,3602,2000-10-04T16:58:44.310Z,-15.421,166.91,23.0,7.0


In [8]:
function unix_time(earthquake_time)
    eq_time = strip(earthquake_time, last(earthquake_time))
    return round(Int, Dates.datetime2unix(DateTime(eq_time)))
end

unix_time (generic function with 1 method)

In [9]:
function darksky_api_call(url)
    try
        response = HTTP.get(url)
        return String(response.body)
        catch e
        return "Error occured: $e"
    end
end

darksky_api_call (generic function with 1 method)

In [10]:
function cities_data()
     return DataFrame(CSV.File(cities_csv))
end

cities_data (generic function with 1 method)

In [11]:
function forecast_weather(latitude, longitude)
    forecast_url = base_url * "/" * apikey * "/" * string(latitude) * "," * string(longitude)
    return darksky_api_call(forecast_url)
end

forecast_weather (generic function with 1 method)

In [12]:
function get_historical_weather(latitude, longitude, earthquake_datetime)
   # println("Looking up info for Latitude $latitude, longitud $longitude and earthquake_time $earthquake_datetime\n")
    unix_timestamp = unix_time(earthquake_datetime)
    historical_temperature_url = base_url * "/" * apikey * "/" * string(latitude) * "," * string(longitude) * "," * string(unix_timestamp)
   # println("unix_time stamp : $unix_timestamp", "\n$historical_temperature_url")
    return darksky_api_call(historical_temperature_url)
end

get_historical_weather (generic function with 1 method)

In [13]:
# This will return the nearest city lat, lon and the distance in kilometers
function find_nearest_city(lat, lon)
    distancearray = []
    cities_df = cities_data() # This will receive a table, with the city data
    
    for i in eachrow(cities_df)
       city_coords = LLA(i["latitude"],i["longitude"])
       point = LLA(lat, lon)
       push!(distancearray, distance(city_coords, point)/1000)
    end
    
    distance_kms, index = findmin(distancearray)
    #nearest_city_lat, nearest_city_lon = cities_df[index, 3:4] # this returns a dataframe row, keeping this line to remember two ways to accomplish this
    nearest_city_lat, nearest_city_lon = cities_df[index, ["latitude", "longitude"]] # this returns a dataframe row
    return nearest_city_lat, nearest_city_lon, round(distance_kms, digits=0)
        
end

find_nearest_city (generic function with 1 method)

In [17]:
# Find the closest city to the earthquake
function create_earthquakes_nearest_cities_df()
   # eq_cities = DataFrame(nearest_city_latitude = Float64[], nearest_city_longitude = Float64[], nearest_city_distance = Float64[])
    
    eq_cities_df = DataFrame(earthquake_time = String[],
                            earthquake_latitude = Float64[],
                            earthquake_longitude = Float64[],
                            earthquake_depth = Float64[],
                            earthquake_magnitude = Float64[],
                            nearest_city_latitude = Float64[],
                            nearest_city_longitude = Float64[],
                            nearest_city_distance = Float64[])
    
    for earthquake in eachrow(earthquakes_df)
        city_latitude, city_longitude, city_distance_kilometers = find_nearest_city(earthquake["latitude"], earthquake["longitude"])
        push!(eq_cities_df, (earthquake_time = earthquake["datetime"],
                            earthquake_latitude = earthquake["latitude"],
                            earthquake_longitude = earthquake["longitude"],
                            earthquake_depth = earthquake["depth"],
                            earthquake_magnitude = earthquake["magnitude"],
                            nearest_city_latitude = city_latitude,
                            nearest_city_longitude = city_longitude,
                            nearest_city_distance = city_distance_kilometers))
    end
    return eq_cities_df
end

create_earthquakes_nearest_cities_df (generic function with 1 method)

### Keeping the "yes" data should be the only option,  I chose to all keep the "no" in case I find an alternate weather datasource.
If I want the data for which the historical data is available, then I will just filter the dataframe.

In [18]:
# This will have to be merged with the eq79 data since the nearest_city is the best chance at historical weather data to be available
function create_earthquakes_cities_historical_weather_df()
    
    nearestcity_df = create_earthquakes_nearest_cities_df()
    
    weather = []
    for city in eachrow(nearestcity_df)
        apicall_response = get_historical_weather(city["nearest_city_latitude"],city["nearest_city_longitude"],city["earthquake_time"])
        h = JSON.parse(apicall_response)
        
        if haskey(h["currently"], "apparentTemperature")
            push!(weather, "yes")
        else
            push!(weather, "no")
        end
    end
    insertcols!(nearestcity_df, :historical_temp_available => weather)
    return nearestcity_df # return table with weather availability
        
end

create_earthquakes_cities_historical_weather_df (generic function with 1 method)

In [19]:
x = create_earthquakes_cities_historical_weather_df()

Unnamed: 0_level_0,earthquake_time,earthquake_latitude,earthquake_longitude,earthquake_depth
Unnamed: 0_level_1,String,Float64,Float64,Float64
1,2000-01-08T16:47:20.580Z,-16.925,-174.248,183.4
2,2000-02-25T01:43:58.640Z,-19.528,173.818,33.0
3,2000-03-28T11:00:22.510Z,22.338,143.73,126.5
4,2000-04-23T09:27:23.320Z,-28.307,-62.99,608.5
5,2000-05-04T04:21:16.210Z,-1.105,123.573,26.0
6,2000-05-12T18:43:18.120Z,-23.548,-66.452,225.0
7,2000-06-04T16:28:26.170Z,-4.721,102.087,33.0
8,2000-06-18T14:44:13.310Z,-13.802,97.453,10.0
9,2000-08-06T07:27:12.900Z,28.856,139.556,394.8
10,2000-10-04T16:58:44.310Z,-15.421,166.91,23.0


## Save the dataframe

In [36]:
CSV.write("/home/antonio/Repos/iono2/earthquakes_csv/eq79_cities_weather.csv", x)

"/home/antonio/Repos/iono2/earthquakes_csv/eq79_cities_weather.csv"

## Let's see for how many earthquakes with a magnitude 7-9, is the historical data available.

The results are not very promissing, because that means that for the chosen magnitudes about 40 percent have weather data available.  The weather data for some of those the data will probably not be good because of how far the measurements (temp) from the earthquake epicenter.

In [37]:
counter(x.historical_temp_available)

Accumulator{Any,Int64} with 2 entries:
  "yes" => 108
  "no"  => 155

# Historical weather is available and the temperature is from a max distance of 300 km, or 180 miles away.

In [42]:
eq_usable_data = []
for earthquake in eachrow(x)
    if earthquake.nearest_city_distance <= 300 && earthquake.historical_temp_available == "yes"# kilometers, I think 200 miles would be the max?
        push!(eq_usable_data, (earthquake.nearest_city_distance, earthquake.historical_temp_available))
    end
end

## This is the possible number of earthquakes for which the weather data is useful, that is assuming the 300 kms max distance.

In [43]:
length(eq_usable_data)

67

In [None]:
# Function to generate the unix time for the date and the previous 3 months (~90 days), for that year and the previous years.
function generate_historical_unixtime(timestamp)
    unixtime_array = []
    for i in 1:90
        date = unix_time(timestamp) - (86400 * i)
        push!(unixtime_array, date)
    end
    return unixtime_array
end

In [67]:
# 86400 seconds in a day (1 day epoch time)
# Will have to save dataframe with weather data per earthquake, still have to decide the structure of the dataframe for this.
# Probably better to write this to disk along with the actual historical data.
earthquake_historical_dates = DataFrame(date = String[], ambient_temperature = Float64[])
#for eq in eachrow(x)
#    
#    println(unix_time(eq["earthquake_time"]) - 86400)
#end
#

Unnamed: 0_level_0,date,ambient_temperature
Unnamed: 0_level_1,String,Float64


In [None]:
# Need to test a couple of times for 5 years, 90 days of data for each.
# save all the historical data.  This will mark the Air temperature data complete.