In [1]:
import numpy as np
import pandas as pd


import os, sys
lib_path = os.path.abspath("py")
sys.path.append(lib_path)

import weather as we

In [2]:
def nearest_points(df1, df2, x, y):
    # dataframes to numpy arrays of complex numbers
    p1 = (df1[x] + 1j * df1[y]).values
    p2 = (df2[x] + 1j * df2[y]).values

    # calculate all the distances, between each point in
    # df1 and each point in df2 (using an array-broadcasting trick)
    all_dists = abs(p1[..., np.newaxis] - p2)

    # find indices of the minimal distance from df1 to df2,
    # and from df2 to df1
    nearest_idxs1 = np.argmin(all_dists, axis = 0)
    nearest_idxs2 = np.argmin(all_dists, axis = 1)

    # extract the rows from the dataframes
    nearest_points1 = df1.ix[nearest_idxs1].reset_index()
    nearest_points2 = df2.ix[nearest_idxs2].reset_index()
    return nearest_points1, nearest_points2


In [3]:
events_with_coordinates_file_path = "location data/events_with_coordinates.csv"

In [4]:
events = pd.read_csv(events_with_coordinates_file_path, index_col=0)
events.head()

Unnamed: 0,"Linn,vald",Maakond,Väljakutse liik SOS,Aeg,LocQuery,lng,lat
0,Mustjala vald,Saare mk,TULEKAHJU VÄLJASPOOL HOONEID,2015-01-01 00:02:01,"Mustjala vald, Estonia",22.2328,58.4628
1,Puka vald,Valga mk,TULEKAHJU VÄLJASPOOL HOONEID,2015-01-01 00:09:33,"Puka vald, Estonia",26.227205,58.052534
2,Maardu,Harju mk,TULEKAHJU HOONES,2015-01-01 00:11:16,"Maardu, Estonia",24.980187,59.46877
3,Salme vald,Saare mk,TULEKAHJU HOONES,2015-01-01 00:19:59,"Salme vald, Estonia",22.250636,58.164069
4,Lasnamäe,Tallinn,TULEKAHJU HOONES,2015-01-01 00:21:00,"Lasnamäe, Tallinn, Estonia",24.8612,59.433


In [5]:
cache_path = "./Cache/"
stations_path = "./weather data/stations_estonia.csv"
stations = we.load_stations(path=stations_path)
weather = we.load_estonian_weather(cache_path, stations, [2015,2014,2013,2012,2011])

*** Loading weather ***
Cache './Cache/2015_estonia.csv.gz' exists, skipping filtering...
* Loading year: 2015 *
* Loading year complete! *
Cache './Cache/2014_estonia.csv.gz' exists, skipping filtering...
* Loading year: 2014 *
* Loading year complete! *
Cache './Cache/2013_estonia.csv.gz' exists, skipping filtering...
* Loading year: 2013 *
* Loading year complete! *
Cache './Cache/2012_estonia.csv.gz' exists, skipping filtering...
* Loading year: 2012 *
* Loading year complete! *
Cache './Cache/2011_estonia.csv.gz' exists, skipping filtering...
* Loading year: 2011 *
* Loading year complete! *
*** Loading weather complete ***


In [6]:
nearest_events, nearest_stations = nearest_points(events.reset_index(),stations, "lat", "lng")
nearest_stations.head()

Unnamed: 0,index,ID,lat,lng,elev,name
0,18,EN000026215,58.233,22.5,4.0,KURESSAARE-ROOMASSAA
1,15,EN000026144,58.37,26.27,89.0,TOOMA
2,3,EN000026038,59.3831,24.5831,34.0,TALLINN
3,18,EN000026215,58.233,22.5,4.0,KURESSAARE-ROOMASSAA
4,3,EN000026038,59.3831,24.5831,34.0,TALLINN


In [7]:
merged = events.reset_index().merge(nearest_stations, how='left', left_index=True, right_on="index", suffixes=('', '_station'))

In [8]:
merged.head()

Unnamed: 0,index,"Linn,vald",Maakond,Väljakutse liik SOS,Aeg,LocQuery,lng,lat,index_station,ID,lat_station,lng_station,elev,name
117,0,Mustjala vald,Saare mk,TULEKAHJU VÄLJASPOOL HOONEID,2015-01-01 00:02:01,"Mustjala vald, Estonia",22.2328,58.4628,0.0,EN000026027,59.3,23.35,5.0,OSMUSSAR ISLAND
4463,0,Mustjala vald,Saare mk,TULEKAHJU VÄLJASPOOL HOONEID,2015-01-01 00:02:01,"Mustjala vald, Estonia",22.2328,58.4628,0.0,EN000026027,59.3,23.35,5.0,OSMUSSAR ISLAND
7004,0,Mustjala vald,Saare mk,TULEKAHJU VÄLJASPOOL HOONEID,2015-01-01 00:02:01,"Mustjala vald, Estonia",22.2328,58.4628,0.0,EN000026027,59.3,23.35,5.0,OSMUSSAR ISLAND
9730,0,Mustjala vald,Saare mk,TULEKAHJU VÄLJASPOOL HOONEID,2015-01-01 00:02:01,"Mustjala vald, Estonia",22.2328,58.4628,0.0,EN000026027,59.3,23.35,5.0,OSMUSSAR ISLAND
9744,0,Mustjala vald,Saare mk,TULEKAHJU VÄLJASPOOL HOONEID,2015-01-01 00:02:01,"Mustjala vald, Estonia",22.2328,58.4628,0.0,EN000026027,59.3,23.35,5.0,OSMUSSAR ISLAND


In [9]:
coordinates_file_path = "location data/location_coordinates.csv"
locations = pd.read_csv(coordinates_file_path)
locations.head()

Unnamed: 0.1,Unnamed: 0,lng,lat
0,"Mustjala vald, Estonia",22.2328,58.4628
1,"Puka vald, Estonia",26.227205,58.052534
2,"Maardu, Estonia",24.980187,59.46877
3,"Salme vald, Estonia",22.250636,58.164069
4,"Lasnamäe, Tallinn, Estonia",24.8612,59.433


In [10]:
nearest_locations, nearest_stations = nearest_points(locations,stations, "lat", "lng")
nearest_stations.drop("index", axis=1, inplace=True)
merged = locations.merge(nearest_stations, how='left', left_index=True, right_index = True, suffixes=('', '_station'))
merged.head()

Unnamed: 0.1,Unnamed: 0,lng,lat,ID,lat_station,lng_station,elev,name
0,"Mustjala vald, Estonia",22.2328,58.4628,EN000026215,58.233,22.5,4.0,KURESSAARE-ROOMASSAA
1,"Puka vald, Estonia",26.227205,58.052534,EN000026144,58.37,26.27,89.0,TOOMA
2,"Maardu, Estonia",24.980187,59.46877,EN000026038,59.3831,24.5831,34.0,TALLINN
3,"Salme vald, Estonia",22.250636,58.164069,EN000026215,58.233,22.5,4.0,KURESSAARE-ROOMASSAA
4,"Lasnamäe, Tallinn, Estonia",24.8612,59.433,EN000026038,59.3831,24.5831,34.0,TALLINN
