In [48]:
%env WORKDIR=~/weatherpy-work
from stormevents.io import load_tornadoes

import numpy as np
import pandas as pd

from sklearn.cluster import DBSCAN
from sklearn.metrics import pairwise_distances

env: WORKDIR=~/weatherpy-work


In [49]:
sample_data = load_tornadoes('1999-05-03 12:00', '1999-05-04 11:59', tz='GMT')

In [50]:
sample_data.columns

Index(['begin_yearmonth', 'begin_day', 'begin_time', 'end_yearmonth',
       'end_day', 'end_time', 'episode_id', 'event_id', 'state', 'state_fips',
       'year', 'month_name', 'event_type', 'cz_type', 'cz_fips', 'cz_name',
       'wfo', 'begin_date_time', 'cz_timezone', 'end_date_time',
       'injuries_direct', 'injuries_indirect', 'deaths_direct',
       'deaths_indirect', 'damage_property', 'damage_crops', 'source',
       'magnitude', 'magnitude_type', 'flood_cause', 'category', 'tor_f_scale',
       'tor_length', 'tor_width', 'tor_other_wfo', 'tor_other_cz_state',
       'tor_other_cz_fips', 'tor_other_cz_name', 'begin_range',
       'begin_azimuth', 'begin_location', 'end_range', 'end_azimuth',
       'end_location', 'begin_lat', 'begin_lon', 'end_lat', 'end_lon',
       'episode_narrative', 'event_narrative', 'data_source'],
      dtype='object')

In [51]:
sample_data.head()

Unnamed: 0,begin_yearmonth,begin_day,begin_time,end_yearmonth,end_day,end_time,episode_id,event_id,state,state_fips,...,end_range,end_azimuth,end_location,begin_lat,begin_lon,end_lat,end_lon,episode_narrative,event_narrative,data_source
0,199905,4,226,199905,4,251,2408634,5696915,NEBRASKA,31,...,3.0,NE,HARTINGTON,42.58,-97.18,42.65,-97.23,,Farm buildings were heavily damaged as well as...,PDC
1,199905,4,755,199905,4,915,2408924,5700578,TEXAS,48,...,10.0,NE,ELDORADO,30.95,-100.73,30.95,-100.5,Severe storms developed across much of West Ce...,A slow moving tornado skipped across a 23 mile...,PDC
2,199905,4,458,199905,4,500,2408248,5700322,OKLAHOMA,40,...,,,SAPULPA,35.98,-96.13,36.0,-96.1,Summary of events for May 3-4 1999:Following a...,An F0 tornado moved through Sapulpa's central ...,PDC
3,199905,4,225,199905,4,235,2406640,5696918,NEBRASKA,31,...,2.0,W,BELDEN,42.37,-97.2,42.42,-97.23,,A barn and farm equipment were destroyed.,PDC
4,199905,4,659,199905,4,659,2408248,5700367,OKLAHOMA,40,...,1.0,S,BOYNTON,35.63,-95.65,35.63,-95.65,Summary of events for May 3-4 1999:Following a...,There was a brief tornado touchdown south of B...,PDC


In [52]:
def dist_heading(lat1, lon1, lat2, lon2):
    if any(map(np.isnan, (lat1, lon1, lat2, lon2))):
        return np.nan, np.nan
    from geographiclib.geodesic import Geodesic
    found = Geodesic.WGS84.Inverse(lat1, lon1, lat2, lon2)
    dist_m, heading = found['s12'], found['azi1']
    dist_mi = dist_m * 0.000621371
    return dist_mi, heading

sample_data[['calc_length', 'calc_heading']] = sample_data.apply(
    lambda r: pd.Series(list(dist_heading(r.begin_lat, r.begin_lon, r.end_lat, r.end_lon))), axis=1)

In [54]:
sample_data

Unnamed: 0,begin_yearmonth,begin_day,begin_time,end_yearmonth,end_day,end_time,episode_id,event_id,state,state_fips,...,begin_lat,begin_lon,end_lat,end_lon,episode_narrative,event_narrative,data_source,calc_length,calc_heading,begin_ts_sec
0,199905,4,0226,199905,4,0251,2408634,5696915,NEBRASKA,31,...,42.58,-97.18,42.65,-97.23,,Farm buildings were heavily damaged as well as...,PDC,5.462958,-27.798115,925784760.0
1,199905,4,0755,199905,4,0915,2408924,5700578,TEXAS,48,...,30.95,-100.73,30.95,-100.50,Severe storms developed across much of West Ce...,A slow moving tornado skipped across a 23 mile...,PDC,13.656137,89.940857,925804500.0
2,199905,4,0458,199905,4,0500,2408248,5700322,OKLAHOMA,40,...,35.98,-96.13,36.00,-96.10,Summary of events for May 3-4 1999:Following a...,An F0 tornado moved through Sapulpa's central ...,PDC,2.174189,50.628423,925793880.0
3,199905,4,0225,199905,4,0235,2406640,5696918,NEBRASKA,31,...,42.37,-97.20,42.42,-97.23,,A barn and farm equipment were destroyed.,PDC,3.777036,-23.966385,925784700.0
4,199905,4,0659,199905,4,0659,2408248,5700367,OKLAHOMA,40,...,35.63,-95.65,35.63,-95.65,Summary of events for May 3-4 1999:Following a...,There was a brief tornado touchdown south of B...,PDC,0.000000,180.000000,925801140.0
5,199905,4,0113,199905,4,0130,1500904,5690093,KANSAS,20,...,37.32,-97.40,37.48,-97.37,,Initial touchdown occurred 4 miles north of We...,PDC,11.156839,8.498473,925780380.0
6,199905,4,0215,199905,4,0215,1500904,5690170,KANSAS,20,...,37.65,-97.02,37.65,-97.02,,Brief touchdown in open country.,PDC,0.000000,180.000000,925784100.0
7,199905,4,0130,199905,4,0155,1500904,5690169,KANSAS,20,...,37.48,-97.37,37.70,-97.33,,The same tornado that initially touched down 4...,PDC,15.330316,8.220329,925781400.0
8,199905,4,0151,199905,4,0200,2407899,5696916,NEBRASKA,31,...,42.47,-98.18,42.48,-98.18,,Power lines/poles downed.,PDC,0.690235,0.000000,925782660.0
9,199905,4,0224,199905,4,0224,2407900,5696917,NEBRASKA,31,...,42.63,-98.08,42.63,-98.08,,Brief touchdown.,PDC,0.000000,180.000000,925784640.0


In [127]:
def relation(tor1, tor2):    
    begin_lat_index = 0
    begin_lon_index = 1
    end_lat_index = 2
    end_lon_index = 3
    begin_ts_index = 4
    cz_hash_index = 5
    
    if tor1[begin_ts_index] > tor2[begin_ts_index]:
        tor1, tor2 = tor2, tor1
    
    lat1, lon1 = tor1[end_lat_index], tor1[end_lon_index]
    if np.isnan(lat1) or np.isnan(lon1):
        lat1, lon1 = tor1[begin_lat_index], tor1[begin_lon_index]
        
    lat2, lon2 = tor2[begin_lat_index], tor2[begin_lon_index]
    dist, _ = dist_heading(lat1, lon1, lat2, lon2)
    
    if dist > 5:
        return True
    if tor1[cz_hash_index] == tor2[cz_hash_index]:
        return True
    return tor2[begin_ts_index] - tor1[begin_ts_index] > 60 * 60

In [128]:
datain = sample_data[['begin_lat', 'begin_lon', 'end_lat', 'end_lon']]
datain.end_lat.fillna(datain.begin_lat, inplace=True)
datain.end_lon.fillna(datain.begin_lon, inplace=True)
datain['begin_ts_sec'] = sample_data.begin_date_time.astype(np.int64) / 10 ** 9
datain['cz_name_hashed'] = sample_data.cz_name.apply(hash)

similarity = pairwise_distances(datain, metric=relation)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [129]:
db = DBSCAN(eps=0.5, metric='precomputed', min_samples=2)
clusters = db.fit_predict(similarity)
sample_data['cluster'] = clusters

In [139]:
relevant_cols =  ['begin_date_time', 'end_date_time', 'begin_lat', 'begin_lon', 
                  'end_lat', 'end_lon', 'tor_length',
                  'tor_f_scale', 'state', 'cluster', 'event_narrative', 'episode_id', 
                  'event_id', 'cz_name', 'tor_other_cz_name']

In [140]:
sample_data.loc[(sample_data.state == 'OKLAHOMA'), relevant_cols].sort_values(['cluster','event_id'])

Unnamed: 0,begin_date_time,end_date_time,begin_lat,begin_lon,end_lat,end_lon,tor_length,tor_f_scale,state,cluster,event_narrative,episode_id,event_id,cz_name,tor_other_cz_name
17,1999-05-04 03:46:00,1999-05-04 03:50:00,35.75,-96.62,35.77,-96.58,2.0,F3,OKLAHOMA,-1,This tornado started out in central Lincoln Co...,2408248,5700318,CREEK,
2,1999-05-04 04:58:00,1999-05-04 05:00:00,35.98,-96.13,36.00,-96.10,2.0,F0,OKLAHOMA,-1,An F0 tornado moved through Sapulpa's central ...,2408248,5700322,CREEK,
95,1999-05-04 05:08:00,1999-05-04 05:18:00,36.02,-96.07,36.07,-96.02,5.5,F1,OKLAHOMA,-1,The same system that earlier caused a destruct...,2408248,5700323,CREEK,
94,1999-05-04 05:18:00,1999-05-04 05:21:00,36.15,-95.95,36.15,-95.95,1.5,F1,OKLAHOMA,-1,A tornado that originally touched down on the ...,2408248,5700366,TULSA,
4,1999-05-04 06:59:00,1999-05-04 06:59:00,35.63,-95.65,35.63,-95.65,0.5,F0,OKLAHOMA,-1,There was a brief tornado touchdown south of B...,2408248,5700367,MUSKOGEE,
12,1999-05-04 07:00:00,1999-05-04 07:01:00,35.95,-95.18,35.98,-95.15,3.0,F1,OKLAHOMA,-1,An F1 tornado cut a swath through Cherokee Cou...,2408248,5700368,CHEROKEE,
18,1999-05-04 08:11:00,1999-05-04 08:11:00,36.00,-94.73,36.00,-94.73,0.5,F1,OKLAHOMA,-1,A brief F1 tornado touchdown caused many trees...,2408248,5700369,ADAIR,
81,1999-05-03 21:51:00,1999-05-03 21:52:00,34.77,-98.38,34.77,-98.38,0.5,F0,OKLAHOMA,-1,Tornado A1. See summary at end of May 3rd stor...,2409595,5705184,COMANCHE,
82,1999-05-03 21:55:00,1999-05-03 21:55:00,34.78,-98.33,34.78,-98.33,0.1,F0,OKLAHOMA,-1,Tornado A2. See summary at end of May 3rd stor...,2409595,5705185,COMANCHE,
83,1999-05-03 22:20:00,1999-05-03 22:35:00,34.88,-98.32,34.97,-98.30,6.0,F3,OKLAHOMA,-1,Tornado A3. See summary at end of May 3rd stor...,2409595,5705186,CADDO,
