In [24]:
import sys
sys.path.append('..')

from events_calc import precip_events
from lib.iem import hourly_precip

import numpy as np
import pandas as pd
import warnings

In [6]:
dfs = []
eps = 6
min_samples = 6
date_ = pd.Timestamp('1980-07-01')
batch = 0

pd.options.mode.chained_assignment = None
while date_ < pd.Timestamp('2020-01-01'):
    time_interval = (date_, date_ + pd.Timedelta(365, 'D'))
    rainfall = hourly_precip('SFO', *time_interval)
    rainfall = rainfall[rainfall.precip_in >= 0.01]
    rainfall['clust'] = precip_events(rainfall, eps, min_samples)
    
    max_cluster = rainfall['clust'].max()
    df_to_add = rainfall[rainfall.clust >= 0]
    df_to_add['clust'] += batch
    dfs.append(df_to_add)
    
    date_ = date_ + pd.Timedelta(365, 'D')
    batch += max_cluster

In [7]:
dfs_combined = pd.concat(dfs)

In [8]:
rain_events = dfs_combined.groupby('clust', as_index=False).agg({
    'precip_in': sum,
    'valid': [lambda r: pd.Timestamp(np.nanmean([tsp.value for tsp in r]))]
})
rain_events.columns.droplevel(level=0)
rain_events.columns = ['clust', 'precip', 'mean_time']

In [9]:
rain_events.tail()

Unnamed: 0,clust,precip,mean_time
1473,1473,0.29,2020-04-05 00:34:17.142857216
1474,1474,0.41,2020-04-05 16:30:00.000000000
1475,1475,0.66,2020-04-06 20:34:17.142857216
1476,1476,0.14,2020-05-17 12:30:00.000000000
1477,1477,0.07,2020-05-18 09:20:00.000000000


In [25]:
u = rain_events.precip.mean()
sd = rain_events.precip.std()

threshold = u + 2* sd
u, sd, threshold

(0.5108930987821381, 0.5962053448342627, 1.7033037884506634)

In [11]:
big_storms = rain_events[rain_events.precip > threshold]

In [12]:
def is_el_nino(valid):
    return any([
        pd.Timestamp('1982-07-01') < valid < pd.Timestamp('1983-06-30'),
        pd.Timestamp('1997-07-01') < valid < pd.Timestamp('1998-06-30'),
        pd.Timestamp('2015-07-01') < valid < pd.Timestamp('2016-06-30'),
    ])

In [13]:
non_el_nino_storms = big_storms[big_storms.apply(lambda r: not is_el_nino(r.mean_time), axis=1)]
el_nino_storms = big_storms[big_storms.apply(lambda r: is_el_nino(r.mean_time), axis=1)]

In [15]:
# def export_file_for_ftp(ser, dest):
#     ser.to_csv(dest, index=False, header=ser.shape)

In [14]:
last_15_years = non_el_nino_storms[non_el_nino_storms.mean_time >= pd.Timestamp('2004-06-01')].mean_time.dt.strftime('%Y%m%d')
first_15_years = non_el_nino_storms[non_el_nino_storms.mean_time <= pd.Timestamp('1996-06-01')].mean_time.dt.strftime('%Y%m%d')

In [22]:
non_el_nino_storms

Unnamed: 0,clust,precip,mean_time
8,8,2.56,1981-01-27 12:16:55.384615360
32,32,2.12,1981-11-13 21:00:00.000000000
45,45,1.98,1981-12-29 17:30:00.000000000
47,47,6.51,1982-01-04 17:09:05.454545472
54,54,2.63,1982-02-15 14:11:15.000000000
...,...,...,...
1367,1367,2.29,2017-02-20 20:07:49.565217280
1392,1392,3.54,2018-01-08 21:30:00.000000000
1409,1409,1.86,2018-04-06 20:20:00.000000000
1426,1426,1.73,2019-01-16 07:26:40.000000000


In [23]:
el_nino_storms

Unnamed: 0,clust,precip,mean_time
81,81,2.46,1982-11-18 07:04:26.666666688
87,87,2.05,1982-12-21 09:51:43.448275840
92,92,1.87,1983-01-24 07:21:25.714285696
93,93,1.86,1983-01-27 05:20:00.000000000
639,639,2.63,1997-11-26 14:24:32.727272704
643,643,2.22,1997-12-07 21:08:53.333333376
650,650,1.75,1998-01-12 08:25:42.857142912
658,658,3.82,1998-02-03 03:46:00.000000000
