In [2]:
import pandas as pd 
import numpy as np

In [3]:
rng = pd.date_range('1/1/2011',periods=72,freq='H')
ts=pd.Series(list(range(len(rng))),index=rng)

In [5]:
ts.head()

2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
Freq: H, dtype: int64

In [6]:
# forward fill
converted = ts.asfreq('45Min',method='ffill')
converted.head()

2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
Freq: 45T, dtype: int64

In [7]:
# backward fill
converted = ts.asfreq('45Min',method='bfill')
converted.head()

2011-01-01 00:00:00    0
2011-01-01 00:45:00    1
2011-01-01 01:30:00    2
2011-01-01 02:15:00    3
2011-01-01 03:00:00    3
Freq: 45T, dtype: int64

In [9]:
#useful when you are joining with other timeseries data
converted = ts.asfreq('45Min',method=None)
converted.head()

2011-01-01 00:00:00    0.0
2011-01-01 00:45:00    NaN
2011-01-01 01:30:00    NaN
2011-01-01 02:15:00    NaN
2011-01-01 03:00:00    3.0
Freq: 45T, dtype: float64

### What does the above code do to the size and content of your dataframe ?

In [10]:
converted[1:10]

2011-01-01 00:45:00    NaN
2011-01-01 01:30:00    NaN
2011-01-01 02:15:00    NaN
2011-01-01 03:00:00    3.0
2011-01-01 03:45:00    NaN
2011-01-01 04:30:00    NaN
2011-01-01 05:15:00    NaN
2011-01-01 06:00:00    6.0
2011-01-01 06:45:00    NaN
Freq: 45T, dtype: float64

In [11]:
ts[1:10]

2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
2011-01-01 05:00:00    5
2011-01-01 06:00:00    6
2011-01-01 07:00:00    7
2011-01-01 08:00:00    8
2011-01-01 09:00:00    9
Freq: H, dtype: int64

In [12]:
print(ts.shape)
print(converted.shape)

(72,)
(95,)


### Take a look at the specs for .asfreq(). what are you options for filling in missing data?

In [13]:
ts.asfreq??


### How can you go to less frequent rather than more frequent?

In [14]:
converted = ts.asfreq('3H')

In [15]:
converted[1:10]

2011-01-01 03:00:00     3
2011-01-01 06:00:00     6
2011-01-01 09:00:00     9
2011-01-01 12:00:00    12
2011-01-01 15:00:00    15
2011-01-01 18:00:00    18
2011-01-01 21:00:00    21
2011-01-02 00:00:00    24
2011-01-02 03:00:00    27
Freq: 3H, dtype: int64

In [16]:
ts[1:10]

2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
2011-01-01 05:00:00    5
2011-01-01 06:00:00    6
2011-01-01 07:00:00    7
2011-01-01 08:00:00    8
2011-01-01 09:00:00    9
Freq: H, dtype: int64

In [21]:
# Let's try the more flexible .resample()
ts.resample('2H',label='right').mean()[:10]

.resample() is now a deferred operation
You called head(...) on this deferred object which materialized it into a series
by implicitly taking the mean.  Use .resample(...).mean() instead
  This is separate from the ipykernel package so we can avoid doing imports until


2011-01-01 02:00:00    0.5
2011-01-01 04:00:00    2.5
2011-01-01 06:00:00    4.5
2011-01-01 08:00:00    6.5
2011-01-01 10:00:00    8.5
Freq: 2H, dtype: float64

In [23]:
ts.resample??

### What is particularly useful is that we can use resample to even out irregular timeseries

In [30]:
irreg_ts = ts[list(np.random.choice(a=list(range(len(ts))),size=10,replace=False))]
irreg_ts

2011-01-01 06:00:00     6
2011-01-01 08:00:00     8
2011-01-03 20:00:00    68
2011-01-01 00:00:00     0
2011-01-01 07:00:00     7
2011-01-02 02:00:00    26
2011-01-01 13:00:00    13
2011-01-02 04:00:00    28
2011-01-01 18:00:00    18
2011-01-01 23:00:00    23
dtype: int64

In [31]:
irreg_ts.asfreq('D')

2011-01-01 06:00:00    6
Freq: D, dtype: int64

#### Why did not that work?

* Data is not in order, pandas expects ordered data

In [32]:
irreg_ts = irreg_ts.sort_index()
irreg_ts

2011-01-01 00:00:00     0
2011-01-01 06:00:00     6
2011-01-01 07:00:00     7
2011-01-01 08:00:00     8
2011-01-01 13:00:00    13
2011-01-01 18:00:00    18
2011-01-01 23:00:00    23
2011-01-02 02:00:00    26
2011-01-02 04:00:00    28
2011-01-03 20:00:00    68
dtype: int64

In [34]:
irreg_ts.asfreq('D',method='bfill')

2011-01-01     0
2011-01-02    26
2011-01-03    68
Freq: D, dtype: int64

In [36]:
irreg_ts.resample('D').mean()

2011-01-01    10.714286
2011-01-02    27.000000
2011-01-03    68.000000
Freq: D, dtype: float64

## Try
1. what if you wants to go to a higher frequency, but you did not want to back fill or forward fill? why might you want to do that?
2. What is the difference between .resample() and .asfreq()?
    * .resample -> Groupby, take mean,min,max
    * .asfreq ->up sample and down sample, ffill,bfill
3. How can I forword-fill only a few days (hint: .fillna)
4. What are some helpful functions to use with Resampler object?
    * gets agrregate statistics over large period of time as you down sample it

In [38]:
# How can I forword-fill only a few days (hint: .fillna)

converted.asfreq('10Min',method=None).fillna(method='ffill',limit=3)

2011-01-01 00:00:00     0.0
2011-01-01 00:10:00     0.0
2011-01-01 00:20:00     0.0
2011-01-01 00:30:00     0.0
2011-01-01 00:40:00     NaN
2011-01-01 00:50:00     NaN
2011-01-01 01:00:00     NaN
2011-01-01 01:10:00     NaN
2011-01-01 01:20:00     NaN
2011-01-01 01:30:00     NaN
2011-01-01 01:40:00     NaN
2011-01-01 01:50:00     NaN
2011-01-01 02:00:00     NaN
2011-01-01 02:10:00     NaN
2011-01-01 02:20:00     NaN
2011-01-01 02:30:00     NaN
2011-01-01 02:40:00     NaN
2011-01-01 02:50:00     NaN
2011-01-01 03:00:00     3.0
2011-01-01 03:10:00     3.0
2011-01-01 03:20:00     3.0
2011-01-01 03:30:00     3.0
2011-01-01 03:40:00     NaN
2011-01-01 03:50:00     NaN
2011-01-01 04:00:00     NaN
2011-01-01 04:10:00     NaN
2011-01-01 04:20:00     NaN
2011-01-01 04:30:00     NaN
2011-01-01 04:40:00     NaN
2011-01-01 04:50:00     NaN
                       ... 
2011-01-03 16:10:00     NaN
2011-01-03 16:20:00     NaN
2011-01-03 16:30:00     NaN
2011-01-03 16:40:00     NaN
2011-01-03 16:50:00 