In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = pd.date_range('1/1/2011', periods = 72, freq = 'H')
ts = pd.Series(list(range(len(rng))), index = rng)

In [3]:
ts.head()

2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
Freq: H, dtype: int64

In [4]:
converted = ts.asfreq('45Min', method = 'ffill')

In [5]:
converted.head()

2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
Freq: 45T, dtype: int64

### What does the above code do to the size and content of your data frame?

In [6]:
converted[1:10]

2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
2011-01-01 03:45:00    3
2011-01-01 04:30:00    4
2011-01-01 05:15:00    5
2011-01-01 06:00:00    6
2011-01-01 06:45:00    6
Freq: 45T, dtype: int64

In [7]:
ts.shape

(72,)

In [8]:
converted.shape

(95,)

In [9]:
ts[1:10]

2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
2011-01-01 05:00:00    5
2011-01-01 06:00:00    6
2011-01-01 07:00:00    7
2011-01-01 08:00:00    8
2011-01-01 09:00:00    9
Freq: H, dtype: int64

### Take a look at the specs for .asfreq(). What are your options for filling in missing data?

In [None]:
#{‘backfill’/’bfill’, ‘pad’/’ffill’}
#‘pad’ / ‘ffill’: propagate last valid observation forward to next valid
#‘backfill’ / ‘bfill’: use NEXT valid observation to fill

### How can you go to less frequent rather than more frequent?

In [10]:
converted = ts.asfreq('3H')

In [11]:
converted[1:10]

2011-01-01 03:00:00     3
2011-01-01 06:00:00     6
2011-01-01 09:00:00     9
2011-01-01 12:00:00    12
2011-01-01 15:00:00    15
2011-01-01 18:00:00    18
2011-01-01 21:00:00    21
2011-01-02 00:00:00    24
2011-01-02 03:00:00    27
Freq: 3H, dtype: int64

In [12]:
ts[1:10]

2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
2011-01-01 05:00:00    5
2011-01-01 06:00:00    6
2011-01-01 07:00:00    7
2011-01-01 08:00:00    8
2011-01-01 09:00:00    9
Freq: H, dtype: int64

In [13]:
# Let's try the more flexible .resample()
ts.resample('2H').mean()[1:10]

2011-01-01 02:00:00     2.5
2011-01-01 04:00:00     4.5
2011-01-01 06:00:00     6.5
2011-01-01 08:00:00     8.5
2011-01-01 10:00:00    10.5
2011-01-01 12:00:00    12.5
2011-01-01 14:00:00    14.5
2011-01-01 16:00:00    16.5
2011-01-01 18:00:00    18.5
Freq: 2H, dtype: float64

In [14]:
# What's particularly useful is that we can use reample to event out irregular time series
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))), size = 10, replace = False))]

In [15]:
irreg_ts

2011-01-03 10:00:00    58
2011-01-01 23:00:00    23
2011-01-03 00:00:00    48
2011-01-01 13:00:00    13
2011-01-01 10:00:00    10
2011-01-01 20:00:00    20
2011-01-01 05:00:00     5
2011-01-01 21:00:00    21
2011-01-03 23:00:00    71
2011-01-02 11:00:00    35
dtype: int64

In [16]:
irreg_ts.asfreq('D')

Series([], Freq: D, dtype: int64)

In [None]:
# not consecutive

### Why didn't that work?

In [17]:
irreg_ts = irreg_ts.sort_index()
irreg_ts

2011-01-01 05:00:00     5
2011-01-01 10:00:00    10
2011-01-01 13:00:00    13
2011-01-01 20:00:00    20
2011-01-01 21:00:00    21
2011-01-01 23:00:00    23
2011-01-02 11:00:00    35
2011-01-03 00:00:00    48
2011-01-03 10:00:00    58
2011-01-03 23:00:00    71
dtype: int64

In [18]:
irreg_ts.asfreq('D')

2011-01-01 05:00:00    5.0
2011-01-02 05:00:00    NaN
2011-01-03 05:00:00    NaN
Freq: D, dtype: float64

In [19]:
irreg_ts.resample('D').count()

2011-01-01    6
2011-01-02    1
2011-01-03    3
Freq: D, dtype: int64

# Try

(1) What if you want to go to a higher frequency, but you don't want to back fill or forward fill? Why might you want to do that?

(2) What is the difference between .ressample() and .asfreq()?

(3) How can I forward-fill only a few days? (hint: .fillna())

(4) What are some helpful functions to use with a Resampler object?

In [25]:
# 1. method=none, if you do not want to interpolate
# 2. resample will produce an object that you can do more things with; asfreq is more limited
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))), size = 10, replace = False))]
irreg_ts = irreg_ts.sort_index()
irreg_ts.resample('H').fillna(limit=5,method='ffill')

2011-01-01 00:00:00     0.0
2011-01-01 01:00:00     1.0
2011-01-01 02:00:00     2.0
2011-01-01 03:00:00     2.0
2011-01-01 04:00:00     4.0
2011-01-01 05:00:00     4.0
2011-01-01 06:00:00     4.0
2011-01-01 07:00:00     4.0
2011-01-01 08:00:00     4.0
2011-01-01 09:00:00     4.0
2011-01-01 10:00:00     NaN
2011-01-01 11:00:00     NaN
2011-01-01 12:00:00     NaN
2011-01-01 13:00:00     NaN
2011-01-01 14:00:00    14.0
2011-01-01 15:00:00    14.0
2011-01-01 16:00:00    16.0
2011-01-01 17:00:00    16.0
2011-01-01 18:00:00    16.0
2011-01-01 19:00:00    16.0
2011-01-01 20:00:00    16.0
2011-01-01 21:00:00    16.0
2011-01-01 22:00:00     NaN
2011-01-01 23:00:00     NaN
2011-01-02 00:00:00     NaN
2011-01-02 01:00:00     NaN
2011-01-02 02:00:00     NaN
2011-01-02 03:00:00     NaN
2011-01-02 04:00:00     NaN
2011-01-02 05:00:00    29.0
2011-01-02 06:00:00    29.0
2011-01-02 07:00:00    29.0
2011-01-02 08:00:00    29.0
2011-01-02 09:00:00    29.0
2011-01-02 10:00:00    29.0
2011-01-02 11:00:00 

In [26]:
irreg_ts.resample('H').mean()

2011-01-01 00:00:00     0.0
2011-01-01 01:00:00     1.0
2011-01-01 02:00:00     2.0
2011-01-01 03:00:00     NaN
2011-01-01 04:00:00     4.0
2011-01-01 05:00:00     NaN
2011-01-01 06:00:00     NaN
2011-01-01 07:00:00     NaN
2011-01-01 08:00:00     NaN
2011-01-01 09:00:00     NaN
2011-01-01 10:00:00     NaN
2011-01-01 11:00:00     NaN
2011-01-01 12:00:00     NaN
2011-01-01 13:00:00     NaN
2011-01-01 14:00:00    14.0
2011-01-01 15:00:00     NaN
2011-01-01 16:00:00    16.0
2011-01-01 17:00:00     NaN
2011-01-01 18:00:00     NaN
2011-01-01 19:00:00     NaN
2011-01-01 20:00:00     NaN
2011-01-01 21:00:00     NaN
2011-01-01 22:00:00     NaN
2011-01-01 23:00:00     NaN
2011-01-02 00:00:00     NaN
2011-01-02 01:00:00     NaN
2011-01-02 02:00:00     NaN
2011-01-02 03:00:00     NaN
2011-01-02 04:00:00     NaN
2011-01-02 05:00:00    29.0
2011-01-02 06:00:00     NaN
2011-01-02 07:00:00     NaN
2011-01-02 08:00:00     NaN
2011-01-02 09:00:00     NaN
2011-01-02 10:00:00     NaN
2011-01-02 11:00:00 

In [27]:
irreg_ts.resample('H').count()

2011-01-01 00:00:00    1
2011-01-01 01:00:00    1
2011-01-01 02:00:00    1
2011-01-01 03:00:00    0
2011-01-01 04:00:00    1
2011-01-01 05:00:00    0
2011-01-01 06:00:00    0
2011-01-01 07:00:00    0
2011-01-01 08:00:00    0
2011-01-01 09:00:00    0
2011-01-01 10:00:00    0
2011-01-01 11:00:00    0
2011-01-01 12:00:00    0
2011-01-01 13:00:00    0
2011-01-01 14:00:00    1
2011-01-01 15:00:00    0
2011-01-01 16:00:00    1
2011-01-01 17:00:00    0
2011-01-01 18:00:00    0
2011-01-01 19:00:00    0
2011-01-01 20:00:00    0
2011-01-01 21:00:00    0
2011-01-01 22:00:00    0
2011-01-01 23:00:00    0
2011-01-02 00:00:00    0
2011-01-02 01:00:00    0
2011-01-02 02:00:00    0
2011-01-02 03:00:00    0
2011-01-02 04:00:00    0
2011-01-02 05:00:00    1
2011-01-02 06:00:00    0
2011-01-02 07:00:00    0
2011-01-02 08:00:00    0
2011-01-02 09:00:00    0
2011-01-02 10:00:00    0
2011-01-02 11:00:00    0
2011-01-02 12:00:00    0
2011-01-02 13:00:00    0
2011-01-02 14:00:00    0
2011-01-02 15:00:00    1
