In [1]:
from datetime import datetime
import numpy as np
import pandas as pd

In [2]:
dates = [datetime(2011,1,2),datetime(2011,1,5),
         datetime(2011,1,7),datetime(2011,1,8),
         datetime(2011,1,10),datetime(2011,1,12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02   -1.976813
2011-01-05    0.938698
2011-01-07   -0.911981
2011-01-08    2.104867
2011-01-10    0.545187
2011-01-12    0.505027
dtype: float64

In [3]:
resampler = ts.resample('D')

In [4]:
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x00000262B2AAE5C8>

This topic have it's own section cover in :"Resampling and Frequency Conversion":

# Generating Date Ranges

In [5]:
index = pd.date_range('2012-04-01', '2012-06-01')
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

By default, date_range generates daily timestamps. If you pass only a start or end date, you must pass a number of periods to generate.

In [6]:
pd.date_range(start='2012-04-01', periods=20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [7]:
pd.date_range(end='2012-06-01', periods=20)

DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [9]:
pd.date_range('2000-01-01', '2000-12-01', freq='BM') # BusinessMonthEnd - last business day (weekday) of month

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

Alias       Offsettype               Description
D           Day                      Calendar daily
B           Businessday              Business daily
H           Hour                     
T or min    minute
S           second
L or ms     Milli
U           Micro
M           Monthend                 
BM          BusinessMonthEnd         Last Calendar day of month
MS          MonthBegin               First calendar day of month
BMS         BusinessMonthBegin       First weekday of month
W-MON,W-TUE,.. Week                     Weekly on given day of week
WOM-1MON,
WOM-2MON,..  WeekOfMonth             Generate weekly dates in the first, second, third or fourth week of the month(eg. WOM-                                      3FRI for the 3rd friday of each month)
...
...
...
...

In [10]:
pd.date_range('2012-05-02 12:56:31', periods=5)

DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

In [11]:
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

# Frequencies and Date Offsets

In [12]:
from pandas.tseries.offsets import Hour, Minute

In [13]:
hour = Hour()

In [14]:
hour

<Hour>

In [15]:
four_hours = Hour(4)

In [16]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [18]:
Hour(2) + Minute(30)

<150 * Minutes>

In [19]:
pd.date_range('2000-01-01', periods=10, freq='1h30min')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')

#  Week of month dates

One usefull frequncy class is "week of month", starting with WOM. This enables you to get dates like the third Friday of each month:

In [27]:
rng = pd.date_range('2020-08-01', '2020-09-30', freq='WOM-3TUE')

In [28]:
rng

DatetimeIndex(['2020-08-18', '2020-09-15'], dtype='datetime64[ns]', freq='WOM-3TUE')

# Shifting (Leading and Lagging) Data

"Shifting" refers to moving data backward and forward through time. Both Series and DataFrame have a shift method for doing naive shifts forward or backward, leaving the index unmodified

In [30]:
ts = pd.Series(np.random.randn(4),
              index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts

2000-01-31   -0.538172
2000-02-29    0.579572
2000-03-31   -0.250597
2000-04-30    1.477254
Freq: M, dtype: float64

In [31]:
ts.shift(2)

2000-01-31         NaN
2000-02-29         NaN
2000-03-31   -0.538172
2000-04-30    0.579572
Freq: M, dtype: float64

In [32]:
ts.shift(-2)

2000-01-31   -0.250597
2000-02-29    1.477254
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

In [33]:
ts / ts.shift(1) -1 

2000-01-31         NaN
2000-02-29   -2.076926
2000-03-31   -1.432382
2000-04-30   -6.894949
Freq: M, dtype: float64

In [34]:
ts.shift(2, freq='M')

2000-03-31   -0.538172
2000-04-30    0.579572
2000-05-31   -0.250597
2000-06-30    1.477254
Freq: M, dtype: float64

In [35]:
ts.shift(3, freq='D')

2000-02-03   -0.538172
2000-03-03    0.579572
2000-04-03   -0.250597
2000-05-03    1.477254
dtype: float64

In [37]:
ts.shift(1, freq='90T') # T means minutes

2000-01-31 01:30:00   -0.538172
2000-02-29 01:30:00    0.579572
2000-03-31 01:30:00   -0.250597
2000-04-30 01:30:00    1.477254
Freq: M, dtype: float64

# Shifting dates with offsets

The Pandas date offsets can also be used with datetime or Timestamp Objects:

In [38]:
from pandas.tseries.offsets import Day, MonthEnd

In [39]:
now = datetime(2011, 11, 17)

In [40]:
now + 3 * Day()

Timestamp('2011-11-20 00:00:00')

In [41]:
now + MonthEnd()

Timestamp('2011-11-30 00:00:00')

In [43]:
now + MonthEnd(-3)

Timestamp('2011-08-31 00:00:00')

Anchored offsets can explicitly "roll" dates forward or backward by simply using their rollforward or rollback methods, respectively:

In [44]:
offset = MonthEnd()

In [45]:
offset.rollforward(now)

Timestamp('2011-11-30 00:00:00')

In [46]:
offset.rollback(now)

Timestamp('2011-10-31 00:00:00')

A creative use of date offsets is to use these methods with groupby:

In [49]:
ts = pd.Series(np.random.randn(31), index=pd.date_range('1/5/2000', periods=31, freq='4d'))
ts

2000-01-05   -2.136677
2000-01-09   -0.869878
2000-01-13    0.091303
2000-01-17   -0.364612
2000-01-21   -1.603978
2000-01-25   -1.072751
2000-01-29    0.669291
2000-02-02   -0.713826
2000-02-06    1.263062
2000-02-10    0.041788
2000-02-14    0.751242
2000-02-18    1.000549
2000-02-22    2.928056
2000-02-26   -0.151769
2000-03-01   -0.227775
2000-03-05   -0.410764
2000-03-09   -2.523123
2000-03-13   -0.086191
2000-03-17    1.977129
2000-03-21    0.296297
2000-03-25   -2.142008
2000-03-29    0.037852
2000-04-02   -1.438414
2000-04-06   -0.514071
2000-04-10   -0.589986
2000-04-14    0.834856
2000-04-18    0.519923
2000-04-22    0.247954
2000-04-26   -1.537810
2000-04-30   -1.572686
2000-05-04    0.046919
Freq: 4D, dtype: float64

In [53]:
ts.groupby(offset.rollforward).mean()

2000-01-31   -0.755329
2000-02-29    0.731300
2000-03-31   -0.384823
2000-04-30   -0.506279
2000-05-31    0.046919
dtype: float64

An Easier and Faster way to do this is using resample

In [56]:
ts.resample('M').mean()

2000-01-31   -0.755329
2000-02-29    0.731300
2000-03-31   -0.384823
2000-04-30   -0.506279
2000-05-31    0.046919
Freq: M, dtype: float64

In [58]:
ts.resample('M').sum()

2000-01-31   -5.287302
2000-02-29    5.119102
2000-03-31   -3.078583
2000-04-30   -4.050234
2000-05-31    0.046919
Freq: M, dtype: float64

In [60]:
ts

2000-01-05   -2.136677
2000-01-09   -0.869878
2000-01-13    0.091303
2000-01-17   -0.364612
2000-01-21   -1.603978
2000-01-25   -1.072751
2000-01-29    0.669291
2000-02-02   -0.713826
2000-02-06    1.263062
2000-02-10    0.041788
2000-02-14    0.751242
2000-02-18    1.000549
2000-02-22    2.928056
2000-02-26   -0.151769
2000-03-01   -0.227775
2000-03-05   -0.410764
2000-03-09   -2.523123
2000-03-13   -0.086191
2000-03-17    1.977129
2000-03-21    0.296297
2000-03-25   -2.142008
2000-03-29    0.037852
2000-04-02   -1.438414
2000-04-06   -0.514071
2000-04-10   -0.589986
2000-04-14    0.834856
2000-04-18    0.519923
2000-04-22    0.247954
2000-04-26   -1.537810
2000-04-30   -1.572686
2000-05-04    0.046919
Freq: 4D, dtype: float64