### Import required modules

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

In [2]:
import pandas as pd
import numpy as np

### Create a dataframe

In [3]:
df = pd.DataFrame()

df['german_army'] = np.random.randint(low=20000, high=30000, size=100)
df['allied_army'] = np.random.randint(low=20000, high=40000, size=100)
df.index = pd.date_range('1/1/2014', periods=100, freq='H')

df.head()

Unnamed: 0,german_army,allied_army
2014-01-01 00:00:00,20266,20468
2014-01-01 01:00:00,29076,29728
2014-01-01 02:00:00,27911,39794
2014-01-01 03:00:00,25203,23221
2014-01-01 04:00:00,23592,39131


### Truncate the dataframe

In [4]:
tdf=df.truncate(before='1/2/2014', after='1/3/2014')
tdf

Unnamed: 0,german_army,allied_army
2014-01-02 00:00:00,27252,24627
2014-01-02 01:00:00,28866,21078
2014-01-02 02:00:00,21675,28872
2014-01-02 03:00:00,23078,29725
2014-01-02 04:00:00,25799,29022
2014-01-02 05:00:00,26681,29478
2014-01-02 06:00:00,26195,35497
2014-01-02 07:00:00,22824,23646
2014-01-02 08:00:00,29492,36278
2014-01-02 09:00:00,27337,28569


In [5]:
tdf.size
df['1/2/2014':'1/3/2014 00'].size
df['1/2/2014':'2014-01-03 00'].size
df['2014-01-02':'2014-01-03 00'].size

50

50

50

50

### Set the dataframe's index

In [6]:
df.head()

Unnamed: 0,german_army,allied_army
2014-01-01 00:00:00,20266,20468
2014-01-01 01:00:00,29076,29728
2014-01-01 02:00:00,27911,39794
2014-01-01 03:00:00,25203,23221
2014-01-01 04:00:00,23592,39131


In [7]:
df.index = df.index + pd.DateOffset(months=4, days=5)
# df.index = df.index + pd.DateOffset(months=4, days=5,years=1,hours=1,minutes=1,seconds=1)

In [8]:
df.index[:5]
set(df.index.year)
set(df.index.month)
set(df.index.day)
set(df.index.hour)
set(df.index.minute)
set(df.index.second)
len(set(df.index.hour))

DatetimeIndex(['2014-05-06 00:00:00', '2014-05-06 01:00:00',
               '2014-05-06 02:00:00', '2014-05-06 03:00:00',
               '2014-05-06 04:00:00'],
              dtype='datetime64[ns]', freq='H')

{2014}

{5}

{6, 7, 8, 9, 10}

{0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23}

{0}

{0}

24

### View the dataframe

In [9]:
df.head()

Unnamed: 0,german_army,allied_army
2014-05-06 00:00:00,20266,20468
2014-05-06 01:00:00,29076,29728
2014-05-06 02:00:00,27911,39794
2014-05-06 03:00:00,25203,23221
2014-05-06 04:00:00,23592,39131


### Lead a variable 1 hour

In [10]:
df.shift(1).head()

Unnamed: 0,german_army,allied_army
2014-05-06 00:00:00,,
2014-05-06 01:00:00,20266.0,20468.0
2014-05-06 02:00:00,29076.0,29728.0
2014-05-06 03:00:00,27911.0,39794.0
2014-05-06 04:00:00,25203.0,23221.0


### Lag a variable 1 hour

In [11]:
df.shift(-1).tail()

Unnamed: 0,german_army,allied_army
2014-05-09 23:00:00,29710.0,22768.0
2014-05-10 00:00:00,20957.0,32525.0
2014-05-10 01:00:00,24388.0,35062.0
2014-05-10 02:00:00,25505.0,25570.0
2014-05-10 03:00:00,,


### Aggregate into days by summing up the value of each hourly observation

In [28]:
df.resample('M').sum()

Unnamed: 0,german_army,allied_army
2014-05-31,2498319,3009288


In [12]:
df.resample('D').sum()

Unnamed: 0,german_army,allied_army
2014-05-06,599563,720782
2014-05-07,606998,705592
2014-05-08,594189,725481
2014-05-09,597009,741508
2014-05-10,100560,115925


### Aggregate into days by averaging up the value of each hourly observation

In [13]:
df.resample('D').mean()

Unnamed: 0,german_army,allied_army
2014-05-06,24981.791667,30032.583333
2014-05-07,25291.583333,29399.666667
2014-05-08,24757.875,30228.375
2014-05-09,24875.375,30896.166667
2014-05-10,25140.0,28981.25


### Aggregate into days by taking the min value up the value of each hourly observation

In [14]:
df.resample('D').median()

Unnamed: 0,german_army,allied_army
2014-05-06,25020.5,29666.0
2014-05-07,25111.0,29250.0
2014-05-08,25367.5,31367.0
2014-05-09,24806.0,31019.0
2014-05-10,24946.5,29047.5


### Aggregate into days by taking the median value of each day's worth of hourly observation

In [15]:
df.resample('D').median()

Unnamed: 0,german_army,allied_army
2014-05-06,25020.5,29666.0
2014-05-07,25111.0,29250.0
2014-05-08,25367.5,31367.0
2014-05-09,24806.0,31019.0
2014-05-10,24946.5,29047.5


### Aggregate into days by taking the first value of each day's worth of hourly observation

In [16]:
df.resample('D').first()

Unnamed: 0,german_army,allied_army
2014-05-06,20266,20468
2014-05-07,27252,24627
2014-05-08,27177,34310
2014-05-09,27677,28334
2014-05-10,29710,22768


### Aggregate into days by taking the last value of each day's worth of hourly observation

In [17]:
df.resample('D').last()

Unnamed: 0,german_army,allied_army
2014-05-06,27507,34162
2014-05-07,29109,31078
2014-05-08,22316,29992
2014-05-09,22068,25117
2014-05-10,25505,25570


### Aggregate into days by taking the first, last, highest, and lowest value of each day's worth of hourly observation

In [33]:
df.resample('D').ohlc()

Unnamed: 0_level_0,german_army,german_army,german_army,german_army,allied_army,allied_army,allied_army,allied_army
Unnamed: 0_level_1,open,high,low,close,open,high,low,close
2014-05-06,20266,29076,20266,27507,20468,39794,20011,34162
2014-05-07,27252,29492,20636,29109,24627,38590,20941,31078
2014-05-08,27177,29320,20016,22316,34310,39354,20001,29992
2014-05-09,27677,29987,20648,22068,28334,39662,22189,25117
2014-05-10,29710,29710,20957,25505,22768,35062,22768,25570
