In [17]:
import os
import pandas as pd
import featuretools as ft

### Import data

In [38]:
df = pd.read_csv('./data/energydata_complete.csv')
dfc_tmp = ft.demo.load_mock_customer()
dfc = dfc_tmp["transactions"].merge(dfc_tmp["sessions"]).merge(dfc_tmp["customers"])

### Task 1 - Aggregating transactions with mathematical operations

In [40]:
data = dfc[['customer_id', 'transaction_id', 'transaction_time', 'amount']]

In [41]:
data

Unnamed: 0,customer_id,transaction_id,transaction_time,amount
0,2,298,2014-01-01 00:00:00,127.64
1,2,2,2014-01-01 00:01:05,109.48
2,2,308,2014-01-01 00:02:10,95.06
3,2,116,2014-01-01 00:03:15,78.92
4,2,371,2014-01-01 00:04:20,31.54
...,...,...,...,...
495,3,112,2014-01-01 08:56:15,55.42
496,3,111,2014-01-01 08:57:20,34.87
497,3,276,2014-01-01 08:58:25,10.94
498,3,266,2014-01-01 08:59:30,19.86


In [42]:
operations = ['sum', 'max', 'min', 'mean', 'median', 'std', 'count']
feature_names = [oper + '_value' for oper in operations]

In [43]:
data.groupby('customer_id')['amount'].agg(operations)

Unnamed: 0_level_0,sum,max,min,mean,median,std,count
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,9025.62,139.43,5.81,71.631905,69.715,40.442059,126
2,7200.28,146.81,8.73,77.422366,75.96,37.705178,93
3,6236.62,149.15,5.89,67.06043,58.93,43.683296,93
4,8727.68,149.95,5.73,80.070459,81.41,45.068765,109
5,6349.66,149.02,7.55,80.375443,78.87,44.09563,79


### Task 2 - Aggregating transactions in a time window

In [44]:
tmp_df = df[['date', 'Appliances', 'lights']]

In [45]:
tmp_df['date'] = pd.to_datetime(tmp_df['date'])

In [31]:
tmp_df.rolling(window=6, on='date').mean().head(10)

Unnamed: 0,date,Appliances,lights
0,2016-01-11 17:00:00,,
1,2016-01-11 17:10:00,,
2,2016-01-11 17:20:00,,
3,2016-01-11 17:30:00,,
4,2016-01-11 17:40:00,,
5,2016-01-11 17:50:00,55.0,35.0
6,2016-01-11 18:00:00,55.0,38.333333
7,2016-01-11 18:10:00,55.0,41.666667
8,2016-01-11 18:20:00,56.666667,43.333333
9,2016-01-11 18:30:00,60.0,43.333333


In [32]:
operations = ['sum', 'max', 'min', 'mean', 'median', 'std', 'count']
oper_dict = {k: operations for k in ['Appliances', 'lights']}

In [46]:
data_rolled = tmp_df.set_index('date').rolling(window='60min').agg(oper_dict)

In [47]:
data_rolled

Unnamed: 0_level_0,Appliances,Appliances,Appliances,Appliances,Appliances,Appliances,Appliances,lights,lights,lights,lights,lights,lights,lights
Unnamed: 0_level_1,sum,max,min,mean,median,std,count,sum,max,min,mean,median,std,count
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
2016-01-11 17:00:00,60.0,60.0,60.0,60.000000,60.0,,1.0,30.0,30.0,30.0,30.000000,30.0,,1.0
2016-01-11 17:10:00,120.0,60.0,60.0,60.000000,60.0,0.000000,2.0,60.0,30.0,30.0,30.000000,30.0,0.000000,2.0
2016-01-11 17:20:00,170.0,60.0,50.0,56.666667,60.0,5.773503,3.0,90.0,30.0,30.0,30.000000,30.0,0.000000,3.0
2016-01-11 17:30:00,220.0,60.0,50.0,55.000000,55.0,5.773503,4.0,130.0,40.0,30.0,32.500000,30.0,5.000000,4.0
2016-01-11 17:40:00,280.0,60.0,50.0,56.000000,60.0,5.477226,5.0,170.0,40.0,30.0,34.000000,30.0,5.477226,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-05-27 17:20:00,820.0,220.0,90.0,136.666667,115.0,51.639778,6.0,0.0,0.0,0.0,0.000000,0.0,0.000000,6.0
2016-05-27 17:30:00,690.0,180.0,90.0,115.000000,105.0,33.911650,6.0,0.0,0.0,0.0,0.000000,0.0,0.000000,6.0
2016-05-27 17:40:00,780.0,270.0,90.0,130.000000,105.0,69.570109,6.0,10.0,10.0,0.0,1.666667,0.0,4.082483,6.0
2016-05-27 17:50:00,1080.0,420.0,90.0,180.000000,105.0,136.528385,6.0,20.0,10.0,0.0,3.333333,0.0,5.163978,6.0


#### Task 3 - Determining the number of local maxima and minima