[View in Colaboratory](https://colab.research.google.com/github/vincentei/predict_power_prices/blob/master/AR_model_power_prices.ipynb)

In [0]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import numpy as np

In [0]:
df = pd.read_csv('prices.csv')

In [139]:
df['deliverydate'] = pd.to_datetime(df['deliverydate'],format = '%d-%m-%Y %H:%M:%S')
df.head(2)

Unnamed: 0,deliverydate,price
0,2010-01-01 00:00:00,13.2
1,2010-01-01 01:00:00,13.14


In [140]:
# convert to datetime
df['deliverydate'] = df['deliverydate'].dt.tz_localize('Europe/Amsterdam',ambiguous='infer')
df.head(2)

Unnamed: 0,deliverydate,price
0,2010-01-01 00:00:00+01:00,13.2
1,2010-01-01 01:00:00+01:00,13.14


In [0]:
# make lags
lags = [1,2,3,7,14] # number of days

for i in lags:
        col_name = 'lag' + str(i*24)
        df[col_name] = df['price'].shift(i*24)


In [142]:
idx = [0,1,23,24,25]
df.loc[idx]

Unnamed: 0,deliverydate,price,lag24,lag48,lag72,lag168,lag336
0,2010-01-01 00:00:00+01:00,13.2,,,,,
1,2010-01-01 01:00:00+01:00,13.14,,,,,
23,2010-01-01 23:00:00+01:00,31.06,,,,,
24,2010-01-02 00:00:00+01:00,26.75,13.2,,,,
25,2010-01-02 01:00:00+01:00,24.25,13.14,,,,


In [143]:
# remove rows with NaN
df.dropna(axis=0, how='any',inplace=True)
df.reset_index(drop=True,inplace=True)
df.head(2)

Unnamed: 0,deliverydate,price,lag24,lag48,lag72,lag168,lag336
0,2010-01-15 00:00:00+01:00,40.3,38.72,34.93,41.84,39.5,13.2
1,2010-01-15 01:00:00+01:00,37.5,36.5,31.78,37.54,38.8,13.14


In [0]:
# split in train and test
idx = (df['deliverydate'].dt.year<=2016) & (df['deliverydate'].dt.year>=2014)

df_train = df.loc[idx].reset_index(drop=True)
df_test = df.loc[~idx].reset_index(drop=True)

In [145]:
df_test.head(2)

Unnamed: 0,deliverydate,price,lag24,lag48,lag72,lag168,lag336
0,2010-01-15 00:00:00+01:00,40.3,38.72,34.93,41.84,39.5,13.2
1,2010-01-15 01:00:00+01:00,37.5,36.5,31.78,37.54,38.8,13.14


In [0]:
# select features from df
features = ['lag24','lag48','lag168','lag336']

X_train = df_train[features]
y_train = df_train['price']

In [153]:
# fit the model
lm = LinearRegression()  
lm.fit(X_train, y_train) 

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [0]:
# Make predictions using the testing set
X_test = df_test[features]
y_pred = lm.predict(X_test)

In [155]:
# calc the mean absolute error
mean_absolute_error(df_test['price'], y_pred)

5.517704927563496