In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error


In [80]:
idx = pd.Index(np.arange(6) + 1)

ts = pd.DataFrame([idx, [100, 110, 108, 115, 120, np.nan]]).T
ts.columns = ["time", "value"]


In [81]:
ts


Unnamed: 0,time,value
0,1.0,100.0
1,2.0,110.0
2,3.0,108.0
3,4.0,115.0
4,5.0,120.0
5,6.0,


In [84]:
ts["X"] = ts["value"].shift(1)
ts['y'] = ts["value"]

In [88]:
ts


Unnamed: 0,time,value,X,y
0,1.0,100.0,,100.0
1,2.0,110.0,100.0,110.0
2,3.0,108.0,110.0,108.0
3,4.0,115.0,108.0,115.0
4,5.0,120.0,115.0,120.0
5,6.0,,120.0,


In [6]:
X, y


(0    NaN
 1    100
 2    110
 3    108
 4    115
 Name: value, dtype: object,
 0    110
 1    108
 2    115
 3    120
 4    NaN
 Name: value, dtype: object)

In [89]:
# Converts a list of time-series data in to supervised data set.
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols = list()

    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))

    for i in range(0, n_out):
        cols.append(df.shift(-i))

    agg = pd.concat(cols, axis=1)
    if dropnan:
        agg.dropna(inplace=True)

    return agg.values


In [90]:
tsa = series_to_supervised([100, 110, 108, 115, 120])


In [93]:
ts = pd.DataFrame(tsa)
ts.columns = ['X', 'y']

In [94]:
ts

Unnamed: 0,X,y
0,100.0,110.0
1,110.0,108.0
2,108.0,115.0
3,115.0,120.0


In [2]:
def train_test_split(data, n_test):
	return data[:-n_test, :], data[-n_test:,:]

In [4]:
# fit an random forest model and make a one step prediction
def random_forest_forecast(train, testX):
	train = np.asarray(train)

	# splot into input and output columns
	trainX, trainy = train[:, :-1], train[:, -1]

	# fit model
	model = RandomForestRegressor(n_estimators=1000)
	model.fit(trainX, trainy)

	# make a one-step prediction
	yhat = model.predict([testX])
	return yhat[0]


In [5]:
# Walk-forward validation for univariate data


def walk_forward_validation(data, n_test):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)

    # Seed history with training dataset
    history = [x for x in train]

    # Step over each time-step in the test set
    for i in range(len(test)):

        # Split test tow into input and output columns
        testX, testy = test[i, :-1], test[i, -1]

        # Fit model on history and make a prediction
        yhat = random_forest_forecast(history, testX)

        # Store forecase in list of predictions
        predictions.append(yhat)

        # add actual observation to history for the next loop
        history.append(test[i])

        # Summarize progress
        print(">expected=%.1f, predicted=%.1f" % (testy, yhat))

    # estimate prediction error
    error = mean_absolute_error(test[:, -1], predictions)
    return error, test[:, -1], predictions
