In [79]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

In [80]:
#Define function to add timesteps in datafram
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	"""
	Frame a time series as a supervised learning dataset.
	Arguments:
		data: Sequence of observations as a list or NumPy array.
		n_in: Number of lag observations as input (X).
		n_out: Number of observations as output (y).
		dropnan: Boolean whether or not to drop rows with NaN values.
	Returns:
		Pandas DataFrame of series framed for supervised learning.
	"""
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [81]:
#Add timesteps to the dataframe
dfm = pd.read_csv('mergeddata.csv', header=0, index_col=1)
values = dfm.values
n_days = 4
dfm_series = series_to_supervised(dfm, n_days, 1)
#dfm_series = dfm_series.drop(['var1(t-4)'], axis=1)
dfm_series.head()

Unnamed: 0_level_0,var1(t-4),var2(t-4),var3(t-4),var4(t-4),var5(t-4),var6(t-4),var7(t-4),var8(t-4),var9(t-4),var10(t-4),...,var13(t),var14(t),var15(t),var16(t),var17(t),var18(t),var19(t),var20(t),var21(t),var22(t)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1993-05-18,0.0,1.2138,1.2171,1.2105,1.2152,1181.0,0.7896,0.7928,0.788,0.7924,...,1.538,1.5237,1.5365,2711,140.472839,141.07637,140.064438,140.087128,140.087128,0.0
1993-05-19,1.0,1.2148,1.2158,1.2084,1.2099,991.0,0.7922,0.7967,0.791,0.7943,...,1.5482,1.5328,1.5432,2261,140.114349,140.19603,138.312836,138.399063,138.399063,0.0
1993-05-27,7.0,1.2051,1.2111,1.1993,1.2025,2101.0,0.7776,0.7803,0.7754,0.7794,...,1.565,1.5425,1.562,2831,141.130829,141.788803,141.09906,141.380417,141.380417,0.0
1993-05-28,8.0,1.2031,1.2031,1.1907,1.1951,1681.0,0.78,0.7805,0.7762,0.7768,...,1.568,1.5535,1.5607,2871,141.380417,141.394012,139.542587,139.891998,139.891998,0.0
1993-06-07,14.0,1.2321,1.2363,1.2259,1.2359,1701.0,0.7881,0.7955,0.7859,0.7948,...,1.5268,1.506,1.5268,2111,139.279388,140.704269,139.202255,140.472839,140.472839,0.0


In [82]:
#get var1(t) to the end
swapcol = list(dfm_series)
print(swapcol)
swapcol[-21], swapcol[-1] = swapcol[-1], swapcol[-21]
dfm_series = dfm_series.loc[:,swapcol]

['var1(t-4)', 'var2(t-4)', 'var3(t-4)', 'var4(t-4)', 'var5(t-4)', 'var6(t-4)', 'var7(t-4)', 'var8(t-4)', 'var9(t-4)', 'var10(t-4)', 'var11(t-4)', 'var12(t-4)', 'var13(t-4)', 'var14(t-4)', 'var15(t-4)', 'var16(t-4)', 'var17(t-4)', 'var18(t-4)', 'var19(t-4)', 'var20(t-4)', 'var21(t-4)', 'var22(t-4)', 'var1(t-3)', 'var2(t-3)', 'var3(t-3)', 'var4(t-3)', 'var5(t-3)', 'var6(t-3)', 'var7(t-3)', 'var8(t-3)', 'var9(t-3)', 'var10(t-3)', 'var11(t-3)', 'var12(t-3)', 'var13(t-3)', 'var14(t-3)', 'var15(t-3)', 'var16(t-3)', 'var17(t-3)', 'var18(t-3)', 'var19(t-3)', 'var20(t-3)', 'var21(t-3)', 'var22(t-3)', 'var1(t-2)', 'var2(t-2)', 'var3(t-2)', 'var4(t-2)', 'var5(t-2)', 'var6(t-2)', 'var7(t-2)', 'var8(t-2)', 'var9(t-2)', 'var10(t-2)', 'var11(t-2)', 'var12(t-2)', 'var13(t-2)', 'var14(t-2)', 'var15(t-2)', 'var16(t-2)', 'var17(t-2)', 'var18(t-2)', 'var19(t-2)', 'var20(t-2)', 'var21(t-2)', 'var22(t-2)', 'var1(t-1)', 'var2(t-1)', 'var3(t-1)', 'var4(t-1)', 'var5(t-1)', 'var6(t-1)', 'var7(t-1)', 'var8(t-1)'

In [83]:
dfm_series.index.name = 'Date'
dfm_series.to_csv('mergeddataseries.csv')
dfm_series.head(20)

Unnamed: 0_level_0,var1(t-4),var2(t-4),var3(t-4),var4(t-4),var5(t-4),var6(t-4),var7(t-4),var8(t-4),var9(t-4),var10(t-4),...,var13(t),var14(t),var15(t),var16(t),var17(t),var18(t),var19(t),var20(t),var21(t),var2(t)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1993-05-18,0.0,1.2138,1.2171,1.2105,1.2152,1181.0,0.7896,0.7928,0.788,0.7924,...,1.538,1.5237,1.5365,2711,140.472839,141.07637,140.064438,140.087128,140.087128,1.2133
1993-05-19,1.0,1.2148,1.2158,1.2084,1.2099,991.0,0.7922,0.7967,0.791,0.7943,...,1.5482,1.5328,1.5432,2261,140.114349,140.19603,138.312836,138.399063,138.399063,1.2043
1993-05-27,7.0,1.2051,1.2111,1.1993,1.2025,2101.0,0.7776,0.7803,0.7754,0.7794,...,1.565,1.5425,1.562,2831,141.130829,141.788803,141.09906,141.380417,141.380417,1.201
1993-05-28,8.0,1.2031,1.2031,1.1907,1.1951,1681.0,0.78,0.7805,0.7762,0.7768,...,1.568,1.5535,1.5607,2871,141.380417,141.394012,139.542587,139.891998,139.891998,1.2205
1993-06-07,14.0,1.2321,1.2363,1.2259,1.2359,1701.0,0.7881,0.7955,0.7859,0.7948,...,1.5268,1.506,1.5268,2111,139.279388,140.704269,139.202255,140.472839,140.472839,1.205
1993-06-08,15.0,1.2358,1.2375,1.2222,1.2228,1761.0,0.7945,0.7967,0.7912,0.7938,...,1.5265,1.5135,1.5205,2121,140.472839,141.257874,140.463776,140.622589,140.622589,1.2083
1993-06-09,16.0,1.2229,1.2259,1.2195,1.2252,1051.0,0.7936,0.7951,0.7903,0.7908,...,1.527,1.5069,1.5152,3591,140.622589,142.324265,140.559067,142.065613,142.065613,1.2028
1993-06-10,17.0,1.2252,1.2273,1.201,1.2013,2871.0,0.7909,0.7978,0.7904,0.7969,...,1.5325,1.507,1.5305,3421,142.092834,142.564774,142.015701,142.356033,142.356033,1.1951
1993-06-11,18.0,1.205,1.2085,1.2017,1.2085,1011.0,0.7981,0.7981,0.7912,0.7929,...,1.5405,1.5185,1.5215,3621,142.346954,142.346954,141.575531,142.065613,142.065613,1.2009
1993-06-14,19.0,1.2083,1.2109,1.2025,1.2032,1171.0,0.7925,0.7952,0.7906,0.7914,...,1.533,1.519,1.5285,1851,142.083771,143.808136,142.083771,143.508636,143.508636,1.2045


In [84]:
#dfm_series.index.values
#dfm_series.index = dfm_series.index.astype(float)