In [10]:
import pandas as pd
import numpy as np

# load data into dataframe
gold = pd.read_csv("../Data/gold.csv", sep=',')

# show head of dataframe
gold.head()

Unnamed: 0,timestamp,price
0,01/01/2010 01:00,1.4312
1,01/01/2010 05:00,1.4312
2,01/01/2010 09:00,1.4312
3,01/01/2010 13:00,1.4312
4,01/01/2010 17:00,1.4312


In [11]:
from rsi_strategy import RsiStrategy

# calculate RSI indicator and add to dataframe
gold['rsi'] = RsiStrategy.calc_rsi(gold['price'], 14)

# show tail of dataframe 
gold.tail()

Unnamed: 0,timestamp,price,rsi
15643,31/12/2019 05:00,1.12032,82.239382
15644,31/12/2019 09:00,1.1212,84.217016
15645,31/12/2019 13:00,1.12308,84.138786
15646,31/12/2019 17:00,1.12266,80.548303
15647,31/12/2019 21:00,1.12177,72.842105


In [12]:
# calculate changes in rsi between each hour
gold['rsi_delta'] = gold['rsi'] - gold['rsi'].shift(1)

# show latest trades made
gold.tail()

Unnamed: 0,timestamp,price,rsi,rsi_delta
15643,31/12/2019 05:00,1.12032,82.239382,-0.355351
15644,31/12/2019 09:00,1.1212,84.217016,1.977634
15645,31/12/2019 13:00,1.12308,84.138786,-0.07823
15646,31/12/2019 17:00,1.12266,80.548303,-3.590483
15647,31/12/2019 21:00,1.12177,72.842105,-7.706198


In [13]:
# remove first 30 rows (first rows usually quite noisy)
gold = gold.iloc[30:]

# calculate trades based on rsi and price data
df = pd.DataFrame(RsiStrategy.calc_trades(gold['timestamp'], gold['price'], gold['rsi']))

# add names to columns 
df.columns = ['timestamp', 'entry', 'position_type', 'exit']

# show latest trades made
df.tail()

Unnamed: 0,timestamp,entry,position_type,exit
604,17/11/2019 21:00,1.10517,short,1.10222
605,25/11/2019 05:00,1.10222,long,1.1076
606,02/12/2019 17:00,1.1076,short,1.11239
607,19/12/2019 21:00,1.11239,long,1.109
608,25/12/2019 21:00,1.109,short,1.12163


In [14]:
# intersect two dataframes 
df = pd.merge(df, gold, how='inner', on=['timestamp'])

# remove duplicate columns
df.drop(columns=['price'])

# show head 
df.head()

Unnamed: 0,timestamp,entry,position_type,exit,price,rsi,rsi_delta
0,11/01/2010 17:00,1.4535,short,1.43613,1.4535,72.914439,7.411049
1,17/01/2010 21:00,1.43613,long,1.42122,1.43613,19.166413,-17.307751
2,20/01/2010 09:00,1.42,long,1.4029,1.42,29.770565,0.370178
3,27/01/2010 21:00,1.40214,long,1.3869,1.40214,26.613463,-4.02304
4,31/01/2010 21:00,1.38578,long,1.39663,1.38578,21.310202,-9.883968


In [15]:
# load data into dataframes
US10Y = pd.read_csv("../Data/us10y.csv", sep=',')
vix = pd.read_csv("../Data/vix.csv", sep=',')
dollar_index = pd.read_csv("../Data/dxy.csv", sep=',')

# calculate rsi for DXY and remove everything else 
dollar_index['dxy_rsi'] = RsiStrategy.calc_rsi(dollar_index['Close'], 14)
dollar_index = dollar_index.drop(['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'], axis=1)

# calculate rsi for US10Y and remove everything else 
US10Y['us10y_rsi'] = RsiStrategy.calc_rsi(US10Y['Close'], 14)
US10Y = US10Y.drop(['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'], axis=1)

# remove everything else for the vix except from the close 
# N.B. refer to report why rsi is not calculated for vix 
vix = vix.drop(['Open', 'High', 'Low', 'Volume', 'Adj Close'], axis=1)
vix = vix.rename(columns={"Close": "vix_close"})

# convert Date columns to DateTime objects
US10Y['Date'] = pd.to_datetime(US10Y["Date"])
vix['Date'] = pd.to_datetime(vix["Date"])
dollar_index['Date'] = pd.to_datetime(dollar_index["Date"])

# combine data into one dataframe
fundamental_df = pd.merge(dollar_index, vix, how='outer', on=['Date'])
fundamental_df = pd.merge(fundamental_df, US10Y, how='outer', on=['Date'])

# forward fill NaN values
fundamental_df = fundamental_df.fillna(method='ffill')

# drop starting NaN values that will not be forward filled 
fundamental_df = fundamental_df.dropna()

# show head of dataframe
fundamental_df.head()

Unnamed: 0,Date,dxy_rsi,vix_close,us10y_rsi
49,2010-03-18,39.28574,16.620001,58.119658
50,2010-03-19,54.819297,16.969999,58.823529
51,2010-03-21,45.25555,16.969999,63.926941
52,2010-03-22,47.509629,16.870001,44.755245
53,2010-03-23,52.09796,16.35,50.625


In [16]:
# rename column to match timestamp column name in df Dataframe 
fundamental_df =  fundamental_df.rename(columns={"Date": "timestamp"})

# convert date to string 
fundamental_df['timestamp'] = fundamental_df['timestamp'].dt.strftime('%d/%m/%Y')

# repeat time column 24 times and restore column names
f_df = pd.DataFrame(np.repeat(fundamental_df.values,24,axis=0)) 
f_df.columns = fundamental_df.columns

# list comprehension to brute force all possible times with dates 
new_timelist = [x + " " + str(counter % 24) + ":00" if counter % 24 >= 10 else x + " 0" + str(counter % 24) + ":00" for x, counter in zip(f_df['timestamp'], range(0,len(f_df['timestamp'])))]

# set this as the new time column in f_df
f_df['timestamp'] = new_timelist

# attempt merge 
df = pd.merge(df, f_df, how='inner', on=['timestamp'])

# show result 
df.tail()

Unnamed: 0,timestamp,entry,position_type,exit,price,rsi,rsi_delta,dxy_rsi,vix_close,us10y_rsi
579,05/11/2019 17:00,1.10696,long,1.10517,1.10696,27.906977,-5.89584,63.9751,13.1,44.9153
580,17/11/2019 21:00,1.10517,short,1.10222,1.10517,72.284264,2.732982,67.0966,12.05,58.6207
581,25/11/2019 05:00,1.10222,long,1.1076,1.10222,29.254457,-5.564274,63.7254,11.87,21.7391
582,02/12/2019 17:00,1.1076,short,1.11239,1.1076,72.156573,15.04622,73.5629,14.91,46.5909
583,19/12/2019 21:00,1.11239,long,1.109,1.11239,29.241877,-16.972975,45.8332,12.5,66.5871


In [17]:
# create outcome column for prediction
df['outcome'] = df['exit'] - df['entry']
df['outcome'] = (df['outcome'] > 0) & (df['position_type'] == 'long') | (df['outcome'] < 0) & (df['position_type'] == 'short')

# drop entry exit and price 
df = df.drop(columns=['entry', 'exit', 'price'], axis=1, inplace=False)

# show head 
df.tail()

Unnamed: 0,timestamp,position_type,rsi,rsi_delta,dxy_rsi,vix_close,us10y_rsi,outcome
579,05/11/2019 17:00,long,27.906977,-5.89584,63.9751,13.1,44.9153,False
580,17/11/2019 21:00,short,72.284264,2.732982,67.0966,12.05,58.6207,True
581,25/11/2019 05:00,long,29.254457,-5.564274,63.7254,11.87,21.7391,True
582,02/12/2019 17:00,short,72.156573,15.04622,73.5629,14.91,46.5909,False
583,19/12/2019 21:00,long,29.241877,-16.972975,45.8332,12.5,66.5871,False


In [18]:
# export dataframe to csv 
df.to_csv('training_data.csv')