In [1]:
from finrl.preprocessing import data, preprocessors

# Download and save the data in a pandas DataFrame:
data_df = data.load_ohlc_dataset("chicago_pmi/EURUSD/ohlc/EURUSD_Chicago_Pmi_2018-01-31.csv")
data_df2 = data.load_ohlc_dataset("chicago_pmi/EURUSD/ohlc/EURUSD_Chicago_Pmi_2018-02-28.csv")

In [2]:
data_df

Unnamed: 0_level_0,ask,ask,ask,ask,bid,bid,bid,bid,bid_vol,ask_vol
Unnamed: 0_level_1,open,high,low,close,open,high,low,close,bid_vol,ask_vol
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2018-01-31 13:45:00+00:00,1.24666,1.24667,1.24666,1.24667,1.24664,1.24666,1.24664,1.24666,4.680000e+06,2.500000e+06
2018-01-31 13:45:01+00:00,1.24669,1.24669,1.24667,1.24667,1.24666,1.24666,1.24665,1.24665,4.900000e+06,4.810000e+06
2018-01-31 13:45:02+00:00,1.24667,1.24667,1.24667,1.24667,1.24665,1.24665,1.24665,1.24665,0.000000e+00,0.000000e+00
2018-01-31 13:45:03+00:00,1.24667,1.24667,1.24667,1.24667,1.24666,1.24666,1.24665,1.24665,2.200000e+06,3.000000e+06
2018-01-31 13:45:04+00:00,1.24667,1.24667,1.24667,1.24667,1.24666,1.24666,1.24666,1.24666,1.000000e+06,1.500000e+06
...,...,...,...,...,...,...,...,...,...,...
2018-01-31 15:44:55+00:00,1.24705,1.24705,1.24705,1.24705,1.24703,1.24703,1.24703,1.24703,2.500000e+06,3.990000e+06
2018-01-31 15:44:56+00:00,1.24704,1.24707,1.24704,1.24707,1.24703,1.24705,1.24703,1.24705,4.870000e+06,3.990000e+06
2018-01-31 15:44:57+00:00,1.24710,1.24710,1.24707,1.24707,1.24705,1.24706,1.24705,1.24706,1.068000e+07,1.795000e+07
2018-01-31 15:44:58+00:00,1.24708,1.24708,1.24708,1.24708,1.24704,1.24704,1.24704,1.24704,5.320000e+06,3.370000e+06


<a id='3'></a>
# Part 4. Preprocess Data
Data preprocessing is a crucial step for training a high quality machine learning model. We need to check for missing data and do feature engineering in order to convert the data into a model-ready state.
* FinRL uses a class **FeatureEngineer** to preprocess the data
* Add **technical indicators**. In practical trading, various information needs to be taken into account, for example the historical stock prices, current holding shares, technical indicators, etc.


In [3]:
param_map = {
                'sma_9': {'talib_name' : 'SMA', 'time_period' : 9}, #time_period in seoonds
                'ema_9': {'talib_name' : 'EMA', 'time_period' : 9}, #time_period in seoonds
                'sma_21' : {'talib_name' : 'SMA', 'time_period' : 21},
                'ema_21' : {'talib_name' : 'EMA', 'time_period' : 21}
            } 

In [4]:
data_df = preprocessors.FeatureEngineer(data_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          tech_indicator_list = ["sma_9", "ema_9", "sma_21", "ema_21"]).preprocess_data()
data_df2 = preprocessors.FeatureEngineer(data_df2,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          tech_indicator_list = ["sma_9", "ema_9", "sma_21", "ema_21"]).preprocess_data()

Successfully added technical indicators
Successfully added technical indicators


In [5]:
data_df

Unnamed: 0_level_0,ask,ask,ask,ask,bid,bid,bid,bid,bid_vol,ask_vol,ovr,ovr,ovr,ovr,ovr,ovr,ovr,ovr,ovr
Unnamed: 0_level_1,open,high,low,close,open,high,low,close,bid_vol,ask_vol,open,high,low,close,volume,sma_9,ema_9,sma_21,ema_21
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2018-01-31 13:45:00+00:00,1.24666,1.24667,1.24666,1.24667,1.24664,1.24666,1.24664,1.24666,4.680000e+06,2.500000e+06,1.246650,1.246665,1.246650,1.246665,7.180000e+06,,,,
2018-01-31 13:45:01+00:00,1.24669,1.24669,1.24667,1.24667,1.24666,1.24666,1.24665,1.24665,4.900000e+06,4.810000e+06,1.246675,1.246675,1.246660,1.246660,9.710000e+06,,,,
2018-01-31 13:45:02+00:00,1.24667,1.24667,1.24667,1.24667,1.24665,1.24665,1.24665,1.24665,0.000000e+00,0.000000e+00,1.246660,1.246660,1.246660,1.246660,0.000000e+00,,,,
2018-01-31 13:45:03+00:00,1.24667,1.24667,1.24667,1.24667,1.24666,1.24666,1.24665,1.24665,2.200000e+06,3.000000e+06,1.246665,1.246665,1.246660,1.246660,5.200000e+06,,,,
2018-01-31 13:45:04+00:00,1.24667,1.24667,1.24667,1.24667,1.24666,1.24666,1.24666,1.24666,1.000000e+06,1.500000e+06,1.246665,1.246665,1.246665,1.246665,2.500000e+06,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-31 15:44:55+00:00,1.24705,1.24705,1.24705,1.24705,1.24703,1.24703,1.24703,1.24703,2.500000e+06,3.990000e+06,1.247040,1.247040,1.247040,1.247040,6.490000e+06,1.247078,1.247057,1.247078,1.247057
2018-01-31 15:44:56+00:00,1.24704,1.24707,1.24704,1.24707,1.24703,1.24705,1.24703,1.24705,4.870000e+06,3.990000e+06,1.247035,1.247060,1.247035,1.247060,8.860000e+06,1.247079,1.247057,1.247079,1.247057
2018-01-31 15:44:57+00:00,1.24710,1.24710,1.24707,1.24707,1.24705,1.24706,1.24705,1.24706,1.068000e+07,1.795000e+07,1.247075,1.247080,1.247060,1.247065,2.863000e+07,1.247080,1.247058,1.247080,1.247058
2018-01-31 15:44:58+00:00,1.24708,1.24708,1.24708,1.24708,1.24704,1.24704,1.24704,1.24704,5.320000e+06,3.370000e+06,1.247060,1.247060,1.247060,1.247060,8.690000e+06,1.247081,1.247058,1.247081,1.247058


To view parameter options for technical indicators in TA-lib library. For the following Bollinger Bands indicator (BBANDS), we have <br>
    - timeperiod <br>
    - nbdevup <br>
    - nbdevdn <br>
    - matype

In [6]:
from talib import abstract
bbands = abstract.Function('BBANDS')
bbands

{'name': 'BBANDS', 'group': 'Overlap Studies', 'display_name': 'Bollinger Bands', 'function_flags': ['Output scale same as input'], 'input_names': OrderedDict([('price', 'close')]), 'parameters': OrderedDict([('timeperiod', 5), ('nbdevup', 2), ('nbdevdn', 2), ('matype', 0)]), 'output_flags': OrderedDict([('upperband', ['Values represent an upper limit']), ('middleband', ['Line']), ('lowerband', ['Values represent a lower limit'])]), 'output_names': ['upperband', 'middleband', 'lowerband']}

<a id='4'></a>
# Part 5. Build Environment
Considering the stochastic and interactive nature of the automated stock trading tasks, a financial task is modeled as a **Markov Decision Process (MDP)** problem. The training process involves observing stock price change, taking an action and reward's calculation to have the agent adjusting its strategy accordingly. By interacting with the environment, the trading agent will derive a trading strategy with the maximized rewards as time proceeds.

Our trading environments, based on OpenAI Gym framework, simulate live stock markets with real market data according to the principle of time-driven simulation.

The action space describes the allowed actions that the agent interacts with the environment. Normally, action a includes three actions: {-1, 0, 1}, where -1, 0, 1 represent selling, holding, and buying one share. Also, an action can be carried upon multiple shares. We use an action space {-k,…,-1, 0, 1, …, k}, where k denotes the number of shares to buy and -k denotes the number of shares to sell. For example, "Buy 10 shares of AAPL" or "Sell 10 shares of AAPL" are 10 or -10, respectively. The continuous action space needs to be normalized to [-1, 1], since the policy is defined on a Gaussian distribution, which needs to be normalized and symmetric.

In [7]:
train = data_df
trade = data_df2

In [8]:
feaures_list = list(train['ovr'].columns)
from sklearn import preprocessing
data_normaliser = preprocessing.StandardScaler()
train.loc[:,('ovr',feaures_list)] = data_normaliser.fit_transform(train['ovr'][feaures_list])
trade.loc[:,('ovr',feaures_list)] = data_normaliser.transform(trade['ovr'][feaures_list])

In [9]:
train

Unnamed: 0_level_0,ask,ask,ask,ask,bid,bid,bid,bid,bid_vol,ask_vol,ovr,ovr,ovr,ovr,ovr,ovr,ovr,ovr,ovr
Unnamed: 0_level_1,open,high,low,close,open,high,low,close,bid_vol,ask_vol,open,high,low,close,volume,sma_9,ema_9,sma_21,ema_21
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2018-01-31 13:45:00+00:00,1.24666,1.24667,1.24666,1.24667,1.24664,1.24666,1.24664,1.24666,4.680000e+06,2.500000e+06,1.115983,1.126963,1.126179,1.137330,-0.302064,,,,
2018-01-31 13:45:01+00:00,1.24669,1.24669,1.24667,1.24667,1.24666,1.24666,1.24665,1.24665,4.900000e+06,4.810000e+06,1.152298,1.141470,1.140721,1.130068,0.019150,,,,
2018-01-31 13:45:02+00:00,1.24667,1.24667,1.24667,1.24667,1.24665,1.24665,1.24665,1.24665,0.000000e+00,0.000000e+00,1.130509,1.119709,1.140721,1.130068,-1.213653,,,,
2018-01-31 13:45:03+00:00,1.24667,1.24667,1.24667,1.24667,1.24666,1.24666,1.24665,1.24665,2.200000e+06,3.000000e+06,1.137772,1.126963,1.140721,1.130068,-0.553449,,,,
2018-01-31 13:45:04+00:00,1.24667,1.24667,1.24667,1.24667,1.24666,1.24666,1.24666,1.24666,1.000000e+06,1.500000e+06,1.137772,1.126963,1.147992,1.137330,-0.896247,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-31 15:44:55+00:00,1.24705,1.24705,1.24705,1.24705,1.24703,1.24703,1.24703,1.24703,2.500000e+06,3.990000e+06,1.682505,1.670980,1.693340,1.681982,-0.389668,1.752342,1.724727,1.752342,1.724727
2018-01-31 15:44:56+00:00,1.24704,1.24707,1.24704,1.24707,1.24703,1.24705,1.24703,1.24705,4.870000e+06,3.990000e+06,1.675242,1.699994,1.686069,1.711030,-0.088767,1.754047,1.725026,1.754047,1.725026
2018-01-31 15:44:57+00:00,1.24710,1.24710,1.24707,1.24707,1.24705,1.24706,1.24705,1.24706,1.068000e+07,1.795000e+07,1.733347,1.729008,1.722425,1.718292,2.421276,1.756482,1.725778,1.756482,1.725778
2018-01-31 15:44:58+00:00,1.24708,1.24708,1.24708,1.24708,1.24704,1.24704,1.24704,1.24704,5.320000e+06,3.370000e+06,1.711558,1.699994,1.722425,1.711030,-0.110351,1.757943,1.726009,1.757943,1.726009


<a id='4.2'></a>
## 5.2 User-defined Environment: a simulation environment class 

<a id='4.3'></a>
## 5.3 Initialize Environment
* **stock dimension**: the number of unique stock tickers we use
* **hmax**: the maximum amount of shares to buy or sell
* **initial amount**: the amount of money we use to trade in the begining
* **transaction cost percentage**: a per share rate for every share trade
* **tech_indicator_list**: a list of technical indicator names (modified from config.py)

In [10]:
## we store the stockstats technical indicator column names in config.py
## check https://github.com/jealous/stockstats for different names
tech_indicator_list = list(param_map.keys())
tech_indicator_list

['sma_9', 'ema_9', 'sma_21', 'ema_21']

In [11]:
# account balance + close price + shares + technical indicators + open-high-low-price + 1 returns
asset_dimension = 1
state_space = 1 + 2*asset_dimension + len(tech_indicator_list)*asset_dimension + 3*asset_dimension
print(state_space)


10


In [15]:
from finrl.env.environment import EnvSetup
env_setup = EnvSetup(stock_dim = asset_dimension,
                     state_space = state_space,
                     hmax = 200,
                     initial_amount = 100000,
                     transaction_cost_pct = 0.001,
                     tech_indicator_list = tech_indicator_list)
    

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [None]:
train['ovr']

In [None]:
env_setup.__dict__

In [None]:
%load_ext autoreload
%autoreload

In [None]:
from finrl.env.EnvSingleStock import SingleStockEnv
env_train = env_setup.create_env_training(data = train['ovr'].iloc[29:,:],
                                          env_class = SingleStockEnv)
                                

<a id='5'></a>
# Part 6: Implement DRL Algorithms
* The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups.
* FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG,
Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to
design their own DRL algorithms by adapting these DRL algorithms.

In [None]:
from finrl.model.models import DRLAgent
agent = DRLAgent(env = env_train)

### Model Training: 5 models, A2C DDPG, PPO, TD3, SAC



### Model 1: A2C

In [None]:
from finrl.config import config
## default hyperparameters in config file
config.A2C_PARAMS

In [None]:
%autoreload

In [None]:
import datetime
print("==============Model Training===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
a2c_params_tuning = {'n_steps':5, 
			  'ent_coef':0.005, 
			  'learning_rate':0.0007,
			  'verbose':0,
			  'timesteps':100000}

model_a2c = agent.train_A2C(model_name = "A2C", model_params = a2c_params_tuning)

### Model 2: DDPG

In [None]:
## default hyperparameters in config file
config.DDPG_PARAMS

In [None]:
print("==============Model Training===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
ddpg_params_tuning = {
                     'batch_size': 128,
			               'buffer_size':100000, 
			               'verbose':0,
			               'timesteps':50000}
model_ddpg = agent.train_DDPG(model_name = "DDPG_{}".format(now), model_params = ddpg_params_tuning)

### Model 3: PPO

In [None]:
config.PPO_PARAMS

In [None]:
print("==============Model Training===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
ppo_params_tuning = {'n_steps':128, 
                     'nminibatches': 4,
			               'ent_coef':0.005, 
			               'learning_rate':0.00025,
			              'verbose':0,
			              'timesteps':50000}
model_ppo = agent.train_PPO(model_name = "PPO_{}".format(now), model_params = ppo_params_tuning)

### Model 4: TD3

In [None]:
## default hyperparameters in config file
config.TD3_PARAMS

In [None]:
print("==============Model Training===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
td3_params_tuning = {
                     'batch_size': 128,
			               'buffer_size':200000, 
                     'learning_rate': 0.0002,
			               'verbose':0,
			               'timesteps':50000}
model_td3 = agent.train_TD3(model_name = "TD3_{}".format(now), model_params = td3_params_tuning)

### Model 5: SAC

In [None]:
## default hyperparameters in config file
config.SAC_PARAMS

In [None]:
print("==============Model Training===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
sac_params_tuning={
  'batch_size': 64,
 'buffer_size': 100000,
  'ent_coef':'auto_0.1',
 'learning_rate': 0.0001,
 'learning_starts':200,
 'timesteps': 50000,
 'verbose': 0}
model_sac = agent.train_SAC(model_name = "SAC_{}".format(now), model_params = sac_params_tuning)

### Trading
* we use the environment class we initialized at 5.3 to create a stock trading environment
* Assume that we have $100,000 initial capital at 2019-01-01. 
* We use the trained model of PPO to trade AAPL.

In [None]:
trade.head()

In [None]:
# create trading env
env_trade, obs_trade = env_setup.create_env_trading(data = trade,
                                                    env_class = SingleStockEnv) 

In [None]:
## make a prediction and get the account value change
df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_td3,
                                           test_data = trade,
                                           test_env = env_trade,
                                           test_obs = obs_trade)

<a id='6'></a>
# Part 7: Backtesting Performance
Backtesting plays a key role in evaluating the performance of a trading strategy. Automated backtesting tool is preferred because it reduces the human error. We usually use the Quantopian pyfolio package to backtest our trading strategies. It is easy to use and consists of various individual plots that provide a comprehensive image of the performance of a trading strategy.

<a id='6.1'></a>
## 7.1 BackTestStats
pass in df_account_value, this information is stored in env class


In [None]:
print("==============Get Backtest Results===========")
perf_stats_all = BackTestStats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

<a id='6.2'></a>
## 7.2 BackTestPlot

In [None]:
print("==============Compare to AAPL itself buy-and-hold===========")
%matplotlib inline
BackTestPlot(account_value=df_account_value, baseline_ticker = 'AAPL')

<a id='6.3'></a>
## 7.3 Baseline Stats

In [None]:
print("==============Get Baseline Stats===========")
baesline_perf_stats=BaselineStats('AAPL')

In [None]:
print("==============Get Baseline Stats===========")
baesline_perf_stats=BaselineStats('^GSPC')

<a id='6.4'></a>
## 7.4 Compare to Stock Market Index

In [None]:
print("==============Compare to S&P 500===========")
%matplotlib inline
# S&P 500: ^GSPC
# Dow Jones Index: ^DJI
# NASDAQ 100: ^NDX
BackTestPlot(df_account_value, baseline_ticker = '^GSPC')