In [1]:
import numpy as np
import pandas as pd

from datetime import datetime

from pathlib import Path

import talib

In [2]:
class CSVDataHandler:
    def __init__(self, symbol_list, timeframe):
        self.csv_dir = '../exchange_data'
        self.symbol_list = ['-'.join(symbol.split('/'))
                            for symbol in symbol_list]
        self.timeframe = timeframe

        self.symbol_data = dict()
        self.latest_symbol_data = dict()

        self.continue_backtest = True

        self._load_symbol_data()

    def __repr__(self):
        return f'<CSVDataHandler>'

    def __str__(self):
        return f'CSVDataHandler with {self.timeframe} timeframe'

    def _load_symbol_data(self):
        '''
        It imports historical data from a set of CSV files then prepare them for the backtesting.
        It sets self.symbol_data with all data and init self_latest_symbol_data with empty data.
        '''
        csv_files_path = f'{Path().absolute()}/{self.csv_dir}'
        columns = ['datetime', 'open', 'high', 'low', 'close', 'volume']
        combined_symbol_index = None

        for symbol in self.symbol_list:
            symbol_csv_path = f'{csv_files_path}/{symbol}_{self.timeframe}.csv'
            self.symbol_data[symbol] = pd.read_csv(
                symbol_csv_path, header=None, index_col=0, names=columns)

            self.symbol_data[symbol].index = pd.to_datetime(
                self.symbol_data[symbol].index, unit='ms')
            self.symbol_data[symbol] = self.symbol_data[symbol].drop_duplicates(
            )
            self.symbol_data[symbol] = self.symbol_data[symbol].sort_index()

            if combined_symbol_index is None:
                combined_symbol_index = self.symbol_data[symbol].index
            else:
                combined_symbol_index = combined_symbol_index.union(
                    self.symbol_data[symbol].index)

            # Init list that will contain latest ohlcv data for each symbol
            self.latest_symbol_data[symbol] = list()

        for symbol in self.symbol_list:
            self.symbol_data[symbol] = self.symbol_data[symbol].reindex(
                index=combined_symbol_index, method='pad', fill_value=0).itertuples(name='OHLCV')

    def _get_new_bar(self, symbol):
        for bar in self.symbol_data[symbol]:
            yield bar

    def update_bars(self):
        """
        It feeds the backtest with latest data in each iteration and trigger Market event.
        It sets this data in self.latest_symbol_data.
        If there is no data to feed the backtest, it raises an exception and the backtest stops.
        """
        for symbol in self.symbol_list:
            try:
                bar = next(self._get_new_bar(symbol))
            except StopIteration:
                self.continue_backtest = False
            else:
                if bar is not None:
                    self.latest_symbol_data[symbol].append(bar)

    def get_latest_bar(self, symbol):
        """
        It returns the latest bar data of a given symbol as namedtuple
        e.g: OHLCV(Index=Timestamp('2017-08-08 11:00:00'), open=0.080292,
                   high=0.081039, low=0.079854, close=0.080282, volume=610.868)
        """
        try:
            return self.latest_symbol_data[symbol][-1]
        except KeyError:
            print("That symbol is not available in the historical data set.")
            raise

    def get_latest_bars(self, symbol, N=1):
        """
        It returns the latest bars data of a given symbol as a np array
        """
        try:
            return np.array(self.latest_symbol_data[symbol][-N:])
        except KeyError:
            print("That symbol is not available in the historical data set.")
            raise
            
    def get_latest_bars_df(self, symbol, N=1):
        """
        It returns the latest bars data of a given symbol as a np array
        """
        try:
            bars = pd.DataFrame(self.latest_symbol_data[symbol][-N:])
            bars.set_index('Index', inplace=True)
            return bars
        except KeyError:
            print("That symbol is not available in the historical data set.")
            raise

    def get_latest_bar_datetime(self, symbol):
        """
        It returns the latest bar timestamp object of a given symbol
        """
        try:
            return self.latest_symbol_data[symbol][-1].Index
        except KeyError:
            print("That symbol is not available in the historical data set.")
            raise

    def get_latest_bar_value(self, symbol, value_type):
        """
        It returns the latest bar value (open, high, low, close or volume) of a given symbol
        """
        try:
            if value_type == 'datetime':
                return getattr(self.latest_symbol_data[symbol][-1], 'Index')
            return getattr(self.latest_symbol_data[symbol][-1], value_type)
        except KeyError:
            print("That symbol is not available in the historical data set.")
            raise

    def get_latest_bars_values(self, symbol, value_type, N=1):
        """
        It returns a numpy array of the latest bars values (open, high, low, close or volume) of a given symbol
        e.g: array([ 89, 88.2, 86.4, ...])
        """
        try:
            bars = self.latest_symbol_data[symbol][-N:]
        except KeyError:
            print("That symbol is not available in the historical data set.")
            raise
        else:
            if value_type == 'datetime':
                return np.array([getattr(bar, 'Index') for bar in bars])
            return np.array([getattr(bar, value_type) for bar in bars])

    def current_price(self, symbol):
        """
        It retuns latest close price
        """
        return self.get_latest_bar_value(symbol, 'close')


In [5]:
data = CSVDataHandler(['BTC/USDT'], '1h')

In [6]:
for _ in range(250):
    data.update_bars()

bars = data.get_latest_bars_df('BTC-USDT', N=250)

In [7]:
bars['rsi'] = talib.RSI(bars['close'])

In [8]:
price_set = bars.loc[bars['open'] > bars['close'], 'low']
rsi_set = bars.loc[bars['open'] > bars['close'], 'rsi']

In [9]:
bars

Unnamed: 0_level_0,open,high,low,close,volume,rsi
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-08-17 04:00:00,4261.48,4313.62,4261.32,4308.83,47.181009,
2017-08-17 05:00:00,4308.83,4328.69,4291.37,4315.32,23.234916,
2017-08-17 06:00:00,4330.29,4345.45,4309.37,4324.35,7.229691,
2017-08-17 07:00:00,4316.62,4349.99,4287.41,4349.99,4.443249,
2017-08-17 08:00:00,4333.32,4377.85,4333.32,4360.69,0.972807,
...,...,...,...,...,...,...
2017-08-27 09:00:00,4323.90,4345.81,4302.11,4320.81,12.868744,51.750829
2017-08-27 10:00:00,4330.00,4330.00,4285.56,4302.73,13.820714,48.092588
2017-08-27 11:00:00,4323.87,4344.99,4285.80,4323.37,14.332417,52.242971
2017-08-27 12:00:00,4340.00,4340.00,4298.66,4298.66,11.000033,47.360671


In [10]:
rsi_set

Index
2017-08-17 06:00:00          NaN
2017-08-17 11:00:00          NaN
2017-08-17 12:00:00          NaN
2017-08-17 15:00:00          NaN
2017-08-17 17:00:00          NaN
                         ...    
2017-08-27 09:00:00    51.750829
2017-08-27 10:00:00    48.092588
2017-08-27 11:00:00    52.242971
2017-08-27 12:00:00    47.360671
2017-08-27 13:00:00    50.206067
Name: rsi, Length: 124, dtype: float64

In [3]:
trades = pd.read_csv('../backtest_results/trades.csv', header=0, parse_dates=True, index_col=0)

In [225]:
start_date = '2018-01-01 00:00:00'
end_date = '2018-12-10 00:00:00'
mask = (trades['open_date'] >= start_date) & (trades['open_date'] <= end_date)
trades_2018 = trades.loc[mask]

In [6]:
records = trades[['open_date', 'open_market_price', 'close_date', 'close_market_price', 'win_trades_long', 'returns_long']]

In [28]:
records[120:130]

Unnamed: 0,open_date,open_market_price,close_date,close_market_price,win_trades_long,returns_long
120,2018-08-05 00:00:00.000,6910.03,2018-08-05 13:00:00.000,6969.93,True,0.008669
121,2018-08-05 14:00:00.000,6920.0,2018-08-06 07:00:00.000,7015.0,True,0.013728
122,2018-08-09 12:00:00.000,6254.13,2018-08-09 23:00:00.000,6529.79,True,0.044076
123,2018-08-11 05:00:00.000,6096.14,2018-08-11 23:00:00.000,6232.35,True,0.022344
124,2018-09-05 16:00:00.000,6916.65,2018-09-05 23:00:00.000,6700.0,False,-0.031323
125,2018-09-06 09:00:00.000,6393.96,2018-09-07 08:00:00.000,6446.92,True,0.008283
126,2018-09-07 10:00:00.000,6363.05,2018-09-08 17:00:00.000,6212.33,False,-0.023687
127,2018-09-25 07:00:00.000,6382.89,2018-09-26 21:00:00.000,6476.81,True,0.014714
128,2018-10-11 20:00:00.000,6300.89,2018-10-15 07:00:00.000,6860.25,True,0.088775
129,2018-11-15 10:00:00.000,5590.0,2018-11-15 14:00:00.000,5403.52,False,-0.03336
