In [35]:
import sys
import matplotlib.pyplot as plt
import os
import subprocess
import pandas as pd
from typing import List, Optional, MutableSequence
from datetime import timedelta

# Avoid AttributeError: module 'sqlalchemy' has no attribute 'orm'
import sqlalchemy.orm  # noqa: F401

sys.path.append("../../")
from gmo_hft_bot.db.database import initialize_database
from gmo_hft_bot.db import crud
from backtest.visualize.ohlcv import ohlcv_plot
from backtest.utils.utils import get_ohlcv_df, get_predict_df, match_timestamp_for_ohlcv
from backtest.backtest_trade.richman_backtest import richman_backtest
from backtest.visualize.backtest_visualize import cum_return_plot, position_change_plot, position_change_average_plot


In [46]:
exchange_name='gmo'
trading_type = 'margin'
pair_name = 'BTC_JPY'
time_bar = '5S'

In [37]:
def execute_shell_script(exchange_name: str, trading_type: str, pair_name: str) -> None:
    shell_script_path: str = os.path.join('.',"download_klines.sh")
    print(shell_script_path)
    subprocess.run(["chmod", "+x", shell_script_path])
    subprocess.run(
        [
            shell_script_path,
            exchange_name,
            trading_type,
            pair_name,
        ]
    )

os.makedirs('raw_data',exist_ok =True)
execute_shell_script(exchange_name, trading_type, pair_name)

./download_klines.sh
downloaded: ./raw_data/gmo/margin/BTC_JPY/BTC_JPY2022-03-24.csv
File not exist: https://api.coin.z.com/data/trades/BTC_JPY/2022/03/20220325_BTC_JPY.csv.gz
File not exist: https://api.coin.z.com/data/trades/BTC_JPY/2022/03/20220326_BTC_JPY.csv.gz
File not exist: https://api.coin.z.com/data/trades/BTC_JPY/2022/03/20220327_BTC_JPY.csv.gz
File not exist: https://api.coin.z.com/data/trades/BTC_JPY/2022/03/20220328_BTC_JPY.csv.gz
File not exist: https://api.coin.z.com/data/trades/BTC_JPY/2022/03/20220329_BTC_JPY.csv.gz
File not exist: https://api.coin.z.com/data/trades/BTC_JPY/2022/03/20220330_BTC_JPY.csv.gz
File not exist: https://api.coin.z.com/data/trades/BTC_JPY/2022/03/20220331_BTC_JPY.csv.gz


In [47]:
def _raw_datafile_paths(exchange_name: str, trading_type: str, pair_name: str) -> List:
        raw_data_folder = "./raw_data"

        file_lists = []
        file_lists += list(
            map(
                lambda x: os.path.join(raw_data_folder, exchange_name, trading_type, pair_name, x),
                os.listdir(os.path.join(raw_data_folder, exchange_name, trading_type, pair_name)),
            )
        )

        return file_lists

# Index column are different from each exchanges.
# So, you need to set index correctly here.
def set_index(exchange_name: str, trading_type: str, df: pd.DataFrame) -> pd.DataFrame:
    # df has column named as 'timestamp'
    if exchange_name in ["bybit"]:
        df = df.set_index("timestamp")
        df.index = pd.to_datetime(df.index, utc=True, unit="s")
        return df
    if exchange_name in ["gmo"]:
        df = df.set_index("timestamp")
        df.index = pd.to_datetime(df.index, utc=True, unit="ns")
        return df
    else:
        return df

def create_ohlc(data: pd.DataFrame, time_period: str = "15T") -> pd.DataFrame:
    # sampling time to create candle data. ex. 15T is 15min, 1H is hour.
    df = data["price"].resample(time_period).ohlc()
    df["volume"] = data["size"].resample(time_period).sum()
    return df


all_ohlc_df = pd.DataFrame()
raw_datafile_paths = _raw_datafile_paths(exchange_name, trading_type, pair_name)
for raw_datafile_path in raw_datafile_paths:
    df = pd.read_csv(raw_datafile_path)
    display(df)
    df = set_index(exchange_name, trading_type, df)
    # display(df)
    ohlc_df = create_ohlc(df, time_bar)

    # ohlc_df.index = ohlc_df.index.strftime("%Y/%m/%d %H:%M:%S")

    all_ohlc_df = all_ohlc_df.append(ohlc_df, ignore_index=False)

# save_dir = os.path.join(DATAFOLDER.ohlc_data_folder, time_bar)
# os.makedirs(save_dir, exist_ok=True)
# save_path = os.path.join(save_dir, f"{exchange_name}_{trading_type}_{pair_name}.parquet.gzip")
all_ohlc_df = all_ohlc_df.sort_index()
all_ohlc_df = all_ohlc_df.fillna(method="ffill")
# all_ohlc_df.to_parquet(save_path, engine="pyarrow", compression="gzip")


all_ohlc_df.index = pd.to_datetime(all_ohlc_df.index, utc=True, unit="s")
all_ohlc_df.index += timedelta(hours=9)
display(all_ohlc_df[-30:])


Unnamed: 0,symbol,side,size,price,timestamp
0,BTC_JPY,BUY,0.01,5133000.0,2022-03-23 21:00:00.103
1,BTC_JPY,SELL,0.03,5132992.0,2022-03-23 21:00:00.149
2,BTC_JPY,SELL,0.02,5132932.0,2022-03-23 21:00:00.149
3,BTC_JPY,SELL,0.01,5132858.0,2022-03-23 21:00:00.149
4,BTC_JPY,BUY,0.07,5133000.0,2022-03-23 21:00:00.230
...,...,...,...,...,...
86370,BTC_JPY,SELL,0.01,5370105.0,2022-03-24 20:59:14.868
86371,BTC_JPY,SELL,0.01,5370105.0,2022-03-24 20:59:14.894
86372,BTC_JPY,SELL,0.01,5370106.0,2022-03-24 20:59:16.766
86373,BTC_JPY,BUY,0.01,5372307.0,2022-03-24 20:59:17.569


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-03-25 05:57:25+00:00,5370000.0,5370000.0,5369405.0,5369440.0,0.0
2022-03-25 05:57:30+00:00,5370000.0,5370000.0,5369405.0,5369440.0,0.0
2022-03-25 05:57:35+00:00,5370000.0,5370000.0,5369405.0,5369440.0,0.0
2022-03-25 05:57:40+00:00,5370457.0,5370457.0,5370452.0,5370452.0,0.06
2022-03-25 05:57:45+00:00,5370457.0,5370457.0,5370452.0,5370452.0,0.0
2022-03-25 05:57:50+00:00,5370457.0,5370457.0,5370452.0,5370452.0,0.0
2022-03-25 05:57:55+00:00,5371300.0,5371300.0,5371300.0,5371300.0,0.01
2022-03-25 05:58:00+00:00,5371300.0,5371300.0,5371300.0,5371300.0,0.0
2022-03-25 05:58:05+00:00,5369909.0,5372280.0,5369807.0,5372280.0,0.09
2022-03-25 05:58:10+00:00,5369909.0,5372280.0,5369807.0,5372280.0,0.0


In [39]:
# display(all_ohlc_df['2022-03-24 23:04:50+00:00':])
