## Import Library

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import time
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
import requests

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['figure.dpi'] = 120

import warnings
warnings.filterwarnings('ignore')

In [2]:
USER_AGENTS = [
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_3_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36"
]

HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
VNDIRECT_DATA_HISTORY_URL = 'https://dchart-api.vndirect.com.vn/dchart/history'
URL = VNDIRECT_DATA_HISTORY_URL

In [3]:
def getStockHistoryData(ticker, timestamp_from=0, timestamp_to=0):
    if timestamp_from == 0:
        three_months = date.today() + relativedelta(months=-3)
        timestamp_from = datetime.strptime(three_months.strftime("%m/%d/%Y") + ', 00:00:0', "%m/%d/%Y, %H:%M:%S")\
            .timestamp()
    if timestamp_to == 0:
        timestamp_to = datetime.strptime(date.today().strftime("%m/%d/%Y") + ', 23:59:00', "%m/%d/%Y, %H:%M:%S")\
            .timestamp()

    params = {
        "resolution": "5",
        "symbol": str(ticker),
        "from": int(timestamp_from),
        "to": int(timestamp_to)
    }

    x = requests.get(URL, params=params, headers=HEADERS)
    response = x.json()

    import numpy as np
    import pandas as pd

    timestamp = np.array(response['t']).astype(int)
    close = np.array(response['c']).astype(float)
    open = np.array(response['o']).astype(float)
    high = np.array(response['h']).astype(float)
    low = np.array(response['l']).astype(float)
    volume = np.array(response['v']).astype(int)

    dataset = pd.DataFrame({'Time': timestamp, 'Open': list(open), 'High': list(high), 'Low': list(low),
                            'Close': list(close), 'Volume': list(volume)},
                           columns=['Time', 'Open', 'High', 'Low', 'Close', 'Volume'])
    return dataset

In [4]:
def prepareData(htd):
    if 'Time' in htd.columns:
        from datetime import datetime

        htd['DateStr'] = htd.apply(
            lambda x: datetime.fromtimestamp(x['Time']).strftime("%Y-%m-%d %H:%M:%S"), axis=1)

    htd['Date'] = pd.to_datetime(htd['DateStr'])
    # htd['Date'] = htd['Date'] + pd.DateOffset(hours=7)
    ticker_data = htd.set_index('Date')
    ticker_data.drop(columns=['Time', 'DateStr'], inplace=True)
    return ticker_data

## Load Price Data

In [5]:
ticker = "VN30F1M"
htd = getStockHistoryData(ticker, 0, 0)
data = prepareData(htd)
data = data.dropna()
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-06-13 09:00:00,1331.0,1331.2,1329.4,1331.0,5800
2024-06-13 09:05:00,1331.0,1332.4,1330.6,1331.2,3470
2024-06-13 09:10:00,1330.5,1331.4,1330.5,1331.1,1563
2024-06-13 09:15:00,1331.1,1332.4,1330.5,1330.7,4991
2024-06-13 09:20:00,1330.6,1331.5,1330.6,1331.3,1952
...,...,...,...,...,...
2024-09-13 13:45:00,1292.8,1293.5,1291.4,1292.0,6854
2024-09-13 13:50:00,1292.2,1294.3,1292.0,1294.0,5429
2024-09-13 13:55:00,1294.1,1294.5,1293.2,1294.2,4012
2024-09-13 14:00:00,1294.0,1294.8,1293.7,1294.0,3661


In [6]:
def cal_first_close(tick):
  tick = tick[100*tick.index.hour+tick.index.minute == 915]
  if not tick.empty:
    return tick[0]


def cal_high_before(tick):
  tick = tick[100*tick.index.hour+tick.index.minute < 1345]
  return tick.max()


def cal_low_before(tick):
  tick = tick[100*tick.index.hour+tick.index.minute < 1355]
  return tick.min()

def cal_close(tick):
  tick = tick[100*tick.index.hour+tick.index.minute == 1425]
  if not tick.empty:
    return tick[0]
      
def cal_last(tick):
  tick = tick[100*tick.index.hour+tick.index.minute == 1445]
  if not tick.empty:
    return tick[0]

In [7]:
data_tmp = data.copy()
data_tmp['first_close'] = data_tmp.Close
data_tmp['Close_price'] = data_tmp.Close
data_tmp['_last'] = data_tmp.Close
data_tmp['prev_high'] = data_tmp.High
data_tmp['prev_low'] = data_tmp.Low
day_data = data_tmp.resample("D").agg({
    'first_close': cal_first_close,
    'prev_high':cal_high_before,
    'prev_low': cal_low_before,
    'Close_price': cal_close,
    '_last': cal_last
    }).rename(columns={'_last':'day_Close'})

In [8]:
day_data.dropna(subset=['first_close'], inplace=True)

In [9]:
day_data['prev_day_Close'] = day_data['day_Close'].shift(1)
day_data.drop(columns=['day_Close', 'Close_price'], inplace=True)
day_data = day_data.dropna()

In [10]:
data = data.assign(time_d=pd.PeriodIndex(data.index, freq='1D').to_timestamp())
data = pd.merge(data, day_data, left_on="time_d", right_index=True, how="left")
data.dropna(inplace=True)

In [11]:
# Drop data before 1:55PM & after 2:10PM
data = data[(100*data.index.hour + data.index.minute > 1350) & (100*data.index.hour + data.index.minute < 1415)]

In [12]:
# mom_y = momentum_yesterday
data['mom_y'] = 100 * (data.Close - data.prev_day_Close) / data.prev_day_Close
data['body_rate'] = (data.Close - data.first_close) / (data.prev_high - data.prev_low)

In [13]:
# def cal_signal(row):
#     signal = ''
#     if row['mom_y'] > 0.18 and row['body_rate'] > .65:
#         signal = 'long'
#     elif row['mom_y'] < 0.6 and row['body_rate'] < -.39:
#         signal = 'short'
#     return signal
def cal_signal(row):
    signal = ''
    if row['mom_y'] > 0.26 and row['body_rate'] > .65:
        signal = 'long'
    elif row['mom_y'] < -.18 and row['body_rate'] < -.39:
        signal = 'short'
    return signal
data['signal'] = data.apply(lambda r: cal_signal(r), axis=1)

In [14]:
data[data.signal != ''].tail(30)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,time_d,first_close,prev_high,prev_low,prev_day_Close,mom_y,body_rate,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-08-13 14:05:00,1267.3,1267.3,1262.7,1263.4,13767,2024-08-13,1267.1,1270.0,1264.9,1270.0,-0.519685,-0.72549,short
2024-08-13 14:10:00,1263.1,1264.8,1263.0,1264.3,6146,2024-08-13,1267.1,1270.0,1264.9,1270.0,-0.448819,-0.54902,short
2024-08-15 14:10:00,1266.4,1266.4,1264.2,1264.4,3673,2024-08-15,1268.7,1269.7,1267.1,1268.0,-0.283912,-1.653846,short
2024-08-16 13:55:00,1286.5,1287.4,1285.7,1287.2,3816,2024-08-16,1269.8,1287.4,1268.2,1267.7,1.538219,0.90625,long
2024-08-16 14:00:00,1287.6,1288.5,1286.3,1287.0,6974,2024-08-16,1269.8,1287.4,1268.2,1267.7,1.522442,0.895833,long
2024-08-16 14:05:00,1287.0,1288.5,1286.7,1288.2,4295,2024-08-16,1269.8,1287.4,1268.2,1267.7,1.617102,0.958333,long
2024-08-16 14:10:00,1288.2,1288.2,1286.7,1287.3,2610,2024-08-16,1269.8,1287.4,1268.2,1267.7,1.546107,0.911458,long
2024-08-19 13:55:00,1299.2,1300.0,1298.2,1299.2,4481,2024-08-19,1292.0,1299.3,1288.3,1288.0,0.869565,0.654545,long
2024-08-20 13:55:00,1307.1,1307.7,1306.6,1307.0,5444,2024-08-20,1298.6,1306.5,1298.3,1298.0,0.693374,1.02439,long
2024-08-20 14:00:00,1307.3,1308.1,1306.6,1308.1,6397,2024-08-20,1298.6,1306.5,1298.3,1298.0,0.77812,1.158537,long


In [15]:
data.tail(30)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,time_d,first_close,prev_high,prev_low,prev_day_Close,mom_y,body_rate,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-09-04 14:00:00,1311.2,1311.4,1309.8,1310.2,2873,2024-09-04,1313.7,1321.5,1307.3,1332.7,-1.688302,-0.246479,
2024-09-04 14:05:00,1310.2,1312.9,1310.2,1312.6,5042,2024-09-04,1313.7,1321.5,1307.3,1332.7,-1.508216,-0.077465,
2024-09-04 14:10:00,1312.5,1314.5,1312.5,1313.2,7493,2024-09-04,1313.7,1321.5,1307.3,1332.7,-1.463195,-0.035211,
2024-09-05 13:55:00,1313.4,1313.5,1310.9,1310.9,8015,2024-09-05,1315.7,1318.6,1312.8,1314.0,-0.235921,-0.827586,short
2024-09-05 14:00:00,1311.0,1312.5,1310.9,1311.9,6164,2024-09-05,1315.7,1318.6,1312.8,1314.0,-0.159817,-0.655172,
2024-09-05 14:05:00,1311.7,1312.9,1311.5,1312.3,3626,2024-09-05,1315.7,1318.6,1312.8,1314.0,-0.129376,-0.586207,
2024-09-05 14:10:00,1312.5,1312.9,1311.5,1312.2,3978,2024-09-05,1315.7,1318.6,1312.8,1314.0,-0.136986,-0.603448,
2024-09-06 13:55:00,1309.1,1310.0,1309.1,1309.9,1866,2024-09-06,1308.4,1310.9,1305.6,1307.7,0.168234,0.283019,
2024-09-06 14:00:00,1310.0,1311.3,1309.8,1310.6,5202,2024-09-06,1308.4,1310.9,1305.6,1307.7,0.221763,0.415094,
2024-09-06 14:05:00,1310.5,1310.6,1309.3,1309.9,4064,2024-09-06,1308.4,1310.9,1305.6,1307.7,0.168234,0.283019,
