# Template Live data

### Import Library

In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
import time
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
import requests

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['figure.dpi'] = 120

import warnings
warnings.filterwarnings('ignore')

In [26]:
USER_AGENTS = [
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_3_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36"
]

HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
VNDIRECT_DATA_HISTORY_URL = 'https://dchart-api.vndirect.com.vn/dchart/history'
URL = VNDIRECT_DATA_HISTORY_URL

In [27]:
def getStockHistoryData(ticker, timestamp_from=0, timestamp_to=0):
    if timestamp_from == 0:
        three_months = date.today() + relativedelta(months=-3)
        timestamp_from = datetime.strptime(three_months.strftime("%m/%d/%Y") + ', 00:00:0', "%m/%d/%Y, %H:%M:%S")\
            .timestamp()
    if timestamp_to == 0:
        timestamp_to = datetime.strptime(date.today().strftime("%m/%d/%Y") + ', 23:59:00', "%m/%d/%Y, %H:%M:%S")\
            .timestamp()

    params = {
        "resolution": "5",
        "symbol": str(ticker),
        "from": int(timestamp_from),
        "to": int(timestamp_to)
    }

    x = requests.get(URL, params=params, headers=HEADERS)
    response = x.json()

    import numpy as np
    import pandas as pd

    timestamp = np.array(response['t']).astype(int)
    close = np.array(response['c']).astype(float)
    open = np.array(response['o']).astype(float)
    high = np.array(response['h']).astype(float)
    low = np.array(response['l']).astype(float)
    volume = np.array(response['v']).astype(int)

    dataset = pd.DataFrame({'Time': timestamp, 'Open': list(open), 'High': list(high), 'Low': list(low),
                            'Close': list(close), 'Volume': list(volume)},
                           columns=['Time', 'Open', 'High', 'Low', 'Close', 'Volume'])
    return dataset

In [28]:
def prepareData(htd):
    if 'Time' in htd.columns:
        from datetime import datetime

        htd['DateStr'] = htd.apply(
            lambda x: datetime.fromtimestamp(x['Time']).strftime("%Y-%m-%d %H:%M:%S"), axis=1)

    htd['Date'] = pd.to_datetime(htd['DateStr'])
    ticker_data = htd.set_index('Date')
    ticker_data.drop(columns=['Time', 'DateStr'], inplace=True)
    return ticker_data

### Load Price Data

In [29]:
ticker = "VN30F1M"
htd = getStockHistoryData(ticker, 0, 0)
ticker_data = prepareData(htd)
ticker_data = ticker_data.dropna()
ticker_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-18 09:00:00,1303.1,1304.3,1301.7,1303.9,5508
2024-07-18 09:05:00,1304.0,1304.0,1302.8,1303.0,1498
2024-07-18 09:10:00,1303.0,1303.6,1303.0,1303.5,1644
2024-07-18 09:15:00,1303.5,1305.0,1302.9,1303.1,4127
2024-07-18 09:20:00,1303.1,1303.3,1300.9,1301.2,5197
...,...,...,...,...,...
2024-10-18 10:55:00,1371.0,1371.2,1369.5,1369.6,5203
2024-10-18 11:00:00,1369.4,1369.5,1367.6,1368.4,8310
2024-10-18 11:05:00,1368.3,1368.9,1367.1,1367.3,3971
2024-10-18 11:10:00,1367.2,1367.6,1363.8,1365.4,10418


In [30]:
def prepare_clustering_data(htd):
    htd['high_diff_1'] = htd['High'] - htd['High'].shift(1)
    htd['high_diff_2'] = htd['High'] - htd['High'].shift(2)
    htd['low_diff_1'] = htd['Low'] - htd['Low'].shift(1)
    htd['low_diff_2'] = htd['Low'] - htd['Low'].shift(2)
    htd['open_diff_1'] = htd['Open'] - htd['Open'].shift(1)
    htd['open_diff_2'] = htd['Open'] - htd['Open'].shift(2)
    htd['close_diff_1'] = htd['Close'] - htd['Close'].shift(1)
    htd['close_diff_2'] = htd['Close'] - htd['Close'].shift(2)
    htd.dropna(inplace=True)
    return htd


def prepare_data(htd):
    htd["current"] = htd.index + pd.DateOffset(minutes=5)
    htd = prepare_clustering_data(htd)
    kmeans = load_brain()
    cluster_cols = ["high_diff_1", "high_diff_2", "low_diff_1", "low_diff_2",
                    "open_diff_1", "open_diff_2", "close_diff_1", "close_diff_2"]
    cluster = kmeans.fit(htd[cluster_cols])
    htd = htd.assign(cluster=cluster.labels_)
    htd['signal'] = htd.apply(lambda r: cal_signal(r), axis=1)
    return htd


def cal_signal(row):
    signal = ''
    if row['cluster'] in [42, 37, 58, 69, 91, 23, 80, 71, 14, 75]:
        signal = 'long'
    elif row['cluster'] in [73, 34, 71, 14, 10, 24, 56, 64, 74, 3]:
        signal = 'short'
    return signal


def load_brain():
    import os
    from pathlib import Path
    import pickle
    notebook_path = os.getcwd()
    algo_dir = Path(notebook_path).parent.parent.parent
    trainner_file = str(algo_dir) + '/VN30ps/kmean_clusters/kmeans.pickle'
    try:
        with open(trainner_file, 'rb') as fp:
            return pickle.load(fp)
    except FileNotFoundError:
        print('You need to train a network first')
        return

In [31]:
prepared_data = prepare_data(ticker_data)

In [32]:
prepared_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,current,high_diff_1,high_diff_2,low_diff_1,low_diff_2,open_diff_1,open_diff_2,close_diff_1,close_diff_2,cluster,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-07-18 09:10:00,1303.0,1303.6,1303.0,1303.5,1644,2024-07-18 09:15:00,-0.4,-0.7,0.2,1.3,-1.0,-0.1,0.5,-0.4,1,
2024-07-18 09:15:00,1303.5,1305.0,1302.9,1303.1,4127,2024-07-18 09:20:00,1.4,1.0,-0.1,0.1,0.5,-0.5,-0.4,0.1,24,short
2024-07-18 09:20:00,1303.1,1303.3,1300.9,1301.2,5197,2024-07-18 09:25:00,-1.7,-0.3,-2.0,-2.1,-0.4,0.1,-1.9,-2.3,18,
2024-07-18 09:25:00,1301.3,1302.4,1300.9,1302.4,2461,2024-07-18 09:30:00,-0.9,-2.6,0.0,-2.0,-1.8,-2.2,1.2,-0.7,64,short
2024-07-18 09:30:00,1302.1,1303.4,1302.1,1303.4,2915,2024-07-18 09:35:00,1.0,0.1,1.2,1.2,0.8,-1.0,1.0,2.2,84,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-18 10:55:00,1371.0,1371.2,1369.5,1369.6,5203,2024-10-18 11:00:00,-0.2,-1.9,-1.1,-1.5,-0.1,-2.1,-1.5,-1.6,10,short
2024-10-18 11:00:00,1369.4,1369.5,1367.6,1368.4,8310,2024-10-18 11:05:00,-1.7,-1.9,-1.9,-3.0,-1.6,-1.7,-1.2,-2.7,72,
2024-10-18 11:05:00,1368.3,1368.9,1367.1,1367.3,3971,2024-10-18 11:10:00,-0.6,-2.3,-0.5,-2.4,-1.1,-2.7,-1.1,-2.3,10,short
2024-10-18 11:10:00,1367.2,1367.6,1363.8,1365.4,10418,2024-10-18 11:15:00,-1.3,-1.9,-3.3,-3.8,-1.1,-2.2,-1.9,-3.0,38,


In [33]:
prepared_data[prepared_data.signal != '']

Unnamed: 0_level_0,Open,High,Low,Close,Volume,current,high_diff_1,high_diff_2,low_diff_1,low_diff_2,open_diff_1,open_diff_2,close_diff_1,close_diff_2,cluster,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-07-18 09:15:00,1303.5,1305.0,1302.9,1303.1,4127,2024-07-18 09:20:00,1.4,1.0,-0.1,0.1,0.5,-0.5,-0.4,0.1,24,short
2024-07-18 09:25:00,1301.3,1302.4,1300.9,1302.4,2461,2024-07-18 09:30:00,-0.9,-2.6,0.0,-2.0,-1.8,-2.2,1.2,-0.7,64,short
2024-07-18 09:50:00,1302.0,1302.7,1301.5,1301.9,1208,2024-07-18 09:55:00,-0.6,-0.7,-0.3,-1.1,-1.3,-1.0,-0.1,-1.1,75,long
2024-07-18 10:05:00,1301.3,1301.4,1298.7,1299.3,6043,2024-07-18 10:10:00,-0.6,-1.3,-1.4,-3.0,-0.7,-0.5,-2.0,-2.7,10,short
2024-07-18 10:10:00,1299.4,1300.4,1298.7,1300.4,4720,2024-07-18 10:15:00,-1.0,-1.6,0.0,-1.4,-1.9,-2.6,1.1,-0.9,64,short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-18 10:30:00,1374.4,1375.2,1373.1,1373.5,3452,2024-10-18 10:35:00,0.8,1.8,0.3,0.5,1.4,1.8,-0.9,0.5,74,short
2024-10-18 10:35:00,1373.4,1373.9,1373.2,1373.4,946,2024-10-18 10:40:00,-1.3,-0.5,0.1,0.4,-1.0,0.4,-0.1,-1.0,80,long
2024-10-18 10:55:00,1371.0,1371.2,1369.5,1369.6,5203,2024-10-18 11:00:00,-0.2,-1.9,-1.1,-1.5,-0.1,-2.1,-1.5,-1.6,10,short
2024-10-18 11:05:00,1368.3,1368.9,1367.1,1367.3,3971,2024-10-18 11:10:00,-0.6,-2.3,-0.5,-2.4,-1.1,-2.7,-1.1,-2.3,10,short


In [34]:
prepared_data[(prepared_data.index > '2024-10-17 08:20:00') & (prepared_data.index < '2024-10-17 14:30:00')]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,current,high_diff_1,high_diff_2,low_diff_1,low_diff_2,open_diff_1,open_diff_2,close_diff_1,close_diff_2,cluster,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-10-17 09:00:00,1358.0,1359.2,1357.6,1358.8,6396,2024-10-17 09:05:00,4.6,2.7,3.0,1.1,3.4,1.5,4.2,2.3,61,
2024-10-17 09:05:00,1358.7,1358.9,1357.8,1358.3,2053,2024-10-17 09:10:00,-0.3,4.3,0.2,3.2,0.7,4.1,-0.5,3.7,86,
2024-10-17 09:10:00,1358.2,1358.5,1358.1,1358.5,1346,2024-10-17 09:15:00,-0.4,-0.7,0.3,0.5,-0.5,0.2,0.2,-0.3,1,
2024-10-17 09:15:00,1358.5,1359.2,1358.2,1358.2,2582,2024-10-17 09:20:00,0.7,0.3,0.1,0.4,0.3,-0.2,-0.3,-0.1,24,short
2024-10-17 09:20:00,1358.4,1358.4,1357.1,1357.8,2705,2024-10-17 09:25:00,-0.8,-0.1,-1.1,-1.0,-0.1,0.2,-0.4,-0.7,27,
2024-10-17 09:25:00,1357.8,1358.3,1357.5,1357.6,1431,2024-10-17 09:30:00,-0.1,-0.9,0.4,-0.7,-0.6,-0.7,-0.2,-0.6,75,long
2024-10-17 09:30:00,1357.6,1357.9,1357.1,1357.1,1041,2024-10-17 09:35:00,-0.4,-0.5,-0.4,0.0,-0.2,-0.8,-0.5,-0.7,5,
2024-10-17 09:35:00,1357.1,1357.4,1356.4,1357.4,2960,2024-10-17 09:40:00,-0.5,-0.9,-0.7,-1.1,-0.5,-0.7,0.3,-0.2,75,long
2024-10-17 09:40:00,1357.4,1357.4,1356.9,1356.9,889,2024-10-17 09:45:00,0.0,-0.5,0.5,-0.2,0.3,-0.2,-0.5,-0.2,65,
2024-10-17 09:45:00,1357.0,1357.4,1356.1,1357.1,1865,2024-10-17 09:50:00,0.0,0.0,-0.8,-0.3,-0.4,-0.1,0.2,-0.3,63,


In [35]:
prepared_data[(prepared_data.index > '2024-10-18 08:20:00')]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,current,high_diff_1,high_diff_2,low_diff_1,low_diff_2,open_diff_1,open_diff_2,close_diff_1,close_diff_2,cluster,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-10-18 09:00:00,1374.6,1377.9,1374.0,1376.3,12038,2024-10-18 09:05:00,19.6,19.4,15.7,15.6,16.3,16.1,18.0,17.9,7,
2024-10-18 09:05:00,1376.5,1376.5,1374.5,1374.6,5041,2024-10-18 09:10:00,-1.4,18.2,0.5,16.2,1.9,18.2,-1.7,16.3,46,
2024-10-18 09:10:00,1374.5,1376.0,1370.5,1375.6,9270,2024-10-18 09:15:00,-0.5,-1.9,-4.0,-3.5,-2.0,-0.1,1.0,-0.7,83,
2024-10-18 09:15:00,1375.6,1375.8,1372.9,1373.4,5229,2024-10-18 09:20:00,-0.2,-0.7,2.4,-1.6,1.1,-0.9,-2.2,-1.2,5,
2024-10-18 09:20:00,1373.3,1373.9,1372.2,1373.2,3593,2024-10-18 09:25:00,-1.9,-2.1,-0.7,1.7,-2.3,-1.2,-0.2,-2.4,52,
2024-10-18 09:25:00,1373.2,1374.8,1373.2,1374.8,3793,2024-10-18 09:30:00,0.9,-1.0,1.0,0.3,-0.1,-2.4,1.6,1.4,45,
2024-10-18 09:30:00,1374.9,1376.8,1374.1,1376.1,7476,2024-10-18 09:35:00,2.0,2.9,0.9,1.9,1.7,1.6,1.3,2.9,49,
2024-10-18 09:35:00,1376.2,1376.8,1375.7,1376.1,3790,2024-10-18 09:40:00,0.0,2.0,1.6,2.5,1.3,3.0,0.0,1.3,16,
2024-10-18 09:40:00,1376.1,1376.2,1373.8,1374.4,4664,2024-10-18 09:45:00,-0.6,-0.6,-1.9,-0.3,-0.1,1.2,-1.7,-1.7,91,long
2024-10-18 09:45:00,1374.4,1374.9,1373.8,1374.0,2381,2024-10-18 09:50:00,-1.3,-1.9,0.0,-1.9,-1.7,-1.8,-0.4,-2.1,83,
