In [19]:
import os
import random
import requests
import numpy as np
import pandas as pd
import lunardate
from tqdm import tqdm

from datetime import datetime,timedelta, timezone

In [21]:
def get_vn30f1m_trading(start, ticker):
    """
    Fetch VN30F1M trading data from the API.

    Parameters:
    start (int): The start timestamp.
    ticker (str): The ticker symbol.

    Returns:
    pd.DataFrame: A DataFrame containing the trading data.
    """
    end = '9999999999'
    url = f"https://services.entrade.com.vn/chart-api/chart?from={start}&resolution=1&symbol={ticker}&to={end}"
    try:
        resp = requests.get(url)
        data = resp.json()
        if not data:
            print(f"No data returned for {ticker}")
        vn30f1m = pd.DataFrame(data).iloc[:, :6]
        vn30f1m['t'] = vn30f1m['t'].astype(int).apply(lambda x: (datetime.fromtimestamp(x, timezone.utc) + timedelta(hours=7)).replace(tzinfo=None))
        vn30f1m.columns = ['date', 'open', 'high', 'low', 'close', 'volume']
        return vn30f1m
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}.")
        return pd.DataFrame()

df = get_vn30f1m_trading('0', 'VN30F1M')
df

Unnamed: 0,date,open,high,low,close,volume
0,2018-08-13 09:00:00,943.5,943.5,942.9,942.9,975
1,2018-08-13 09:01:00,943.0,943.1,942.9,943.1,220
2,2018-08-13 09:02:00,943.0,943.6,943.0,943.5,121
3,2018-08-13 09:03:00,943.3,943.4,943.3,943.4,135
4,2018-08-13 09:04:00,943.2,943.2,943.0,943.1,361
...,...,...,...,...,...,...
331713,2024-08-07 14:27:00,1251.2,1251.9,1251.2,1251.7,1545
331714,2024-08-07 14:28:00,1251.9,1252.0,1251.7,1251.9,1453
331715,2024-08-07 14:29:00,1251.8,1251.8,1250.4,1250.4,2526
331716,2024-08-07 14:30:00,1251.0,1251.0,1251.0,1251.0,68


In [22]:
df['date'] = pd.to_datetime(df['date'])

morning_start = pd.to_datetime('09:00:00').time()
morning_end = pd.to_datetime('11:30:00').time()
afternoon_start = pd.to_datetime('13:00:00').time()
afternoon_end = pd.to_datetime('14:30:00').time()
extra_time = pd.to_datetime('14:45:00').time()

In [23]:
def get_vietnam_trading_days(start_date, end_date):
    # Tạo danh sách các ngày trong khoảng thời gian
    date_range = pd.date_range(start_date, end_date)

    # Loại bỏ các ngày cuối tuần (Thứ 7, Chủ nhật)
    trading_days = date_range[date_range.weekday < 5]

    # Lấy các năm trong khoảng thời gian
    years = list(set(date_range.year))

    all_holidays = []

    for year in years:
        # Danh sách các ngày lễ (dạng datetime)
        holidays = [
            datetime(year, 1, 1),  # Tết Dương lịch
            datetime(year, 4, 30),  # Ngày Giải phóng miền Nam
            datetime(year, 5, 1),  # Ngày Quốc tế Lao động
            datetime(year, 9, 2)  # Ngày Quốc khánh
        ]

        # Tính ngày Tết Nguyên Đán
        lunar_new_year = lunardate.LunarDate(year, 1, 1).toSolarDate()
        tet_holidays = [lunar_new_year + timedelta(days=i) for i in range(7)]  # Tết Nguyên Đán kéo dài 7 ngày
        holidays.extend(tet_holidays)

        # Tính ngày Giỗ tổ Hùng Vương (10 tháng 3 âm lịch)
        hung_vuong_anniversary = lunardate.LunarDate(year, 3, 10).toSolarDate()
        holidays.append(hung_vuong_anniversary)

        # Thêm các ngày lễ vào danh sách tổng
        all_holidays.extend(holidays)

    # Loại bỏ các ngày lễ khỏi danh sách ngày giao dịch
    trading_days = [day for day in trading_days if day not in all_holidays]

    return trading_days

In [14]:
def generate_trading_times(day):
    times = []
    current_time = pd.Timestamp.combine(day, morning_start)
    end_time = pd.Timestamp.combine(day, morning_end)
    while current_time <= end_time:
        times.append(current_time)
        current_time += pd.Timedelta(minutes=1)
        
    current_time = pd.Timestamp.combine(day, afternoon_start)
    end_time = pd.Timestamp.combine(day, afternoon_end)
    while current_time <= end_time:
        times.append(current_time)
        current_time += pd.Timedelta(minutes=1)
        
    times.append(pd.Timestamp.combine(day, extra_time))
    
    return times

In [24]:
start_date = df['date'].min().date()
end_date = df['date'].max().date()
trading_days = get_vietnam_trading_days(start_date, end_date)

full_range = []
for day in trading_days:
    full_range.extend(generate_trading_times(day))

In [25]:
full_range = pd.DatetimeIndex(full_range)
df.set_index('date', inplace=True)
missing_times = full_range.difference(df.index)
print(f"Missing timestamps: {missing_times}")

Missing timestamps: DatetimeIndex(['2018-08-13 14:45:00', '2018-08-28 09:26:00',
               '2018-08-28 09:29:00', '2018-08-28 09:51:00',
               '2018-08-29 10:00:00', '2018-09-03 09:00:00',
               '2018-09-03 09:01:00', '2018-09-03 09:02:00',
               '2018-09-03 09:03:00', '2018-09-03 09:04:00',
               ...
               '2024-04-29 14:22:00', '2024-04-29 14:23:00',
               '2024-04-29 14:24:00', '2024-04-29 14:25:00',
               '2024-04-29 14:26:00', '2024-04-29 14:27:00',
               '2024-04-29 14:28:00', '2024-04-29 14:29:00',
               '2024-04-29 14:30:00', '2024-04-29 14:45:00'],
              dtype='datetime64[ns]', length=44204, freq=None)


In [27]:
df_reindexed = df.reindex(full_range)
df = df_reindexed.ffill()
df

Unnamed: 0,open,high,low,close,volume
2018-08-13 09:00:00,943.5,943.5,942.9,942.9,975.0
2018-08-13 09:01:00,943.0,943.1,942.9,943.1,220.0
2018-08-13 09:02:00,943.0,943.6,943.0,943.5,121.0
2018-08-13 09:03:00,943.3,943.4,943.3,943.4,135.0
2018-08-13 09:04:00,943.2,943.2,943.0,943.1,361.0
...,...,...,...,...,...
2024-08-07 14:27:00,1251.2,1251.9,1251.2,1251.7,1545.0
2024-08-07 14:28:00,1251.9,1252.0,1251.7,1251.9,1453.0
2024-08-07 14:29:00,1251.8,1251.8,1250.4,1250.4,2526.0
2024-08-07 14:30:00,1251.0,1251.0,1251.0,1251.0,68.0
