In [1]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import datetime as dt
import copy
from sqlalchemy import create_engine, MetaData

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

##### Import Data

In [2]:
#Đọc name map để chuyển đỏi các tên thành dạng full
name_map = pd.read_excel("data/t2m_classification.xlsx", sheet_name='name_map').drop(columns=['group'],axis=1)
name_map_dict = name_map.set_index('code')['full_name'].to_dict()

In [3]:
#Đọc toàn bộ các file csv được xuất ra từ ami eod
eod_item_dict = {}
folder_path = '../ami_eod_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        eod_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

for item, df in eod_item_dict.items():
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%y%m%d')
    eod_item_dict[item] = df

#Tạo bảng tổng hớp tất cả các item
eod_item_df = pd.DataFrame(list(eod_item_dict.keys())).rename(columns={0:'item'})
eod_item_df['len'] = eod_item_df['item'].apply(lambda x: len(x))
eod_item_df['last_2chars'] = eod_item_df['item'].str[-2:]
eod_item_df['first_4chars'] = eod_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
stock_name_df = eod_item_df[eod_item_df['len']==3].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_name_df = eod_item_df[(eod_item_df['len']>3) & (eod_item_df['len']!=6) & (eod_item_df['len']<10) & 
                (eod_item_df['item']!='0001')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
                
eod_stock_dict = {k:v.drop(['option'], axis=1) for k,v in eod_item_dict.items() if k in stock_name_df['item'].tolist()}
eod_index_dict = {k:v.rename(columns={'option':'value'}).drop('cap', axis=1)
                for k,v in eod_item_dict.items() if k in index_name_df['item'].tolist()}

#Lọc ra danh sách tên các cổ phiếu, index giao dịch tự doanh và nước ngoài
stock_td_nn_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars'].isin(['TD','NN']))].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_td_nn_df = eod_item_df[(eod_item_df['len']>=10) & (eod_item_df['first_4chars']!='VN30') & ((eod_item_df['last_2chars']=='NN') | (eod_item_df['last_2chars']=='TD'))].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)

stock_td_nn_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_td_nn_df['item'].tolist()}
index_td_nn_dict = {k:v.drop(['high','low','cap','stock'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in index_td_nn_df['item'].tolist()}

#Điều chỉnh đơn vị của các bảng NN và TD
for df in index_td_nn_dict.values():
    df['buy_volume'] = df['buy_volume']/1000
    df['sell_volume'] = -df['sell_volume']/1000
    df['buy_value'] = df['buy_value']/1000000000
    df['sell_value'] = -df['sell_value']/1000000000
    df['net_volume'] = df['buy_volume'] + df['sell_volume']
    df['net_value'] = df['buy_value'] + df['sell_value']

In [4]:
#Tạo một date_series bao gồm khoảng ngày tính toán eod
date_series = pd.DataFrame(eod_index_dict['VNINDEX']['date']).rename(columns={0:'date'})

#Tạo một time_series bao gồm khoảng ngày tính toán itd (tính thừa 1 ngày để trừ dần đi)
time_series_list = []
for day in date_series['date'].iloc[:2].tolist():
    time_series_list.extend(pd.date_range(start=f'{day} 09:00:00', end=f'{day} 11:25:00', freq='5T'))
    time_series_list.extend(pd.date_range(start=f'{day} 13:00:00', end=f'{day} 15:00:00', freq='5T'))
time_series = pd.DataFrame(time_series_list).rename(columns={0:'date'})

In [5]:
#Đọc toàn bộ các file csv được xuất ra từ ami itd
itd_item_dict = {}
folder_path = '../ami_itd_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        itd_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

#Cắt đi các khung giờ chưa có dữ liệu trong time_s
current_time = pd.to_datetime(itd_item_dict['ACV']['date'].iloc[0], format='%y%m%d %H%M%S')

for item, df in itd_item_dict.items():
    #Chuyển đổi về dạng m5
    df['minutes'] = df['date'].str.replace(' ', '').str.slice(4, 10).apply(lambda x: (x[:-1] + '0') if int(x[-1]) < 5 else (x[:-1] + '5'))
    df['date'] = df['date'].str.slice(0,7) + df['minutes'].str.slice(2) + "00"

    df = df.groupby(df['date']).agg({
            'stock': 'first',
            'date': 'last',
            'open': 'last',
            'high': 'max',
            'low': 'min',
            'close': 'first',
            'volume': 'sum'
        }).reset_index(drop=True)
    
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%y%m%d %H%M%S')

    #Chuẩn hoá khung thời gian của itd
    df = time_series.merge(df, on='date', how='left').sort_values('date', ascending=False)
    df[['stock','open','high','low','close']] = df[['stock','open','high','low','close']].fillna(method='bfill')
    df['volume'] = df['volume'].fillna(0)
    df = df.loc[df['date'] > date_series['date'].iloc[5]].reset_index(drop=True)

    #Xoá những giá trị bị fill quá thời gian hiện tại:
    df.loc[df['date'] > current_time, ['open','high','low','close']] = None
    df = df.dropna().reset_index(drop=True)

    itd_item_dict[item] = df

#Tạo bảng tổng hợp tất cả các item
itd_item_df = pd.DataFrame(list(itd_item_dict.keys())).rename(columns={0:'item'})
itd_item_df['len'] = itd_item_df['item'].apply(lambda x: len(x))
itd_item_df['last_2chars'] = itd_item_df['item'].str[-2:]
itd_item_df['third_last_char'] = itd_item_df['item'].str[-3:-2]
itd_item_df['first_4chars'] = itd_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
stock_name_df = itd_item_df[itd_item_df['len']==3].reset_index(drop=True).drop(['len','last_2chars','third_last_char','first_4chars'], axis=1)
index_name_df = itd_item_df[(itd_item_df['len']>3) & (itd_item_df['len']!=6) & (itd_item_df['len']<10) & (itd_item_df['item']!='0001')]\
                .reset_index(drop=True).drop(['len','last_2chars','third_last_char','first_4chars'], axis=1)

itd_stock_dict = {k:v for k,v in itd_item_dict.items() if k in stock_name_df['item'].tolist()}
itd_index_dict = {k:v.rename(columns={'option':'value'})
                for k,v in itd_item_dict.items() if k in index_name_df['item'].tolist()}

In [6]:
def calculate_time_percent(time):
    start_time_am = dt.time(9, 00)
    end_time_am = dt.time(11, 30)
    start_time_pm = dt.time(13, 00)
    end_time_pm = dt.time(15, 00)

    def time_difference_in_minutes(time1, time2):
        delta1 = dt.timedelta(hours=time1.hour, minutes=time1.minute, seconds=time1.second)
        delta2 = dt.timedelta(hours=time2.hour, minutes=time2.minute, seconds=time2.second)
        diff = delta2 - delta1
        return diff.seconds // 60

    time = (time + timedelta(minutes=5)).time()
    full_time_range = time_difference_in_minutes(start_time_am, end_time_am) + time_difference_in_minutes(start_time_pm, end_time_pm)

    if time <= end_time_am:
        time_range = time_difference_in_minutes(start_time_am, time)
    elif time >= start_time_pm:
        time_range = time_difference_in_minutes(start_time_am, time) - time_difference_in_minutes(end_time_am, start_time_pm)

    return time_range/full_time_range

#Điều chỉnh lại time_series bỏ đi các hàng thời gian chưa có dữ liệu
time_series = time_series.loc[time_series['date'] <= current_time].sort_values('date', ascending=False).reset_index(drop=True)

#Tính thêm time percent
time_percent = time_series.copy()
time_percent['percent'] = time_percent['date'].apply(calculate_time_percent)
time_percent['percent'] = time_percent['percent'].apply(lambda x: x if x < 1 else 1)
current_time_percent = time_percent['percent'].iloc[0]

#Tính thêm itd_series cho ngày t0
itd_series = time_series[time_series['date'] > date_series['date'].iloc[0]]

##### Đường trung bình

In [7]:
#Tính toán các đường trung bình và các đường MA
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in eod_stock_dict.items()}

eod_stock_dict = {
    key: df.assign(
        high5=df['high'].rolling(window=5, min_periods=1).max(),
        low5=df['low'].rolling(window=5, min_periods=1).min(),
        high20=df['high'].rolling(window=20, min_periods=1).max(),
        low20=df['low'].rolling(window=20, min_periods=1).min(),
        high60=df['high'].rolling(window=60, min_periods=1).max(),
        low60=df['low'].rolling(window=60, min_periods=1).min(),
        high120=df['high'].rolling(window=120, min_periods=1).max(),
        low120=df['low'].rolling(window=120, min_periods=1).min(),
        high240=df['high'].rolling(window=240, min_periods=1).max(),
        low240=df['low'].rolling(window=240, min_periods=1).min(),
        high480=df['high'].rolling(window=480, min_periods=1).max(),
        low480=df['low'].rolling(window=480, min_periods=1).min(),

        ma5_V=df['volume'].rolling(window=5, min_periods=1).mean().shift(1),
        ma20_V=df['volume'].rolling(window=20, min_periods=1).mean().shift(1),
        ma60_V=df['volume'].rolling(window=60, min_periods=1).mean().shift(1),
        ma120_V=df['volume'].rolling(window=120, min_periods=1).mean().shift(1),

        ma5=df['close'].rolling(window=5, min_periods=1).mean(),
        ma20=df['close'].rolling(window=20, min_periods=1).mean(),
        ma60=df['close'].rolling(window=60, min_periods=1).mean(),
        ma120=df['close'].rolling(window=120, min_periods=1).mean(),
        ma240=df['close'].rolling(window=240, min_periods=1).mean(),
        ma480=df['close'].rolling(window=480, min_periods=1).mean(),
    )
    for key, df in eod_stock_dict.items()
}

eod_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in eod_stock_dict.items()
}
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in eod_stock_dict.items()}

In [8]:
#Gán các đường trung bình và MA sang bảng dữ liệu ITD
for stock, df in itd_stock_dict.items():
    temp_data = eod_stock_dict[stock][['high5', 'low5', 'high20', 'low20', 'high60', 'low60',
                                       'high120','low120', 'high240', 'low240', 'high480', 'low480']].iloc[0]
    itd_stock_dict[stock] = df.assign(**temp_data)


itd_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in itd_stock_dict.items()}
itd_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in itd_stock_dict.items()
}
itd_stock_dict = {k: v.sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in itd_stock_dict.items()}

In [9]:
#Xoá các cổ phiếu chưa có giao dịch trong ngày
delete_stock = []
for stock, df in eod_stock_dict.items():
    if date_series['date'].iloc[0] != df['date'].iloc[0]:
        delete_stock.append(stock)
for stock in delete_stock:
    # itd_stock_dict.pop(stock)
    eod_stock_dict.pop(stock)

#Xoá các cổ phiếu có giá bị lỗi bằng 0
delete_stock = []
for stock, df in eod_stock_dict.items():
    if df['close'].min() == 0:
        delete_stock.append(stock)
for stock in delete_stock:
    itd_stock_dict.pop(stock)
    eod_stock_dict.pop(stock)

#Tính hệ số thanh khoản và đổi lại cap của cổ phiếu thành cap trung bình trong 20 phiên
for df in eod_stock_dict.values():
    df['liquid_ratio'] = df['volume'] / (df['ma5_V'])
    df['liquid_ratio'].iloc[0] = df['volume'].iloc[0] / ((df['ma5_V']).iloc[0]*current_time_percent)
    df['cap'] = df['cap'][::-1].rolling(window=20).mean()[::-1]

##### Phân nhóm cổ phiếu

In [10]:
stock_classification = pd.read_excel('data/t2m_classification.xlsx')
stock_classification = stock_classification[stock_classification['stock'].isin(list(eod_stock_dict.keys()))]

#Tạo ngày đầu tiên của tháng hiện tại
first_day_of_month = date_series[date_series['date'] > pd.Timestamp(df['date'].iloc[0].year, df['date'].iloc[0].month, 1)]['date'].iloc[-1]

#Tạo các mảng dữ liệu vốn hoá và giá của phiên đầu tiên hàng tháng
price_arr = []
cap_arr = []
for stock, df in eod_stock_dict.items():
    price_arr.append(df[df['date'] == first_day_of_month]['close'].item())
    cap_arr.append(df[df['date'] == first_day_of_month]['cap'].iloc[0].item())

#Tạo bảng chia nhóm vốn hoá
vonhoa_classification_df = stock_classification.copy()
vonhoa_classification_df['price'] = price_arr
vonhoa_classification_df['cap'] = cap_arr

cap_coef = sum(cap_arr)/10000
vonhoa_classification_df['marketcap_group'] = vonhoa_classification_df.apply(lambda x:
    'small' if ((x['cap']>cap_coef) & (x['cap']<10*cap_coef)) | 
               ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']<10)) 
               else
    ('mid' if ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']>=10)) | 
              ((x['cap']>=20*cap_coef) & (x['cap']<100*cap_coef))
              else
    ('large' if x['cap']>=100*cap_coef
               else 'penny'
)), axis=1)

stock_classification = pd.concat([stock_classification, vonhoa_classification_df['marketcap_group']], axis=1)

In [11]:
eod_all_stock = {}
eod_all_stock['all_stock'] = {key: value for key, value in eod_stock_dict.items()}

eod_industry_name = {}
for name in np.sort(stock_classification['industry_name'].unique()):
    eod_industry_name[name] = {key: value for key, value in eod_stock_dict.items() if key in stock_classification[stock_classification['industry_name'] == name]['stock'].to_list()}

eod_industry_perform = {}
for name in np.sort(stock_classification['industry_perform'].unique()):
    eod_industry_perform[name] = {key: value for key, value in eod_stock_dict.items() if key in stock_classification[stock_classification['industry_perform'] == name]['stock'].to_list()}

eod_marketcap_group = {}
for name in ['large', 'mid', 'small', 'penny']:
    eod_marketcap_group[name] = {key: value for key, value in eod_stock_dict.items() if key in stock_classification[stock_classification['marketcap_group'] == name]['stock'].to_list()}

In [12]:
itd_all_stock = {}
itd_all_stock['all_stock'] = {key: value for key, value in itd_stock_dict.items()}

itd_industry_name = {}
for name in np.sort(stock_classification['industry_name'].unique()):
    itd_industry_name[name] = {key: value for key, value in itd_stock_dict.items() if key in stock_classification[stock_classification['industry_name'] == name]['stock'].to_list()}

itd_industry_perform = {}
for name in np.sort(stock_classification['industry_perform'].unique()):
    itd_industry_perform[name] = {key: value for key, value in itd_stock_dict.items() if key in stock_classification[stock_classification['industry_perform'] == name]['stock'].to_list()}

itd_marketcap_group = {}
for name in ['large', 'mid', 'small', 'penny']:
    itd_marketcap_group[name] = {key: value for key, value in itd_stock_dict.items() if key in stock_classification[stock_classification['marketcap_group'] == name]['stock'].to_list()}

##### Biểu đồ cấu trúc sóng

In [13]:
def transform_ms(stock_group, time_interval):
    stock_dict = copy.deepcopy(stock_group)

    temp_time_series = copy.deepcopy(date_series) if time_interval == 'eod' else copy.deepcopy(time_series)

    for group_name in stock_dict.keys():

        #Tạo dict chứa các cổ phiếu thuộc cùng nhóm
        trend_dict = {'trend_5p': None, 'trend_20p': None,'trend_60p': None,'trend_120p': None,'trend_240p': None, 'trend_480p': None}
        for trend in trend_dict.keys():
            trend_dict[trend] = pd.DataFrame(temp_time_series['date'].tolist(), columns=['date'])
            for stock in stock_dict[group_name].keys():
                trend_dict[trend][stock] = stock_dict[group_name][stock][trend]
            trend_dict[trend].fillna(value=0, inplace=True)
            trend_dict[trend]['sum'] = trend_dict[trend].iloc[:, 1:len(stock_dict[group_name])+1].apply(sum, axis=1)
            trend_dict[trend]['percent'] = trend_dict[trend]['sum'] / len(stock_dict[group_name])

        #Tạo bảng dữ liệu theo ngày
        stock_dict[group_name] = pd.DataFrame(temp_time_series['date'].tolist(), columns=['date']).sort_values('date', ascending=False)
        for trend in trend_dict.keys():
            stock_dict[group_name][trend] = trend_dict[trend]['percent']

        stock_dict[group_name] = stock_dict[group_name] .iloc[:60]

    return stock_dict

In [14]:
eod_all_stock_ms = transform_ms(eod_all_stock, 'eod')
eod_industry_name_ms = transform_ms(eod_industry_name, 'eod')
eod_industry_perform_ms = transform_ms(eod_industry_perform, 'eod')
eod_marketcap_group_ms = transform_ms(eod_marketcap_group, 'eod')

In [15]:
itd_all_stock_ms = transform_ms(itd_all_stock, 'itd')
itd_industry_name_ms = transform_ms(itd_industry_name, 'itd')
itd_industry_perform_ms = transform_ms(itd_industry_perform, 'itd')
itd_marketcap_group_ms = transform_ms(itd_marketcap_group, 'itd')

##### Điểm dòng tiền EOD

In [16]:
eod_stock_dict = {k: v.iloc[:100].reset_index(drop=True) for k, v in eod_stock_dict.items()}

In [17]:
def score_calculation(df):
    try:
        result = (((df['close'] - df['low']) - (df['high'] - df['close'])) / (df['high'] - df['low']) *
                  abs((df['close'] - df['close_prev'])) / df['close_prev'] *
                  (df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V'])) * 100 \
                  + ((df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V']))/100
        result.fillna(0, inplace=True)
        return result
    except ZeroDivisionError:
        # return 0
        return ((df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V']))/100

In [18]:
#Tính toán các cột cần thiết để lọc danh sách cổ phiếu dòng tiền
raw_eod_score_dict = {}
for stock in eod_stock_dict.keys():
    raw_eod_score_dict[stock] = eod_stock_dict[stock]
    [['stock', 'date', 'high', 'low', 'close', 'volume','liquid_ratio','cap', 'ma5_V', 'ma20_V','ma60_V','ma120_V','ma5']]
    
    raw_eod_score_dict[stock]['ma5_prev'] = raw_eod_score_dict[stock]['ma5'].shift(-1)
    raw_eod_score_dict[stock]['close_prev'] = raw_eod_score_dict[stock]['close'].shift(-1)

    raw_eod_score_dict[stock]['raw_score'] = score_calculation(raw_eod_score_dict[stock])
    raw_eod_score_dict[stock]['raw_score'].iloc[0] = raw_eod_score_dict[stock]['raw_score'].iloc[0].item()/current_time_percent

    raw_eod_score_dict[stock]['highest_price'] = raw_eod_score_dict[stock]['close'][::-1].rolling(window=40, min_periods=1).max()[::-1]
    raw_eod_score_dict[stock]['lowest_volume20'] = raw_eod_score_dict[stock]['volume'][::-1].rolling(window=20, min_periods=1).min().shift(1)[::-1]
    raw_eod_score_dict[stock]['lowest_volume5'] = raw_eod_score_dict[stock]['volume'][::-1].rolling(window=5, min_periods=1).min().shift(1)[::-1]

#Lọc danh sách cổ phiếu dòng tiền
eod_score_dict = {stock: df[['stock', 'date', 'close','low','high', 'volume', 'liquid_ratio', 'raw_score','cap']] 
                    for stock, df in raw_eod_score_dict.items() 
                    if all([
                        df['ma5_V'][0] >= 50000,
                        df['ma20_V'][0] >= 50000,
                        df['ma60_V'][0] >= 50000,
                        df['ma120_V'][0] >= 50000,
                        df['lowest_volume20'][0] > 0,
                        df['lowest_volume5'][0] >= 50000,
                        df['close'][0] > df['highest_price'][0] * 0.382
                    ])}

stock_classification_filtered = stock_classification[stock_classification['stock'].isin(eod_score_dict.keys())].reset_index(drop=True)

In [19]:
#Tính toán các điểm dòng tiền đóng góp từ nhóm ngành và nhóm vốn hoá
def get_total_cap(df, group, eod_stock_dict):
    unique_groups = df[group].unique()
    total_cap_dict = {}
    for unique_group in unique_groups:
        total_cap = date_series.copy()
        keys = df[df[group] == unique_group]['stock'].tolist()
        for key in keys:
            total_cap[key] = eod_stock_dict[key]['cap']
        total_cap['total'] = total_cap.iloc[:, 1:].sum(axis=1)
        total_cap_dict[unique_group] = total_cap['total']
    return total_cap_dict

total_cap_industry_dict = get_total_cap(stock_classification, 'industry_name', eod_stock_dict)
total_cap_marketcap_dict = get_total_cap(stock_classification, 'marketcap_group', eod_stock_dict)

for stock in eod_score_dict.keys():
    eod_score_dict[stock]['total_cap_industry'] = total_cap_industry_dict[stock_classification[stock_classification['stock'] == stock]['industry_name'].item()]
    eod_score_dict[stock]['total_cap_marketcap'] = total_cap_marketcap_dict[stock_classification[stock_classification['stock'] == stock]['marketcap_group'].item()]

for stock in eod_score_dict.keys():
    eod_score_dict[stock]['industry_cont'] = eod_score_dict[stock].apply(lambda x: x['raw_score']*x['cap']/x['total_cap_industry'], axis=1)
    eod_score_dict[stock]['marketcap_cont'] = eod_score_dict[stock].apply(lambda x: x['raw_score']*x['cap']/x['total_cap_marketcap'], axis=1)

In [20]:
eod_industry_score_dict = {}
for nganh in stock_classification_filtered['industry_name'].unique():
    industry_score = date_series.copy()
    for stock in stock_classification_filtered[stock_classification_filtered['industry_name']==nganh]['stock'].tolist():
        industry_score[stock] = eod_score_dict[stock]['industry_cont']
    eod_industry_score_dict[nganh] = industry_score.iloc[:, 1:].sum(axis=1)

eod_marketcap_score_dict = {}
for nganh in stock_classification_filtered['marketcap_group'].unique():
    marketcap_score = date_series.copy()
    for stock in stock_classification_filtered[stock_classification_filtered['marketcap_group']==nganh]['stock'].tolist():
        marketcap_score[stock] = eod_score_dict[stock]['marketcap_cont']
    eod_marketcap_score_dict[nganh] = marketcap_score.iloc[:, 1:].sum(axis=1)

for stock in eod_score_dict.keys():
    nganh = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    marketcap = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()

    eod_score_dict[stock]['industry_score'] = eod_industry_score_dict[nganh]
    eod_score_dict[stock]['marketcap_score'] = eod_marketcap_score_dict[marketcap]
    eod_score_dict[stock]['t0_score'] = eod_score_dict[stock].apply(lambda x: x['raw_score']*0.7+x['industry_score']*0.2+x['marketcap_score']*0.1, axis=1)

    eod_score_dict[stock].sort_values('date', ascending=True, inplace=True)
    eod_score_dict[stock]['t5_score'] = eod_score_dict[stock]['t0_score'].rolling(window=5, min_periods=1).mean()
    eod_score_dict[stock].sort_values('date', ascending=False, inplace=True)

    eod_score_dict[stock]['industry_name'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    eod_score_dict[stock]['industry_perform'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_perform'].item()
    eod_score_dict[stock]['stock_perform'] = stock_classification_filtered[stock_classification_filtered['stock'] == stock]['stock_perform'].item()
    eod_score_dict[stock]['marketcap_group'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()
    eod_score_dict[stock]['t2m_select'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['t2m_select'].item()

In [21]:
group_score = date_series.copy()
ranking_group = date_series.copy()

#Xếp hạng T5
for stock in eod_score_dict.keys():
    group_score[stock] = eod_score_dict[stock]['t5_score']
    group_score.fillna(0, inplace=True)
    ranking_group[stock] = 0
ranking_group = group_score.iloc[:,1:].rank(ascending=False, method='min', axis=1)

for stock, df in eod_score_dict.items():
    df['price_change'] = df['close'][::-1].pct_change()[::-1]
    df['rank'] = ranking_group[stock]
    df['rank_prev'] = df['rank'].shift(-1)
    df['rank_change'] = df['rank_prev'] - df['rank']

#Xếp hạng T0
for stock in eod_score_dict.keys():
    group_score[stock] = eod_score_dict[stock]['t0_score']
    group_score.fillna(0, inplace=True)
    ranking_group[stock] = 0
ranking_group = group_score.iloc[:,1:].rank(ascending=False, method='min', axis=1)

for stock, df in eod_score_dict.items():
    df['rank_t0'] = ranking_group[stock]
    df['rank_t0_prev'] = df['rank_t0'].shift(-1)

#Check xem xếp hạng T0 nằm trong top 10% hay không
for stock, df in eod_score_dict.items():
    df['top_check'] = df['rank_t0'].apply(lambda x: 1 if x <= len(stock_classification_filtered)*0.1 else 0)
    df['top_count'] = df['top_check'][::-1].rolling(window=20).sum()[::-1]

# Tạo điểm số cuối cùng
eod_score_dict = {k: v.iloc[:60].drop(
    columns=['raw_score', 'industry_cont', 'marketcap_cont', 'total_cap_industry', 'total_cap_marketcap',
             'industry_score', 'marketcap_score', 'rank_t0_prev', 'rank_prev', 'top_check'])
    for k, v in eod_score_dict.items()}

In [79]:
#Tạo bảng tổng hợp điểm t0 của tất cả cổ phiếu
eod_score_df = pd.DataFrame(stock_classification_filtered['stock'])

score_list = []
for stock, df in eod_score_dict.items():
    score_list.append(df.iloc[0])

eod_score_df = pd.DataFrame(score_list).sort_values('t0_score', ascending=False).reset_index(drop=True)

##### Điểm dòng tiền ITD

In [22]:
# Giả định date_series và itd_stock_dict đã được định nghĩa
hsx_itd_start = pd.Timestamp(date_series['date'].iloc[0].replace(hour=9, minute=15, second=0, microsecond=0))

# Danh sách stock từ stock_classification_filtered và danh sách HSX stocks
filtered_stocks = stock_classification_filtered['stock'].tolist()
hsx_stocks = stock_classification[stock_classification['exchange'] == 'HSX']['stock'].tolist()

# Lọc và cập nhật itd_score_dict trong một bước
itd_score_dict = {
    k: v.loc[v['date'] >= (hsx_itd_start if k in hsx_stocks else date_series['date'].iloc[0])]
    for k, v in copy.deepcopy(itd_stock_dict).items() if k in filtered_stocks
}

for stock, df in itd_score_dict.items():

    df['ma5_V'] = time_percent['percent']*(raw_eod_score_dict[stock]['ma5_V'].iloc[0])
    df['ma5_prev'] = raw_eod_score_dict[stock]['ma5_prev'].iloc[0]
    df['close_prev'] = raw_eod_score_dict[stock]['close_prev'].iloc[0]
    df['cap'] = raw_eod_score_dict[stock]['cap'].iloc[0]

    df['high'] = df['high'][::-1].cummax()[::-1]
    df['low'] = df['low'][::-1].cummin()[::-1]
    df['volume'] = df['volume'][::-1].cumsum()[::-1]
    df['liquid_ratio'] = df['volume']/df['ma5_V']

    df.loc[0, 'volume'] = raw_eod_score_dict[stock]['volume'].iloc[0]
    df.loc[0, 'close'] = raw_eod_score_dict[stock]['close'].iloc[0]
    df.loc[0, 'low'] = raw_eod_score_dict[stock]['low'].iloc[0]
    df.loc[0, 'high'] = raw_eod_score_dict[stock]['high'].iloc[0]

    df['raw_score'] = score_calculation(df)
    df['raw_score'] = df['raw_score']/current_time_percent

for stock in itd_score_dict.keys():
    itd_score_dict[stock]['total_cap_industry'] = total_cap_industry_dict[stock_classification[stock_classification['stock'] == stock]['industry_name'].item()].iloc[0]
    itd_score_dict[stock]['total_cap_marketcap'] = total_cap_marketcap_dict[stock_classification[stock_classification['stock'] == stock]['marketcap_group'].item()].iloc[0]

for stock in itd_score_dict.keys():
    itd_score_dict[stock]['industry_cont'] = itd_score_dict[stock].apply(lambda x: x['raw_score']*x['cap']/x['total_cap_industry'], axis=1)
    itd_score_dict[stock]['marketcap_cont'] = itd_score_dict[stock].apply(lambda x: x['raw_score']*x['cap']/x['total_cap_marketcap'], axis=1)

In [23]:
itd_industry_score_dict = {}
for nganh in stock_classification_filtered['industry_name'].unique():
    industry_score = itd_series.copy()
    for stock in stock_classification_filtered[stock_classification_filtered['industry_name']==nganh]['stock'].tolist():
        industry_score[stock] = itd_score_dict[stock]['industry_cont']
    itd_industry_score_dict[nganh] = industry_score.iloc[:, 1:].sum(axis=1)

itd_marketcap_score_dict = {}
for nganh in stock_classification_filtered['marketcap_group'].unique():
    marketcap_score = itd_series.copy()
    for stock in stock_classification_filtered[stock_classification_filtered['marketcap_group']==nganh]['stock'].tolist():
        marketcap_score[stock] = itd_score_dict[stock]['marketcap_cont']
    itd_marketcap_score_dict[nganh] = marketcap_score.iloc[:, 1:].sum(axis=1)

for stock in itd_score_dict.keys():
    nganh = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    marketcap = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()

    itd_score_dict[stock]['industry_score'] = itd_industry_score_dict[nganh]
    itd_score_dict[stock]['marketcap_score'] = itd_marketcap_score_dict[marketcap]
    itd_score_dict[stock]['t0_score'] = itd_score_dict[stock].apply(lambda x: x['raw_score']*0.7+x['industry_score']*0.2+x['marketcap_score']*0.1, axis=1)

    itd_score_dict[stock]['industry_name'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    itd_score_dict[stock]['industry_perform'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_perform'].item()
    itd_score_dict[stock]['stock_perform'] = stock_classification_filtered[stock_classification_filtered['stock'] == stock]['stock_perform'].item()
    itd_score_dict[stock]['marketcap_group'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()
    itd_score_dict[stock]['t2m_select'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['t2m_select'].item()

In [24]:
itd_score_dict = {k: v[['stock', 'date','close', 'volume','t0_score', 'liquid_ratio', 'industry_name', 'industry_perform', 'stock_perform','marketcap_group', 't2m_select']]
                  for k, v in itd_score_dict.items()}

##### Chỉ số kĩ thuật

In [25]:
def find_peak_and_bottom(df):
    df_copy = df.copy()

    df_copy['close_p1'] = df_copy['close'].shift(-1)
    df_copy['close_p2'] = df_copy['close'].shift(-2)
    df_copy['close_f1'] = df_copy['close'].shift(1)
    df_copy['close_f2'] = df_copy['close'].shift(2)

    df_copy['high_p1'] = df_copy['high'].shift(-1)
    df_copy['high_p2'] = df_copy['high'].shift(-2)
    df_copy['high_f1'] = df_copy['high'].shift(1)
    df_copy['high_f2'] = df_copy['high'].shift(2)

    df_copy['low_p1'] = df_copy['low'].shift(-1)
    df_copy['low_p2'] = df_copy['low'].shift(-2)
    df_copy['low_f1'] = df_copy['low'].shift(1)
    df_copy['low_f2'] = df_copy['low'].shift(2)

    df_copy['peak'] = ((df_copy['high']>df_copy['close_p1']) & (df_copy['high']>df_copy['close_p2']) &
                        (df_copy['high']>df_copy['close_f1']) & (df_copy['high']>df_copy['close_f2']) &
                        (df_copy['high'] == df_copy[['high', 'high_p1', 'high_p2', 'high_f1', 'high_f2']].max(axis=1))).astype(int)

    df_copy['bottom'] = ((df_copy['low']<df_copy['close_p1']) & (df_copy['low']<df_copy['close_p2']) &
                        (df_copy['low']<df_copy['close_f2']) & (df_copy['low']<df_copy['close_f2']) &
                        (df_copy['low'] == df_copy[['low', 'low_p1', 'low_p2', 'low_f1', 'low_f2']].min(axis=1))).astype(int)

    return df_copy['peak'], df_copy['bottom']

In [26]:
def calculate_ta_df(price_df):
    ta_df = price_df[['stock','date','open','high','low','close','volume']]
    ta_df['month'] = ta_df['date'].dt.to_period('M').astype(str)
    ta_df['quarter'] = ta_df['date'].dt.to_period('Q').astype(str)
    ta_df['year'] = ta_df['date'].dt.to_period('Y').astype(str)
    return ta_df

In [27]:
def calculate_candle_ta_df(ta_df,input_type):
    candle_ta_df = calculate_ta_df(ta_df)
    temp_df = copy.deepcopy(candle_ta_df)
    temp_df['peak'] = find_peak_and_bottom(temp_df)[0]
    temp_df['bottom'] = find_peak_and_bottom(temp_df)[1]
    ta_df_peak = temp_df[temp_df['peak']==1].drop(['peak','bottom'], axis=1)
    ta_df_bottom = temp_df[temp_df['bottom']==1].drop(['peak','bottom'], axis=1)

    ta_df_peak['month'] = temp_df['date'].dt.to_period('M').astype(str)
    ta_df_peak['quarter'] = temp_df['date'].dt.to_period('Q').astype(str)
    ta_df_peak['year'] = temp_df['date'].dt.to_period('Y').astype(str)

    ta_df_bottom['month'] = temp_df['date'].dt.to_period('M').astype(str)
    ta_df_bottom['quarter'] = temp_df['date'].dt.to_period('Q').astype(str)
    ta_df_bottom['year'] = temp_df['date'].dt.to_period('Y').astype(str)
    
    candle_ta_df['month_bottom'] = candle_ta_df[candle_ta_df['month'] == candle_ta_df['month'].unique()[0]]['low'].min()
    candle_ta_df['quarter_bottom'] = candle_ta_df[candle_ta_df['quarter'] == candle_ta_df['quarter'].unique()[0]]['low'].min()
    candle_ta_df['year_bottom'] = candle_ta_df[candle_ta_df['year'] == candle_ta_df['year'].unique()[0]]['low'].min()

    candle_ta_df['month_peak'] = candle_ta_df[candle_ta_df['month'] == candle_ta_df['month'].unique()[0]]['high'].max()
    candle_ta_df['quarter_peak'] = candle_ta_df[candle_ta_df['quarter'] == candle_ta_df['quarter'].unique()[0]]['high'].max()
    candle_ta_df['year_peak'] = candle_ta_df[candle_ta_df['year'] == candle_ta_df['year'].unique()[0]]['high'].max()

    candle_ta_df['month_open'] = candle_ta_df[candle_ta_df['month'] == candle_ta_df['month'].unique()[0]]['open'].iloc[-1]
    candle_ta_df['quarter_open'] = candle_ta_df[candle_ta_df['quarter'] == candle_ta_df['quarter'].unique()[0]]['open'].iloc[-1]
    candle_ta_df['year_open'] = candle_ta_df[candle_ta_df['year'] == candle_ta_df['year'].unique()[0]]['open'].iloc[-1]

    if input_type == 'stock':

        candle_ta_df['from_month_peak'] = (candle_ta_df['close'] - candle_ta_df['month_peak'])/candle_ta_df['month_peak']
        candle_ta_df['from_quarter_peak'] = (candle_ta_df['close'] - candle_ta_df['quarter_peak'])/candle_ta_df['quarter_peak']
        candle_ta_df['from_year_peak'] = (candle_ta_df['close'] - candle_ta_df['year_peak'])/candle_ta_df['year_peak']

        candle_ta_df['from_month_bottom'] = (candle_ta_df['close'] - candle_ta_df['month_bottom'])/candle_ta_df['month_bottom']
        candle_ta_df['from_quarter_bottom'] = (candle_ta_df['close'] - candle_ta_df['quarter_bottom'])/candle_ta_df['quarter_bottom']
        candle_ta_df['from_year_bottom'] = (candle_ta_df['close'] - candle_ta_df['year_bottom'])/candle_ta_df['year_bottom']

        candle_ta_df['from_month_open'] = (candle_ta_df['close'] - candle_ta_df['month_open'])/candle_ta_df['month_open']
        candle_ta_df['from_quarter_open'] = (candle_ta_df['close'] - candle_ta_df['quarter_open'])/candle_ta_df['quarter_open']
        candle_ta_df['from_year_open'] = (candle_ta_df['close'] - candle_ta_df['year_open'])/candle_ta_df['year_open']

    if input_type == 'index':

        candle_ta_df['from_month_peak'] = (candle_ta_df['close'] - candle_ta_df['month_peak'])
        candle_ta_df['from_quarter_peak'] = (candle_ta_df['close'] - candle_ta_df['quarter_peak'])
        candle_ta_df['from_year_peak'] = (candle_ta_df['close'] - candle_ta_df['year_peak'])

        candle_ta_df['from_month_bottom'] = (candle_ta_df['close'] - candle_ta_df['month_bottom'])
        candle_ta_df['from_quarter_bottom'] = (candle_ta_df['close'] - candle_ta_df['quarter_bottom'])
        candle_ta_df['from_year_bottom'] = (candle_ta_df['close'] - candle_ta_df['year_bottom'])

        candle_ta_df['from_month_open'] = (candle_ta_df['close'] - candle_ta_df['month_open'])
        candle_ta_df['from_quarter_open'] = (candle_ta_df['close'] - candle_ta_df['quarter_open'])
        candle_ta_df['from_year_open'] = (candle_ta_df['close'] - candle_ta_df['year_open'])

    return candle_ta_df

In [97]:
def calculate_fibo_ta_df(ta_df, input_type):
    fibo_ta_df = calculate_ta_df(ta_df)

    fibo_ta_df['month_peak'] = fibo_ta_df[fibo_ta_df['month'].isin(fibo_ta_df['month'].unique()[:2].tolist())]['high'].max()
    fibo_ta_df['quarter_peak'] = fibo_ta_df[fibo_ta_df['quarter'].isin(fibo_ta_df['quarter'].unique()[:2].tolist())]['high'].max()
    fibo_ta_df['year_peak'] = fibo_ta_df[fibo_ta_df['year'].isin(fibo_ta_df['year'].unique()[:2].tolist())]['high'].max()

    fibo_ta_df['month_bottom'] = fibo_ta_df[fibo_ta_df['month'].isin(fibo_ta_df['month'].unique()[:2].tolist())]['low'].min()
    fibo_ta_df['quarter_bottom'] = fibo_ta_df[fibo_ta_df['quarter'].isin(fibo_ta_df['quarter'].unique()[:2].tolist())]['low'].min()
    fibo_ta_df['year_bottom'] = fibo_ta_df[fibo_ta_df['year'].isin(fibo_ta_df['year'].unique()[:2].tolist())]['low'].min()
        
    fibo_ta_df['month_fibo_382'] = fibo_ta_df['month_peak'] - (fibo_ta_df['month_peak'] - fibo_ta_df['month_bottom'])*0.382
    fibo_ta_df['month_fibo_500'] = fibo_ta_df['month_peak'] - (fibo_ta_df['month_peak'] - fibo_ta_df['month_bottom'])*0.5
    fibo_ta_df['month_fibo_618'] = fibo_ta_df['month_peak'] - (fibo_ta_df['month_peak'] - fibo_ta_df['month_bottom'])*0.618

    fibo_ta_df['quarter_fibo_382'] = fibo_ta_df['quarter_peak'] - (fibo_ta_df['quarter_peak'] - fibo_ta_df['quarter_bottom'])*0.382
    fibo_ta_df['quarter_fibo_500'] = fibo_ta_df['quarter_peak'] - (fibo_ta_df['quarter_peak'] - fibo_ta_df['quarter_bottom'])*0.5
    fibo_ta_df['quarter_fibo_618'] = fibo_ta_df['quarter_peak'] - (fibo_ta_df['quarter_peak'] - fibo_ta_df['quarter_bottom'])*0.618

    fibo_ta_df['year_fibo_382'] = fibo_ta_df['year_peak'] - (fibo_ta_df['year_peak'] - fibo_ta_df['year_bottom'])*0.382
    fibo_ta_df['year_fibo_500'] = fibo_ta_df['year_peak'] - (fibo_ta_df['year_peak'] - fibo_ta_df['year_bottom'])*0.5
    fibo_ta_df['year_fibo_618'] = fibo_ta_df['year_peak'] - (fibo_ta_df['year_peak'] - fibo_ta_df['year_bottom'])*0.618

    if input_type == 'stock':

        fibo_ta_df['from_month_fibo_382'] = (fibo_ta_df['close'] - fibo_ta_df['month_fibo_382'])/abs(fibo_ta_df['month_fibo_382'])
        fibo_ta_df['from_month_fibo_500'] = (fibo_ta_df['close'] - fibo_ta_df['month_fibo_500'])/abs(fibo_ta_df['month_fibo_500'])
        fibo_ta_df['from_month_fibo_618'] = (fibo_ta_df['close'] - fibo_ta_df['month_fibo_618'])/abs(fibo_ta_df['month_fibo_618'])

        fibo_ta_df['from_quarter_fibo_382'] = (fibo_ta_df['close'] - fibo_ta_df['quarter_fibo_382'])/abs(fibo_ta_df['quarter_fibo_382'])
        fibo_ta_df['from_quarter_fibo_500'] = (fibo_ta_df['close'] - fibo_ta_df['quarter_fibo_500'])/abs(fibo_ta_df['quarter_fibo_500'])
        fibo_ta_df['from_quarter_fibo_618'] = (fibo_ta_df['close'] - fibo_ta_df['quarter_fibo_618'])/abs(fibo_ta_df['quarter_fibo_618'])

        fibo_ta_df['from_year_fibo_382'] = (fibo_ta_df['close'] - fibo_ta_df['year_fibo_382'])/abs(fibo_ta_df['year_fibo_382'])
        fibo_ta_df['from_year_fibo_500'] = (fibo_ta_df['close'] - fibo_ta_df['year_fibo_500'])/abs(fibo_ta_df['year_fibo_500'])
        fibo_ta_df['from_year_fibo_618'] = (fibo_ta_df['close'] - fibo_ta_df['year_fibo_618'])/abs(fibo_ta_df['year_fibo_618'])

    if input_type == 'index':
            
        fibo_ta_df['from_month_fibo_382'] = (fibo_ta_df['close'] - fibo_ta_df['month_fibo_382'])
        fibo_ta_df['from_month_fibo_500'] = (fibo_ta_df['close'] - fibo_ta_df['month_fibo_500'])
        fibo_ta_df['from_month_fibo_618'] = (fibo_ta_df['close'] - fibo_ta_df['month_fibo_618'])

        fibo_ta_df['from_quarter_fibo_382'] = (fibo_ta_df['close'] - fibo_ta_df['quarter_fibo_382'])
        fibo_ta_df['from_quarter_fibo_500'] = (fibo_ta_df['close'] - fibo_ta_df['quarter_fibo_500'])
        fibo_ta_df['from_quarter_fibo_618'] = (fibo_ta_df['close'] - fibo_ta_df['quarter_fibo_618'])

        fibo_ta_df['from_year_fibo_382'] = (fibo_ta_df['close'] - fibo_ta_df['year_fibo_382'])
        fibo_ta_df['from_year_fibo_500'] = (fibo_ta_df['close'] - fibo_ta_df['year_fibo_500'])
        fibo_ta_df['from_year_fibo_618'] = (fibo_ta_df['close'] - fibo_ta_df['year_fibo_618'])

    return fibo_ta_df

In [29]:
def calculate_pivot_ta_df(ta_df, input_type):
    pivot_ta_df = calculate_ta_df(ta_df)

    try: pivot_ta_df['month_peak'] = pivot_ta_df[pivot_ta_df['month']==pivot_ta_df['month'].unique()[1]]['high'].max()
    except: pivot_ta_df['month_peak'] = None
    try: pivot_ta_df['quarter_peak'] = pivot_ta_df[pivot_ta_df['quarter']==pivot_ta_df['quarter'].unique()[1]]['high'].max()
    except: pivot_ta_df['quarter_peak'] = None
    try: pivot_ta_df['year_peak'] = pivot_ta_df[pivot_ta_df['year']==pivot_ta_df['year'].unique()[1]]['high'].max()
    except: pivot_ta_df['year_peak'] = None

    try: pivot_ta_df['month_bottom'] = pivot_ta_df[pivot_ta_df['month']==pivot_ta_df['month'].unique()[1]]['low'].min()
    except: pivot_ta_df['month_bottom'] = None
    try: pivot_ta_df['quarter_bottom'] = pivot_ta_df[pivot_ta_df['quarter']==pivot_ta_df['quarter'].unique()[1]]['low'].min()
    except: pivot_ta_df['quarter_bottom'] = None
    try: pivot_ta_df['year_bottom'] = pivot_ta_df[pivot_ta_df['year']==pivot_ta_df['year'].unique()[1]]['low'].min()
    except: pivot_ta_df['year_bottom'] = None

    try: pivot_ta_df['month_close'] = pivot_ta_df[pivot_ta_df['month']==pivot_ta_df['month'].unique()[1]]['close'].iloc[0]
    except: pivot_ta_df['month_close'] = None
    try: pivot_ta_df['quarter_close'] = pivot_ta_df[pivot_ta_df['quarter']==pivot_ta_df['quarter'].unique()[1]]['close'].iloc[0]
    except: pivot_ta_df['quarter_close'] = None
    try: pivot_ta_df['year_close'] = pivot_ta_df[pivot_ta_df['year']==pivot_ta_df['year'].unique()[1]]['close'].iloc[0]
    except: pivot_ta_df['year_close'] = None

    pivot_ta_df['month_pivot_p'] = (pivot_ta_df['month_peak'] + pivot_ta_df['month_bottom'] + pivot_ta_df['month_close'])/3
    pivot_ta_df['quarter_pivot_p'] = (pivot_ta_df['quarter_peak'] + pivot_ta_df['quarter_bottom'] + pivot_ta_df['quarter_close'])/3
    pivot_ta_df['year_pivot_p'] = (pivot_ta_df['year_peak'] + pivot_ta_df['year_bottom'] + pivot_ta_df['year_close'])/3

    if input_type == 'index':
        pivot_ta_df['from_month_pivot_p'] = (pivot_ta_df['close'] - pivot_ta_df['month_pivot_p'])
        pivot_ta_df['from_quarter_pivot_p'] = (pivot_ta_df['close'] - pivot_ta_df['quarter_pivot_p'])
        pivot_ta_df['from_year_pivot_p'] = (pivot_ta_df['close'] - pivot_ta_df['year_pivot_p'])

    if input_type == 'stock':
        pivot_ta_df['from_month_pivot_p'] = (pivot_ta_df['close'] - pivot_ta_df['month_pivot_p'])/abs(pivot_ta_df['month_pivot_p'])
        pivot_ta_df['from_quarter_pivot_p'] = (pivot_ta_df['close'] - pivot_ta_df['quarter_pivot_p'])/abs(pivot_ta_df['quarter_pivot_p'])
        pivot_ta_df['from_year_pivot_p'] = (pivot_ta_df['close'] - pivot_ta_df['year_pivot_p'])/abs(pivot_ta_df['year_pivot_p'])

    return pivot_ta_df

In [30]:
def calculate_ma_ta_df(ta_df,input_type):

    ma_ta_df = calculate_ta_df(ta_df)
    ma_ta_df['ma5'] = ma_ta_df['close'][::-1].rolling(window=5, min_periods=1).mean()[::-1]
    ma_ta_df['ma20'] = ma_ta_df['close'][::-1].rolling(window=20, min_periods=1).mean()[::-1]
    ma_ta_df['ma60'] = ma_ta_df['close'][::-1].rolling(window=60, min_periods=1).mean()[::-1]
    ma_ta_df['ma120'] = ma_ta_df['close'][::-1].rolling(window=120, min_periods=1).mean()[::-1]
    ma_ta_df['ma240'] = ma_ta_df['close'][::-1].rolling(window=240, min_periods=1).mean()[::-1]
    ma_ta_df['ma480'] = ma_ta_df['close'][::-1].rolling(window=480, min_periods=1).mean()[::-1]

    if input_type == 'stock':

        ma_ta_df['from_ma5'] = (ma_ta_df['close'] - ma_ta_df['ma5'])/ma_ta_df['ma5']
        ma_ta_df['from_ma20'] = (ma_ta_df['close'] - ma_ta_df['ma20'])/ma_ta_df['ma20']
        ma_ta_df['from_ma60'] = (ma_ta_df['close'] - ma_ta_df['ma60'])/ma_ta_df['ma60']
        ma_ta_df['from_ma120'] = (ma_ta_df['close'] - ma_ta_df['ma120'])/ma_ta_df['ma120']
        ma_ta_df['from_ma240'] = (ma_ta_df['close'] - ma_ta_df['ma240'])/ma_ta_df['ma240']
        ma_ta_df['from_ma480'] = (ma_ta_df['close'] - ma_ta_df['ma480'])/ma_ta_df['ma480']

    if input_type == 'index':

        ma_ta_df['from_ma5'] = (ma_ta_df['close'] - ma_ta_df['ma5'])
        ma_ta_df['from_ma20'] = (ma_ta_df['close'] - ma_ta_df['ma20'])
        ma_ta_df['from_ma60'] = (ma_ta_df['close'] - ma_ta_df['ma60'])
        ma_ta_df['from_ma120'] = (ma_ta_df['close'] - ma_ta_df['ma120'])
        ma_ta_df['from_ma240'] = (ma_ta_df['close'] - ma_ta_df['ma240'])
        ma_ta_df['from_ma480'] = (ma_ta_df['close'] - ma_ta_df['ma480'])

    return ma_ta_df

In [132]:
def transform_ta_df(ta_df,ta_name):
    df_list = []
    for time_frame in ['month','quarter','year']:
        coef_2 = 0
        if ta_name == 'candle':
            df = ta_df[['stock',f'{time_frame}_open',f'{time_frame}_peak',f'{time_frame}_bottom',f'from_{time_frame}_open',f'from_{time_frame}_peak',f'from_{time_frame}_bottom']].iloc[:1]
            df_name = ['Open','High','Low']
            coef_1 = 4
        elif ta_name == 'fibo':
            df = ta_df[['stock',f'{time_frame}_fibo_382',f'{time_frame}_fibo_500',f'{time_frame}_fibo_618',f'from_{time_frame}_fibo_382',f'from_{time_frame}_fibo_500',f'from_{time_frame}_fibo_618']].iloc[:1]
            df_name = ['Fibo 0.382', 'Fibo 0.500', 'Fibo 0.618']
            coef_1 = 4
        elif ta_name == 'pivot':
            df = ta_df[['stock',f'{time_frame}_pivot_p',f'from_{time_frame}_pivot_p']].iloc[:1]
            df_name = ['Pivot']
            coef_1 = 2
        elif ta_name == 'ma':
            if time_frame == 'month':
                df = ta_df[['stock','ma5','ma20','from_ma5','from_ma20']].iloc[:1]
                df_name = ['MA5','MA20']
            elif time_frame == 'quarter':
                df = ta_df[['stock','ma60','ma120','from_ma60','from_ma120']].iloc[:1]
                df_name = ['MA60','MA120']
            elif time_frame == 'year':
                df = ta_df[['stock','ma240','ma480','from_ma240','from_ma480']].iloc[:1]
                df_name = ['MA240','MA480']
            coef_1 = 3
        df_value = df.iloc[0,1:coef_1].tolist()
        df_from = df.iloc[0,coef_1:].tolist()

        if ta_name == 'pivot':
            df_order = 3
        else:
            df_order = [i for i in range(1, len(df_name) + 1)]
            
        df = pd.DataFrame({'stock':df['stock'].item(),'name': df_name,'value': df_value,'from': df_from, 'order': df_order})
        df['id'] = time_frame
        df['ta_name'] = ta_name
        df['value'] = df['value'].apply(lambda x: '{:.2f}'.format(x) if isinstance(x, (int, float)) else x)
        df_list.append(df)
    cancat_df = pd.concat(df_list, axis=0)
    return cancat_df

def concat_ta_df(ta_df,input_type):
    df_candle_raw = calculate_candle_ta_df(ta_df,input_type)
    df_pivot_raw = calculate_pivot_ta_df(ta_df,input_type)
    df_ma_raw = calculate_ma_ta_df(ta_df,input_type)
    df_fibo_raw = calculate_fibo_ta_df(ta_df,input_type)

    df_candle = transform_ta_df(df_candle_raw,'candle')
    df_pivot = transform_ta_df(df_pivot_raw,'pivot')
    df_ma = transform_ta_df(df_ma_raw,'ma')
    df_fibo = transform_ta_df(df_fibo_raw,'fibo')

    concat_ta_df = pd.concat([df_candle,df_fibo,df_pivot,df_ma], axis=0)

    return concat_ta_df

##### Page 1

- Bảng hiển thị 5 chỉ số dạng Card

In [32]:
index_card_dict = {}
for index, df in eod_index_dict.items():
    df['change_value'] = df['close'][::-1].diff()[::-1]
    df['change_percent'] = (df['close'][::-1].pct_change()[::-1]).round(4)

    index_card_dict[index] = df.iloc[0]

index_card_df = pd.DataFrame(index_card_dict).transpose().drop(['open', 'high', 'low'], axis=1).reset_index(drop=True)

- Ghép bảng vẽ biểu đồ đường cho 5 chỉ số index

In [33]:
index_value_df = date_series.copy()
for index, df in eod_index_dict.items():
    index_value_df[index] = df['close']

index_value_df = index_value_df.iloc[:60]
index_value_df = index_value_df.melt(id_vars=['date'], var_name='index_name', value_name='value')

- Tính bảng chỉ số kĩ thuật cho các index

In [140]:
ta_index_df = pd.DataFrame()
for index, df in eod_index_dict.items():
    temp_df = concat_ta_df(df, 'index')
    ta_index_df = pd.concat([ta_index_df, temp_df], axis=0)

- Tâm lý và thanh khoản

In [34]:
#Tính bảng hệ số thanh khoản ITD
liquidity_t0 = time_series[time_series['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
liquidity_ma5 = 0

for stock, df in itd_stock_dict.items():
    liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
    liquidity_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']

liquidity_df = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
for column in liquidity_t0.columns[1:]:
    liquidity_t0[column] = liquidity_t0[column].cumsum()

liquidity_df['volume_t0'] = liquidity_t0.iloc[:,1:].sum(axis=1)
liquidity_df['volume_ma5'] = liquidity_ma5 * liquidity_df['percent']
liquidity_df['ratio'] = liquidity_df['volume_t0']/liquidity_df['volume_ma5']
liquidity_df.loc[0, 'ratio'] = 0 

In [47]:
#Tính bảng chỉ số tâm lý
market_sentiment = itd_series.copy()
total_count = 0

for stock, df in itd_score_dict.items():
    if len(itd_stock_dict[stock]) > 52:
        total_count += 1
        market_sentiment[stock] = df['t0_score']


market_sentiment['count_pos'] = market_sentiment.iloc[:,1:].apply(lambda row: (row > 0).sum(), axis=1)
market_sentiment['total'] = total_count
market_sentiment = market_sentiment.dropna()
market_sentiment = market_sentiment[['date','count_pos','total']]
market_sentiment['ratio'] = market_sentiment['count_pos'] / market_sentiment['total']
market_sentiment['sentiment'] = market_sentiment['ratio'].apply(lambda x: 'Sợ hãi' if x < 0.2 else
                                                                ('Tiêu cực' if (x >= 0.2) & (x < 0.4) else
                                                                ('Trung lập' if (x >= 0.4) & (x < 0.6) else
                                                                ('Tích cực' if (x >= 0.6) & (x < 0.8) else 'Hưng phấn'))))

- Dữ liệu cho biểu đồ MS

In [53]:
eod_market_ms = eod_all_stock_ms['all_stock'].copy()
eod_market_ms['type'] = 'eod'

itd_market_ms = itd_all_stock_ms['all_stock'].copy()
itd_market_ms['type'] = 'itd'

market_ms = pd.concat([eod_market_ms, itd_market_ms], axis=0)

- Dữ liệu top 10 cổ phiếu tiền vào và tiền ra

In [86]:
market_top_10 = eod_score_df[['stock', 'industry_name','industry_perform','marketcap_group','close','price_change','t0_score']].iloc[:10]
market_top_10['type'] = 'top'

market_bottom_10 = eod_score_df[['stock', 'industry_name','industry_perform','marketcap_group','close','price_change','t0_score']].iloc[:10]
market_bottom_10['type'] = 'bottom'

market_top_stock = pd.concat([market_top_10, market_bottom_10], axis=0).reset_index(drop=True)

##### Page 2