In [1]:
import os
import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import datetime as dt
import copy

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

#### Chuẩn bị các dữ liệu

##### Các dữ liệu dùng để làm map tham chiếu

In [2]:
#Đọc name map để chuyển đỏi các tên thành dạng full
name_map = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'order'],axis=1)
name_map_dict = name_map.set_index('code')['full_name'].to_dict()

order_map = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'full_name'],axis=1)
order_map_dict = order_map.set_index('code')['order'].to_dict()

group_map = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='name_map').drop(columns=['order', 'full_name'],axis=1)
group_map_dict = group_map.set_index('code')['group'].to_dict()

#Tạo các danh sách nhóm trong mỗi cách chia cổ phiếu
all_stock_key_list = [key for key, value in group_map_dict.items() if value == 'tt']
industry_name_list = [key for key, value in group_map_dict.items() if value in ['A', 'B', 'C', 'D']]
industry_perform_list = [key for key, value in group_map_dict.items() if value == 'hs']
marketcap_group_list = [key for key, value in group_map_dict.items() if value == 'cap']

#Tạo danh danh key cho tổng tất cả các nhóm
group_stock_key_list = all_stock_key_list + industry_name_list + industry_perform_list + marketcap_group_list

In [3]:
#Tạo dict map thời gian và số lượng cổ phiếu
period_map = pd.read_excel("period_data/period_stock_list.xlsx", sheet_name='period_map')
period_map_dict = period_map.set_index('index').apply(lambda row: row.tolist(), axis=1).to_dict()

#Xoá đi quý hiện tại để chỉ tính toán tới quý trước đó
def get_quarter(name):
    now = datetime.now()
    year = now.year
    month = now.month
    if 1 <= month <= 3:
        quarter = "q1"
        previous_quarter = "q4"
    elif 4 <= month <= 6:
        quarter = "q2"
        previous_quarter = "q3"
    elif 7 <= month <= 9:
        quarter = "q3"
        previous_quarter = "q2"
    else:
        quarter = "q4"
        previous_quarter = "q1"
    if name == 'current_quarter':
        return f'{quarter}_{year}'
    if name == 'previous_quarter':
        if quarter == 'q4':
            return f'{previous_quarter}_{year-1}'
        else:
            return f'{previous_quarter}_{year}'
        
#Lấy ra khoảng thời gian tính toán cho quý này và quý trước
calculate_time_span = [period_map_dict['q2_2020'][0], period_map_dict[get_quarter('current_quarter')][1]]
current_quarter_span = [period_map_dict[get_quarter('current_quarter')][0], period_map_dict[get_quarter('current_quarter')][1]]
previous_quarter_span = [period_map_dict[get_quarter('previous_quarter')][0], period_map_dict[get_quarter('previous_quarter')][1]]

#Lấy ra list cổ phiếu của giai đoạn hiện tại
period_stock_list = pd.read_excel("period_data/period_stock_list.xlsx", sheet_name='period_stock_list')
curren_stock_list = period_stock_list[get_quarter('current_quarter')].dropna().tolist()

##### Các biến thời gian

In [4]:
#Khởi tạo ngày và thời gian hiện tại
today = pd.to_datetime(
        pd.read_csv('D:\\t2m-project\\ami-data\\ami_eod_data\\VNINDEX.csv')\
        .sort_values('date', ascending=False).reset_index(drop=True)\
        ['date'].iloc[0]
        , format='%y%m%d')

current_time = pd.to_datetime(
               pd.read_csv('D:\\t2m-project\\ami-data\\ami_itd_data\\HNXINDEX.csv')\
               .sort_values('date', ascending=False).reset_index(drop=True)\
               ['date'].iloc[0]
               , format='%y%m%d %H%M%S')

#Khởi tạo time_series bao gồm tất cả khung thời gian của ngày hiện tại
time_series_list = []
time_series_list.extend(pd.date_range(start=f'{today} 09:00:00', end=f'{today} 11:25:00', freq='5T'))
time_series_list.extend(pd.date_range(start=f'{today} 13:00:00', end=f'{today} 14:55:00', freq='5T'))
time_series = pd.DataFrame(time_series_list).rename(columns={0:'date'})

#Khởi tạo khung thời gian bắt đầu từ 9h15 để vẽ các biểu đồ
itd_series = pd.DataFrame(time_series_list[3:]).rename(columns={0:'date'})

In [5]:
def calculate_time_percent(time):
    start_time_am = dt.time(9, 00)
    end_time_am = dt.time(11, 30)
    start_time_pm = dt.time(13, 00)
    end_time_pm = dt.time(15, 00)

    def time_difference_in_minutes(time1, time2):
        delta1 = dt.timedelta(hours=time1.hour, minutes=time1.minute, seconds=time1.second)
        delta2 = dt.timedelta(hours=time2.hour, minutes=time2.minute, seconds=time2.second)
        diff = delta2 - delta1
        return diff.seconds // 60

    time = (time + timedelta(minutes=5)).time()
    full_time_range = time_difference_in_minutes(start_time_am, end_time_am) + time_difference_in_minutes(start_time_pm, end_time_pm)

    if time <= end_time_am:
        time_range = time_difference_in_minutes(start_time_am, time)
    elif time >= start_time_pm:
        time_range = time_difference_in_minutes(start_time_am, time) - time_difference_in_minutes(end_time_am, start_time_pm)

    return time_range/full_time_range

#Điều chỉnh lại time_series bỏ đi các hàng thời gian chưa có dữ liệu
time_series = time_series.loc[time_series['date'] <= current_time].sort_values('date', ascending=False).reset_index(drop=True)

#Tính thêm time percent
time_percent = time_series.copy()
time_percent['percent'] = time_percent['date'].apply(calculate_time_percent)
time_percent['percent'] = time_percent['percent'].apply(lambda x: x if x < 1 else 1)
current_time_percent = time_percent['percent'].iloc[0]

In [6]:
#Tạo bảng thời gian update
def get_update_time(start_time_am, end_time_am, start_time_pm, end_time_pm):
    if (dt.datetime.now()).weekday() <= 4:
        current_time = dt.datetime.now().time()
        if current_time < start_time_am: current_time = end_time_pm
        elif (current_time >= start_time_am) & (current_time < end_time_am): current_time = current_time
        elif (current_time >= end_time_am) & (current_time < start_time_pm): current_time = end_time_am
        elif (current_time >= start_time_pm) & (current_time < end_time_pm): current_time = current_time
        elif current_time >= end_time_pm: current_time = end_time_pm
        return current_time
    if (dt.datetime.now()).weekday() > 4:
        return end_time_pm

time_update = get_update_time(dt.time(9, 00), dt.time(11, 30), dt.time(13, 00), dt.time(15, 00))
date_time_update = dt.datetime.combine(current_time.date(), time_update)
update_time = pd.DataFrame([f"Cập nhât: {date_time_update.strftime('%d/%m/%Y %H:%M:%S')}"]).rename(columns={0:'date'})

##### Các dữ liệu EOD

In [7]:
#Đọc toàn bộ các file csv được xuất ra từ ami eod
eod_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_eod_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        eod_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

for item, df in eod_item_dict.items():
    df['date'] = pd.to_datetime(df['date'], format='%y%m%d')
    eod_item_dict[item] = df

#Tạo bảng tổng hớp tất cả các item
eod_item_df = pd.DataFrame(list(eod_item_dict.keys())).rename(columns={0:'item'})
eod_item_df['len'] = eod_item_df['item'].apply(lambda x: len(x))
eod_item_df['last_2chars'] = eod_item_df['item'].str[-2:]
eod_item_df['first_4chars'] = eod_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
index_name_df = eod_item_df[(eod_item_df['len']>3) & (eod_item_df['len']!=6) & (eod_item_df['len']<10) & 
                (eod_item_df['item']!='0001')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
                
eod_stock_dict = {k:v.drop(['option'], axis=1) for k,v in eod_item_dict.items()  if k in curren_stock_list}
eod_index_dict = {k:v.rename(columns={'option':'value'}).drop('cap', axis=1)
                for k,v in eod_item_dict.items() if k in index_name_df['item'].tolist()}

#Lọc ra danh sách tên các cổ phiếu, index giao dịch tự doanh và nước ngoài
stock_name_td_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='TD')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
stock_name_nn_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='NN')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_td_nn_df = eod_item_df[(eod_item_df['len']>=10) & (eod_item_df['first_4chars']!='VN30') & ((eod_item_df['last_2chars']=='NN') | (eod_item_df['last_2chars']=='TD'))].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)

stock_td_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_td_df['item'].tolist()}
stock_nn_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_nn_df['item'].tolist()}
index_td_nn_dict = {k:v.drop(['high','low','cap','stock'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in index_td_nn_df['item'].tolist()}

#Điều chỉnh đơn vị của các bảng NN và TD
for df in index_td_nn_dict.values():
    df['buy_volume'] = df['buy_volume']/1000
    df['sell_volume'] = -df['sell_volume']/1000
    df['buy_value'] = df['buy_value']/1000000000
    df['sell_value'] = -df['sell_value']/1000000000
    df['net_volume'] = df['buy_volume'] + df['sell_volume']
    df['net_value'] = df['buy_value'] + df['sell_value']

In [8]:
#Tính toán các đường trung bình và các đường MA
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in eod_stock_dict.items()}

eod_stock_dict = {
    key: df.assign(
        high5=df['high'].rolling(window=5, min_periods=1).max(),
        low5=df['low'].rolling(window=5, min_periods=1).min(),
        high20=df['high'].rolling(window=20, min_periods=1).max(),
        low20=df['low'].rolling(window=20, min_periods=1).min(),
        high60=df['high'].rolling(window=60, min_periods=1).max(),
        low60=df['low'].rolling(window=60, min_periods=1).min(),
        high120=df['high'].rolling(window=120, min_periods=1).max(),
        low120=df['low'].rolling(window=120, min_periods=1).min(),
        high240=df['high'].rolling(window=240, min_periods=1).max(),
        low240=df['low'].rolling(window=240, min_periods=1).min(),
        high480=df['high'].rolling(window=480, min_periods=1).max(),
        low480=df['low'].rolling(window=480, min_periods=1).min(),

        ma5_V=df['volume'].rolling(window=5, min_periods=1).mean().shift(1),

        ma5=df['close'].rolling(window=5, min_periods=1).mean(),
        ma20=df['close'].rolling(window=20, min_periods=1).mean(),
        ma60=df['close'].rolling(window=60, min_periods=1).mean(),
        ma120=df['close'].rolling(window=120, min_periods=1).mean(),
        ma240=df['close'].rolling(window=240, min_periods=1).mean(),
        ma480=df['close'].rolling(window=480, min_periods=1).mean(),
    )
    for key, df in eod_stock_dict.items()
}

eod_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in eod_stock_dict.items()
}
eod_stock_dict = {k: v[(v['date'] >= calculate_time_span[0]) & (v['date'] <= calculate_time_span[1])].sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in eod_stock_dict.items()}

In [9]:
#Thêm cột tên period và số lượng cổ phiếu từng thời kì
def assign_period(x):
    for key, value in period_map_dict.items():
        if (x >= pd.Timestamp(value[0])) & (x <= pd.Timestamp(value[1])):
            return key

for stock, df in eod_stock_dict.items():
    df['period'] = df['date'].apply(assign_period)
    df['count'] = df['period'].apply(lambda x: period_map_dict[x][2])

#Tính hệ số thanh khoản và đổi lại cap của cổ phiếu thành cap trung bình trong 20 phiên
for df in eod_stock_dict.values():
    df['liquid_ratio'] = df['volume'] / (df['ma5_V'])
    df['liquid_ratio'].iloc[0] = df['volume'].iloc[0] / ((df['ma5_V']).iloc[0]*current_time_percent)
    df['cap'] = df['cap'][::-1].rolling(window=20).mean()[::-1]

#Tạo một date_series bao gồm khoảng ngày tính toán eod
date_series = pd.DataFrame(eod_stock_dict['REE']['date']).rename(columns={0:'date'})    

##### Các dữ liệu ITD

In [10]:
#Đọc toàn bộ các file csv được xuất ra từ ami itd
itd_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_itd_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        itd_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

#Điều chỉnh lại timeseries cho khớp với khung thời gian dữ liệu, bỏ đi các hàng chưa có dữ liệu
time_series = time_series.loc[time_series['date'] <= current_time].sort_values('date', ascending=False).reset_index(drop=True)

for item, df in itd_item_dict.items():
    
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%y%m%d %H%M%S')

    #Fill dữ liệu vào các khoảng thời gian trống
    df = time_series.merge(df, on='date', how='left').sort_values('date', ascending=False)
    df[['open','high','low','close']] = df[['open','high','low','close']].fillna(method='bfill')
    df['volume'] = df['volume'].fillna(0)
    df['stock'] = item


    itd_item_dict[item] = df

#Tạo bảng tổng hợp tất cả các item
itd_item_df = pd.DataFrame(list(itd_item_dict.keys())).rename(columns={0:'item'})
itd_item_df['len'] = itd_item_df['item'].apply(lambda x: len(x))
itd_item_df['last_2chars'] = itd_item_df['item'].str[-2:]
itd_item_df['third_last_char'] = itd_item_df['item'].str[-3:-2]
itd_item_df['first_4chars'] = itd_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
index_name_df = itd_item_df[(itd_item_df['len']>3) & (itd_item_df['len']!=6) & (itd_item_df['len']<10) & (itd_item_df['item']!='0001')]\
                .reset_index(drop=True).drop(['len','last_2chars','third_last_char','first_4chars'], axis=1)

itd_stock_dict = {k:v.drop(columns=['option'], axis=1)
                  for k,v in itd_item_dict.items() if k in curren_stock_list}
itd_index_dict = {k:v.rename(columns={'option':'value'})
                for k,v in itd_item_dict.items() if k in index_name_df['item'].tolist()}

In [11]:
#Gán các đường trung bình và MA sang bảng dữ liệu ITD
for stock, df in itd_stock_dict.items():
    temp_data = eod_stock_dict[stock][['high5', 'low5', 'high20', 'low20', 'high60', 'low60',
                                       'high120','low120', 'high240', 'low240', 'high480', 'low480']].iloc[0]
    itd_stock_dict[stock] = df.assign(**temp_data)

    #Fill vào các giá trị cho các cổ phiếu không có giao dịch trong ngày
    if df['volume'].max() == 0:
        itd_stock_dict[stock][['open','high','low','close','cap']] = eod_stock_dict[stock][['open','high','low','close','cap']].iloc[0]


itd_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in itd_stock_dict.items()}
itd_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in itd_stock_dict.items()
}
itd_stock_dict = {k: v.sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in itd_stock_dict.items()}

#### Phân nhóm cổ phiếu

In [12]:
full_stock_classification_df = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='stock_classification')
stock_classification_df = full_stock_classification_df[full_stock_classification_df['stock'].isin(curren_stock_list)].reset_index(drop=True)

price_arr = []
cap_arr = []
for stock, df in eod_stock_dict.items():
    df = df[df['date'] > current_quarter_span[0]]
    price_arr.append(df['close'].iloc[-1].item())
    cap_arr.append(df['cap'].iloc[-1].item())

vonhoa_classification_df = stock_classification_df.copy()
vonhoa_classification_df['price'] = price_arr
vonhoa_classification_df['cap'] = cap_arr

cap_coef = sum(cap_arr)/10000
vonhoa_classification_df['marketcap_group'] = vonhoa_classification_df.apply(lambda x:
'small' if ((x['cap']>cap_coef) & (x['cap']<10*cap_coef)) | 
        ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']<10)) 
        else
('mid' if ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']>=10)) | 
        ((x['cap']>=20*cap_coef) & (x['cap']<100*cap_coef))
        else
('large' if x['cap']>=100*cap_coef
        else 'penny'
)), axis=1)

stock_classification_df = pd.concat([stock_classification_df, vonhoa_classification_df['marketcap_group']], axis=1)

In [13]:
# Convert DataFrame columns to dictionaries for quick access
stock_by_industry = stock_classification_df.set_index('stock')['industry_name'].to_dict()
stock_by_perform = stock_classification_df.set_index('stock')['industry_perform'].to_dict()
stock_by_marketcap = stock_classification_df.set_index('stock')['marketcap_group'].to_dict()

# Initialize dictionaries
eod_all_stock = {}
itd_all_stock = {}
eod_industry_name = {}
itd_industry_name = {}
eod_industry_perform = {}
itd_industry_perform = {}
eod_marketcap_group = {}
itd_marketcap_group = {}

# Function to create mappings based on category
def create_mapping(stock_dict, category_dict):
    category_map = {}
    for category, stocks in category_dict.items():
        category_map[category] = {stock: stock_dict[stock] for stock in stocks if stock in stock_dict}
    return category_map

# Precompute unique categories and relevant stocks
unique_industries = np.unique(list(stock_by_industry.values()))
unique_performs = np.unique(list(stock_by_perform.values()))
unique_marketcaps = ['large', 'mid', 'small', 'penny']

# Mapping for all_stock
itd_all_stock['all_stock'] = {key: value for key, value in itd_stock_dict.items()}
eod_all_stock['all_stock'] = {key: value for key, value in eod_stock_dict.items()}

# Mapping for industry
for industry in unique_industries:
    relevant_stocks = [stock for stock, ind in stock_by_industry.items() if ind == industry]
    eod_industry_name[industry] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_industry_name[industry] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}

# Mapping for performance
for performance in unique_performs:
    relevant_stocks = [stock for stock, perf in stock_by_perform.items() if perf == performance]
    eod_industry_perform[performance] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_industry_perform[performance] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}

# Mapping for marketcap
for marketcap in unique_marketcaps:
    relevant_stocks = [stock for stock, mcap in stock_by_marketcap.items() if mcap == marketcap]
    eod_marketcap_group[marketcap] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_marketcap_group[marketcap] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}


#### Điểm dòng tiền từng cổ phiếu

##### Điểm dòng tiền EOD

In [14]:
#Lấy ra 40 phiên gần nhất để tính điểm dòng tiền
score_date_series = date_series[date_series['date']>=previous_quarter_span[0]]

def score_calculation(row):
    try:
        return (((row['close'] - row['low']) - (row['high'] - row['close'])) / (row['high'] - row['low']) *
                abs((row['close'] - row['close_prev'])) / row['close_prev'] *
                (row['volume']*row['close']) / (row['ma5_prev'] * row['ma5_V'])) * 100 \
                + ((row['close'] - row['ma5_prev']) / row['ma5_prev'])/100 #Cộng thêm lượng này để tránh các trường hợp điểm dòng tiền bằng nhau gây trùng xếp hạng
    except ZeroDivisionError:
        return ((row['close'] - row['ma5_prev']) / row['ma5_prev'])/100 #Cộng thêm lượng này để tránh các trường hợp điểm dòng tiền bằng nhau gây trùng xếp hạng


In [15]:
#Tính điểm dòng tiền cho từng cổ phiếu
eod_score_dict = {}
for stock in eod_stock_dict.keys():

    #Lọc ra các cột cần sử dụng và chỉ lấy 40 phiên gần nhất để tính
    temp_df = eod_stock_dict[stock][['stock', 'date', 'period', 'count', 'open', 'high', 'low', 'close', 'volume', 'liquid_ratio', 'ma5', 'ma5_V']]
    temp_df = temp_df[temp_df['date']>=previous_quarter_span[0]]

    #Tính điểm dòng tiền t0 và t5
    temp_df['ma5_prev'] = temp_df['ma5'].shift(-1)
    temp_df['close_prev'] = temp_df['close'].shift(-1)
    temp_df['t0_score'] = temp_df.apply(score_calculation, axis=1)
    temp_df['t5_score'] = temp_df['t0_score'][::-1].rolling(window=5, min_periods=1).mean()[::-1]

    #Gán lại temp_df cho dict
    eod_score_dict[stock] = temp_df

#Tính xếp hạng cho cổ phiếu
t0_ranking_df = score_date_series.copy()
t5_ranking_df = score_date_series.copy()
for stock in eod_score_dict.keys():
    t0_ranking_df[stock] = eod_score_dict[stock]['t0_score']
    t0_ranking_df.fillna(0, inplace=True)
    t5_ranking_df[stock] = eod_score_dict[stock]['t5_score']
    t5_ranking_df.fillna(0, inplace=True)
t0_ranking_df = t0_ranking_df.iloc[:,1:].rank(ascending=False, method='min', axis=1)
t5_ranking_df = t5_ranking_df.iloc[:,1:].rank(ascending=False, method='min', axis=1)

#Ghép xếp hạng vào bảng thông tin cổ phiếu
for stock, df in eod_score_dict.items():
    df['rank_t0'] = t0_ranking_df[stock]
    df['rank_t5'] = t5_ranking_df[stock]
    
    #Check xem xếp hạng T0 nằm trong top 10% hay không
    df['top_check'] = df.apply(lambda x: 1 if x['rank_t0'] <= x['count']*0.1 else 0, axis=1)

    #Tính số phiên lọt top 10% trong 20 phiên
    df['top_count'] = df['top_check'][::-1].rolling(window=20).sum()[::-1]

#Cắt đi các hàng chứa Nan ko cần thiết
for stock, df in eod_score_dict.items():
    df.dropna(inplace=True)

In [16]:
#Tạo bảng tổng hợp điểm t0 của tất cả cổ phiếu
eod_score_df = pd.DataFrame(stock_classification_df['stock'])

score_list = []
for stock, df in eod_score_dict.items():
    score_list.append(df.iloc[0])

eod_score_df = pd.DataFrame(score_list).sort_values('t0_score', ascending=False).reset_index(drop=True)
eod_score_df = eod_score_df.fillna('')

eod_score_df['filter_t0'] = eod_score_df['t0_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
eod_score_df['filter_t5'] = eod_score_df['t5_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
eod_score_df['filter_liquid'] = eod_score_df['liquid_ratio'].apply(lambda x: '<50%' if x < 0.6 else (
                                                                             '50%-100%' if (x >= 0.5) & (x < 1) else (
                                                                             '100%-150%' if(x >= 1) & (x < 1.5) else (
                                                                             '150%-200%' if(x >= 1.5) & (x < 2) else '>200%'))))
eod_score_df['order_filter_liquid'] = eod_score_df['filter_liquid'].apply(lambda x: 1 if x == '<50%' else (
                                                                             2 if x == '50%-100%' else (
                                                                             3 if x == '100%-150%' else (
                                                                             4 if x == '150%-200%' else 5))))
eod_score_df['filter_rank'] = eod_score_df['rank_t5'].apply(lambda x: '1-50' if x <= 50 else (
                                                                   '51-150' if (x > 50) & (x <= 150) else (
                                                                   '151-250' if(x > 150) & (x <= 250) else '>250')))
eod_score_df['order_filter_rank'] = eod_score_df['filter_rank'].apply(lambda x: 1 if x == '1-50' else (
                                                                             2 if x == '51-150' else (
                                                                             3 if x == '151-250' else 4)))

eod_score_df = eod_score_df.merge(stock_classification_df[['stock','industry_name','industry_perform','marketcap_group']], on='stock', how='left')
eod_score_df['industry_name'] = eod_score_df['industry_name'].map(name_map_dict)
eod_score_df['industry_perform'] = eod_score_df['industry_perform'].map(name_map_dict)
eod_score_df['marketcap_group'] = eod_score_df['marketcap_group'].map(name_map_dict)

##### Điểm dòng tiền ITD

In [17]:
# Giả định date_series và itd_stock_dict đã được định nghĩa
itd_start = pd.Timestamp(date_series['date'].iloc[0].replace(hour=9, minute=15, second=0, microsecond=0))

# Lọc ra các khung giwof nhỏ hơn 9h15 của các cổ phiếu HNX và UPCOM
itd_score_dict = {k: v[['stock', 'date', 'open', 'high', 'low', 'close', 'volume']]
                  .loc[v['date'] >= itd_start] 
                  for k, v in copy.deepcopy(itd_stock_dict).items()}

for stock, itd_df in itd_score_dict.items():

    eod_df = eod_score_dict[stock]

    itd_df['ma5_V'] = time_percent['percent']*(eod_df['ma5_V'].iloc[0])
    itd_df['ma5_prev'] = eod_df['ma5_prev'].iloc[0]
    itd_df['close_prev'] = eod_df['close_prev'].iloc[0]

    itd_df['high'] = itd_df['high'][::-1].cummax()[::-1]
    itd_df['low'] = itd_df['low'][::-1].cummin()[::-1]
    itd_df['volume'] = itd_df['volume'][::-1].cumsum()[::-1]
    itd_df['liquid_ratio'] = itd_df['volume']/itd_df['ma5_V']

    itd_df.loc[0, 'volume'] = eod_df['volume'].iloc[0]
    itd_df.loc[0, 'close'] = eod_df['close'].iloc[0]
    itd_df.loc[0, 'low'] = eod_df['low'].iloc[0]
    itd_df.loc[0, 'high'] = eod_df['high'].iloc[0]

    itd_df['t0_score'] = itd_df.apply(score_calculation, axis=1)
    itd_df['price_change'] = (itd_df['close'] - eod_df['open'].iloc[0])/eod_df['open'].iloc[0]

    itd_df['industry_name'] = stock_classification_df[stock_classification_df['stock']==stock]['industry_name'].map(name_map_dict).item()
    itd_df['industry_perform'] = stock_classification_df[stock_classification_df['stock']==stock]['industry_perform'].map(name_map_dict).item()
    itd_df['marketcap_group'] = stock_classification_df[stock_classification_df['stock']==stock]['marketcap_group'].map(name_map_dict).item()

    # Cắt đi các cột thừa của eod_stock_dict khi ko dùng nữa
    eod_score_dict[stock] = eod_df.drop(columns=['open','high','low','ma5_prev','close_prev','volume'])

    # Cắt đi các cột thừa của itd_stock_dict khi ko dùng nữa
    itd_score_dict[stock] = itd_df[['stock', 'date','close', 'volume','t0_score','liquid_ratio','industry_name','industry_perform','marketcap_group', 'price_change']]

#### Điểm dòng tiền nhóm cổ phiếu

- Các hàm tính toán

In [18]:
#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
def adjust_score_by_breath(t0_score, ratio_column):
    adjusted_score = []
    for score, ratio in zip(t0_score, ratio_column):
        if score >= 0:
            adjusted_score.append(score*ratio)
        else:
            adjusted_score.append(score*(1-ratio))
    return adjusted_score

#Hàm điều chỉnh điểm dòng tiền của cổ phiếu tránh sự đột biến khi đóng góp vào nhóm chung
def adjust_score_for_smooth(row, column_name, max_percent, mark):
    origin_score = row[column_name]
    
    if abs(origin_score) > row['total'] * max_percent:

        sum_abs = row['total'] - abs(row[column_name])
        fixed_score = sum_abs / (1 - max_percent) - sum_abs

        if origin_score >= 0:
            return fixed_score
        else:
            return -fixed_score
    else:
        mark[0] = 0
        return origin_score

#Áp dụng hàm điều chỉnh điểm phía trên vào các nhóm cổ phiếu, việc này lặp lại nhiều lần cho tới khi triệt tiêu sự đột biến
def apply_smooth_score(score_dict, group_type, type_name):

    if type_name == 'itd':
        initial_score_df = time_series.copy()
    elif type_name == 'eod':
        initial_score_df = score_date_series.copy()

    if group_type == 'all_stock':
        key_list = all_stock_key_list
    elif group_type == 'industry_perform':
        key_list = [key for key, value in group_map_dict.items() if value == 'hs']
    elif group_type == 'marketcap_group':
        key_list = [key for key, value in group_map_dict.items() if value == 'cap'] 
    elif group_type == 'industry_name':
        key_list = [key for key, value in group_map_dict.items() if value in ['A', 'B', 'C', 'D']]

    for key in key_list:
        score_df = initial_score_df.copy()
        if group_type == 'all_stock':
            stock_list = stock_classification_df['stock'].tolist()
        else:
            stock_list = [stock for stock in stock_classification_df[stock_classification_df[group_type]==key]['stock'].dropna().tolist()]
        for stock in stock_list:
            try: score_df[stock] = score_dict[stock][f't0_score']
            except: pass

        max_percent = max(0.1, min(5*(1/len(stock_list)), 0.5))
        score_df['total'] = score_df.iloc[:, 1:].abs().sum(axis=1)

        mark = [1]
        while True:
            if mark[0] == 1:
                for stock in stock_list:
                    score_df[stock] = score_df.iloc[:, 1:].apply(adjust_score_for_smooth, axis=1, args=(stock, max_percent, mark))
            if mark[0] == 0: break

        for stock in stock_list:
            try: score_dict[stock][f't0_{group_type}'] = score_df[stock]
            except: pass

##### Dòng tiền vào nhóm cổ phiếu EOD

In [19]:
#Thêm các cột dòng tiền đóng góp vào các nhóm cổ phiếu vào các dict period (đã loại bỏ đột biến)
for group_type in ['all_stock','industry_name','industry_perform','marketcap_group']:
    apply_smooth_score(eod_score_dict, group_type, 'eod')

In [20]:
#Tính độ rộng cho từng phiên phục vụ cho việc điều chỉnh điểm dòng tiền
temp_df = date_series.copy()
for stock, df in eod_score_dict.items():
    temp_df[stock] = eod_score_dict[stock]['t0_score']
temp_df.iloc[:,1:] = temp_df.iloc[:,1:].applymap(lambda x: 1 if x > 0 else 0)

eod_market_breath = date_series.copy()

industry_name_breadth_dict = {}
for key in eod_industry_name.keys():
    stock_list = stock_classification_df[stock_classification_df['industry_name']==key]['stock'].tolist()
    industry_name_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = industry_name_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

industry_perform_breadth_dict = {}
for key in eod_industry_perform.keys():
    stock_list = stock_classification_df[stock_classification_df['industry_perform']==key]['stock'].tolist()
    industry_perform_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = industry_perform_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

marketcap_group_breadth_dict = {}
for key in eod_marketcap_group.keys():
    stock_list = stock_classification_df[stock_classification_df['marketcap_group']==key]['stock'].tolist()
    marketcap_group_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = marketcap_group_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

all_stock_breadth_dict = {}
for key in eod_all_stock.keys():
    stock_list = stock_classification_df['stock'].tolist()
    all_stock_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = all_stock_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
for stock, df in eod_score_dict.items():
    name_of_industry_name = stock_classification_df[stock_classification_df['stock']==stock]['industry_name'].item()
    name_of_industry_perform = stock_classification_df[stock_classification_df['stock']==stock]['industry_perform'].item()
    name_of_marketcap_group = stock_classification_df[stock_classification_df['stock']==stock]['marketcap_group'].item()

    df[f't0_industry_name'] = adjust_score_by_breath(df['t0_industry_name'], eod_market_breath[name_of_industry_name])
    df[f't0_industry_perform'] = adjust_score_by_breath(df['t0_industry_perform'], eod_market_breath[name_of_industry_perform])
    df[f't0_marketcap_group'] = adjust_score_by_breath(df['t0_marketcap_group'], eod_market_breath[name_of_marketcap_group])
    df[f't0_all_stock'] = adjust_score_by_breath(df['t0_all_stock'], eod_market_breath['all_stock'])

In [21]:
#Tạo bảng dữ liệu điểm dòng tiền cho các nhóm cổ phiếu
eod_group_score_df = score_date_series.copy()

#Thêm cột điểm dòng tiền toàn bộ cổ phiếu
for nganh in eod_all_stock.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_all_stock']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[nganh] = score_df['total']

#Thêm các cột điểm dòng tiền ngành
eod_industry_name_score_df = date_series.copy()
for nganh in eod_industry_name.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df[stock_classification_df['industry_name']==nganh]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_industry_name']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[nganh] = score_df['total']

#Thêm các cột điểm dòng tiền nhóm hiệu suất
eod_industry_perform_score_df = date_series.copy()
for group in eod_industry_perform.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df[stock_classification_df['industry_perform']==group]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_industry_perform']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[group] = score_df['total']

#Thêm các cột điểm dòng tiền nhóm vốn hoá
eod_marketcap_group_score_df = date_series.copy()
for marketcap in eod_marketcap_group.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df[stock_classification_df['marketcap_group']==marketcap]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_marketcap_group']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[marketcap] = score_df['total']

- Lưu lại dữ liệu cho capital allocation

In [22]:
#Lấy dữ liệu lịch sử MS đã tính toán
period_group_score_df = pd.read_excel("period_data/period_processed_data.xlsx", sheet_name='full_group_score_df')

#Ghép bảng dữ liệu lịch sử với dữ liệu của quý này
full_group_score_df = pd.concat([eod_group_score_df[eod_group_score_df['date']>=current_quarter_span[0]], period_group_score_df], axis=0).reset_index(drop=True)

#Lưu lại vào excel
with pd.ExcelWriter('capital_allocation/history_data.xlsx', engine='openpyxl') as writer:
    full_group_score_df.to_excel(writer, sheet_name='full_group_score_df', index=False)

##### Dòng tiền vào nhóm cổ phiếu ITD

In [23]:
#Thêm các cột dòng tiền đóng góp vào các nhóm cổ phiếu vào các dict period (đã loại bỏ đột biến)
for group_type in ['all_stock','industry_name','industry_perform','marketcap_group']:
    apply_smooth_score(itd_score_dict, group_type, 'itd')

In [24]:
#Tính độ rộng cho từng phiên phục vụ cho việc điều chỉnh điểm dòng tiền
temp_df = time_series.copy()
for stock, df in itd_score_dict.items():
    temp_df[stock] = itd_score_dict[stock]['t0_score']
temp_df.iloc[:,1:] = temp_df.iloc[:,1:].applymap(lambda x: 1 if x > 0 else 0)

itd_market_breath = time_series.copy()
current_stock_list = list(itd_score_dict.keys())

industry_name_breadth_dict = {}
for key in itd_industry_name.keys():
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_name']==key]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    industry_name_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = industry_name_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

industry_perform_breadth_dict = {}
for key in itd_industry_perform.keys():
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_perform']==key]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    industry_perform_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = industry_perform_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

marketcap_group_breadth_dict = {}
for key in itd_marketcap_group.keys():
    temp_stock_list_full = stock_classification_df[stock_classification_df['marketcap_group']==key]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    marketcap_group_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = marketcap_group_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

all_stock_breadth_dict = {}
for key in itd_all_stock.keys():
    temp_stock_list_full = stock_classification_df['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    all_stock_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = all_stock_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
for stock, df in itd_score_dict.items():

    name_of_industry_name = stock_classification_df[stock_classification_df['stock']==stock]['industry_name'].item()
    name_of_industry_perform = stock_classification_df[stock_classification_df['stock']==stock]['industry_perform'].item()
    name_of_marketcap_group = stock_classification_df[stock_classification_df['stock']==stock]['marketcap_group'].item()

    df['t0_industry_name'] = adjust_score_by_breath(df['t0_industry_name'], itd_market_breath[name_of_industry_name])
    df['t0_industry_perform'] = adjust_score_by_breath(df['t0_industry_perform'], itd_market_breath[name_of_industry_perform])
    df['t0_marketcap_group'] = adjust_score_by_breath(df['t0_marketcap_group'], itd_market_breath[name_of_marketcap_group])
    df['t0_all_stock'] = adjust_score_by_breath(df['t0_all_stock'], itd_market_breath['all_stock'])

In [25]:
#Tạo bảng dữ liệu điểm dòng tiền cho các nhóm cổ phiếu
itd_group_score_df = time_series.copy()
current_stock_list = list(itd_score_dict.keys())

#Thêm cột điểm dòng tiền toàn bộ cổ phiếu
for nganh in itd_all_stock.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        score_df[stock] = itd_score_dict[stock]['t0_all_stock']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[nganh] = score_df['total']

#Thêm các cột điểm dòng tiền ngành
itd_industry_name_score_df = time_series.copy()
for nganh in itd_industry_name.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_name']==nganh]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        score_df[stock] = itd_score_dict[stock]['t0_industry_name']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[nganh] = score_df['total']

#Thêm các cột điểm dòng tiền nhóm hiệu suất
itd_industry_perform_score_df = time_series.copy()
for group in itd_industry_perform.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_perform']==group]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        score_df[stock] = itd_score_dict[stock]['t0_industry_perform']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[group] = score_df['total']

#Thêm các cột điểm dòng tiền nhóm vốn hoá
itd_marketcap_group_score_df = time_series.copy()
for marketcap in itd_marketcap_group.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df[stock_classification_df['marketcap_group']==marketcap]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        score_df[stock] = itd_score_dict[stock]['t0_marketcap_group']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[marketcap] = score_df['total']

#### Xếp hạng dòng tiền các nhóm cổ phiếu

In [26]:
#Tạo bảng xếp hạng cho các nhóm cổ phiếu
def create_ranking_df(score_df):
    socre_dict = {}
    for group in score_df.columns[1:]:
        socre_dict[group] = date_series.copy()
        socre_dict[group]['t0_score'] = score_df[group]
        socre_dict[group]['t5_score'] = socre_dict[group]['t0_score'][::-1].rolling(window=5).mean()[::-1]

    ranking_score = date_series.copy()
    for group in socre_dict.keys():
        ranking_score[group] = socre_dict[group]['t5_score']
        ranking_score.fillna(0,inplace=True)

    ranking_df = date_series.copy()
    for group in socre_dict.keys():
        ranking_df[group] = 0

    for i in range(len(date_series.copy())):
        ranking_df.iloc[i, 1:] = ranking_score.iloc[i, 1:].rank(ascending=False, method='min')
    
    ranking_df = ranking_df.head(20)

    return ranking_df

industry_name_ranking = create_ranking_df(eod_group_score_df[['date',                                     
        'ban_le', 'bao_hiem', 'bds', 'bds_kcn','chung_khoan', 
        'cong_nghe','cong_nghiep', 'dau_khi', 'det_may','dulich_dv',
        'dv_hatang', 'hoa_chat', 'htd', 'khoang_san', 'ngan_hang','tai_chinh',
        'thep', 'thuc_pham', 'thuy_san', 'van_tai', 'vlxd', 'xd','y_te']])
industry_perform_ranking = create_ranking_df(eod_group_score_df[['date','A', 'B', 'C', 'D']])
marketcap_group_ranking = create_ranking_df(eod_group_score_df[['date','large', 'mid', 'small', 'penny']])

group_score_ranking = industry_name_ranking.merge(industry_perform_ranking, on='date', how='left').merge(marketcap_group_ranking, on='date', how='left')

In [27]:
group_score_ranking_melted = pd.DataFrame()
for column in group_score_ranking.columns[1:]:
    temp_df = group_score_ranking[['date', column]]
    temp_df.columns = [['date', 'rank']]
    temp_df['name'] = column
    group_score_ranking_melted = pd.concat([group_score_ranking_melted, temp_df], axis=0)

group_score_ranking_melted.columns = ['date','rank','name']
group_score_ranking_melted['name'] = group_score_ranking_melted['name'].map(name_map_dict)

#### Chỉ số thanh khoản các nhóm cổ phiếu

- Chỉ dùng chỉ số thanh khoản ITD vì cái này ko có ý nghĩa dài hạn

In [28]:
itd_group_liquidity_df = time_series.copy().sort_values('date').reset_index(drop=True)

#Thêm cột toàn bộ cổ phiếu
for name in itd_all_stock.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_all_stock[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

#Thêm các cột cho các ngành
for name in itd_industry_name.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_industry_name[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

#Thêm các cột cho các nhóm hiệu suất
for name in itd_industry_perform.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_industry_perform[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

#Thêm các cột cho các nhóm vốn hoá
for name in itd_marketcap_group.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_marketcap_group[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

itd_group_liquidity_df = itd_group_liquidity_df.sort_values('date', ascending=False).reset_index(drop=True)

#### Tính dữ liệu cho MS

In [29]:
def transform_ms(stock_group):
    stock_dict = copy.deepcopy(stock_group)

    # Prepare a base date DataFrame from date_series
    dates_df = pd.DataFrame(date_series['date'].tolist(), columns=['date'])
    
    for group_name, stocks in stock_dict.items():
        # Initialize a DataFrame for group trends
        group_trends = dates_df.copy()

        # Compute trends across stocks
        for trend in ['trend_5p', 'trend_20p', 'trend_60p', 'trend_120p', 'trend_240p', 'trend_480p']:
            # Concatenate all trend data for current trend across all stocks
            trend_data = pd.concat([stocks[stock][trend] for stock in stocks], axis=1)
            trend_data.fillna(0, inplace=True)
            
            # Calculate the sum and percent for the trend
            sum_trend = trend_data.sum(axis=1)
            percent_trend = sum_trend / len(stocks)
            
            # Add to group trends DataFrame
            group_trends[f'{trend}'] = percent_trend

        stock_dict[group_name] = group_trends[group_trends['date'] >= current_quarter_span[0]].sort_values('date', ascending=False)

    return stock_dict

In [30]:
#Tính toán các biểu đồ MS cho các nhóm cổ phiếu
all_stock_ms = transform_ms(eod_all_stock)
industry_name_ms = transform_ms(eod_industry_name)
industry_perform_ms = transform_ms(eod_industry_perform)
marketcap_group_ms = transform_ms(eod_marketcap_group)

#Gộp tất cả biểu đồ MS vào 1 bảng
temp_market_ms = pd.DataFrame()
for item in [all_stock_ms, industry_name_ms, industry_perform_ms, marketcap_group_ms]:
    for group, df in item.items():
        df['name'] = group
        temp_market_ms = pd.concat([temp_market_ms, df], axis=0)

#Lấy dữ liệu lịch sử MS đã tính toán
period_market_ms = pd.read_excel("period_data/period_processed_data.xlsx", sheet_name='full_market_ms_df')

#Ghép bảng dữ liệu lịch sử với dữ liệu của quý này
full_market_ms = pd.concat([temp_market_ms, period_market_ms], axis=0).reset_index(drop=True)

#Cắt bảng dữ liệu full ra thành dữ liệu cần dùng để biểu diễn
market_ms = pd.DataFrame()
for key in group_stock_key_list:
    if key == 'all_stock':
        market_ms = pd.concat([market_ms, full_market_ms[full_market_ms['name'] == key].sort_values('date', ascending=False).reset_index(drop=True)], axis=0)
    else:
        market_ms = pd.concat([market_ms, full_market_ms[full_market_ms['name'] == key].sort_values('date', ascending=False).reset_index(drop=True).iloc[:60]], axis=0)

market_ms['name'] = market_ms['name'].map(name_map_dict)

#### Tính dữ liệu cho group price index

In [32]:
def calculate_total_change(stock_group, name, price_index_date_series):
    period_index_df = price_index_date_series.copy()

    for stock, df in stock_group[name].items():
        period_index_df[stock] = df['close']
        period_index_df[stock] = period_index_df[stock][::-1].pct_change()[::-1]

    period_index_df['total_change'] = period_index_df.iloc[:,1:].sum(axis=1)
    period_index_df['total_change'] = (period_index_df['total_change']/len(stock_group[name]))*100
    period_index_df['total_change'] = period_index_df['total_change']*10

    return period_index_df['total_change']

In [33]:
#Lấy dữ liệu lịch sử group price change đã tính toán
period_group_price_change = pd.read_excel("period_data/period_processed_data.xlsx", sheet_name='full_group_price_change_df')

#Tính dữ liệu group price change của quý hiện tại
temp_group_price_change = date_series.copy()
for key in all_stock_key_list:
    temp_group_price_change[key] = calculate_total_change(eod_all_stock, key, date_series)

for key in industry_name_list:
    temp_group_price_change[key] = calculate_total_change(eod_industry_name, key, date_series)

for key in industry_perform_list:
    temp_group_price_change[key] = calculate_total_change(eod_industry_perform, key, date_series)

for key in marketcap_group_list:
    temp_group_price_change[key] = calculate_total_change(eod_marketcap_group, key, date_series)

temp_group_price_change = temp_group_price_change[temp_group_price_change['date'] >= current_quarter_span[0]]

#Ghép dữ liệu thay đổi index các nhóm cổ phiếu
group_price_index_df = pd.concat([temp_group_price_change, period_group_price_change]).sort_values('date', ascending=False).reset_index(drop=True)

for key in group_stock_key_list:
    group_price_index_df[key] = group_price_index_df[key][::-1].cumsum()[::-1] + 1000

#### Tính dữ liệu cho các chỉ số kĩ thuật

##### Các hàm tính toán

In [35]:
def calculate_ta_df(price_df):
    ta_df = price_df[['stock', 'date', 'open', 'high', 'low', 'close', 'volume']].copy()
    ta_df['week'] = ta_df['date'].dt.strftime('%Y-%U')
    ta_df['month'] = ta_df['date'].dt.to_period('M')
    ta_df['quarter'] = ta_df['date'].dt.to_period('Q')
    ta_df['year'] = ta_df['date'].dt.to_period('Y')
    return ta_df

In [36]:
def calculate_candle_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()
    # Define unique time frames up front to avoid recalculating them multiple times
    unique_weeks = ta_df['week'].unique()
    unique_months = ta_df_copy['month'].unique()
    unique_quarters = ta_df_copy['quarter'].unique()
    unique_years = ta_df_copy['year'].unique()

    # Define filters for reuse
    filter_week_1 = ta_df_copy['week'] == unique_weeks[1] if len(unique_weeks) > 1 else None
    filter_week_0 = ta_df_copy['week'] == unique_weeks[0] if len(unique_weeks) > 0 else None
    filter_month_1 = ta_df_copy['month'] == unique_months[1] if len(unique_months) > 1 else None
    filter_month_0 = ta_df_copy['month'] == unique_months[0] if len(unique_months) > 0 else None
    filter_quarter_1 = ta_df_copy['quarter'] == unique_quarters[1] if len(unique_quarters) > 1 else None
    filter_quarter_0 = ta_df_copy['quarter'] == unique_quarters[0] if len(unique_quarters) > 0 else None
    filter_year_1 = ta_df_copy['year'] == unique_years[1] if len(unique_years) > 1 else None
    filter_year_0 = ta_df_copy['year'] == unique_years[0] if len(unique_years) > 0 else None

    # Apply filters and calculate needed values
    if filter_week_1 is not None:
        ta_df_copy['week_last_low'] = ta_df_copy.loc[filter_week_1, 'low'].min()
        ta_df_copy['week_last_high'] = ta_df_copy.loc[filter_week_1, 'high'].max()
    else:
        ta_df_copy['week_last_low'] = None
        ta_df_copy['week_last_high'] = None
    if filter_week_0 is not None:
        ta_df_copy['week_open'] = ta_df_copy.loc[filter_week_0, 'open'].iloc[-1]

    if filter_month_1 is not None:
        ta_df_copy['month_last_low'] = ta_df_copy.loc[filter_month_1, 'low'].min()
        ta_df_copy['month_last_high'] = ta_df_copy.loc[filter_month_1, 'high'].max()
    else:
        ta_df_copy['month_last_low'] = None
        ta_df_copy['month_last_high'] = None
    if filter_month_0 is not None:
        ta_df_copy['month_open'] = ta_df_copy.loc[filter_month_0, 'open'].iloc[-1]

    if filter_quarter_1 is not None:
        ta_df_copy['quarter_last_low'] = ta_df_copy.loc[filter_quarter_1, 'low'].min()
        ta_df_copy['quarter_last_high'] = ta_df_copy.loc[filter_quarter_1, 'high'].max()
    else:
        ta_df_copy['quarter_last_low'] = None
        ta_df_copy['quarter_last_high'] = None
    if filter_quarter_0 is not None:
        ta_df_copy['quarter_open'] = ta_df_copy.loc[filter_quarter_0, 'open'].iloc[-1]

    if filter_year_1 is not None:
        ta_df_copy['year_last_low'] = ta_df_copy.loc[filter_year_1, 'low'].min()
        ta_df_copy['year_last_high'] = ta_df_copy.loc[filter_year_1, 'high'].max()
    else:
        ta_df_copy['year_last_low'] = None
        ta_df_copy['year_last_high'] = None
    if filter_year_0 is not None:
        ta_df_copy['year_open'] = ta_df_copy.loc[filter_year_0, 'open'].iloc[-1]

    # Compute 'from' values for stock or index
    columns_to_compute = ['week', 'month', 'quarter', 'year']
    for frame in columns_to_compute:
        suffix = ['last_high', 'last_low', 'open']
        for suf in suffix:
            column_name = f'{frame}_{suf}'
            if column_name in ta_df_copy.columns:
                if input_type == 'stock':
                    ta_df_copy[f'from_{frame}_{suf}'] = (ta_df_copy['close'] - ta_df_copy[column_name]) / ta_df_copy[column_name]
                elif input_type == 'index':
                    ta_df_copy[f'from_{frame}_{suf}'] = ta_df_copy['close'] - ta_df_copy[column_name]

    return ta_df_copy

In [37]:
def calculate_fibo_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()

    ta_df_copy['month_high'] = ta_df_copy[ta_df_copy['month'].isin(ta_df_copy['month'].unique()[:2].tolist())]['high'].max()
    ta_df_copy['quarter_high'] = ta_df_copy[ta_df_copy['quarter'].isin(ta_df_copy['quarter'].unique()[:2].tolist())]['high'].max()
    ta_df_copy['year_high'] = ta_df_copy[ta_df_copy['year'].isin(ta_df_copy['year'].unique()[:2].tolist())]['high'].max()

    ta_df_copy['month_low'] = ta_df_copy[ta_df_copy['month'].isin(ta_df_copy['month'].unique()[:2].tolist())]['low'].min()
    ta_df_copy['quarter_low'] = ta_df_copy[ta_df_copy['quarter'].isin(ta_df_copy['quarter'].unique()[:2].tolist())]['low'].min()
    ta_df_copy['year_low'] = ta_df_copy[ta_df_copy['year'].isin(ta_df_copy['year'].unique()[:2].tolist())]['low'].min()
        
    ta_df_copy['month_fibo_382'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.382
    ta_df_copy['month_fibo_500'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.5
    ta_df_copy['month_fibo_618'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.618

    ta_df_copy['quarter_fibo_382'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.382
    ta_df_copy['quarter_fibo_500'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.5
    ta_df_copy['quarter_fibo_618'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.618

    ta_df_copy['year_fibo_382'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.382
    ta_df_copy['year_fibo_500'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.5
    ta_df_copy['year_fibo_618'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.618

    if input_type == 'stock':

        ta_df_copy['from_month_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_382'])/abs(ta_df_copy['month_fibo_382'])
        ta_df_copy['from_month_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_500'])/abs(ta_df_copy['month_fibo_500'])
        ta_df_copy['from_month_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_618'])/abs(ta_df_copy['month_fibo_618'])

        ta_df_copy['from_quarter_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_382'])/abs(ta_df_copy['quarter_fibo_382'])
        ta_df_copy['from_quarter_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_500'])/abs(ta_df_copy['quarter_fibo_500'])
        ta_df_copy['from_quarter_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_618'])/abs(ta_df_copy['quarter_fibo_618'])

        ta_df_copy['from_year_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_382'])/abs(ta_df_copy['year_fibo_382'])
        ta_df_copy['from_year_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_500'])/abs(ta_df_copy['year_fibo_500'])
        ta_df_copy['from_year_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_618'])/abs(ta_df_copy['year_fibo_618'])

    if input_type == 'index':
            
        ta_df_copy['from_month_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_382'])
        ta_df_copy['from_month_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_500'])
        ta_df_copy['from_month_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_618'])

        ta_df_copy['from_quarter_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_382'])
        ta_df_copy['from_quarter_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_500'])
        ta_df_copy['from_quarter_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_618'])

        ta_df_copy['from_year_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_382'])
        ta_df_copy['from_year_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_500'])
        ta_df_copy['from_year_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_618'])

    return ta_df_copy

In [38]:
def calculate_pivot_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()

    try: ta_df_copy['month_high'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['high'].max()
    except: ta_df_copy['month_high'] = None
    try: ta_df_copy['quarter_high'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['high'].max()
    except: ta_df_copy['quarter_high'] = None
    try: ta_df_copy['year_high'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['high'].max()
    except: ta_df_copy['year_high'] = None

    try: ta_df_copy['month_low'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['low'].min()
    except: ta_df_copy['month_low'] = None
    try: ta_df_copy['quarter_low'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['low'].min()
    except: ta_df_copy['quarter_low'] = None
    try: ta_df_copy['year_low'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['low'].min()
    except: ta_df_copy['year_low'] = None

    try: ta_df_copy['month_close'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['month_close'] = None
    try: ta_df_copy['quarter_close'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['quarter_close'] = None
    try: ta_df_copy['year_close'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['year_close'] = None

    ta_df_copy['month_pivot'] = (ta_df_copy['month_high'] + ta_df_copy['month_low'] + ta_df_copy['month_close'])/3
    ta_df_copy['quarter_pivot'] = (ta_df_copy['quarter_high'] + ta_df_copy['quarter_low'] + ta_df_copy['quarter_close'])/3
    ta_df_copy['year_pivot'] = (ta_df_copy['year_high'] + ta_df_copy['year_low'] + ta_df_copy['year_close'])/3

    if input_type == 'index':
        ta_df_copy['from_month_pivot'] = (ta_df_copy['close'] - ta_df_copy['month_pivot'])
        ta_df_copy['from_quarter_pivot'] = (ta_df_copy['close'] - ta_df_copy['quarter_pivot'])
        ta_df_copy['from_year_pivot'] = (ta_df_copy['close'] - ta_df_copy['year_pivot'])

    if input_type == 'stock':
        ta_df_copy['from_month_pivot'] = (ta_df_copy['close'] - ta_df_copy['month_pivot'])/abs(ta_df_copy['month_pivot'])
        ta_df_copy['from_quarter_pivot'] = (ta_df_copy['close'] - ta_df_copy['quarter_pivot'])/abs(ta_df_copy['quarter_pivot'])
        ta_df_copy['from_year_pivot'] = (ta_df_copy['close'] - ta_df_copy['year_pivot'])/abs(ta_df_copy['year_pivot'])

    return ta_df_copy

In [39]:
def calculate_ma_ta_df(ta_df,input_type):
    ta_df_copy = ta_df.copy()

    ta_df_copy['ma5'] = ta_df_copy['close'][::-1].rolling(window=5, min_periods=1).mean()[::-1]
    ta_df_copy['ma20'] = ta_df_copy['close'][::-1].rolling(window=20, min_periods=1).mean()[::-1]
    ta_df_copy['ma60'] = ta_df_copy['close'][::-1].rolling(window=60, min_periods=1).mean()[::-1]
    ta_df_copy['ma120'] = ta_df_copy['close'][::-1].rolling(window=120, min_periods=1).mean()[::-1]
    ta_df_copy['ma240'] = ta_df_copy['close'][::-1].rolling(window=240, min_periods=1).mean()[::-1]
    ta_df_copy['ma480'] = ta_df_copy['close'][::-1].rolling(window=480, min_periods=1).mean()[::-1]

    if input_type == 'stock':

        ta_df_copy['from_month_ma5'] = (ta_df_copy['close'] - ta_df_copy['ma5'])/ta_df_copy['ma5']
        ta_df_copy['from_month_ma20'] = (ta_df_copy['close'] - ta_df_copy['ma20'])/ta_df_copy['ma20']
        ta_df_copy['from_quarter_ma60'] = (ta_df_copy['close'] - ta_df_copy['ma60'])/ta_df_copy['ma60']
        ta_df_copy['from_quarter_ma120'] = (ta_df_copy['close'] - ta_df_copy['ma120'])/ta_df_copy['ma120']
        ta_df_copy['from_year_ma240'] = (ta_df_copy['close'] - ta_df_copy['ma240'])/ta_df_copy['ma240']
        ta_df_copy['from_year_ma480'] = (ta_df_copy['close'] - ta_df_copy['ma480'])/ta_df_copy['ma480']

    if input_type == 'index':

        ta_df_copy['from_month_ma5'] = (ta_df_copy['close'] - ta_df_copy['ma5'])
        ta_df_copy['from_month_ma20'] = (ta_df_copy['close'] - ta_df_copy['ma20'])
        ta_df_copy['from_quarter_ma60'] = (ta_df_copy['close'] - ta_df_copy['ma60'])
        ta_df_copy['from_quarter_ma120'] = (ta_df_copy['close'] - ta_df_copy['ma120'])
        ta_df_copy['from_year_ma240'] = (ta_df_copy['close'] - ta_df_copy['ma240'])
        ta_df_copy['from_year_ma480'] = (ta_df_copy['close'] - ta_df_copy['ma480'])

    return ta_df_copy

In [40]:
def transform_ta_df(ta_df,ta_name):
    df_list = []
    for time_frame in ['month','quarter','year']:
        if ta_name == 'candle':
            df = ta_df[['stock',f'{time_frame}_open',f'{time_frame}_last_high',f'{time_frame}_last_low',f'from_{time_frame}_open',f'from_{time_frame}_last_high',f'from_{time_frame}_last_low']].iloc[:1]
            df_name = ['Open','Last High','Last Low']
            coef = 4
        elif ta_name == 'fibo':
            df = ta_df[['stock',f'{time_frame}_fibo_382',f'{time_frame}_fibo_500',f'{time_frame}_fibo_618',f'from_{time_frame}_fibo_382',f'from_{time_frame}_fibo_500',f'from_{time_frame}_fibo_618']].iloc[:1]
            df_name = ['Fibo 0.382', 'Fibo 0.500', 'Fibo 0.618']
            coef = 4
        elif ta_name == 'pivot':
            df = ta_df[['stock',f'{time_frame}_pivot',f'from_{time_frame}_pivot']].iloc[:1]
            df_name = ['Pivot']
            coef = 2
        elif ta_name == 'ma':
            if time_frame == 'month':
                df = ta_df[['stock','ma5','ma20','from_month_ma5','from_month_ma20']].iloc[:1]
                df_name = ['MA5','MA20']
            elif time_frame == 'quarter':
                df = ta_df[['stock','ma60','ma120','from_quarter_ma60','from_quarter_ma120']].iloc[:1]
                df_name = ['MA60','MA120']
            elif time_frame == 'year':
                df = ta_df[['stock','ma240','ma480','from_year_ma240','from_year_ma480']].iloc[:1]
                df_name = ['MA240','MA480']
            coef = 3
        df_value = df.iloc[0,1:coef].tolist()
        df_from = df.iloc[0,coef:].tolist()

        if ta_name == 'pivot':
            df_order = 3
        else:
            df_order = [i for i in range(1, len(df_name) + 1)]
            
        df = pd.DataFrame({'stock':df['stock'].item(),'name': df_name,'value': df_value,'from': df_from, 'order': df_order})
        df['id'] = time_frame
        df['ta_name'] = ta_name
        df['value'] = df['value'].apply(lambda x: '{:.2f}'.format(x) if isinstance(x, (int, float)) else x)
        df_list.append(df)
    concat_df = pd.concat(df_list, axis=0)
    return concat_df

def concat_ta_df(df,input_type):
    ta_df = calculate_ta_df(df)

    df_candle_raw = calculate_candle_ta_df(ta_df,input_type)
    df_pivot_raw = calculate_pivot_ta_df(ta_df,input_type)
    df_ma_raw = calculate_ma_ta_df(ta_df,input_type)
    df_fibo_raw = calculate_fibo_ta_df(ta_df,input_type)

    df_candle = transform_ta_df(df_candle_raw,'candle')
    df_pivot = transform_ta_df(df_pivot_raw,'pivot')
    df_ma = transform_ta_df(df_ma_raw,'ma')
    df_fibo = transform_ta_df(df_fibo_raw,'fibo')

    concat_ta_df = pd.concat([df_candle,df_fibo,df_pivot,df_ma], axis=0)

    ta_dict = {
        'concat_ta_df': concat_ta_df,
        'ta_dict': {
            'df_candle': df_candle_raw,
            'df_pivot': df_pivot_raw,
            'df_ma': df_ma_raw,
            'df_fibo': df_fibo_raw,
        }
    }
    return ta_dict

##### Thực hiện tính toán

In [41]:
ta_stock_df = pd.DataFrame()
ta_stock_dict = {}

for stock, df in eod_stock_dict.items():
    df_copy = df.copy()
    temp_ta_dict = concat_ta_df(df_copy, 'stock')
    
    temp_ta_stock_df = temp_ta_dict['concat_ta_df']
    ta_stock_df = pd.concat([ta_stock_df, temp_ta_stock_df], axis=0)

    ta_stock_dict[stock] = temp_ta_dict['ta_dict']