In [224]:
import os
import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import datetime as dt
import copy

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

#### Chuẩn bị các dữ liệu

##### Các dữ liệu dùng để làm map tham chiếu

In [225]:
#Đọc name map để chuyển đỏi các tên thành dạng full
name_map = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'order'],axis=1)
name_map_dict = name_map.set_index('code')['full_name'].to_dict()

order_map = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'full_name'],axis=1)
order_map_dict = order_map.set_index('code')['order'].to_dict()

group_map = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='name_map').drop(columns=['order', 'full_name'],axis=1)
group_map_dict = group_map.set_index('code')['group'].to_dict()

#Tạo các danh sách nhóm trong mỗi cách chia cổ phiếu
all_stock_key_list = [key for key, value in group_map_dict.items() if value == 'tt']
industry_name_list = [key for key, value in group_map_dict.items() if value in ['A', 'B', 'C', 'D']]
industry_perform_list = [key for key, value in group_map_dict.items() if value == 'hs']
marketcap_group_list = [key for key, value in group_map_dict.items() if value == 'cap']

#Tạo danh danh key cho tổng tất cả các nhóm
group_stock_key_list = all_stock_key_list + industry_name_list + industry_perform_list + marketcap_group_list

In [226]:
#Tạo dict map thời gian và số lượng cổ phiếu
period_map = pd.read_excel("period_data/period_stock_list.xlsx", sheet_name='period_map')
period_map_dict = period_map.set_index('index').apply(lambda row: row.tolist(), axis=1).to_dict()

#Xoá đi quý hiện tại để chỉ tính toán tới quý trước đó
def get_quarter(name):
    now = datetime.now()
    year = now.year
    month = now.month
    if 1 <= month <= 3:
        quarter = "q1"
        previous_quarter = "q4"
        previous_year = year - 1
    elif 4 <= month <= 6:
        quarter = "q2"
        previous_quarter = "q1"
        previous_year = year
    elif 7 <= month <= 9:
        quarter = "q3"
        previous_quarter = "q2"
        previous_year = year
    else:
        quarter = "q4"
        previous_quarter = "q3"
        previous_year = year
    
    if name == 'current_quarter':
        return f'{quarter}_{year}'
    if name == 'previous_quarter':
        return f'{previous_quarter}_{previous_year}'
        
#Lấy ra khoảng thời gian tính toán cho quý này và quý trước
calculate_time_span = [period_map_dict['q2_2020'][0], period_map_dict[get_quarter('current_quarter')][1]]
current_quarter_span = [period_map_dict[get_quarter('current_quarter')][0], period_map_dict[get_quarter('current_quarter')][1]]
previous_quarter_span = [period_map_dict[get_quarter('previous_quarter')][0], period_map_dict[get_quarter('previous_quarter')][1]]

#Lấy ra list cổ phiếu của giai đoạn hiện tại
period_stock_list = pd.read_excel("period_data/period_stock_list.xlsx", sheet_name='period_stock_list')
current_stock_list = period_stock_list[get_quarter('current_quarter')].dropna().tolist()
total_stock_list = period_stock_list['all_stock'].dropna().tolist()

##### Các biến thời gian

In [227]:
#Khởi tạo vnindex_series để xác định ngày hiện tại
vnindex_series = pd.to_datetime(pd.read_csv('D:\\t2m-project\\ami-data\\ami_eod_data\\VNINDEX.csv').sort_values('date', ascending=False).reset_index(drop=True)['date'], format='%y%m%d')

#Tạo date_series cho thời gian tính toán
date_series = pd.DataFrame(vnindex_series).rename(columns={0:'date'})
date_series = date_series[(date_series['date'] >= calculate_time_span[0]) & (date_series['date'] <= calculate_time_span[1])]

#Xác định ngày hiện tại
today = vnindex_series.iloc[0]

#Xác định giờ hiện tại
current_time = pd.to_datetime(
               pd.read_csv('D:\\t2m-project\\ami-data\\ami_itd_data\\HNXINDEX.csv')\
               .sort_values('date', ascending=False).reset_index(drop=True)\
               ['date'].iloc[0]
               , format='%y%m%d %H%M%S')

#Khởi tạo time_series bao gồm tất cả khung thời gian của ngày hiện tại
time_series_list = []
time_series_list.extend(pd.date_range(start=f'{today} 09:00:00', end=f'{today} 11:25:00', freq='5T'))
time_series_list.extend(pd.date_range(start=f'{today} 13:00:00', end=f'{today} 14:55:00', freq='5T'))
time_series = pd.DataFrame(time_series_list).rename(columns={0:'date'})

#Điều chỉnh lại time_series bỏ đi các hàng thời gian chưa có dữ liệu
time_series = time_series.loc[time_series['date'].dt.time <= current_time.time()].sort_values('date', ascending=False).reset_index(drop=True)

#Khởi tạo khung thời gian bắt đầu từ 9h15 để vẽ các biểu đồ
itd_series = pd.DataFrame(time_series_list[3:]).rename(columns={0:'date'}).sort_values('date', ascending=False)

In [228]:
def calculate_time_percent(time):
    start_time_am = dt.time(9, 00)
    end_time_am = dt.time(11, 30)
    start_time_pm = dt.time(13, 00)
    end_time_pm = dt.time(15, 00)

    def time_difference_in_minutes(time1, time2):
        delta1 = dt.timedelta(hours=time1.hour, minutes=time1.minute, seconds=time1.second)
        delta2 = dt.timedelta(hours=time2.hour, minutes=time2.minute, seconds=time2.second)
        diff = delta2 - delta1
        return diff.seconds // 60

    time = (time + timedelta(minutes=5)).time()
    full_time_range = time_difference_in_minutes(start_time_am, end_time_am) + time_difference_in_minutes(start_time_pm, end_time_pm)

    if time <= end_time_am:
        time_range = time_difference_in_minutes(start_time_am, time)
    elif time >= start_time_pm:
        time_range = time_difference_in_minutes(start_time_am, time) - time_difference_in_minutes(end_time_am, start_time_pm)

    return time_range/full_time_range

#Tính thêm time percent
time_percent = time_series.copy()
time_percent['percent'] = time_percent['date'].apply(calculate_time_percent)
time_percent['percent'] = time_percent['percent'].apply(lambda x: x if x < 1 else 1)

if len(time_percent['percent']) >= 1:
    current_time_percent = time_percent['percent'].iloc[0]
else: 
    current_time_percent = 1

In [229]:
#Tạo bảng thời gian update
def get_market_update_time(start_time_am, end_time_am, start_time_pm, end_time_pm):
    if (dt.datetime.now()).weekday() <= 4:
        current_time = dt.datetime.now().time()
        if current_time < start_time_am: current_time = end_time_pm
        elif (current_time >= start_time_am) & (current_time < end_time_am): current_time = current_time
        elif (current_time >= end_time_am) & (current_time < start_time_pm): current_time = end_time_am
        elif (current_time >= start_time_pm) & (current_time < end_time_pm): current_time = current_time
        elif current_time >= end_time_pm: current_time = end_time_pm
        return current_time
    if (dt.datetime.now()).weekday() > 4:
        return end_time_pm

time_update = get_market_update_time(dt.time(8, 30), dt.time(11, 30), dt.time(13, 00), dt.time(15, 00))
date_time_update = dt.datetime.combine(datetime.now().date(), time_update)
market_update_time = pd.DataFrame([f"Cập nhât: {date_time_update.strftime('%d/%m/%Y %H:%M:%S')}"]).rename(columns={0:'date'})

##### Các dữ liệu EOD

In [230]:
#Đọc toàn bộ các file csv được xuất ra từ ami eod
eod_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_eod_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        eod_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

for item, df in eod_item_dict.items():
    df['date'] = pd.to_datetime(df['date'], format='%y%m%d')
    eod_item_dict[item] = df

#Tạo bảng tổng hớp tất cả các item
eod_item_df = pd.DataFrame(list(eod_item_dict.keys())).rename(columns={0:'item'})
eod_item_df['len'] = eod_item_df['item'].apply(lambda x: len(x))
eod_item_df['last_2chars'] = eod_item_df['item'].str[-2:]
eod_item_df['first_4chars'] = eod_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
index_name_df = eod_item_df[(eod_item_df['len']>3) & (eod_item_df['len']!=6) & (eod_item_df['len']<10) & (eod_item_df['item'] != 'FUEVFVND') &
                (eod_item_df['item']!='0001')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
                
full_stock_dict = {k:v.drop(['option'], axis=1) for k,v in eod_item_dict.items() if k in total_stock_list}
eod_stock_dict = {k:v.drop(['option'], axis=1) for k,v in eod_item_dict.items() if k in current_stock_list}
eod_index_dict = {k:v.rename(columns={'option':'value'}).drop('cap', axis=1)
                for k,v in eod_item_dict.items() if k in index_name_df['item'].tolist()}

#Lọc ra danh sách tên các cổ phiếu, index giao dịch tự doanh và nước ngoài
stock_name_td_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='TD')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
stock_name_nn_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='NN')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_td_nn_df = eod_item_df[(eod_item_df['len']>=10) & (eod_item_df['first_4chars']!='VN30') & ((eod_item_df['last_2chars']=='NN') | (eod_item_df['last_2chars']=='TD'))].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)

stock_td_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_td_df['item'].tolist()}
stock_nn_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_nn_df['item'].tolist()}
index_td_nn_dict = {k:v.drop(['high','low','cap','stock'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in index_td_nn_df['item'].tolist()}

#Điều chỉnh đơn vị của các bảng NN và TD
for df in index_td_nn_dict.values():
    df['buy_volume'] = df['buy_volume']/1000
    df['sell_volume'] = -df['sell_volume']/1000
    df['buy_value'] = df['buy_value']/1000000000
    df['sell_value'] = -df['sell_value']/1000000000
    df['net_volume'] = df['buy_volume'] + df['sell_volume']
    df['net_value'] = df['buy_value'] + df['sell_value']

In [231]:
#Bù những cổ phiếu bị ngừng giao dịch ngày hôm nay
for stock, df in eod_stock_dict.copy().items():
    if df['date'].iloc[0] < date_series['date'].iloc[0]:
        temp_date_list = list(set(date_series['date'].iloc[:100]) - set(df['date'].iloc[:100]))
        temp_price_list = [df['close'].iloc[0]] * len(temp_date_list)
        temp_volume_list = [0] * len(temp_date_list)
        temp_cap_list = [df['cap'].iloc[0]] * len(temp_date_list)

        new_rows = pd.DataFrame({'stock': stock, 
                                'date': temp_date_list, 
                                'open': temp_price_list, 
                                'high': temp_price_list, 
                                'low': temp_price_list, 
                                'close': temp_price_list, 
                                'volume': temp_volume_list, 
                                'cap': temp_cap_list}).sort_values('date', ascending=False)

        eod_stock_dict[stock] = pd.concat([new_rows, df], axis=0).reset_index(drop=True)

#Bù những cổ phiếu bị ngừng giao dịch ngày hôm nay
for stock, df in full_stock_dict.copy().items():
    if df['date'].iloc[0] < date_series['date'].iloc[0]:
        temp_date_list = list(set(date_series['date'].iloc[:100]) - set(df['date'].iloc[:100]))
        temp_price_list = [df['close'].iloc[0]] * len(temp_date_list)
        temp_volume_list = [0] * len(temp_date_list)
        temp_cap_list = [df['cap'].iloc[0]] * len(temp_date_list)

        new_rows = pd.DataFrame({'stock': stock, 
                                'date': temp_date_list, 
                                'open': temp_price_list, 
                                'high': temp_price_list, 
                                'low': temp_price_list, 
                                'close': temp_price_list, 
                                'volume': temp_volume_list, 
                                'cap': temp_cap_list}).sort_values('date', ascending=False)

        full_stock_dict[stock] = pd.concat([new_rows, df], axis=0).reset_index(drop=True)

#Bù những cổ phiếu bị ngừng giao dịch ngày hôm nay
for stock, df in stock_nn_dict.copy().items():
    if len(df) > 0:
        if df['date'].iloc[0] < date_series['date'].iloc[0]:
            temp_date_list = list(set(date_series['date'].iloc[:100]) - set(df['date'].iloc[:100]))

            new_rows = pd.DataFrame({'stock': stock, 
                                    'date': temp_date_list, 
                                    'sell_volume': 0, 
                                    'buy_volume': 0, 
                                    'sell_value': 0, 
                                    'buy_value': 0}).sort_values('date', ascending=False)

            stock_nn_dict[stock] = pd.concat([new_rows, df], axis=0).reset_index(drop=True)

In [232]:
#Tính toán các đường trung bình và các đường MA
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in eod_stock_dict.items()}

eod_stock_dict = {
    key: df.assign(
        high5=df['high'].rolling(window=5, min_periods=1).max(),
        low5=df['low'].rolling(window=5, min_periods=1).min(),
        high20=df['high'].rolling(window=20, min_periods=1).max(),
        low20=df['low'].rolling(window=20, min_periods=1).min(),
        high60=df['high'].rolling(window=60, min_periods=1).max(),
        low60=df['low'].rolling(window=60, min_periods=1).min(),
        high120=df['high'].rolling(window=120, min_periods=1).max(),
        low120=df['low'].rolling(window=120, min_periods=1).min(),
        high240=df['high'].rolling(window=240, min_periods=1).max(),
        low240=df['low'].rolling(window=240, min_periods=1).min(),
        high480=df['high'].rolling(window=480, min_periods=1).max(),
        low480=df['low'].rolling(window=480, min_periods=1).min(),

        ma5_V=df['volume'].rolling(window=5, min_periods=1).mean().shift(1),

        ma5=df['close'].rolling(window=5, min_periods=1).mean(),
        ma20=df['close'].rolling(window=20, min_periods=1).mean(),
        ma60=df['close'].rolling(window=60, min_periods=1).mean(),
        ma120=df['close'].rolling(window=120, min_periods=1).mean(),
        ma240=df['close'].rolling(window=240, min_periods=1).mean(),
        ma480=df['close'].rolling(window=480, min_periods=1).mean(),
    )
    for key, df in eod_stock_dict.items()
}

eod_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in eod_stock_dict.items()
}

eod_stock_dict = {k: v[(v['date'] >= calculate_time_span[0]) & (v['date'] <= calculate_time_span[1])].sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in eod_stock_dict.items()}

In [233]:
#Chuyển đổi thời gian trong period_map_dict sang pd.Timestamp để có thể so sánh với ngày trong df
period_timestamp_map_dict = {}
for key, value in period_map_dict.items():
    period_timestamp_map_dict[key] = (pd.Timestamp(value[0]), pd.Timestamp(value[1]), value[2])

#Thêm các cột count và period vào df
for stock, df in eod_stock_dict.items():
    # Đảm bảo cột 'date' là kiểu Timestamp
    df['date'] = pd.to_datetime(df['date'])
    # Gán 'period' dựa trên việc kiểm tra khoảng thời gian
    df['period'] = df['date'].apply(lambda x: next((key for key, value in period_timestamp_map_dict.items() if value[0] <= x <= value[1]), None))
    # Gán 'count' dựa trên 'period'
    df['count'] = df['period'].apply(lambda x: period_timestamp_map_dict[x][2] if x else None)
    # Cập nhật lại DataFrame vào eod_stock_dict
    eod_stock_dict[stock] = df

#------------------------------------------------------------------------------------------------------------------------------------------------------
#Tính hệ số thanh khoản và sửa các giá trị 0 và inf
for df in eod_stock_dict.values():
    df['liquid_ratio'] = df['volume'] / df['ma5_V']
    # Tính giá trị đầu tiên của liquid_ratio với current_time_percent
    df['liquid_ratio'].iloc[0] = df['volume'].iloc[0] / (df['ma5_V'].iloc[0] * current_time_percent)
    # Thay thế các giá trị inf trong liquid_ratio bằng 0
    df['liquid_ratio'].replace([np.inf, -np.inf], 0, inplace=True)


#------------------------------------------------------------------------------------------------------------------------------------------------------
#Xoá đi các cổ phiếu mà không lấy được dữ liệu từ ami
current_stock_list = [item for item in eod_stock_dict.keys()]

##### Các dữ liệu ITD

In [234]:
#Đọc toàn bộ các file csv được xuất ra từ ami itd
itd_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_itd_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        itd_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

for item, df in itd_item_dict.items():
    
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%y%m%d %H%M%S')

    #Fill dữ liệu vào các khoảng thời gian trống
    df = time_series.merge(df, on='date', how='left').sort_values('date', ascending=False)
    df[['open','high','low','close']] = df[['open','high','low','close']].fillna(method='bfill')
    df['volume'] = df['volume'].fillna(0)
    
    df['stock'] = item

    itd_item_dict[item] = df

#Tạo bảng tổng hợp tất cả các item
itd_item_df = pd.DataFrame(list(itd_item_dict.keys())).rename(columns={0:'item'})
itd_item_df['len'] = itd_item_df['item'].apply(lambda x: len(x))
itd_item_df['last_2chars'] = itd_item_df['item'].str[-2:]
itd_item_df['third_last_char'] = itd_item_df['item'].str[-3:-2]
itd_item_df['first_4chars'] = itd_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
index_name_df = itd_item_df[(itd_item_df['len']>3) & (itd_item_df['len']!=6) & (itd_item_df['len']<10) & (itd_item_df['item']!='0001')]\
                .reset_index(drop=True).drop(['len','last_2chars','third_last_char','first_4chars'], axis=1)

itd_stock_dict = {k:v.drop(columns=['option'], axis=1)
                  for k,v in itd_item_dict.items() if k in eod_stock_dict.keys()}
itd_index_dict = {k:v.rename(columns={'option':'value'})
                for k,v in itd_item_dict.items() if k in index_name_df['item'].tolist()}

In [152]:
#Gán các đường trung bình và MA sang bảng dữ liệu ITD
for stock, df in itd_stock_dict.items():
    temp_data = eod_stock_dict[stock][['high5', 'low5', 'high20', 'low20', 'high60', 'low60',
                                       'high120','low120', 'high240', 'low240', 'high480', 'low480']].iloc[0]
    itd_stock_dict[stock] = df.assign(**temp_data)

    #Fill vào các giá trị cho các cổ phiếu không có giao dịch trong ngày
    if df['volume'].max() == 0:
        itd_stock_dict[stock][['open','high','low','close','cap']] = eod_stock_dict[stock][['open','high','low','close','cap']].iloc[0]


itd_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in itd_stock_dict.items()}
itd_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in itd_stock_dict.items()
}
itd_stock_dict = {k: v.sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in itd_stock_dict.items()}

#### Phân nhóm cổ phiếu

In [153]:
#Lấy danh sách phân loại cổ phiếu
full_stock_classification_df = pd.read_excel("period_data/stock_classification.xlsx", sheet_name='stock_classification')
stock_classification_df = full_stock_classification_df[full_stock_classification_df['stock'].isin(current_stock_list)].reset_index(drop=True)

#Xoá đi các cổ phiếu đã bị xoá không lấy được dữ liệu từ ami
for stock in stock_classification_df['stock']:
    if stock not in eod_stock_dict.keys():
        stock_classification_df = stock_classification_df[stock_classification_df['stock'] != stock].reset_index(drop=True)
        current_stock_list.remove(stock)

##### Phân nhóm vốn hoá

In [154]:
price_arr = []
cap_arr = []
for stock in stock_classification_df['stock']:
    df = eod_stock_dict[stock].copy()
    df = df[df['date'] >= current_quarter_span[0]]
    price_arr.append(df['close'].iloc[-1].item())
    cap_arr.append(df['cap'].iloc[-1].item())

vonhoa_classification_df = stock_classification_df.copy()
vonhoa_classification_df['price'] = price_arr
vonhoa_classification_df['cap'] = cap_arr

cap_coef = sum(cap_arr)/10000
vonhoa_classification_df['marketcap_group'] = vonhoa_classification_df.apply(lambda x:
'small' if ((x['cap']>cap_coef) & (x['cap']<10*cap_coef)) | 
        ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']<10)) 
        else
('mid' if ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']>=10)) | 
        ((x['cap']>=20*cap_coef) & (x['cap']<100*cap_coef))
        else
('large' if x['cap']>=100*cap_coef
        else 'penny'
)), axis=1)

stock_classification_df = pd.concat([stock_classification_df, vonhoa_classification_df['marketcap_group']], axis=1)

##### Phân nhóm stock_perform

In [155]:
def stock_perform_map(df):
    stock_perform_dict = {}
    for i in range(len(df)):
        temp_stock = df[df['stock'] == df['stock'].iloc[i]]['stock'].item()
        temp_industry_perform = df[df['stock'] == temp_stock]['industry_perform'].item()
        temp_group_df = df[df['industry_perform'] == temp_industry_perform]

        temp_group_df['rank'] = temp_group_df['price_change'].rank(ascending=False)
        temp_group_df['stock_perform'] = temp_group_df['rank'].apply(
                lambda x: f'{temp_industry_perform}1' if x < 0.25 * len(temp_group_df) else
                        f'{temp_industry_perform}2' if x <= 0.5 * len(temp_group_df) else
                        f'{temp_industry_perform}3' if x <= 0.75 * len(temp_group_df) else
                        f'{temp_industry_perform}4')

        stock_perform = temp_group_df[temp_group_df['stock'] == temp_stock]['stock_perform'].item()
        stock_perform_dict[temp_stock] = stock_perform
        
    return stock_perform_dict

In [156]:
temp_dict = {}
for stock in current_stock_list:

    stock_df = eod_stock_dict[stock]
    stock_df = stock_df[(stock_df['date'] >= period_map[0].iloc[-2]) & (stock_df['date'] <= period_map[1].iloc[-2])]

    start_value = stock_df['close'].iloc[-1]
    end_value = stock_df['close'].iloc[0]

    temp_dict[stock] = ((end_value - start_value)/start_value)

temp_df = pd.DataFrame.from_dict(temp_dict, orient='index').reset_index().rename(columns={'index':'stock',0:'price_change'})

temp_df = temp_df.merge(stock_classification_df[['stock','industry_perform']], on='stock', how='left')

temp_df['stock_perform'] = temp_df['stock'].map(stock_perform_map(temp_df))

# Lưu lại vào phân chia nhóm
if 'stock_perform' not in stock_classification_df.columns:
    stock_classification_df = stock_classification_df.merge(temp_df[['stock','stock_perform']], on='stock', how='left')

##### Phân ra các dict theo nhóm

In [157]:
# Convert DataFrame columns to dictionaries for quick access
stock_by_industry = stock_classification_df.set_index('stock')['industry_name'].to_dict()
stock_by_perform = stock_classification_df.set_index('stock')['industry_perform'].to_dict()
stock_by_marketcap = stock_classification_df.set_index('stock')['marketcap_group'].to_dict()

# Initialize dictionaries
eod_all_stock = {}
itd_all_stock = {}
eod_industry_name = {}
itd_industry_name = {}
eod_industry_perform = {}
itd_industry_perform = {}
eod_marketcap_group = {}
itd_marketcap_group = {}

# Function to create mappings based on category
def create_mapping(stock_dict, category_dict):
    category_map = {}
    for category, stocks in category_dict.items():
        category_map[category] = {stock: stock_dict[stock] for stock in stocks if stock in stock_dict}
    return category_map

# Precompute unique categories and relevant stocks
unique_industries = np.unique(list(stock_by_industry.values()))
unique_performs = np.unique(list(stock_by_perform.values()))
unique_marketcaps = ['large', 'mid', 'small', 'penny']

# Mapping for all_stock
itd_all_stock['all_stock'] = {key: value for key, value in itd_stock_dict.items()}
eod_all_stock['all_stock'] = {key: value for key, value in eod_stock_dict.items()}

# Mapping for industry
for industry in unique_industries:
    relevant_stocks = [stock for stock, ind in stock_by_industry.items() if ind == industry]
    eod_industry_name[industry] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_industry_name[industry] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}

# Mapping for performance
for performance in unique_performs:
    relevant_stocks = [stock for stock, perf in stock_by_perform.items() if perf == performance]
    eod_industry_perform[performance] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_industry_perform[performance] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}

# Mapping for marketcap
for marketcap in unique_marketcaps:
    relevant_stocks = [stock for stock, mcap in stock_by_marketcap.items() if mcap == marketcap]
    eod_marketcap_group[marketcap] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_marketcap_group[marketcap] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}


#### Điểm dòng tiền từng cổ phiếu

##### Điểm dòng tiền EOD

In [158]:
#Lấy ra 40 phiên gần nhất để tính điểm dòng tiền
score_date_series = date_series[date_series['date']>=previous_quarter_span[0]]

def score_calculation(row):
    try:
        # Tính toán giá trị điểm số
        result = (((row['close'] - row['low']) - (row['high'] - row['close'])) / (row['high'] - row['low']) *
                  abs((row['close'] - row['close_prev'])) / row['close_prev'] *
                  (row['volume'] * row['close']) / (row['ma5_prev'] * row['ma5_V'])) * 100 \
                  + ((row['close'] - row['ma5_prev']) / row['ma5_prev']) / 100
        
        # Kiểm tra nếu kết quả là inf, trả về 0 nếu đúng
        if np.isinf(result):
            # Xử lý khi xảy ra lỗi chia cho 0, trả về giá trị tính toán thêm
            return ((row['close'] - row['ma5_prev']) / row['ma5_prev']) / 100
        return result
    except ZeroDivisionError:
        # Xử lý khi xảy ra lỗi chia cho 0, trả về giá trị tính toán thêm
        return ((row['close'] - row['ma5_prev']) / row['ma5_prev']) / 100

def score_calculation_t0(row):
    try:
        # Tính toán giá trị điểm số
        result = (((row['close'] - row['low']) - (row['high'] - row['close'])) / (row['high'] - row['low']) *
                  abs((row['close'] - row['close_prev'])) / row['close_prev'] *
                  (row['volume'] * row['close']) / (row['ma5_prev'] * row['ma5_V'] * current_time_percent)) * 100 \
                  + ((row['close'] - row['ma5_prev']) / row['ma5_prev']) / 100
        
        # Kiểm tra nếu kết quả là inf hoặc NaN
        if np.isinf(result):
            # Xử lý khi xảy ra lỗi chia cho 0, trả về giá trị tính toán thêm
            return ((row['close'] - row['ma5_prev']) / row['ma5_prev']) / 100
        elif np.isnan(result):
            return ((row['close'] - row['ma5_prev']) / row['ma5_prev']) / 100
        
        return result
    except ZeroDivisionError:
        # Xử lý khi xảy ra lỗi chia cho 0, trả về giá trị tính toán thêm
        return ((row['close'] - row['ma5_prev']) / row['ma5_prev']) / 100

In [159]:
#Tính điểm dòng tiền cho từng cổ phiếu
eod_score_dict = {}
for stock in eod_stock_dict.keys():

    #Lọc ra các cột cần sử dụng và chỉ lấy quý này và quý trước gần nhất để tính
    temp_df = eod_stock_dict[stock][['stock', 'date', 'period', 'count', 'open', 'high', 'low', 'close', 'volume', 'liquid_ratio', 'ma5', 'ma5_V']]
    temp_df = temp_df[temp_df['date']>=previous_quarter_span[0]]

    #Tính điểm dòng tiền t0 và t5
    temp_df['ma5_prev'] = temp_df['ma5'].shift(-1)
    temp_df['close_prev'] = temp_df['close'].shift(-1)

    temp_df['t0_score'] = temp_df.apply(score_calculation, axis=1)
    temp_df['t0_score'].iloc[0] = score_calculation_t0(temp_df.iloc[0])

    temp_df['t5_score'] = temp_df['t0_score'][::-1].rolling(window=5, min_periods=1).mean()[::-1]
    temp_df['price_change'] = temp_df['close'][::-1].pct_change()[::-1]
    temp_df['value_change'] = temp_df['close'][::-1].diff()[::-1]

    #Gán lại temp_df cho dict
    eod_score_dict[stock] = temp_df

#Tính xếp hạng cho cổ phiếu
t0_ranking_df = score_date_series.copy()
t5_ranking_df = score_date_series.copy()
for stock in eod_score_dict.keys():
    t0_ranking_df[stock] = eod_score_dict[stock]['t0_score']
    t0_ranking_df.fillna(0, inplace=True)
    t5_ranking_df[stock] = eod_score_dict[stock]['t5_score']
    t5_ranking_df.fillna(0, inplace=True)
t0_ranking_df = t0_ranking_df.iloc[:,1:].rank(ascending=False, method='min', axis=1)
t5_ranking_df = t5_ranking_df.iloc[:,1:].rank(ascending=False, method='min', axis=1)

#Ghép xếp hạng vào bảng thông tin cổ phiếu
for stock, df in eod_score_dict.items():
    df['rank_t0'] = t0_ranking_df[stock]
    df['rank_t5'] = t5_ranking_df[stock]
    
    #Check xem xếp hạng T0 nằm trong top 10% hay không
    df['top_check'] = df.apply(lambda x: 1 if x['rank_t0'] <= x['count']*0.1 else 0, axis=1)

    #Tính số phiên lọt top 10% trong 20 phiên
    df['top_count'] = df['top_check'][::-1].rolling(window=20).sum()[::-1]

    eod_score_dict[stock] = df

#Cắt đi các hàng chứa Nan ko cần thiết
for stock, df in eod_score_dict.items():
    df.dropna(inplace=True)

In [160]:
#Tạo bảng tổng hợp điểm t0 của tất cả cổ phiếu
stock_score_filter_df = pd.DataFrame(stock_classification_df['stock'])

score_list = []
for stock, df in eod_score_dict.items():
    score_list.append(df.copy().drop(columns=['period','open','high','low','count'], axis=1).iloc[0])

stock_score_filter_df = pd.DataFrame(score_list).sort_values('t0_score', ascending=False).reset_index(drop=True)
stock_score_filter_df = stock_score_filter_df.fillna('')

stock_score_filter_df['filter_t0'] = stock_score_filter_df['t0_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
stock_score_filter_df['filter_t5'] = stock_score_filter_df['t5_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
stock_score_filter_df['filter_liquid'] = stock_score_filter_df['liquid_ratio'].apply(lambda x: '<50%' if x < 0.6 else (
                                                                             '50%-100%' if (x >= 0.5) & (x < 1) else (
                                                                             '100%-150%' if(x >= 1) & (x < 1.5) else (
                                                                             '150%-200%' if(x >= 1.5) & (x < 2) else '>200%'))))
stock_score_filter_df['order_filter_liquid'] = stock_score_filter_df['filter_liquid'].apply(lambda x: 1 if x == '<50%' else (
                                                                             2 if x == '50%-100%' else (
                                                                             3 if x == '100%-150%' else (
                                                                             4 if x == '150%-200%' else 5))))
stock_score_filter_df['filter_rank'] = stock_score_filter_df['rank_t5'].apply(lambda x: '1-50' if x <= 50 else (
                                                                   '51-150' if (x > 50) & (x <= 150) else (
                                                                   '151-250' if(x > 150) & (x <= 250) else '>250')))
stock_score_filter_df['order_filter_rank'] = stock_score_filter_df['filter_rank'].apply(lambda x: 1 if x == '1-50' else (
                                                                             2 if x == '51-150' else (
                                                                             3 if x == '151-250' else 4)))

# stock_score_filter_df = stock_score_filter_df.merge(stock_classification_df[['stock','industry_name','industry_perform','marketcap_group','stock_perform']], on='stock', how='left')
stock_score_filter_df = stock_score_filter_df.merge(stock_classification_df[['stock','industry_name','industry_perform','marketcap_group','stock_perform']], on='stock', how='left')
stock_score_filter_df['industry_name'] = stock_score_filter_df['industry_name'].map(name_map_dict)
stock_score_filter_df['industry_perform'] = stock_score_filter_df['industry_perform'].map(name_map_dict)
stock_score_filter_df['marketcap_group'] = stock_score_filter_df['marketcap_group'].map(name_map_dict)

##### Điểm dòng tiền ITD

In [161]:
# Giả định date_series và itd_stock_dict đã được định nghĩa
itd_start = pd.Timestamp(date_series['date'].iloc[0].replace(hour=9, minute=15, second=0, microsecond=0))

# Lọc ra các khung giwof nhỏ hơn 9h15 của các cổ phiếu HNX và UPCOM
itd_score_dict = {k: v[['stock', 'date', 'open', 'high', 'low', 'close', 'volume']]
                  .loc[v['date'] >= itd_start] 
                  for k, v in copy.deepcopy(itd_stock_dict).items()}

for stock, itd_df in itd_score_dict.items():

    eod_df = eod_score_dict[stock]

    itd_df['ma5_V'] = time_percent['percent']*(eod_df['ma5_V'].iloc[0])
    itd_df['ma5_prev'] = eod_df['ma5_prev'].iloc[0]
    itd_df['close_prev'] = eod_df['close_prev'].iloc[0]

    itd_df['high'] = itd_df['high'][::-1].cummax()[::-1]
    itd_df['low'] = itd_df['low'][::-1].cummin()[::-1]
    itd_df['volume'] = itd_df['volume'][::-1].cumsum()[::-1]
    itd_df['liquid_ratio'] = itd_df['volume']/itd_df['ma5_V']

    itd_df.loc[0, 'volume'] = eod_df['volume'].iloc[0]
    itd_df.loc[0, 'close'] = eod_df['close'].iloc[0]
    itd_df.loc[0, 'low'] = eod_df['low'].iloc[0]
    itd_df.loc[0, 'high'] = eod_df['high'].iloc[0]

    itd_df['t0_score'] = itd_df.apply(score_calculation, axis=1)
    itd_df['price_change'] = (itd_df['close'] - eod_df['open'].iloc[0])/eod_df['open'].iloc[0]

    itd_df['industry_name'] = stock_classification_df[stock_classification_df['stock']==stock]['industry_name'].map(name_map_dict).item()
    itd_df['industry_perform'] = stock_classification_df[stock_classification_df['stock']==stock]['industry_perform'].map(name_map_dict).item()
    itd_df['marketcap_group'] = stock_classification_df[stock_classification_df['stock']==stock]['marketcap_group'].map(name_map_dict).item()

    # Cắt đi các cột thừa của eod_stock_dict khi ko dùng nữa
    eod_score_dict[stock] = eod_df.drop(columns=['open','high','low','ma5_prev','close_prev'])

    # Cắt đi các cột thừa của itd_stock_dict khi ko dùng nữa
    itd_score_dict[stock] = itd_df[['stock', 'date','close', 'volume','t0_score','liquid_ratio','industry_name','industry_perform','marketcap_group', 'price_change']]

##### Điểm dòng tiền 5p

In [162]:
eod_stock_score_df = date_series.copy()
for stock, df in eod_score_dict.items():
    eod_stock_score_df[stock] = df['t0_score']

stock_score_5p_df = eod_stock_score_df.iloc[:5]
stock_score_5p_df["id"] = ["T-0", "T-1", "T-2", "T-3", "T-4"]
stock_score_5p_df = (
    stock_score_5p_df.drop(columns=["date"], axis=1)
    .set_index("id")
    .transpose()
    .reset_index()
    .rename(columns={"index": "stock"})
)
stock_score_5p_df['score'] = stock_score_5p_df[['T-0','T-1','T-2','T-3','T-4']].sum(axis=1)

#### Điểm dòng tiền nhóm cổ phiếu

- Các hàm tính toán

In [163]:
#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
def adjust_score_by_breath(t0_score, ratio_column):
    adjusted_score = []
    for score, ratio in zip(t0_score, ratio_column):
        if score >= 0:
            adjusted_score.append(score*ratio)
        else:
            adjusted_score.append(score*(1-ratio))
    return adjusted_score

#Hàm điều chỉnh điểm dòng tiền của cổ phiếu tránh sự đột biến khi đóng góp vào nhóm chung
def adjust_score_for_smooth(row, column_name, max_percent, mark):
    origin_score = row[column_name]
    
    if abs(origin_score) > row['total'] * max_percent:

        sum_abs = row['total'] - abs(row[column_name])
        fixed_score = sum_abs / (1 - max_percent) - sum_abs

        if origin_score >= 0:
            return fixed_score
        else:
            return -fixed_score
    else:
        mark[0] = 0
        return origin_score

#Áp dụng hàm điều chỉnh điểm phía trên vào các nhóm cổ phiếu, việc này lặp lại nhiều lần cho tới khi triệt tiêu sự đột biến
def apply_smooth_score(score_dict, group_type, type_name):

    if type_name == 'itd':
        initial_score_df = time_series.copy()
    elif type_name == 'eod':
        initial_score_df = score_date_series.copy()

    if group_type == 'all_stock':
        key_list = all_stock_key_list
    elif group_type == 'industry_perform':
        key_list = [key for key, value in group_map_dict.items() if value == 'hs']
    elif group_type == 'marketcap_group':
        key_list = [key for key, value in group_map_dict.items() if value == 'cap'] 
    elif group_type == 'industry_name':
        key_list = [key for key, value in group_map_dict.items() if value in ['A', 'B', 'C', 'D']]

    for key in key_list:
        score_df = initial_score_df.copy()
        if group_type == 'all_stock':
            stock_list = stock_classification_df['stock'].tolist()
        else:
            stock_list = [stock for stock in stock_classification_df[stock_classification_df[group_type]==key]['stock'].dropna().tolist()]
        for stock in stock_list:
            try: score_df[stock] = score_dict[stock][f't0_score']
            except: pass

        max_percent = max(0.1, min(5*(1/len(stock_list)), 0.5))
        score_df['total'] = score_df.iloc[:, 1:].abs().sum(axis=1)

        mark = [1]
        while True:
            if mark[0] == 1:
                for stock in stock_list:
                    if stock in score_dict.keys():
                        score_df[stock] = score_df.iloc[:, 1:].apply(adjust_score_for_smooth, axis=1, args=(stock, max_percent, mark))
            if mark[0] == 0: break

        for stock in stock_list:
            try: score_dict[stock][f't0_{group_type}'] = score_df[stock]
            except: pass

##### Dòng tiền vào nhóm cổ phiếu EOD

In [164]:
#Thêm các cột dòng tiền đóng góp vào các nhóm cổ phiếu vào các dict period (đã loại bỏ đột biến)
for group_type in ['all_stock','industry_name','industry_perform','marketcap_group']:
    apply_smooth_score(eod_score_dict, group_type, 'eod')

In [165]:
#Tính độ rộng cho từng phiên phục vụ cho việc điều chỉnh điểm dòng tiền
temp_df = date_series.copy()
for stock, df in eod_score_dict.items():
    temp_df[stock] = eod_score_dict[stock]['t0_score']
temp_df.iloc[:,1:] = temp_df.iloc[:,1:].applymap(lambda x: 1 if x > 0 else 0)

eod_market_breath = date_series.copy()

industry_name_breadth_dict = {}
for key in eod_industry_name.keys():
    stock_list = stock_classification_df[stock_classification_df['industry_name']==key]['stock'].tolist()
    industry_name_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = industry_name_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

industry_perform_breadth_dict = {}
for key in eod_industry_perform.keys():
    stock_list = stock_classification_df[stock_classification_df['industry_perform']==key]['stock'].tolist()
    industry_perform_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = industry_perform_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

marketcap_group_breadth_dict = {}
for key in eod_marketcap_group.keys():
    stock_list = stock_classification_df[stock_classification_df['marketcap_group']==key]['stock'].tolist()
    marketcap_group_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = marketcap_group_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

all_stock_breadth_dict = {}
for key in eod_all_stock.keys():
    stock_list = stock_classification_df['stock'].tolist()
    all_stock_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = all_stock_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
for stock, df in eod_score_dict.items():
    name_of_industry_name = stock_classification_df[stock_classification_df['stock']==stock]['industry_name'].item()
    name_of_industry_perform = stock_classification_df[stock_classification_df['stock']==stock]['industry_perform'].item()
    name_of_marketcap_group = stock_classification_df[stock_classification_df['stock']==stock]['marketcap_group'].item()

    df[f't0_industry_name'] = adjust_score_by_breath(df['t0_industry_name'], eod_market_breath[name_of_industry_name])
    df[f't0_industry_perform'] = adjust_score_by_breath(df['t0_industry_perform'], eod_market_breath[name_of_industry_perform])
    df[f't0_marketcap_group'] = adjust_score_by_breath(df['t0_marketcap_group'], eod_market_breath[name_of_marketcap_group])
    df[f't0_all_stock'] = adjust_score_by_breath(df['t0_all_stock'], eod_market_breath['all_stock'])

In [166]:
#Tạo bảng dữ liệu điểm dòng tiền cho các nhóm cổ phiếu
eod_group_score_df = score_date_series.copy()

#Thêm cột điểm dòng tiền toàn bộ cổ phiếu
for nganh in eod_all_stock.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_all_stock']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[nganh] = score_df['total']

#Thêm các cột điểm dòng tiền ngành
eod_industry_name_score_df = date_series.copy()
for nganh in eod_industry_name.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df[stock_classification_df['industry_name']==nganh]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_industry_name']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[nganh] = score_df['total']
    if nganh == 'bao_hiem': temp_df = score_df.copy()

#Thêm các cột điểm dòng tiền nhóm hiệu suất
eod_industry_perform_score_df = date_series.copy()
for group in eod_industry_perform.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df[stock_classification_df['industry_perform']==group]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_industry_perform']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[group] = score_df['total']

#Thêm các cột điểm dòng tiền nhóm vốn hoá
eod_marketcap_group_score_df = date_series.copy()
for marketcap in eod_marketcap_group.keys():
    score_df = date_series.copy()
    for stock in stock_classification_df[stock_classification_df['marketcap_group']==marketcap]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_marketcap_group']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    eod_group_score_df[marketcap] = score_df['total']

##### Dòng tiền vào nhóm cổ phiếu ITD

In [167]:
#Thêm các cột dòng tiền đóng góp vào các nhóm cổ phiếu vào các dict period (đã loại bỏ đột biến)
for group_type in ['all_stock','industry_name','industry_perform','marketcap_group']:
    apply_smooth_score(itd_score_dict, group_type, 'itd')

In [168]:
#Tính độ rộng cho từng phiên phục vụ cho việc điều chỉnh điểm dòng tiền
temp_df = time_series.copy()
for stock, df in itd_score_dict.items():
    temp_df[stock] = itd_score_dict[stock]['t0_score']
temp_df.iloc[:,1:] = temp_df.iloc[:,1:].applymap(lambda x: 1 if x > 0 else 0)

itd_market_breath = time_series.copy()

industry_name_breadth_dict = {}
for key in itd_industry_name.keys():
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_name']==key]['stock'].tolist()
    stock_list = [item for item in list(set(temp_stock_list_full) & set(current_stock_list)) if item in itd_stock_dict.keys()]

    industry_name_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = industry_name_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

industry_perform_breadth_dict = {}
for key in itd_industry_perform.keys():
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_perform']==key]['stock'].tolist()
    stock_list = [item for item in list(set(temp_stock_list_full) & set(current_stock_list)) if item in itd_stock_dict.keys()]

    industry_perform_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = industry_perform_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

marketcap_group_breadth_dict = {}
for key in itd_marketcap_group.keys():
    temp_stock_list_full = stock_classification_df[stock_classification_df['marketcap_group']==key]['stock'].tolist()
    stock_list = [item for item in list(set(temp_stock_list_full) & set(current_stock_list)) if item in itd_stock_dict.keys()]

    marketcap_group_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = marketcap_group_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

all_stock_breadth_dict = {}
for key in itd_all_stock.keys():
    temp_stock_list_full = stock_classification_df['stock'].tolist()
    stock_list = [item for item in list(set(temp_stock_list_full) & set(current_stock_list)) if item in itd_stock_dict.keys()]

    all_stock_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    itd_market_breath[key] = all_stock_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
for stock, df in itd_score_dict.items():

    name_of_industry_name = stock_classification_df[stock_classification_df['stock']==stock]['industry_name'].item()
    name_of_industry_perform = stock_classification_df[stock_classification_df['stock']==stock]['industry_perform'].item()
    name_of_marketcap_group = stock_classification_df[stock_classification_df['stock']==stock]['marketcap_group'].item()

    df['t0_industry_name'] = adjust_score_by_breath(df['t0_industry_name'], itd_market_breath[name_of_industry_name])
    df['t0_industry_perform'] = adjust_score_by_breath(df['t0_industry_perform'], itd_market_breath[name_of_industry_perform])
    df['t0_marketcap_group'] = adjust_score_by_breath(df['t0_marketcap_group'], itd_market_breath[name_of_marketcap_group])
    df['t0_all_stock'] = adjust_score_by_breath(df['t0_all_stock'], itd_market_breath['all_stock'])

In [169]:
#Tạo bảng dữ liệu điểm dòng tiền cho các nhóm cổ phiếu
itd_group_score_df = time_series.copy()

#Thêm cột điểm dòng tiền toàn bộ cổ phiếu
for nganh in itd_all_stock.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        if stock in itd_stock_dict.keys():
            score_df[stock] = itd_score_dict[stock]['t0_all_stock']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[nganh] = score_df['total']

#Thêm các cột điểm dòng tiền ngành
itd_industry_name_score_df = time_series.copy()
for nganh in itd_industry_name.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_name']==nganh]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        if stock in itd_stock_dict.keys():
            score_df[stock] = itd_score_dict[stock]['t0_industry_name']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[nganh] = score_df['total']

#Thêm các cột điểm dòng tiền nhóm hiệu suất
itd_industry_perform_score_df = time_series.copy()
for group in itd_industry_perform.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df[stock_classification_df['industry_perform']==group]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        if stock in itd_stock_dict.keys():
            score_df[stock] = itd_score_dict[stock]['t0_industry_perform']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[group] = score_df['total']

#Thêm các cột điểm dòng tiền nhóm vốn hoá
itd_marketcap_group_score_df = time_series.copy()
for marketcap in itd_marketcap_group.keys():
    score_df = time_series.copy()
    temp_stock_list_full = stock_classification_df[stock_classification_df['marketcap_group']==marketcap]['stock'].tolist()
    stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

    for stock in stock_list:
        if stock in itd_stock_dict.keys():
            score_df[stock] = itd_score_dict[stock]['t0_marketcap_group']
    score_df['total'] = score_df.iloc[:, 1:].mean(axis=1)
    itd_group_score_df[marketcap] = score_df['total']

#### Xếp hạng dòng tiền các nhóm cổ phiếu

In [170]:
#Tạo bảng xếp hạng cho các nhóm cổ phiếu
def create_ranking_df(score_df):
    socre_dict = {}
    for group in score_df.columns[1:]:
        socre_dict[group] = date_series.copy()
        socre_dict[group]['t0_score'] = score_df[group]
        socre_dict[group]['t5_score'] = socre_dict[group]['t0_score'][::-1].rolling(window=5).mean()[::-1]

    ranking_score = date_series.copy()
    for group in socre_dict.keys():
        ranking_score[group] = socre_dict[group]['t5_score']
        ranking_score.fillna(0,inplace=True)

    ranking_df = date_series.copy()
    for group in socre_dict.keys():
        ranking_df[group] = 0

    for i in range(len(date_series.copy())):
        ranking_df.iloc[i, 1:] = ranking_score.iloc[i, 1:].rank(ascending=False, method='min')
    
    ranking_df = ranking_df.head(20)

    return ranking_df

industry_name_ranking = create_ranking_df(eod_group_score_df[['date',                                     
        'ban_le', 'bao_hiem', 'bds', 'bds_kcn','chung_khoan', 
        'cong_nghe','cong_nghiep', 'dau_khi', 'det_may','dulich_dv',
        'dv_hatang', 'hoa_chat', 'htd', 'khoang_san', 'ngan_hang','tai_chinh',
        'thep', 'thuc_pham', 'thuy_san', 'van_tai', 'vlxd', 'xd','y_te']])
industry_perform_ranking = create_ranking_df(eod_group_score_df[['date','A', 'B', 'C', 'D']])
marketcap_group_ranking = create_ranking_df(eod_group_score_df[['date','large', 'mid', 'small', 'penny']])

group_score_ranking_df = industry_name_ranking.merge(industry_perform_ranking, on='date', how='left').merge(marketcap_group_ranking, on='date', how='left')

In [171]:
#Tạo bảng phục vụ cho việc tra cứu nhóm cổ phiếu
group_score_ranking_melted = pd.DataFrame()
for column in group_score_ranking_df.columns[1:]:
    temp_df = group_score_ranking_df[['date', column]]
    temp_df.columns = [['date', 'rank']]
    temp_df['name'] = column
    group_score_ranking_melted = pd.concat([group_score_ranking_melted, temp_df], axis=0)

group_score_ranking_melted.columns = ['date','rank','name']
group_score_ranking_melted['name'] = group_score_ranking_melted['name'].map(name_map_dict)

#### Thanh khoản các nhóm cổ phiếu

##### Khung ITD ta dùng chỉ số thanh khoản

In [172]:
itd_group_liquidity_df = time_series.copy().sort_values('date').reset_index(drop=True)

#Thêm cột toàn bộ cổ phiếu
for name in itd_all_stock.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_all_stock[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

#Thêm các cột cho các ngành
for name in itd_industry_name.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_industry_name[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

#Thêm các cột cho các nhóm hiệu suất
for name in itd_industry_perform.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_industry_perform[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

#Thêm các cột cho các nhóm vốn hoá
for name in itd_marketcap_group.keys():
    liquidity_t0 = time_percent[time_percent['date'] >= date_series['date'].iloc[0]].sort_values('date').reset_index(drop=True)
    liquidity_month_ma5 = 0

    for stock, df in itd_marketcap_group[name].items():
        liquidity_t0[stock] = df[df['date'] >= date_series['date'].iloc[0]].sort_values('date')['volume'].reset_index(drop=True)
        liquidity_month_ma5 += eod_stock_dict[stock].iloc[0]['ma5_V']
    for column in liquidity_t0.columns[2:]:
        liquidity_t0[column] = liquidity_t0[column].cumsum()

    liquidity_t0['volume_t0'] = liquidity_t0.iloc[:,2:].sum(axis=1)
    liquidity_t0['volume_month_ma5'] = liquidity_month_ma5 * liquidity_t0['percent']
    liquidity_t0['ratio'] = liquidity_t0['volume_t0']/liquidity_t0['volume_month_ma5']
    liquidity_t0.loc[0, 'ratio'] = 0 

    itd_group_liquidity_df[name] = liquidity_t0['ratio']

itd_group_liquidity_df = itd_group_liquidity_df.sort_values('date', ascending=False).reset_index(drop=True)

##### Khung EOD ta dùng tổng thanh khoản

In [173]:
#Lấy dữ liệu lịch sử group price change đã tính toán
period_group_liquidity_df = pd.read_excel("period_data/period_processed_data.xlsx", sheet_name='full_group_liquidity_df')

#Tạo dict chứa thanh khoản trung bình của mỗi nhóm cổ phiếu trong các giai đoạn
temp_group_liquidity_df = date_series.copy()

for name in all_stock_key_list:
    temp_volume_df = date_series.copy()
    for stock, df in eod_all_stock[name].items():
        temp_volume_df[stock] = df['volume']
    temp_volume_df['volume'] = temp_volume_df.iloc[:, 1:].sum(axis=1)
    temp_group_liquidity_df[name] = temp_volume_df['volume']

for name in industry_name_list:
    temp_volume_df = date_series.copy()
    for stock, df in eod_industry_name[name].items():
        temp_volume_df[stock] = df['volume']
    temp_volume_df['volume'] = temp_volume_df.iloc[:, 1:].sum(axis=1)
    temp_group_liquidity_df[name] = temp_volume_df['volume']

for name in industry_perform_list:
    temp_volume_df = date_series.copy()
    for stock, df in eod_industry_perform[name].items():
        temp_volume_df[stock] = df['volume']
    temp_volume_df['volume'] = temp_volume_df.iloc[:, 1:].sum(axis=1)
    temp_group_liquidity_df[name] = temp_volume_df['volume']

for name in marketcap_group_list:
    temp_volume_df = date_series.copy()
    for stock, df in eod_marketcap_group[name].items():
        temp_volume_df[stock] = df['volume']
    temp_volume_df['volume'] = temp_volume_df.iloc[:, 1:].sum(axis=1)
    temp_group_liquidity_df[name] = temp_volume_df['volume']

#Ghép các bảng dòng tiền nhóm cổ phiếu từng giai đoạn thành bảng full thanh khoản
eod_group_liquidity_df = temp_group_liquidity_df[temp_group_liquidity_df['date'] >= current_quarter_span[0]]
eod_group_liquidity_df = pd.concat([eod_group_liquidity_df, period_group_liquidity_df]).sort_values('date', ascending=False).reset_index(drop=True)

In [174]:
#Tạo bảng hệ số thanh khoản để lấy phiên cuối
eod_group_liquidity_index_df = date_series.copy()
temp_df = date_series.copy()
for key in group_stock_key_list:
    temp_df[key] = eod_group_liquidity_df[key][::-1].rolling(window=5).mean()[::-1].shift(-1)
    eod_group_liquidity_index_df[key] = eod_group_liquidity_df[key]/temp_df[key]
    eod_group_liquidity_index_df[key].iloc[0] = eod_group_liquidity_df[key].iloc[0]/(temp_df[key].iloc[0]*current_time_percent)

#Sủa lại giá trị cuối của bảng ITD cho trùng khớp với EOD
if len(itd_group_liquidity_df)>0:
    for column in itd_group_liquidity_df.columns:
        itd_group_liquidity_df.loc[0, column] = eod_group_liquidity_index_df[column].iloc[0]

#### Gộp chỉ số TK và DT các nhóm CP

In [175]:
#Gộp bảng hệ số thanh khoản và dòng tiền của các nhóm cổ phiếu ITD
group_itd_score_liquidity_df = time_series.copy().reset_index(drop=True)
for column in itd_group_liquidity_df.columns[1:]:
    group_itd_score_liquidity_df[f'liquid_{column}'] = itd_group_liquidity_df[column]
for column in itd_group_score_df.columns[1:]:
    group_itd_score_liquidity_df[f'score_{column}'] = itd_group_score_df[column]
    #Đảm bảo ITD bằng với EOD
    group_itd_score_liquidity_df[f'score_{column}'].iloc[0] = eod_group_score_df[column].iloc[0]

#Hiệu chỉnh lại theo khung thời gian ITD
group_itd_score_liquidity_df = itd_series.merge(group_itd_score_liquidity_df, on='date', how='left')

In [176]:
#Tạo bảng giá trị cuối của dòng tiền và thanh khoản
group_eod_score_liquidity_df = pd.concat([eod_group_liquidity_index_df.dropna().iloc[0, 1:],eod_group_score_df.iloc[0,1:],eod_group_score_df.iloc[:5,1:].mean(axis=0),group_score_ranking_df.iloc[0].iloc[1:]], axis=1).reset_index()
group_eod_score_liquidity_df.columns = ['name','liquidity','score','score_t5','rank']
group_eod_score_liquidity_df['liquid_state'] = group_eod_score_liquidity_df['liquidity'].apply(lambda x: 'Rất thấp' if x < 0.5 else (
                                                                                                 'Thấp' if (x >= 0.5) & (x < 0.8) else (
                                                                                                 'Trung bình' if (x >= 0.8) & (x < 1.2) else (
                                                                                                  'Cao' if (x >= 1.2) & (x < 1.5) else 'Rất cao'))))

group_eod_score_liquidity_df['order'] = group_eod_score_liquidity_df['name'].map(order_map_dict)
group_eod_score_liquidity_df['group'] = group_eod_score_liquidity_df['name'].map(group_map_dict)
group_eod_score_liquidity_df['name'] = group_eod_score_liquidity_df['name'].map(name_map_dict)

#Thêm cột thứ tự vào bảng liquid các ngành
temp_df = group_eod_score_liquidity_df[group_eod_score_liquidity_df['group'].isin(['A','B','C','D'])]
temp_df = temp_df.sort_values('score', ascending=False)['name'].reset_index(drop=True).reset_index().rename(columns={'index':'industry_rank'})
group_eod_score_liquidity_df = group_eod_score_liquidity_df.merge(temp_df, on='name', how='left')

#### Tính dữ liệu cho MS

In [177]:
def transform_ms(stock_group):
    stock_dict = copy.deepcopy(stock_group)

    # Prepare a base date DataFrame from date_series
    dates_df = pd.DataFrame(date_series['date'].tolist(), columns=['date'])
    
    for group_name, stocks in stock_dict.items():
        # Initialize a DataFrame for group trends
        group_trends = dates_df.copy()

        # Compute trends across stocks
        for trend in ['trend_5p', 'trend_20p', 'trend_60p', 'trend_120p', 'trend_240p', 'trend_480p']:
            # Concatenate all trend data for current trend across all stocks
            trend_data = pd.concat([stocks[stock][trend] for stock in stocks], axis=1)
            trend_data.fillna(0, inplace=True)
            
            # Calculate the sum and percent for the trend
            sum_trend = trend_data.sum(axis=1)
            percent_trend = sum_trend / len(stocks)
            
            # Add to group trends DataFrame
            group_trends[f'{trend}'] = percent_trend

        stock_dict[group_name] = group_trends[group_trends['date'] >= current_quarter_span[0]].sort_values('date', ascending=False)

    return stock_dict

In [178]:
#Tính toán các biểu đồ MS cho các nhóm cổ phiếu
all_stock_ms = transform_ms(eod_all_stock)
industry_name_ms = transform_ms(eod_industry_name)
industry_perform_ms = transform_ms(eod_industry_perform)
marketcap_group_ms = transform_ms(eod_marketcap_group)

#Gộp tất cả biểu đồ MS vào 1 bảng
temp_group_ms_chart_df = pd.DataFrame()
for item in [all_stock_ms, industry_name_ms, industry_perform_ms, marketcap_group_ms]:
    for group, df in item.items():
        df['name'] = group
        temp_group_ms_chart_df = pd.concat([temp_group_ms_chart_df, df], axis=0)

#Lấy dữ liệu lịch sử MS đã tính toán
period_group_ms_chart_df = pd.read_excel("period_data/period_processed_data.xlsx", sheet_name='full_group_ms_chart_df')

#Ghép bảng dữ liệu lịch sử với dữ liệu của quý này
full_group_ms_chart_df = pd.concat([temp_group_ms_chart_df, period_group_ms_chart_df], axis=0).reset_index(drop=True)

#Cắt bảng dữ liệu full ra thành dữ liệu cần dùng để biểu diễn
group_ms_chart_df = pd.DataFrame()
for key in group_stock_key_list:
    if key == 'all_stock':
        group_ms_chart_df = pd.concat([group_ms_chart_df, full_group_ms_chart_df[full_group_ms_chart_df['name'] == key].sort_values('date', ascending=False).reset_index(drop=True)], axis=0)
    else:
        group_ms_chart_df = pd.concat([group_ms_chart_df, full_group_ms_chart_df[full_group_ms_chart_df['name'] == key].sort_values('date', ascending=False).reset_index(drop=True).iloc[:60]], axis=0)

group_ms_chart_df['name'] = group_ms_chart_df['name'].map(name_map_dict)
group_ms_chart_df = group_ms_chart_df.reset_index(drop=True)

#### Biểu đồ nến cho giá

In [179]:
def calculate_total_change(stock_group, name, price_index_date_series):
    period_index_df = price_index_date_series.copy()

    for stock, df in stock_group[name].items():
        period_index_df[stock] = df['close']
        period_index_df[stock] = period_index_df[stock][::-1].pct_change()[::-1]

    period_index_df['total_change'] = period_index_df.iloc[:,1:].sum(axis=1)
    period_index_df['total_change'] = (period_index_df['total_change']/len(stock_group[name]))*100
    period_index_df['total_change'] = period_index_df['total_change']*10

    return period_index_df['total_change']

In [180]:
#Lấy dữ liệu lịch sử group price change đã tính toán
period_group_price_change = pd.read_excel("period_data/period_processed_data.xlsx", sheet_name='full_group_price_change_df')

#Tính dữ liệu group price change của quý hiện tại
temp_group_price_change = date_series.copy()
for key in all_stock_key_list:
    temp_group_price_change[key] = calculate_total_change(eod_all_stock, key, date_series)

for key in industry_name_list:
    temp_group_price_change[key] = calculate_total_change(eod_industry_name, key, date_series)

for key in industry_perform_list:
    temp_group_price_change[key] = calculate_total_change(eod_industry_perform, key, date_series)

for key in marketcap_group_list:
    temp_group_price_change[key] = calculate_total_change(eod_marketcap_group, key, date_series)

temp_group_price_change = temp_group_price_change[temp_group_price_change['date'] >= current_quarter_span[0]]

#Ghép dữ liệu thay đổi index các nhóm cổ phiếu
group_price_index_df = pd.concat([temp_group_price_change, period_group_price_change]).sort_values('date', ascending=False).reset_index(drop=True)

for key in group_stock_key_list:
    group_price_index_df[key] = group_price_index_df[key][::-1].cumsum()[::-1] + 1000

#Tạo bảng để vẽ biểu đồ giá trên web theo từng nhóm
group_price_chart_df = pd.DataFrame()
for key in group_stock_key_list:
    temp_df = group_price_index_df[['date', key]].rename(columns={key:'value'}).iloc[:360]
    temp_df['volume'] = eod_group_liquidity_df[key]
    temp_df['name'] = key

    group_price_chart_df = pd.concat([group_price_chart_df, temp_df], axis=0)
group_price_chart_df['name'] = group_price_chart_df['name'].map(name_map_dict)
group_price_chart_df = group_price_chart_df.reset_index(drop=True)

In [181]:
#Dữ liệu lịch sử giá cho cổ phiếu
stock_price_chart_df = pd.DataFrame()
for index, df in eod_stock_dict.items():
    stock_price_chart_df = pd.concat([stock_price_chart_df, df[[ 'date','stock','open','high','low','close','volume']].iloc[:180]], axis=0)
    
stock_price_chart_df = stock_price_chart_df.reset_index(drop=True)

#### Phân tích kĩ thuật

##### Các hàm tính toán

In [182]:
def calculate_ta_df(price_df):
    ta_df = price_df[['stock', 'date', 'open', 'high', 'low', 'close', 'volume']].copy()
    ta_df['week'] = ta_df['date'].dt.strftime('%Y-%U')
    ta_df['month'] = ta_df['date'].dt.to_period('M')
    ta_df['quarter'] = ta_df['date'].dt.to_period('Q')
    ta_df['year'] = ta_df['date'].dt.to_period('Y')
    return ta_df

In [183]:
def calculate_candle_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()
    # Define unique time frames up front to avoid recalculating them multiple times
    unique_weeks = ta_df['week'].unique()
    unique_months = ta_df_copy['month'].unique()
    unique_quarters = ta_df_copy['quarter'].unique()
    unique_years = ta_df_copy['year'].unique()

    # Define filters for reuse
    filter_week_1 = ta_df_copy['week'] == unique_weeks[1] if len(unique_weeks) > 1 else None
    filter_week_0 = ta_df_copy['week'] == unique_weeks[0] if len(unique_weeks) > 0 else None
    filter_month_1 = ta_df_copy['month'] == unique_months[1] if len(unique_months) > 1 else None
    filter_month_0 = ta_df_copy['month'] == unique_months[0] if len(unique_months) > 0 else None
    filter_quarter_1 = ta_df_copy['quarter'] == unique_quarters[1] if len(unique_quarters) > 1 else None
    filter_quarter_0 = ta_df_copy['quarter'] == unique_quarters[0] if len(unique_quarters) > 0 else None
    filter_year_1 = ta_df_copy['year'] == unique_years[1] if len(unique_years) > 1 else None
    filter_year_0 = ta_df_copy['year'] == unique_years[0] if len(unique_years) > 0 else None

    # Apply filters and calculate needed values
    if filter_week_1 is not None:
        ta_df_copy['week_last_low'] = ta_df_copy.loc[filter_week_1, 'low'].min()
        ta_df_copy['week_last_high'] = ta_df_copy.loc[filter_week_1, 'high'].max()
    else:
        ta_df_copy['week_last_low'] = None
        ta_df_copy['week_last_high'] = None
    if filter_week_0 is not None:
        ta_df_copy['week_open'] = ta_df_copy.loc[filter_week_0, 'open'].iloc[-1]

    if filter_month_1 is not None:
        ta_df_copy['month_last_low'] = ta_df_copy.loc[filter_month_1, 'low'].min()
        ta_df_copy['month_last_high'] = ta_df_copy.loc[filter_month_1, 'high'].max()
    else:
        ta_df_copy['month_last_low'] = None
        ta_df_copy['month_last_high'] = None
    if filter_month_0 is not None:
        ta_df_copy['month_open'] = ta_df_copy.loc[filter_month_0, 'open'].iloc[-1]

    if filter_quarter_1 is not None:
        ta_df_copy['quarter_last_low'] = ta_df_copy.loc[filter_quarter_1, 'low'].min()
        ta_df_copy['quarter_last_high'] = ta_df_copy.loc[filter_quarter_1, 'high'].max()
    else:
        ta_df_copy['quarter_last_low'] = None
        ta_df_copy['quarter_last_high'] = None
    if filter_quarter_0 is not None:
        ta_df_copy['quarter_open'] = ta_df_copy.loc[filter_quarter_0, 'open'].iloc[-1]

    if filter_year_1 is not None:
        ta_df_copy['year_last_low'] = ta_df_copy.loc[filter_year_1, 'low'].min()
        ta_df_copy['year_last_high'] = ta_df_copy.loc[filter_year_1, 'high'].max()
    else:
        ta_df_copy['year_last_low'] = None
        ta_df_copy['year_last_high'] = None
    if filter_year_0 is not None:
        ta_df_copy['year_open'] = ta_df_copy.loc[filter_year_0, 'open'].iloc[-1]

    # Compute 'from' values for stock or index
    columns_to_compute = ['week', 'month', 'quarter', 'year']
    for frame in columns_to_compute:
        suffix = ['last_high', 'last_low', 'open']
        for suf in suffix:
            column_name = f'{frame}_{suf}'
            if column_name in ta_df_copy.columns:
                if input_type == 'stock':
                    ta_df_copy[f'from_{frame}_{suf}'] = (ta_df_copy['close'] - ta_df_copy[column_name]) / ta_df_copy[column_name]
                elif input_type == 'index':
                    ta_df_copy[f'from_{frame}_{suf}'] = ta_df_copy['close'] - ta_df_copy[column_name]

    return ta_df_copy

In [184]:
def calculate_fibo_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()

    ta_df_copy['month_high'] = ta_df_copy[ta_df_copy['month'].isin(ta_df_copy['month'].unique()[:2].tolist())]['high'].max()
    ta_df_copy['quarter_high'] = ta_df_copy[ta_df_copy['quarter'].isin(ta_df_copy['quarter'].unique()[:2].tolist())]['high'].max()
    ta_df_copy['year_high'] = ta_df_copy[ta_df_copy['year'].isin(ta_df_copy['year'].unique()[:2].tolist())]['high'].max()

    ta_df_copy['month_low'] = ta_df_copy[ta_df_copy['month'].isin(ta_df_copy['month'].unique()[:2].tolist())]['low'].min()
    ta_df_copy['quarter_low'] = ta_df_copy[ta_df_copy['quarter'].isin(ta_df_copy['quarter'].unique()[:2].tolist())]['low'].min()
    ta_df_copy['year_low'] = ta_df_copy[ta_df_copy['year'].isin(ta_df_copy['year'].unique()[:2].tolist())]['low'].min()
        
    ta_df_copy['month_fibo_382'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.382
    ta_df_copy['month_fibo_500'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.5
    ta_df_copy['month_fibo_618'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.618

    ta_df_copy['quarter_fibo_382'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.382
    ta_df_copy['quarter_fibo_500'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.5
    ta_df_copy['quarter_fibo_618'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.618

    ta_df_copy['year_fibo_382'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.382
    ta_df_copy['year_fibo_500'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.5
    ta_df_copy['year_fibo_618'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.618

    if input_type == 'stock':

        ta_df_copy['from_month_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_382'])/abs(ta_df_copy['month_fibo_382'])
        ta_df_copy['from_month_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_500'])/abs(ta_df_copy['month_fibo_500'])
        ta_df_copy['from_month_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_618'])/abs(ta_df_copy['month_fibo_618'])

        ta_df_copy['from_quarter_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_382'])/abs(ta_df_copy['quarter_fibo_382'])
        ta_df_copy['from_quarter_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_500'])/abs(ta_df_copy['quarter_fibo_500'])
        ta_df_copy['from_quarter_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_618'])/abs(ta_df_copy['quarter_fibo_618'])

        ta_df_copy['from_year_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_382'])/abs(ta_df_copy['year_fibo_382'])
        ta_df_copy['from_year_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_500'])/abs(ta_df_copy['year_fibo_500'])
        ta_df_copy['from_year_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_618'])/abs(ta_df_copy['year_fibo_618'])

    if input_type == 'index':
            
        ta_df_copy['from_month_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_382'])
        ta_df_copy['from_month_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_500'])
        ta_df_copy['from_month_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_618'])

        ta_df_copy['from_quarter_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_382'])
        ta_df_copy['from_quarter_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_500'])
        ta_df_copy['from_quarter_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_618'])

        ta_df_copy['from_year_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_382'])
        ta_df_copy['from_year_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_500'])
        ta_df_copy['from_year_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_618'])

    return ta_df_copy

In [185]:
def calculate_pivot_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()

    try: ta_df_copy['month_high'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['high'].max()
    except: ta_df_copy['month_high'] = None
    try: ta_df_copy['quarter_high'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['high'].max()
    except: ta_df_copy['quarter_high'] = None
    try: ta_df_copy['year_high'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['high'].max()
    except: ta_df_copy['year_high'] = None

    try: ta_df_copy['month_low'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['low'].min()
    except: ta_df_copy['month_low'] = None
    try: ta_df_copy['quarter_low'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['low'].min()
    except: ta_df_copy['quarter_low'] = None
    try: ta_df_copy['year_low'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['low'].min()
    except: ta_df_copy['year_low'] = None

    try: ta_df_copy['month_close'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['month_close'] = None
    try: ta_df_copy['quarter_close'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['quarter_close'] = None
    try: ta_df_copy['year_close'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['year_close'] = None

    ta_df_copy['month_pivot'] = (ta_df_copy['month_high'] + ta_df_copy['month_low'] + ta_df_copy['month_close'])/3
    ta_df_copy['quarter_pivot'] = (ta_df_copy['quarter_high'] + ta_df_copy['quarter_low'] + ta_df_copy['quarter_close'])/3
    ta_df_copy['year_pivot'] = (ta_df_copy['year_high'] + ta_df_copy['year_low'] + ta_df_copy['year_close'])/3

    if input_type == 'index':
        ta_df_copy['from_month_pivot'] = (ta_df_copy['close'] - ta_df_copy['month_pivot'])
        ta_df_copy['from_quarter_pivot'] = (ta_df_copy['close'] - ta_df_copy['quarter_pivot'])
        ta_df_copy['from_year_pivot'] = (ta_df_copy['close'] - ta_df_copy['year_pivot'])

    if input_type == 'stock':
        ta_df_copy['from_month_pivot'] = (ta_df_copy['close'] - ta_df_copy['month_pivot'])/abs(ta_df_copy['month_pivot'])
        ta_df_copy['from_quarter_pivot'] = (ta_df_copy['close'] - ta_df_copy['quarter_pivot'])/abs(ta_df_copy['quarter_pivot'])
        ta_df_copy['from_year_pivot'] = (ta_df_copy['close'] - ta_df_copy['year_pivot'])/abs(ta_df_copy['year_pivot'])

    return ta_df_copy

In [186]:
def calculate_ma_ta_df(ta_df,input_type):
    ta_df_copy = ta_df.copy()

    ta_df_copy['ma5'] = ta_df_copy['close'][::-1].rolling(window=5, min_periods=1).mean()[::-1]
    ta_df_copy['ma20'] = ta_df_copy['close'][::-1].rolling(window=20, min_periods=1).mean()[::-1]
    ta_df_copy['ma60'] = ta_df_copy['close'][::-1].rolling(window=60, min_periods=1).mean()[::-1]
    ta_df_copy['ma120'] = ta_df_copy['close'][::-1].rolling(window=120, min_periods=1).mean()[::-1]
    ta_df_copy['ma240'] = ta_df_copy['close'][::-1].rolling(window=240, min_periods=1).mean()[::-1]
    ta_df_copy['ma480'] = ta_df_copy['close'][::-1].rolling(window=480, min_periods=1).mean()[::-1]

    if input_type == 'stock':

        ta_df_copy['from_month_ma5'] = (ta_df_copy['close'] - ta_df_copy['ma5'])/ta_df_copy['ma5']
        ta_df_copy['from_month_ma20'] = (ta_df_copy['close'] - ta_df_copy['ma20'])/ta_df_copy['ma20']
        ta_df_copy['from_quarter_ma60'] = (ta_df_copy['close'] - ta_df_copy['ma60'])/ta_df_copy['ma60']
        ta_df_copy['from_quarter_ma120'] = (ta_df_copy['close'] - ta_df_copy['ma120'])/ta_df_copy['ma120']
        ta_df_copy['from_year_ma240'] = (ta_df_copy['close'] - ta_df_copy['ma240'])/ta_df_copy['ma240']
        ta_df_copy['from_year_ma480'] = (ta_df_copy['close'] - ta_df_copy['ma480'])/ta_df_copy['ma480']

    if input_type == 'index':

        ta_df_copy['from_month_ma5'] = (ta_df_copy['close'] - ta_df_copy['ma5'])
        ta_df_copy['from_month_ma20'] = (ta_df_copy['close'] - ta_df_copy['ma20'])
        ta_df_copy['from_quarter_ma60'] = (ta_df_copy['close'] - ta_df_copy['ma60'])
        ta_df_copy['from_quarter_ma120'] = (ta_df_copy['close'] - ta_df_copy['ma120'])
        ta_df_copy['from_year_ma240'] = (ta_df_copy['close'] - ta_df_copy['ma240'])
        ta_df_copy['from_year_ma480'] = (ta_df_copy['close'] - ta_df_copy['ma480'])

    return ta_df_copy

In [187]:
def transform_ta_df(ta_df,ta_name):
    df_list = []
    for time_frame in ['month','quarter','year']:
        if ta_name == 'candle':
            df = ta_df[['stock',f'{time_frame}_open',f'{time_frame}_last_high',f'{time_frame}_last_low',f'from_{time_frame}_open',f'from_{time_frame}_last_high',f'from_{time_frame}_last_low']].iloc[:1]
            df_name = ['Open','Last High','Last Low']
            coef = 4
        elif ta_name == 'fibo':
            df = ta_df[['stock',f'{time_frame}_fibo_382',f'{time_frame}_fibo_500',f'{time_frame}_fibo_618',f'from_{time_frame}_fibo_382',f'from_{time_frame}_fibo_500',f'from_{time_frame}_fibo_618']].iloc[:1]
            df_name = ['Fibo 0.382', 'Fibo 0.500', 'Fibo 0.618']
            coef = 4
        elif ta_name == 'pivot':
            df = ta_df[['stock',f'{time_frame}_pivot',f'from_{time_frame}_pivot']].iloc[:1]
            df_name = ['Pivot']
            coef = 2
        elif ta_name == 'ma':
            if time_frame == 'month':
                df = ta_df[['stock','ma5','ma20','from_month_ma5','from_month_ma20']].iloc[:1]
                df_name = ['MA5','MA20']
            elif time_frame == 'quarter':
                df = ta_df[['stock','ma60','ma120','from_quarter_ma60','from_quarter_ma120']].iloc[:1]
                df_name = ['MA60','MA120']
            elif time_frame == 'year':
                df = ta_df[['stock','ma240','ma480','from_year_ma240','from_year_ma480']].iloc[:1]
                df_name = ['MA240','MA480']
            coef = 3
        df_value = df.iloc[0,1:coef].tolist()
        df_from = df.iloc[0,coef:].tolist()

        if ta_name == 'pivot':
            df_order = 3
        else:
            df_order = [i for i in range(1, len(df_name) + 1)]
            
        df = pd.DataFrame({'stock':df['stock'].item(),'name': df_name,'value': df_value,'from': df_from, 'order': df_order})
        df['id'] = time_frame
        df['ta_name'] = ta_name
        df['value'] = df['value'].apply(lambda x: '{:.2f}'.format(x) if isinstance(x, (int, float)) else x)
        df_list.append(df)
    concat_df = pd.concat(df_list, axis=0)
    return concat_df

def concat_ta_df(df,input_type):
    ta_df = calculate_ta_df(df)

    df_candle_raw = calculate_candle_ta_df(ta_df,input_type)
    df_pivot_raw = calculate_pivot_ta_df(ta_df,input_type)
    df_ma_raw = calculate_ma_ta_df(ta_df,input_type)
    df_fibo_raw = calculate_fibo_ta_df(ta_df,input_type)

    df_candle = transform_ta_df(df_candle_raw,'candle')
    df_pivot = transform_ta_df(df_pivot_raw,'pivot')
    df_ma = transform_ta_df(df_ma_raw,'ma')
    df_fibo = transform_ta_df(df_fibo_raw,'fibo')

    concat_ta_df = pd.concat([df_candle,df_fibo,df_pivot,df_ma], axis=0)

    ta_dict = {
        'concat_ta_df': concat_ta_df,
        'ta_dict': {
            'df_candle': df_candle_raw,
            'df_pivot': df_pivot_raw,
            'df_ma': df_ma_raw,
            'df_fibo': df_fibo_raw,
        }
    }
    return ta_dict

##### Thực hiện tính toán

In [188]:
stock_ta_df = pd.DataFrame()
stock_ta_dict = {}

for stock, df in eod_stock_dict.items():
    df_copy = df.copy()
    temp_ta_dict = concat_ta_df(df_copy, 'stock')
    
    temp_stock_ta_df = temp_ta_dict['concat_ta_df']
    stock_ta_df = pd.concat([stock_ta_df, temp_stock_ta_df], axis=0)

    stock_ta_dict[stock] = temp_ta_dict['ta_dict']
    
stock_ta_df = stock_ta_df.reset_index(drop=True)

In [189]:
market_index_ta_df = pd.DataFrame()
for index, df in eod_index_dict.items():
    temp_ta_dict = concat_ta_df(df, 'index')

    temp_market_index_ta_df = temp_ta_dict['concat_ta_df']
    market_index_ta_df = pd.concat([market_index_ta_df, temp_market_index_ta_df], axis=0)

market_index_ta_df = market_index_ta_df.reset_index(drop=True)

#### Page 1

##### Bảng hiển thị 5 chỉ số thị trường dạng card

In [190]:
market_index_card_df = pd.DataFrame()
for index, df in eod_index_dict.items():
    df['change_value'] = df['close'][::-1].diff()[::-1]
    df['change_percent'] = (df['close'][::-1].pct_change()[::-1]).round(4)

    market_index_card_df = pd.concat([market_index_card_df, df.iloc[[0]]], axis=0)

market_index_card_df = market_index_card_df.reset_index(drop=True)

##### Bảng cho biểu đồ độ rộng theo giá 

In [191]:
# Hàm tính độ rộng thị trường
up_count, up_value, up_volume = 0, 0, 0
down_count, down_value, down_volume = 0, 0, 0
unchange_count, unchange_value, unchange_volume = 0, 0, 0

for stock, df in eod_stock_dict.items():
    open_price = df['open'].iloc[0].item()
    current_price = df['close'].iloc[0].item()
    price_change = current_price - open_price
    if price_change > 0:
        up_count += 1
        up_volume += df['volume'].iloc[0].item()
        up_value += df['close'].iloc[0].item()*1000 * df['volume'].iloc[0].item()
    elif price_change < 0:
        down_count += 1
        down_volume += df['volume'].iloc[0].item()
        down_value += df['close'].iloc[0].item()*1000 * df['volume'].iloc[0].item()
    else:
        unchange_count += 1
        unchange_volume += df['volume'].iloc[0].item()
        unchange_value += df['close'].iloc[0].item()*1000 * df['volume'].iloc[0].item()

market_price_breath_df = pd.DataFrame({
    'name': ['Tăng giá', 'Giảm giá', 'Không đổi'],
    'count': [up_count, down_count, unchange_count],
    'volume': [up_volume, down_volume, unchange_volume],
    'value': [up_value/1000000000, down_value/1000000000, unchange_value/1000000000]
})

##### Bảng cho biểu đồ giá của 5 index ở trang 1

In [192]:
market_index_price_chart_df = pd.DataFrame()
for index, df in eod_index_dict.items():
    market_index_price_chart_df = pd.concat([market_index_price_chart_df, df[[ 'date','stock','open','high','low','close','volume']].iloc[:360]], axis=0)
    
market_index_price_chart_df.columns = [ 'date','index','open','high','low','close','volume']
market_index_price_chart_df = market_index_price_chart_df.reset_index(drop=True)

##### Bảng chỉ số tâm lý thị trường

In [193]:
#Tính bảng chỉ số tâm lý
market_sentiment_df = time_series.copy()
total_count = 0

for stock, df in itd_score_dict.items():
    total_count += 1
    market_sentiment_df[stock] = df['t0_score']

market_sentiment_df['count'] = market_sentiment_df.iloc[:,1:].apply(lambda row: (row > 0).sum(), axis=1)
market_sentiment_df['total'] = total_count
market_sentiment_df = market_sentiment_df[['date','count','total']]
market_sentiment_df['ratio'] = market_sentiment_df['count'] / market_sentiment_df['total']
market_sentiment_df['sentiment'] = market_sentiment_df['ratio'].apply(lambda x: 'Sợ hãi' if x < 0.2 else
                                                                ('Tiêu cực' if (x >= 0.2) & (x < 0.4) else
                                                                ('Trung lập' if (x >= 0.4) & (x < 0.6) else
                                                                ('Tích cực' if (x >= 0.6) & (x < 0.8) else 'Hưng phấn'))))

#Nhân 100 giá trị của cột hệ số
market_sentiment_df['ratio'] = market_sentiment_df['ratio']*100

#Thêm các cột giá trị cuối để tạp card trong power bi
market_sentiment_df['last_ratio'] = market_sentiment_df['ratio'].iloc[0]
market_sentiment_df['last_sentiment'] = market_sentiment_df['sentiment'].iloc[0]

#Hiệu chỉnh lại theo khung thời gian ITD
market_sentiment_df = itd_series.merge(market_sentiment_df, on='date', how='left')

##### Khối ngoại và tự doanh

In [194]:
#Tạo dữ liệu mua bán phiên hiện tại khối ngoại và tự doanh
def calculate_nn_td_buy_sell(index_name):
    temp_dict_nn = {}
    temp_dict_nn['KLGD_NN'] = index_td_nn_dict[f'{index_name}_NN'].iloc[0][['buy_volume','sell_volume','net_volume']].tolist()
    temp_dict_nn['GTGD_NN'] = index_td_nn_dict[f'{index_name}_NN'].iloc[0][['buy_value','sell_value','net_value']].tolist()
    nn_buy_sell_df = pd.DataFrame.from_dict(temp_dict_nn, orient='index').reset_index()
    nn_buy_sell_df.columns = ['type','Mua','Bán','Mua-Bán']
    nn_buy_sell_df = nn_buy_sell_df.set_index('type').transpose()

    temp_dict_td = {}
    temp_dict_td['KLGD_TD'] = index_td_nn_dict[f'{index_name}_TD'].iloc[0][['buy_volume','sell_volume','net_volume']].tolist()
    temp_dict_td['GTGD_TD'] = index_td_nn_dict[f'{index_name}_TD'].iloc[0][['buy_value','sell_value','net_value']].tolist()
    td_buy_sell_df = pd.DataFrame.from_dict(temp_dict_td, orient='index').reset_index()
    td_buy_sell_df.columns = ['type','Mua','Bán','Mua-Bán']
    td_buy_sell_df = td_buy_sell_df.set_index('type').transpose()

    nn_td_buy_sell_df = pd.concat([nn_buy_sell_df,td_buy_sell_df],axis=1)
    nn_td_buy_sell_df['order'] = [1,2,3]

    return nn_td_buy_sell_df

nn_td_buy_sell_hsx = calculate_nn_td_buy_sell('VNINDEX')
nn_td_buy_sell_hsx['id'] = 'HSX'
nn_td_buy_sell_hsx['order_id'] = 1
nn_td_buy_sell_hnx = calculate_nn_td_buy_sell('HNXINDEX')
nn_td_buy_sell_hnx['id'] = 'HNX'
nn_td_buy_sell_hnx['order_id'] = 2
nn_td_buy_sell_upcom = calculate_nn_td_buy_sell('UPINDEX')
nn_td_buy_sell_upcom['id'] = 'UPCOM'
nn_td_buy_sell_upcom['order_id'] = 3

nn_td_buy_sell_df = pd.concat([nn_td_buy_sell_hsx,nn_td_buy_sell_hnx,nn_td_buy_sell_upcom],axis=0).reset_index().rename(columns={'index':'type'})

#Tạo dữ liệu lịch sử 20p khối ngoại và tự doanh
nn_20p_df_hsx = index_td_nn_dict['VNINDEX_NN'][['date','net_value']].iloc[:20].rename(columns={'net_value':'nn_value'})
td_20p_df_hsx = index_td_nn_dict['VNINDEX_TD'][['date','net_value']].iloc[:20].rename(columns={'net_value':'td_value'})
nn_td_20p_df_hsx = nn_20p_df_hsx.merge(td_20p_df_hsx, how='left', on='date')
nn_td_20p_df_hsx['id'] = 'HSX'

nn_20p_df_hnx = index_td_nn_dict['HNXINDEX_NN'][['date','net_value']].iloc[:20].rename(columns={'net_value':'nn_value'})
td_20p_df_hnx = index_td_nn_dict['HNXINDEX_TD'][['date','net_value']].iloc[:20].rename(columns={'net_value':'td_value'})
nn_td_20p_df_hnx = nn_20p_df_hnx.merge(td_20p_df_hnx, how='left', on='date')
nn_td_20p_df_hnx['id'] = 'HNX'

nn_20p_df_upcom = index_td_nn_dict['UPINDEX_NN'][['date','net_value']].iloc[:20].rename(columns={'net_value':'nn_value'})
td_20p_df_upcom = index_td_nn_dict['UPINDEX_TD'][['date','net_value']].iloc[:20].rename(columns={'net_value':'td_value'})
nn_td_20p_df_upcom = nn_20p_df_upcom.merge(td_20p_df_upcom, how='left', on='date')
nn_td_20p_df_upcom['id'] = 'UPCOM'

nn_td_20p_df = pd.concat([nn_td_20p_df_hsx,nn_td_20p_df_hnx,nn_td_20p_df_upcom],axis=0)
nn_td_20p_df = nn_td_20p_df.reset_index(drop=True)

In [195]:
def create_nn_td_top_stock(stock_dict):
    today = date_series['date'][0]
    yesterday = date_series['date'][1]
    the_day_before = date_series['date'][2]

    #Tạo ra top cổ phiếu mua bán của NN
    top_stock_dict = {}
    for stock, df in stock_dict.items():
        if not df.empty:
            if df['date'][0] == today:
                top_stock_dict[stock] = df.iloc[0,1:].tolist()
            elif df['date'][0] == yesterday:
                top_stock_dict[stock] = df.iloc[0,1:].tolist()
            elif df['date'][0] == the_day_before:
                top_stock_dict[stock] = df.iloc[0,1:].tolist()
    top_stock_df = pd.DataFrame.from_dict(top_stock_dict, orient='index').reset_index()
    top_stock_df.columns = df.columns
    top_stock_df['net_values'] = (top_stock_df['buy_value'] - top_stock_df['sell_value'])/1000000000
    top_stock_df['stock'] = top_stock_df['stock'].apply(lambda x: x[:3])

    top_sell = top_stock_df[top_stock_df['net_values']<0].sort_values('net_values')[['stock','date','net_values']].rename(columns={'stock':'sell_stock','net_values':'sell_value'}).reset_index(drop=True).head(20)
    top_buy = top_stock_df[top_stock_df['net_values']>0].sort_values('net_values', ascending=False)[['stock','net_values']].rename(columns={'stock':'buy_stock','net_values':'buy_value'}).reset_index(drop=True).head(20)
    top_stock_df = pd.concat([top_sell,top_buy], axis=1)

    return top_stock_df

try:
    nn_top_stock_hsx = create_nn_td_top_stock({k:v for k,v in stock_nn_dict.items() if k[:3] in stock_classification_df[stock_classification_df['exchange']=='HSX']['stock'].tolist()})
    nn_top_stock_hsx.columns = ['nn_sell_stock','nn_date','nn_sell_value','nn_buy_stock','nn_buy_value']
except:
    nn_top_stock_hsx = pd.DataFrame(columns=['nn_sell_stock', 'nn_date', 'nn_sell_value', 'nn_buy_stock', 'nn_buy_value'])
try:
    td_top_stock_hsx = create_nn_td_top_stock({k:v for k,v in stock_td_dict.items() if k[:3] in stock_classification_df[stock_classification_df['exchange']=='HSX']['stock'].tolist()})
    td_top_stock_hsx.columns = ['td_sell_stock','td_date','td_sell_value','td_buy_stock','td_buy_value']
except:
    td_top_stock_hsx = pd.DataFrame(columns=['td_sell_stock', 'td_date', 'td_sell_value', 'td_buy_stock', 'td_buy_value'])

nn_td_top_stock_hsx = pd.concat([nn_top_stock_hsx,td_top_stock_hsx],axis=1)
nn_td_top_stock_hsx['id'] = 'HSX'

try:
    nn_top_stock_hnx = create_nn_td_top_stock({k:v for k,v in stock_nn_dict.items() if k[:3] in stock_classification_df[stock_classification_df['exchange']=='HNX']['stock'].tolist()})
    nn_top_stock_hnx.columns = ['nn_sell_stock','nn_date','nn_sell_value','nn_buy_stock','nn_buy_value']
except:
    nn_top_stock_hnx = pd.DataFrame(columns=['nn_sell_stock', 'nn_date', 'nn_sell_value', 'nn_buy_stock', 'nn_buy_value'])

try:
    td_top_stock_hnx = create_nn_td_top_stock({k:v for k,v in stock_td_dict.items() if k[:3] in stock_classification_df[stock_classification_df['exchange']=='HNX']['stock'].tolist()})
    td_top_stock_hnx.columns = ['td_sell_stock','td_date','td_sell_value','td_buy_stock','td_buy_value']
except:
    td_top_stock_hnx = pd.DataFrame(columns=['td_sell_stock', 'td_date', 'td_sell_value', 'td_buy_stock', 'td_buy_value'])

nn_td_top_stock_hnx = pd.concat([nn_top_stock_hnx,td_top_stock_hnx],axis=1)
nn_td_top_stock_hnx['id'] = 'HNX'

try:
    nn_top_stock_upcom = create_nn_td_top_stock({k:v for k,v in stock_nn_dict.items() if k[:3] in stock_classification_df[stock_classification_df['exchange']=='UPCOM']['stock'].tolist()})
    nn_top_stock_upcom.columns = ['nn_sell_stock','nn_date','nn_sell_value','nn_buy_stock','nn_buy_value']
except:
    nn_top_stock_upcom = pd.DataFrame(columns=['td_sell_stock', 'td_date', 'td_sell_value', 'td_buy_stock', 'td_buy_value'])

try:
    td_top_stock_upcom = create_nn_td_top_stock({k:v for k,v in stock_td_dict.items() if k[:3] in stock_classification_df[stock_classification_df['exchange']=='UPCOM']['stock'].tolist()})
    td_top_stock_upcom.columns = ['td_sell_stock','td_date','td_sell_value','td_buy_stock','td_buy_value']
except:
    td_top_stock_upcom = pd.DataFrame(columns=['td_sell_stock', 'td_date', 'td_sell_value', 'td_buy_stock', 'td_buy_value'])

nn_td_top_stock_upcom = pd.concat([nn_top_stock_upcom,td_top_stock_upcom],axis=1)
nn_td_top_stock_upcom['id'] = 'UPCOM'

nn_td_top_stock = pd.concat([nn_td_top_stock_hsx,nn_td_top_stock_hnx,nn_td_top_stock_upcom],axis=0)
nn_td_top_stock = nn_td_top_stock.reset_index(drop=True)

##### Top 10 cổ phiếu tiền vào và tiền ra

In [196]:
market_top_10 = stock_score_filter_df[stock_score_filter_df['volume'] > 0][['stock', 'industry_name','industry_perform','marketcap_group','close','price_change','t0_score','liquid_ratio']].sort_values('t0_score', ascending=False).iloc[:10]
market_top_10['type'] = 'top'

market_low_10 = stock_score_filter_df[stock_score_filter_df['volume'] > 0][['stock', 'industry_name','industry_perform','marketcap_group','close','price_change','t0_score','liquid_ratio']].sort_values('t0_score', ascending=True).iloc[:10]
market_low_10['type'] = 'bottom'

market_top_stock_df = pd.concat([market_top_10, market_low_10], axis=0).reset_index(drop=True)

#Thêm 1 hàng giả để ko lỗi web
if len(market_top_stock_df) == 0:
    market_top_stock_df.loc[0] = [np.nan] * len(market_top_stock_df.columns)

#### Page 2

##### Độ rộng dòng tiền và thanh khoản vào các nhóm cổ phiếu

In [197]:
# Hàm tính độ rộng thị trường
def calculate_breadth(name, stock_list):
    
    up_count = 0
    down_count = 0

    liquid_up_count = 0
    liquid_down_count = 0

    for stock, df in eod_score_dict.items():
        if stock in stock_list:
            if df['t0_score'].iloc[0].item() >= 0:
                up_count += 1
            elif df['t0_score'].iloc[0].item() < 0:
                down_count += 1
            if df['liquid_ratio'].iloc[0].item() >= eod_group_liquidity_index_df[name].iloc[0]:
                liquid_up_count += 1
            elif df['liquid_ratio'].iloc[0].item() < eod_group_liquidity_index_df[name].iloc[0]:
                liquid_down_count += 1

    return [name, up_count, down_count, liquid_up_count, liquid_down_count]

# Lấy các giá trị duy nhất từng cột và chuyển thành danh sách
industry_names = stock_classification_df['industry_name'].unique().tolist()
industry_performs = stock_classification_df['industry_perform'].unique().tolist()
marketcap_groups = stock_classification_df['marketcap_group'].unique().tolist()

# Gộp các danh sách lại thành một
group_stock_name_list = ['all_stock'] + industry_names + industry_performs + marketcap_groups

market_breath_list = []
for name in group_stock_name_list:
    if name == 'all_stock':
        market_breath_list.append(calculate_breadth(name, stock_classification_df['stock'].tolist()))
    elif name in industry_names:
        market_breath_list.append(calculate_breadth(name, stock_classification_df[stock_classification_df['industry_name']==name]['stock'].tolist()))
    elif name in industry_performs:
        market_breath_list.append(calculate_breadth(name, stock_classification_df[stock_classification_df['industry_perform']==name]['stock'].tolist()))
    elif name in marketcap_groups:
        market_breath_list.append(calculate_breadth(name, stock_classification_df[stock_classification_df['marketcap_group']==name]['stock'].tolist()))

group_breath_df = pd.DataFrame(market_breath_list, columns=['name','in_flow','out_flow','liquid_up','liquid_down'])
group_breath_df['group'] = group_breath_df['name'].map(group_map_dict)
group_breath_df['order'] = group_breath_df['name'].map(order_map_dict)
group_breath_df['name'] = group_breath_df['name'].map(name_map_dict)

# #Thêm cột thứ tự vào bảng liquid các ngành
group_breath_df = group_breath_df.merge(group_eod_score_liquidity_df[['name', 'industry_rank']], on='name', how='left')

##### Bảng dòng tiền T5 vào các nhóm 

In [198]:
group_score_5p_df = eod_group_score_df.iloc[:5]
group_score_5p_df["id"] = ["T-0", "T-1", "T-2", "T-3", "T-4"]
group_score_5p_df = (
    group_score_5p_df.drop(columns=["date"], axis=1)
    .set_index("id")
    .transpose()
    .reset_index()
    .rename(columns={"index": "name"})
)
group_score_5p_df['score'] = group_score_5p_df[['T-0','T-1','T-2','T-3','T-4']].sum(axis=1)
group_score_5p_df['rank'] = group_score_5p_df['score'].rank()

group_score_5p_df["group"] = group_score_5p_df["name"].map(group_map_dict)
group_score_5p_df["order"] = group_score_5p_df["name"].map(order_map_dict)
group_score_5p_df["name"] = group_score_5p_df["name"].map(name_map_dict)

##### Bảng xếp hạng 23 ngành theo thứ tự từ trên xuống dưới

In [199]:
group_industry_ranking_df = eod_group_score_df[['date',                                     
        'ban_le', 'bao_hiem', 'bds', 'bds_kcn','chung_khoan', 
        'cong_nghe','cong_nghiep', 'dau_khi', 'det_may','dulich_dv',
        'dv_hatang', 'hoa_chat', 'htd', 'khoang_san', 'ngan_hang','tai_chinh',
        'thep', 'thuc_pham', 'thuy_san', 'van_tai', 'vlxd', 'xd','y_te']]

for column in group_industry_ranking_df.columns[1:]:
    group_industry_ranking_df[column] = group_industry_ranking_df[column][::-1].rolling(window=5).mean()[::-1]

group_industry_ranking_df = pd.DataFrame(group_industry_ranking_df.iloc[0, 1:]).rename(columns={0: 'score'}).reset_index()
group_industry_ranking_df.columns = ['name', 'score']
group_industry_ranking_df['type'] = group_industry_ranking_df['score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
group_industry_ranking_df['rank'] = group_industry_ranking_df['score'].rank(ascending=False, method='min')
group_industry_ranking_df['name'] = group_industry_ranking_df['name'].map(name_map_dict)

#### Page 3

##### Bảng thông tin các nhóm

In [200]:
#Dùng lại bảng group_eod_score_liquidity_df

##### Biểu đồ giá và dòng tiền

In [201]:
group_score_power_df = pd.DataFrame()
for group in group_stock_key_list:
    temp_df = date_series.copy().iloc[:20]
    temp_df['name'] = group
    temp_df['close'] = group_price_index_df[group]
    temp_df['volume'] = eod_group_liquidity_df[group]
    temp_df['t0_score'] = eod_group_score_df[group]
    temp_df['t5_score'] = eod_group_score_df[group][::-1].rolling(window=5, min_periods=1).mean()[::-1]

    if group == 'all_stock':
        temp_df['rank'] = 1
    else:
        temp_df['rank'] = group_score_ranking_df[group]

    temp_df['score_change'] = (temp_df['t0_score'][::-1].cumsum()[::-1] - temp_df['t0_score'].iloc[-1])/100
    temp_df['price_change'] = temp_df['close'][::-1].pct_change()[::-1].fillna(0)[::-1].cumsum()[::-1]

    group_score_power_df = pd.concat([group_score_power_df, temp_df], axis=0)

group_score_power_df['name'] = group_score_power_df['name'].map(name_map_dict)
group_score_power_df = group_score_power_df.reset_index(drop=True)

##### Top cổ phiếu tích cực trong nhóm

In [202]:
group_stock_top_10_df = pd.DataFrame()

for group in group_stock_key_list:
    if group in ['all_stock']:
        temp_group_stock_list = stock_classification_df['stock'].tolist()
    elif group in stock_classification_df['industry_name'].unique().tolist():
        temp_group_stock_list = stock_classification_df[stock_classification_df['industry_name']==group]['stock'].tolist()
    elif group in stock_classification_df['industry_perform'].unique().tolist():
        temp_group_stock_list = stock_classification_df[stock_classification_df['industry_perform']==group]['stock'].tolist()
    elif group in stock_classification_df['marketcap_group'].unique().tolist():
        temp_group_stock_list = stock_classification_df[stock_classification_df['marketcap_group']==group]['stock'].tolist()
    else: temp_group_stock_list = []

    group_stock_df = stock_score_filter_df[stock_score_filter_df['volume'] > 0]
    group_stock_df = group_stock_df[group_stock_df['stock'].isin(temp_group_stock_list)][['stock', 'industry_name','industry_perform','marketcap_group','close','price_change','t0_score','t5_score','liquid_ratio','rank_t5']]

    group_stock_top_10 = group_stock_df.sort_values('t0_score', ascending=False).iloc[:10]

    #Thêm 1 hàng giả để ko lỗi web
    if len(group_stock_top_10) == 0:
        group_stock_top_10.loc[0] = [np.nan] * len(group_stock_top_10.columns)


    group_stock_top_10['name'] = group
    group_stock_top_10_df = pd.concat([group_stock_top_10_df, group_stock_top_10], axis=0)

group_stock_top_10_df['name'] = group_stock_top_10_df['name'].map(name_map_dict)
group_stock_top_10_df = group_stock_top_10_df.reset_index(drop=True)

#### Page 4

##### Bảng tổng hợp thông tin dòng tiền cổ phiếu

In [203]:
#Dùng lại bảng stock_score_filter_df

##### Bảng sức mạnh dòng tiền cổ phiếu

In [204]:
stock_score_power_df = pd.DataFrame()
for stock, df in eod_score_dict.items():
    temp_df = date_series.copy().iloc[:20]
    temp_df['stock'] = stock
    temp_df['close'] = eod_score_dict[stock]['close']
    temp_df['volume'] = eod_score_dict[stock]['volume']
    temp_df['t0_score'] = eod_score_dict[stock]['t0_score']
    temp_df['t5_score'] = eod_score_dict[stock]['t5_score']
    temp_df['rank_t5'] = eod_score_dict[stock]['rank_t5']
    temp_df['rank_t0'] = eod_score_dict[stock]['rank_t0']
    temp_df['top_rank_check'] = temp_df['rank_t0'].apply(lambda x: 1 if x < temp_df['rank_t0'].max()*0.1 else 0)
    temp_df['bot_rank_check'] = temp_df['rank_t0'].apply(lambda x: 1 if x > temp_df['rank_t0'].max()*0.9 else 0)

    temp_df['score_change'] = (temp_df['t0_score'][::-1].cumsum()[::-1] - temp_df['t0_score'].iloc[-1])/100
    temp_df['price_change'] = temp_df['close'][::-1].pct_change()[::-1].fillna(0)[::-1].cumsum()[::-1]

    stock_score_power_df = pd.concat([stock_score_power_df, temp_df], axis=0)

stock_score_power_df = stock_score_power_df.reset_index(drop=True)

#### Page 5

##### Bộ lọc cổ phiếu

In [205]:
stock_candle_df = pd.DataFrame({key: df['df_candle'][['stock',
                'from_week_open','from_week_last_high','from_week_last_low',
                'from_month_open','from_month_last_high','from_month_last_low',
                'from_quarter_open','from_quarter_last_high','from_quarter_last_low',
                'from_year_open','from_year_last_high','from_year_last_low']].iloc[0]
              for key, df in stock_ta_dict.items()}).T.reset_index(drop=True)

stock_pivot_df = pd.DataFrame({key: df['df_pivot'][['stock',
                'from_month_pivot',
                'from_quarter_pivot',
                'from_year_pivot']].iloc[0]
              for key, df in stock_ta_dict.items()}).T.reset_index(drop=True)

stock_ma_df = pd.DataFrame({key: df['df_ma'][['stock',
                'from_month_ma5','from_month_ma20',
                'from_quarter_ma60','from_quarter_ma120',
                'from_year_ma240','from_year_ma480']].iloc[0]
              for key, df in stock_ta_dict.items()}).T.reset_index(drop=True)

stock_fibo_df = pd.DataFrame({key: df['df_fibo'][['stock',
                'from_month_fibo_382','from_month_fibo_500','from_month_fibo_618',
                'from_quarter_fibo_382','from_quarter_fibo_500','from_quarter_fibo_618',
                'from_year_fibo_382','from_year_fibo_500','from_year_fibo_618']].iloc[0]
              for key, df in stock_ta_dict.items()}).T.reset_index(drop=True)

stock_ta_filter_df = stock_score_filter_df.merge(stock_candle_df, on='stock', how='left').merge(stock_pivot_df, on='stock', how='left').merge(stock_ma_df, on='stock', how='left').merge(stock_fibo_df, on='stock', how='left')

stock_ta_filter_df['month_trend'] = stock_ta_filter_df.apply(lambda x: 'Tăng mạnh' if (x['from_week_last_high'] >= 0) & (x['from_month_fibo_382']>=0) else (
                                                                  'Tăng' if (x['from_week_last_high'] < 0) & (x['from_month_fibo_382']>=0) else (
                                                                  'Trung lập' if (x['from_month_fibo_618'] >= 0) & (x['from_month_fibo_382']<0) else (
                                                                  'Giảm' if (x['from_week_last_low'] >= 0) & (x['from_month_fibo_618']<0) else 'Giảm mạnh'))), axis=1)

stock_ta_filter_df['quarter_trend'] = stock_ta_filter_df.apply(lambda x: 'Tăng mạnh' if (x['from_month_last_high'] >= 0) & (x['from_quarter_fibo_382']>=0) else (
                                                                  'Tăng' if (x['from_month_last_high'] < 0) & (x['from_quarter_fibo_382']>=0) else (
                                                                  'Trung lập' if (x['from_quarter_fibo_618'] >= 0) & (x['from_quarter_fibo_382']<0) else (
                                                                  'Giảm' if (x['from_month_last_low'] >= 0) & (x['from_quarter_fibo_618']<0) else 'Giảm mạnh'))), axis=1)

stock_ta_filter_df['year_trend'] = stock_ta_filter_df.apply(lambda x: 'Tăng mạnh' if (x['from_quarter_last_high'] >= 0) & (x['from_year_fibo_382']>=0) else (
                                                                  'Tăng' if (x['from_quarter_last_high'] < 0) & (x['from_year_fibo_382']>=0) else (
                                                                  'Trung lập' if (x['from_year_fibo_618'] >= 0) & (x['from_year_fibo_382']<0) else (
                                                                  'Giảm' if (x['from_quarter_last_low'] >= 0) & (x['from_year_fibo_618']<0) else 'Giảm mạnh'))), axis=1)

#### Báo cáo hàng ngày

In [206]:
from math import ceil, floor

bot5_stock = market_top_stock_df.sort_values('t0_score').iloc[:5]
bot5_stock['price_change'] = bot5_stock['price_change'].apply(lambda x: f"{round(x*100, 2):+.2f}%")
bot5_stock = bot5_stock[['stock','price_change']]
bot5_stock = bot5_stock.apply(lambda row: f"{row['stock']} ({row['price_change']})", axis=1).str.cat(sep=', ')

top5_stock = market_top_stock_df.sort_values('t0_score', ascending=False).iloc[:5]
top5_stock['price_change'] = top5_stock['price_change'].apply(lambda x: f"{round(x*100, 2):+.2f}%")
top5_stock = top5_stock[['stock','price_change']]
top5_stock = top5_stock.apply(lambda row: f"{row['stock']} ({row['price_change']})", axis=1).str.cat(sep=', ')

daily_report_df = pd.DataFrame(columns=['name', 'value'])
data = [
    {'name': 'report_datetime', 'value': market_update_time['date'].iloc[0][10:]},
    {'name': 'report_date', 'value': market_update_time['date'].iloc[0][10:20]},
    {'name': 'market_sentiment', 'value': market_sentiment_df['sentiment'][market_sentiment_df.last_valid_index()]},
    {'name': 'liquidity', 'value': f"{(group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Thị trường']['liquidity'].iloc[0]*100).round(2)}%"},
    {'name': 'vnindex_close', 'value': market_index_card_df[market_index_card_df['stock']=="VNINDEX"]['close'].item()},
    {'name': 'vnindex_change_percent', 'value': f"{round(market_index_card_df[market_index_card_df['stock'] == 'VNINDEX']['change_percent'].item() * 100, 2):+.2f}%"},
    {'name': 'vnindex_change_value', 'value': f"{round(market_index_card_df[market_index_card_df['stock'] == 'VNINDEX']['change_value'].item(), 2):+.2f}"},
    {'name': 'market_volume', 'value': f"{int(market_price_breath_df['volume'].sum()):,} cổ phiếu"},
    {'name': 'market_value', 'value': f"{int(market_price_breath_df['value'].sum()):,} tỷ"},
    {'name': '%_tang_gia', 'value':f"{int(market_price_breath_df[market_price_breath_df['name']=='Tăng giá']['count']/market_price_breath_df['count'].sum().item()*100)}%"},
    {'name': '%_giam_gia', 'value':f"{int(market_price_breath_df[market_price_breath_df['name']=='Giảm giá']['count']/market_price_breath_df['count'].sum().item()*100)}%"},
    {'name': '%_khong_doi', 'value':f"{int(market_price_breath_df[market_price_breath_df['name']=='Không đổi']['count']/market_price_breath_df['count'].sum().item()*100)}%"},
    {'name': 'vn30f1m_close', 'value': market_index_card_df[market_index_card_df['stock']=="VN30F1M"]['close'].item()},
    {'name': 'vn30f1m_change_value', 'value': f"{round(market_index_card_df[market_index_card_df['stock'] == 'VN30F1M']['change_value'].item(), 2):+.2f}"},
    {'name': 'vn30f1m_volume', 'value': f"{market_index_card_df[market_index_card_df['stock']=='VN30F1M']['volume'].item():,} hợp đồng"},

    {'name': 'score_hsA', 'value': f"{group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất A']['score'].item():+.2f}"},
    {'name': 'liquidity_hsA', 'value': f"{(group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất A']['liquidity'].item()*100).round(2)}%"},
    {'name': 'inflow_hsA', 'value': f"{ceil((group_breath_df[group_breath_df['name']=='Hiệu suất A']['in_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất A'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'outflow_hsA', 'value': f"{floor((group_breath_df[group_breath_df['name']=='Hiệu suất A']['out_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất A'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'top_group_hsA', 'value': f"dòng tiền mạnh nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='A'].sort_values('score', ascending=False)['name'].iloc[:2].str.cat(sep=', ')} \
                                                                                và yếu nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='A'].sort_values('score')['name'].iloc[:2].str.cat(sep=', ')}"},

    {'name': 'score_hsB', 'value': f"{group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất B']['score'].item():+.2f}"},
    {'name': 'liquidity_hsB', 'value': f"{(group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất B']['liquidity'].item()*100).round(2)}%"},
    {'name': 'inflow_hsB', 'value': f"{ceil((group_breath_df[group_breath_df['name']=='Hiệu suất B']['in_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất B'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'outflow_hsB', 'value': f"{floor((group_breath_df[group_breath_df['name']=='Hiệu suất B']['out_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất B'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'top_group_hsB', 'value': f"dòng tiền mạnh nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='B'].sort_values('score', ascending=False)['name'].iloc[:2].str.cat(sep=', ')} \
                                                                                và yếu nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='B'].sort_values('score')['name'].iloc[:2].str.cat(sep=', ')}"},

    {'name': 'score_hsC', 'value': f"{group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất C']['score'].item():+.2f}"},
    {'name': 'liquidity_hsC', 'value': f"{(group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất C']['liquidity'].item()*100).round(2)}%"},
    {'name': 'inflow_hsC', 'value': f"{ceil((group_breath_df[group_breath_df['name']=='Hiệu suất C']['in_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất C'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'outflow_hsC', 'value': f"{floor((group_breath_df[group_breath_df['name']=='Hiệu suất C']['out_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất C'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'top_group_hsC', 'value': f"dòng tiền mạnh nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='C'].sort_values('score', ascending=False)['name'].iloc[:2].str.cat(sep=', ')} \
                                                                                và yếu nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='C'].sort_values('score')['name'].iloc[:2].str.cat(sep=', ')}"},

    {'name': 'score_hsD', 'value': f"{group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất D']['score'].item():+.2f}"},
    {'name': 'liquidity_hsD', 'value': f"{(group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']=='Hiệu suất D']['liquidity'].item()*100).round(2)}%"},
    {'name': 'inflow_hsD', 'value': f"{ceil((group_breath_df[group_breath_df['name']=='Hiệu suất D']['in_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất D'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'outflow_hsD', 'value': f"{floor((group_breath_df[group_breath_df['name']=='Hiệu suất D']['out_flow'].item()/group_breath_df[group_breath_df['name']=='Hiệu suất D'][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'top_group_hsD', 'value': f"dòng tiền mạnh nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='D'].sort_values('score', ascending=False)['name'].iloc[:2].str.cat(sep=', ')} \
                                                                                và yếu nhất là {group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='D'].sort_values('score')['name'].iloc[:2].str.cat(sep=', ')}"},

    {'name': 'name_bot_cap', 'value': group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[0]},
    {'name': 'score_bot_cap', 'value': f"{group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[0]]['score'].item():+.2f}"},
    {'name': 'liquidity_bot_cap', 'value': f"{(group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[0]]['liquidity'].item()*100).round(2)}%"},
    {'name': 'inflow_bot_cap', 'value': f"{ceil((group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[0]]['in_flow'].item()/group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[0]][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'outflow_bot_cap', 'value': f"{floor((group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[0]]['out_flow'].item()/group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[0]][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},

    {'name': 'name_top_cap', 'value': group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[-1]},
    {'name': 'score_top_cap', 'value': f"{group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[-1]]['score'].item():+.2f}"},
    {'name': 'liquidity_top_cap', 'value': f"{(group_eod_score_liquidity_df[group_eod_score_liquidity_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[-1]]['liquidity'].item()*100).round(2)}%"},
    {'name': 'inflow_top_cap', 'value': f"{ceil((group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[-1]]['in_flow'].item()/group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[-1]][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},
    {'name': 'outflow_top_cap', 'value': f"{floor((group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[-1]]['out_flow'].item()/group_breath_df[group_breath_df['name']==group_eod_score_liquidity_df[group_eod_score_liquidity_df['group']=='cap'].sort_values('score')['name'].iloc[-1]][['in_flow','out_flow']].sum(axis=1)).item()*100)}%"},


    {'name': 'top5_stock', 'value': top5_stock},
    {'name': 'bot5_stock', 'value': bot5_stock}
]

daily_report_df = pd.DataFrame(data)

#### Lưu vào SQL

- Lưu vào mysql và truyền lên sv

In [208]:
from sqlalchemy import create_engine, text

# Thông tin kết nối cơ sở dữ liệu
username = 'twan'
password = 'chodom'
database = 't2m'
host = 'localhost'
# host = '14.225.192.30'
port = '3306'
engine = create_engine(f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}")

# Hàm lưu dữ liệu vào SQL (giữ nguyên)
def save_dataframe_to_sql(df, table_name, engine):
    temp_table_name = f"temp_{table_name}"

    df.to_sql(name=temp_table_name, con=engine, if_exists='replace', index=True)
    with engine.begin() as connection:
        connection.execute(text(f"DROP TABLE IF EXISTS {table_name}"))
        connection.execute(text(f"ALTER TABLE {temp_table_name} RENAME TO {table_name}"))

# Danh sách các DataFrame và tên bảng tương ứng
dataframes = [
    (market_update_time, 'market_update_time'),
    (market_index_card_df, 'market_index_card_df'),
    (market_price_breath_df, 'market_price_breath_df'),
    (market_index_price_chart_df, 'market_index_price_chart_df'),
    (market_index_ta_df, 'market_index_ta_df'),
    (market_sentiment_df, 'market_sentiment_df'),
    (market_top_stock_df, 'market_top_stock_df'),

    (nn_td_top_stock, 'nn_td_top_stock'),
    (nn_td_20p_df, 'nn_td_20p_df'),
    (nn_td_buy_sell_df, 'nn_td_buy_sell_df'),

    (group_itd_score_liquidity_df, 'group_itd_score_liquidity_df'),
    (group_eod_score_liquidity_df, 'group_eod_score_liquidity_df'),
    (group_breath_df, 'group_breath_df'),
    (group_score_5p_df, 'group_score_5p_df'),
    (group_score_ranking_df, 'group_score_ranking_df'),
    (group_score_power_df, 'group_score_power_df'),
    (group_price_chart_df, 'group_price_chart_df'),
    (group_industry_ranking_df, 'group_industry_ranking_df'),
    (group_stock_top_10_df, 'group_stock_top_10_df'),
    (group_ms_chart_df, 'group_ms_chart_df'),

    (stock_ta_df, 'stock_ta_df'),
    (stock_score_power_df, 'stock_score_power_df'),
    (stock_score_5p_df, 'stock_score_5p_df'),
    (stock_ta_filter_df, 'stock_ta_filter_df'),
    (stock_score_filter_df, 'stock_score_filter_df'),
    (stock_price_chart_df, 'stock_price_chart_df'),

    (daily_report_df, 'daily_report_df')
]

# Lặp qua danh sách và lưu các DataFrame vào cơ sở dữ liệu
for df, table_name in dataframes:
    save_dataframe_to_sql(df, table_name, engine)
    print(f"{table_name} saved!")

engine.dispose()

market_update_time saved!
market_index_card_df saved!
market_price_breath_df saved!
market_index_price_chart_df saved!
market_index_ta_df saved!
market_sentiment_df saved!
market_top_stock_df saved!
nn_td_top_stock saved!
nn_td_20p_df saved!
nn_td_buy_sell_df saved!
group_itd_score_liquidity_df saved!
group_eod_score_liquidity_df saved!
group_breath_df saved!
group_score_5p_df saved!
group_score_ranking_df saved!
group_score_power_df saved!
group_price_chart_df saved!
group_industry_ranking_df saved!
group_stock_top_10_df saved!
group_ms_chart_df saved!
stock_ta_df saved!
stock_score_power_df saved!
stock_score_5p_df saved!
stock_ta_filter_df saved!
stock_score_filter_df saved!
stock_price_chart_df saved!
daily_report_df saved!


In [209]:
# import subprocess
# import paramiko
# import os
# import configparser
# import time  # Thêm module để đếm thời gian thực hiện

# def load_ssh_config(config_file):
#     config = configparser.ConfigParser()
#     config.read(config_file)
    
#     server_ip = config.get('ssh', 'server_ip')
#     server_username = config.get('ssh', 'server_username')
#     server_password = config.get('ssh', 'server_password')
#     server_path = config.get('ssh', 'server_path')
#     server_db_name = config.get('ssh', 'server_db_name')
    
#     return server_ip, server_username, server_password, server_path, server_db_name

# def backup_database(local_my_cnf_path, local_db_name, local_backup_file_path):
#     # Tạo bản sao lưu từ cơ sở dữ liệu cục bộ
#     try:
#         start_time = time.time()  # Bắt đầu đếm thời gian
#         subprocess.run(
#             [
#                 r"C:\Program Files\MySQL\MySQL Server 8.0\bin\mysqldump.exe",
#                 f"--defaults-file={local_my_cnf_path}",
#                 local_db_name,
#                 "--result-file", local_backup_file_path
#             ],
#             check=True
#         )
#         print(f"Database {local_db_name} backed up successfully to {local_backup_file_path}.")
#         print(f"Backup completed in {time.time() - start_time:.2f} seconds.")
#     except subprocess.CalledProcessError as e:
#         print(f"Error backing up database: {e}")

# def transfer_backup_to_server(local_backup_file_path, server_ip, server_username, server_password, server_path):
#     # Truyền bản sao lưu lên máy chủ
#     try:
#         start_time = time.time()  # Bắt đầu đếm thời gian
#         ssh = paramiko.SSHClient()
#         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
#         ssh.connect(server_ip, username=server_username, password=server_password)

#         # Mở SFTP để truyền tệp
#         sftp = ssh.open_sftp()
#         remote_file_path = os.path.join(server_path, os.path.basename(local_backup_file_path))
#         remote_file_path = remote_file_path.replace("\\", "/")  # Thay thế ký tự backslash nếu có

#         sftp.put(local_backup_file_path, remote_file_path)
#         sftp.close()
#         ssh.close()

#         print(f"Backup file transferred to {server_ip}:{remote_file_path}.")
#         print(f"Transfer completed in {time.time() - start_time:.2f} seconds.")
#     except Exception as e:
#         print(f"Error transferring backup file: {e}")

# def restore_database_on_server(server_ip, server_username, server_password, server_db_name, server_backup_file, server_my_cnf_path):
#     # Phục hồi cơ sở dữ liệu trên máy chủ, xóa CSDL cũ nếu tồn tại
#     try:
#         start_time = time.time()  # Bắt đầu đếm thời gian
#         ssh = paramiko.SSHClient()
#         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
#         ssh.connect(server_ip, username=server_username, password=server_password)

#         # Xóa CSDL cũ nếu tồn tại, sau đó tạo mới
#         drop_create_db_cmd = (
#             f"mysql --defaults-file={server_my_cnf_path} -e \"DROP DATABASE IF EXISTS {server_db_name}; CREATE DATABASE {server_db_name};\""
#         )
#         stdin, stdout, stderr = ssh.exec_command(f"/bin/bash -c '{drop_create_db_cmd}'")
#         exit_status = stdout.channel.recv_exit_status()
#         if exit_status == 0:
#             print(f"Database {server_db_name} dropped and created successfully on server.")
#         else:
#             print(f"Error dropping or creating database on server: {stderr.read().decode()}")

#         # Phục hồi cơ sở dữ liệu
#         restore_cmd = (
#             f"mysql --defaults-file={server_my_cnf_path} {server_db_name} < {server_backup_file}"
#         )

#         stdin, stdout, stderr = ssh.exec_command(f"/bin/bash -c '{restore_cmd}'")
#         exit_status = stdout.channel.recv_exit_status()
        
#         if exit_status == 0:
#             print(f"Database {server_db_name} restored successfully on server.")
#         else:
#             print(f"Error restoring database on server: {stderr.read().decode()}")
        
#         ssh.close()
#         print(f"Restore completed in {time.time() - start_time:.2f} seconds.")
#     except Exception as e:
#         print(f"Error connecting to server: {e}")


# # Thông tin kết nối trên local
# local_db_name = 't2m'
# local_my_cnf_path = r'D:\t2m-project\t2m-pycode\mysql_backup\.my.cnf'
# local_backup_file_path = r'D:\t2m-project\t2m-pycode\mysql_backup\backup.sql'

# # Thông tin kết nối server
# server_my_cnf_path = '/root/mysql_backup/.my.cnf'
# server_ip, server_username, server_password, server_path, server_db_name = load_ssh_config(r'D:\t2m-project\t2m-pycode\mysql_backup\.my.vnf')

# # Sao lưu cơ sở dữ liệu cục bộ
# backup_database(local_my_cnf_path, local_db_name, local_backup_file_path)

# # Truyền bản sao lưu lên máy chủ
# transfer_backup_to_server(local_backup_file_path, server_ip, server_username, server_password, server_path)

# # Phục hồi cơ sở dữ liệu trên máy chủ
# server_backup_file = os.path.join(server_path, os.path.basename(local_backup_file_path)).replace("\\", "/")
# restore_database_on_server(server_ip, server_username, server_password, server_db_name, server_backup_file, server_my_cnf_path)

- Code cũ

In [210]:
# from sqlalchemy import create_engine, MetaData

# # Thông tin kết nối cơ sở dữ liệu
# username = 'twan'
# password = 'chodom'
# database = 't2m'
# host = '14.225.192.30'
# port = '3306'
# engine = create_engine(f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}")


# #Xoá toàn bộ các bảng cũ
# conn = engine.connect()
# metadata = MetaData()
# metadata.reflect(bind=engine)
# for table in reversed(metadata.sorted_tables):
#     table.drop(engine)
# conn.close()