In [None]:
import os
import pandas as pd
import numpy as np
from datetime import timedelta
import datetime as dt
import copy

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

#### Import Data

In [None]:
#Đọc name map để chuyển đỏi các tên thành dạng full
name_map = pd.read_excel("../data/t2m_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'order'],axis=1)
name_map_dict = name_map.set_index('code')['full_name'].to_dict()

order_map = pd.read_excel("../data/t2m_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'full_name'],axis=1)
order_map_dict = order_map.set_index('code')['order'].to_dict()

group_map = pd.read_excel("../data/t2m_classification.xlsx", sheet_name='name_map').drop(columns=['order', 'full_name'],axis=1)
group_map_dict = group_map.set_index('code')['group'].to_dict()

In [None]:
#Đọc toàn bộ các file csv được xuất ra từ ami eod
eod_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_eod_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        eod_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

for item, df in eod_item_dict.items():
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%y%m%d')
    eod_item_dict[item] = df

#Tạo bảng tổng hớp tất cả các item
eod_item_df = pd.DataFrame(list(eod_item_dict.keys())).rename(columns={0:'item'})
eod_item_df['len'] = eod_item_df['item'].apply(lambda x: len(x))
eod_item_df['last_2chars'] = eod_item_df['item'].str[-2:]
eod_item_df['first_4chars'] = eod_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
stock_name_df = eod_item_df[eod_item_df['len']==3].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_name_df = eod_item_df[(eod_item_df['len']>3) & (eod_item_df['len']!=6) & (eod_item_df['len']<10) & 
                (eod_item_df['item']!='0001')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
                
eod_stock_dict = {k:v.drop(['option'], axis=1) for k,v in eod_item_dict.items() if k in stock_name_df['item'].tolist()}
eod_index_dict = {k:v.rename(columns={'option':'value'}).drop('cap', axis=1)
                for k,v in eod_item_dict.items() if k in index_name_df['item'].tolist()}

#Lọc ra danh sách tên các cổ phiếu, index giao dịch tự doanh và nước ngoài
stock_name_td_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='TD')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
stock_name_nn_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='NN')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_td_nn_df = eod_item_df[(eod_item_df['len']>=10) & (eod_item_df['first_4chars']!='VN30') & ((eod_item_df['last_2chars']=='NN') | (eod_item_df['last_2chars']=='TD'))].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)

stock_td_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_td_df['item'].tolist()}
stock_nn_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_nn_df['item'].tolist()}
index_td_nn_dict = {k:v.drop(['high','low','cap','stock'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in index_td_nn_df['item'].tolist()}

#Điều chỉnh đơn vị của các bảng NN và TD
for df in index_td_nn_dict.values():
    df['buy_volume'] = df['buy_volume']/1000
    df['sell_volume'] = -df['sell_volume']/1000
    df['buy_value'] = df['buy_value']/1000000000
    df['sell_value'] = -df['sell_value']/1000000000
    df['net_volume'] = df['buy_volume'] + df['sell_volume']
    df['net_value'] = df['buy_value'] + df['sell_value']

In [None]:
#Tạo một date_series bao gồm khoảng ngày tính toán eod
date_series = pd.DataFrame(eod_index_dict['VNINDEX']['date']).rename(columns={0:'date'})

#Tạo một time_series bao gồm khoảng ngày tính toán itd (tính thừa 1 ngày để trừ dần đi)
time_series_list = []
for day in date_series['date'].iloc[:1].tolist():
    time_series_list.extend(pd.date_range(start=f'{day} 09:00:00', end=f'{day} 11:25:00', freq='5T'))
    time_series_list.extend(pd.date_range(start=f'{day} 13:00:00', end=f'{day} 14:55:00', freq='5T'))
time_series = pd.DataFrame(time_series_list).rename(columns={0:'date'})

#Tạo 1 khung thời gian trong ngày từ 9h15 tới hết giờ
itd_series = pd.DataFrame(time_series_list[3:]).rename(columns={0:'date'})

In [None]:
def calculate_time_percent(time):
    start_time_am = dt.time(9, 00)
    end_time_am = dt.time(11, 30)
    start_time_pm = dt.time(13, 00)
    end_time_pm = dt.time(15, 00)

    def time_difference_in_minutes(time1, time2):
        delta1 = dt.timedelta(hours=time1.hour, minutes=time1.minute, seconds=time1.second)
        delta2 = dt.timedelta(hours=time2.hour, minutes=time2.minute, seconds=time2.second)
        diff = delta2 - delta1
        return diff.seconds // 60

    time = (time + timedelta(minutes=5)).time()
    full_time_range = time_difference_in_minutes(start_time_am, end_time_am) + time_difference_in_minutes(start_time_pm, end_time_pm)

    if time <= end_time_am:
        time_range = time_difference_in_minutes(start_time_am, time)
    elif time >= start_time_pm:
        time_range = time_difference_in_minutes(start_time_am, time) - time_difference_in_minutes(end_time_am, start_time_pm)

    return time_range/full_time_range

#Tính thêm time percent
time_percent = time_series.copy()
time_percent['percent'] = time_percent['date'].apply(calculate_time_percent)
time_percent['percent'] = time_percent['percent'].apply(lambda x: x if x < 1 else 1)
current_time_percent = time_percent['percent'].iloc[-1]

In [None]:
itd_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_itd_data'
itd_item_dict['HNXINDEX'] = pd.read_csv(os.path.join(folder_path, 'HNXINDEX.csv')).sort_values('date', ascending=False).reset_index(drop=True)
current_time = pd.to_datetime(itd_item_dict['HNXINDEX']['date'].iloc[0], format='%y%m%d %H%M%S')

#Tạo bảng thời gian update
def get_update_time(start_time_am, end_time_am, start_time_pm, end_time_pm):
    if (dt.datetime.now()).weekday() <= 4:
        current_time = dt.datetime.now().time()
        if current_time < start_time_am: current_time = end_time_pm
        elif (current_time >= start_time_am) & (current_time < end_time_am): current_time = current_time
        elif (current_time >= end_time_am) & (current_time < start_time_pm): current_time = end_time_am
        elif (current_time >= start_time_pm) & (current_time < end_time_pm): current_time = current_time
        elif current_time >= end_time_pm: current_time = end_time_pm
        return current_time
    if (dt.datetime.now()).weekday() > 4:
        return end_time_pm

time_update = get_update_time(dt.time(9, 00), dt.time(11, 30), dt.time(13, 00), dt.time(15, 00))
date_time_update = dt.datetime.combine(current_time.date(), time_update)
update_time = pd.DataFrame([f"Cập nhât: {date_time_update.strftime('%d/%m/%Y %H:%M:%S')}"]).rename(columns={0:'date'})

#### Đường trung bình

In [None]:
#Tính toán các đường trung bình và các đường MA
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in eod_stock_dict.items()}

eod_stock_dict = {
    key: df.assign(
        high5=df['high'].rolling(window=5, min_periods=1).max(),
        low5=df['low'].rolling(window=5, min_periods=1).min(),
        high20=df['high'].rolling(window=20, min_periods=1).max(),
        low20=df['low'].rolling(window=20, min_periods=1).min(),
        high60=df['high'].rolling(window=60, min_periods=1).max(),
        low60=df['low'].rolling(window=60, min_periods=1).min(),
        high120=df['high'].rolling(window=120, min_periods=1).max(),
        low120=df['low'].rolling(window=120, min_periods=1).min(),
        high240=df['high'].rolling(window=240, min_periods=1).max(),
        low240=df['low'].rolling(window=240, min_periods=1).min(),
        high480=df['high'].rolling(window=480, min_periods=1).max(),
        low480=df['low'].rolling(window=480, min_periods=1).min(),

        ma5_V=df['volume'].rolling(window=5, min_periods=1).mean().shift(1),
        ma20_V=df['volume'].rolling(window=20, min_periods=1).mean().shift(1),
        ma60_V=df['volume'].rolling(window=60, min_periods=1).mean().shift(1),
        ma120_V=df['volume'].rolling(window=120, min_periods=1).mean().shift(1),

        ma5=df['close'].rolling(window=5, min_periods=1).mean(),
        ma20=df['close'].rolling(window=20, min_periods=1).mean(),
        ma60=df['close'].rolling(window=60, min_periods=1).mean(),
        ma120=df['close'].rolling(window=120, min_periods=1).mean(),
        ma240=df['close'].rolling(window=240, min_periods=1).mean(),
        ma480=df['close'].rolling(window=480, min_periods=1).mean(),
    )
    for key, df in eod_stock_dict.items()
}

eod_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in eod_stock_dict.items()
}
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in eod_stock_dict.items()}

In [None]:
#Xoá các cổ phiếu chưa có giao dịch trong ngày
delete_stock = []
for stock, df in eod_stock_dict.items():
    if date_series['date'].iloc[0] != df['date'].iloc[0]:
        delete_stock.append(stock)
for stock in delete_stock:
    try:
        eod_stock_dict.pop(stock)
    except:
        eod_stock_dict.pop(stock)

#Xoá các cổ phiếu có giá bị lỗi bằng 0
delete_stock = []
for stock, df in eod_stock_dict.items():
    if df['close'].min() == 0:
        delete_stock.append(stock)
for stock in delete_stock:
    try:
        eod_stock_dict.pop(stock)
    except:
        eod_stock_dict.pop(stock)

#Tính hệ số thanh khoản và đổi lại cap của cổ phiếu thành cap trung bình trong 20 phiên
for df in eod_stock_dict.values():
    df['liquid_ratio'] = df['volume'] / (df['ma5_V'])
    df['liquid_ratio'].iloc[0] = df['volume'].iloc[0] / ((df['ma5_V']).iloc[0]*current_time_percent)
    df['cap'] = df['cap'][::-1].rolling(window=20).mean()[::-1]

#### Phân nhóm cổ phiếu

In [None]:
stock_classification = pd.read_excel('../data/t2m_classification.xlsx')
stock_classification = stock_classification[stock_classification['stock'].isin(list(eod_stock_dict.keys()))]

#Tạo ngày đầu tiên của tháng hiện tại
first_day_of_month = date_series[date_series['date'] > pd.Timestamp(date_series['date'].iloc[0].year, date_series['date'].iloc[0].month, 1)]['date'].iloc[-1]

#Tạo các mảng dữ liệu vốn hoá và giá của phiên đầu tiên hàng tháng
price_arr = []
cap_arr = []
for stock, df in eod_stock_dict.items():
    if len(df[df['date'] == first_day_of_month]) > 0:
        price_arr.append(df[df['date'] == first_day_of_month]['close'].item())
        cap_arr.append(df[df['date'] == first_day_of_month]['cap'].iloc[0].item())
    else:
        price_arr.append(df['close'].iloc[0].item())
        cap_arr.append(df['cap'].iloc[0].item())

#Tạo bảng chia nhóm vốn hoá
vonhoa_classification_df = stock_classification.copy()
vonhoa_classification_df['price'] = price_arr
vonhoa_classification_df['cap'] = cap_arr

cap_coef = sum(cap_arr)/10000
vonhoa_classification_df['marketcap_group'] = vonhoa_classification_df.apply(lambda x:
    'small' if ((x['cap']>cap_coef) & (x['cap']<10*cap_coef)) | 
               ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']<10)) 
               else
    ('mid' if ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']>=10)) | 
              ((x['cap']>=20*cap_coef) & (x['cap']<100*cap_coef))
              else
    ('large' if x['cap']>=100*cap_coef
               else 'penny'
)), axis=1)

stock_classification = pd.concat([stock_classification, vonhoa_classification_df['marketcap_group']], axis=1)

In [None]:
# Convert DataFrame columns to dictionaries for quick access
stock_by_industry = stock_classification.set_index('stock')['industry_name'].to_dict()
stock_by_perform = stock_classification.set_index('stock')['industry_perform'].to_dict()
stock_by_marketcap = stock_classification.set_index('stock')['marketcap_group'].to_dict()

# Initialize dictionaries
eod_all_stock = {}
eod_industry_name = {}
eod_industry_perform = {}
eod_marketcap_group = {}

# Function to create mappings based on category
def create_mapping(stock_dict, category_dict):
    category_map = {}
    for category, stocks in category_dict.items():
        category_map[category] = {stock: stock_dict[stock] for stock in stocks if stock in stock_dict}
    return category_map

# Precompute unique categories and relevant stocks
unique_industries = np.unique(list(stock_by_industry.values()))
unique_performs = np.unique(list(stock_by_perform.values()))
unique_marketcaps = ['large', 'mid', 'small', 'penny']

# Mapping for all_stock
eod_all_stock['all_stock'] = {key: value for key, value in eod_stock_dict.items()}

# Mapping for industry
for industry in unique_industries:
    relevant_stocks = [stock for stock, ind in stock_by_industry.items() if ind == industry]
    eod_industry_name[industry] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}

# Mapping for performance
for performance in unique_performs:
    relevant_stocks = [stock for stock, perf in stock_by_perform.items() if perf == performance]
    eod_industry_perform[performance] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}

# Mapping for marketcap
for marketcap in unique_marketcaps:
    relevant_stocks = [stock for stock, mcap in stock_by_marketcap.items() if mcap == marketcap]
    eod_marketcap_group[marketcap] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}


In [None]:
group_stock_list = ['all_stock'] + stock_classification['industry_name'].unique().tolist()\
                            + stock_classification['industry_perform'].unique().tolist()\
                            + stock_classification['marketcap_group'].unique().tolist()

In [None]:
#Tạo bảng để slicer các nhóm cổ phiếu
group_slicer_df = pd.DataFrame(group_stock_list).rename(columns={0:'name'})
group_slicer_df['order'] = group_slicer_df['name'].map(order_map_dict)
group_slicer_df['group'] = group_slicer_df['name'].map(group_map_dict)
group_slicer_df['name'] = group_slicer_df['name'].map(name_map_dict)

#### Biểu đồ cấu trúc sóng

In [None]:
import pandas as pd

def transform_ms(stock_group):
    stock_dict = copy.deepcopy(stock_group)

    # Prepare a base date DataFrame from date_series
    dates_df = pd.DataFrame(date_series['date'].tolist(), columns=['date'])
    
    for group_name, stocks in stock_dict.items():
        # Initialize a DataFrame for group trends
        group_trends = dates_df.copy()

        # Compute trends across stocks
        for trend in ['trend_5p', 'trend_20p', 'trend_60p', 'trend_120p', 'trend_240p', 'trend_480p']:
            # Concatenate all trend data for current trend across all stocks
            trend_data = pd.concat([stocks[stock][trend] for stock in stocks], axis=1)
            trend_data.fillna(0, inplace=True)
            
            # Calculate the sum and percent for the trend
            sum_trend = trend_data.sum(axis=1)
            percent_trend = sum_trend / len(stocks)
            
            # Add to group trends DataFrame
            group_trends[f'{trend}'] = percent_trend

        stock_dict[group_name] = group_trends.sort_values('date', ascending=False)
    return stock_dict


In [None]:
#Tính toán các biểu đồ MS cho các nhóm cổ phiếu
all_stock_ms = transform_ms(eod_all_stock)
industry_name_ms = transform_ms(eod_industry_name)
industry_perform_ms = transform_ms(eod_industry_perform)
marketcap_group_ms = transform_ms(eod_marketcap_group)

In [None]:
#Gộp tất cả biểu đồ MS vào 1 bảng
market_ms = pd.DataFrame()
for item in [all_stock_ms, industry_name_ms, industry_perform_ms, marketcap_group_ms]:
    for group, df in item.items():
        df['name'] = group
        market_ms = pd.concat([market_ms, df], axis=0)

market_ms['name'] = market_ms['name'].map(name_map_dict)

#### Điểm dòng tiền từng cổ phiếu

- Điểm dòng tiền EOD

In [None]:
price_index_date_series = pd.DataFrame(eod_index_dict['VNINDEX']['date'])
previous_year = price_index_date_series['date'].iloc[0].year - 4

eod_stock_dict = {k: v[v['date'] > pd.Timestamp(year=previous_year, month=1, day=1)].reset_index(drop=True) for k, v in eod_stock_dict.items()}
date_series = date_series[date_series['date'] > pd.Timestamp(year=previous_year, month=1, day=1)]

In [None]:
def score_calculation(df):
    try:
        result = (((df['close'] - df['low']) - (df['high'] - df['close'])) / (df['high'] - df['low']) *
                  abs((df['close'] - df['close_prev'])) / df['close_prev'] *
                  (df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V'])) * 100 \
                  + ((df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V']))/100
        result.fillna(0, inplace=True)
        return result
    except ZeroDivisionError:
        # return 0
        return ((df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V']))/100

In [None]:
# Tính toán các cột cần thiết để lọc danh sách cổ phiếu dòng tiền
raw_eod_score_dict = {}
for stock in eod_stock_dict.keys():
    raw_eod_score_dict[stock] = eod_stock_dict[stock][['stock', 'date', 'high', 'low', 'close', 'volume', 'liquid_ratio', 'cap', 'ma5_V', 'ma20_V', 'ma60_V', 'ma120_V', 'ma5']]
    
    raw_eod_score_dict[stock]['ma5_prev'] = raw_eod_score_dict[stock]['ma5'].shift(-1)
    raw_eod_score_dict[stock]['close_prev'] = raw_eod_score_dict[stock]['close'].shift(-1)

    raw_eod_score_dict[stock]['raw_score'] = score_calculation(raw_eod_score_dict[stock])
    raw_eod_score_dict[stock]['raw_score'].iloc[0] = raw_eod_score_dict[stock]['raw_score'].iloc[0].item() / current_time_percent

    raw_eod_score_dict[stock]['highest_price'] = raw_eod_score_dict[stock]['close'][::-1].rolling(window=40, min_periods=1).max()[::-1]
    raw_eod_score_dict[stock]['lowest_volume60'] = raw_eod_score_dict[stock]['volume'][::-1].rolling(window=60, min_periods=1).min().shift(1)[::-1]
    raw_eod_score_dict[stock]['mean_volume20'] = raw_eod_score_dict[stock]['volume'][::-1].rolling(window=20, min_periods=1).mean().shift(1)[::-1]

# Lọc danh sách cổ phiếu dòng tiền
eod_score_dict = {
    stock: df[['stock', 'date', 'close', 'low', 'high', 'volume', 'liquid_ratio', 'raw_score', 'cap']]
    for stock, df in raw_eod_score_dict.items()
    if all([
        (df[df['date'] == first_day_of_month]['ma5_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['ma20_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['ma60_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['ma120_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['lowest_volume60'] > 0).all(),
        (df[df['date'] == first_day_of_month]['mean_volume20'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['close'] > df[df['date'] == first_day_of_month]['highest_price'] * 0.382).all()
    ])
}

stock_classification_filtered = stock_classification[stock_classification['stock'].isin(eod_score_dict.keys())].reset_index(drop=True)


In [None]:
for stock in eod_score_dict.keys():
    nganh = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    marketcap = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()

    eod_score_dict[stock]['t0_score'] = eod_score_dict[stock]['raw_score']

    eod_score_dict[stock].sort_values('date', ascending=True, inplace=True)
    eod_score_dict[stock]['t5_score'] = eod_score_dict[stock]['t0_score'].rolling(window=5, min_periods=1).mean()
    eod_score_dict[stock].sort_values('date', ascending=False, inplace=True)

    eod_score_dict[stock]['industry_name'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    eod_score_dict[stock]['industry_perform'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_perform'].item()
    eod_score_dict[stock]['stock_perform'] = stock_classification_filtered[stock_classification_filtered['stock'] == stock]['stock_perform'].item()
    eod_score_dict[stock]['marketcap_group'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()
    eod_score_dict[stock]['t2m_select'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['t2m_select'].item()

In [None]:
group_score = date_series.copy()
ranking_group = date_series.copy()

#Xếp hạng T5
for stock in eod_score_dict.keys():
    group_score[stock] = eod_score_dict[stock]['t5_score']
    group_score.fillna(0, inplace=True)
    ranking_group[stock] = 0
ranking_group = group_score.iloc[:,1:].rank(ascending=False, method='min', axis=1)

for stock, df in eod_score_dict.items():
    df['price_change'] = df['close'][::-1].pct_change()[::-1]
    df['value_change'] = df['close'][::-1].diff()[::-1]
    df['rank'] = ranking_group[stock]
    df['rank_prev'] = df['rank'].shift(-1)
    df['rank_change'] = df['rank_prev'] - df['rank']

#Xếp hạng T0
for stock in eod_score_dict.keys():
    group_score[stock] = eod_score_dict[stock]['t0_score']
    group_score.fillna(0, inplace=True)
    ranking_group[stock] = 0
ranking_group = group_score.iloc[:,1:].rank(ascending=False, method='min', axis=1)

for stock, df in eod_score_dict.items():
    df['rank_t0'] = ranking_group[stock]
    df['rank_t0_prev'] = df['rank_t0'].shift(-1)

#Check xem xếp hạng T0 nằm trong top 10% hay không
for stock, df in eod_score_dict.items():
    df['top_check'] = df['rank_t0'].apply(lambda x: 1 if x <= len(stock_classification_filtered)*0.1 else 0)
    df['top_count'] = df['top_check'][::-1].rolling(window=20).sum()[::-1]

eod_score_dict = {k: v.drop(
    columns=['raw_score', 'rank_t0_prev', 'rank_prev', 'top_check'])
    for k, v in eod_score_dict.items()}

In [None]:
#Tạo bảng tổng hợp điểm t0 của tất cả cổ phiếu
eod_score_df = pd.DataFrame(stock_classification_filtered['stock'])

score_list = []
for stock, df in eod_score_dict.items():
    score_list.append(df.iloc[0])

eod_score_df = pd.DataFrame(score_list).sort_values('t0_score', ascending=False).reset_index(drop=True)
eod_score_df = eod_score_df.fillna('')

eod_score_df['filter_t0'] = eod_score_df['t0_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
eod_score_df['filter_t5'] = eod_score_df['t5_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
eod_score_df['filter_liquid'] = eod_score_df['liquid_ratio'].apply(lambda x: '<50%' if x < 0.6 else (
                                                                             '50%-100%' if (x >= 0.5) & (x < 1) else (
                                                                             '100%-150%' if(x >= 1) & (x < 1.5) else (
                                                                             '150%-200%' if(x >= 1.5) & (x < 2) else '>200%'))))
eod_score_df['order_filter_liquid'] = eod_score_df['filter_liquid'].apply(lambda x: 1 if x == '<50%' else (
                                                                             2 if x == '50%-100%' else (
                                                                             3 if x == '100%-150%' else (
                                                                             4 if x == '150%-200%' else 5))))
eod_score_df['filter_rank'] = eod_score_df['rank'].apply(lambda x: '1-50' if x <= 50 else (
                                                                   '51-150' if (x > 50) & (x <= 150) else (
                                                                   '151-250' if(x > 150) & (x <= 250) else '>250')))
eod_score_df['order_filter_rank'] = eod_score_df['filter_rank'].apply(lambda x: 1 if x == '1-50' else (
                                                                             2 if x == '51-150' else (
                                                                             3 if x == '151-250' else 4)))

eod_score_df['industry_name'] = eod_score_df['industry_name'].map(name_map_dict)
eod_score_df['industry_perform'] = eod_score_df['industry_perform'].map(name_map_dict)
eod_score_df['marketcap_group'] = eod_score_df['marketcap_group'].map(name_map_dict)

#### Điểm dòng tiền nhóm cổ phiếu

- Các hàm tính toán

In [None]:
#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
def adjust_score_by_breath(t0_score, ratio_column):
    adjusted_score = []
    for score, ratio in zip(t0_score, ratio_column):
        if score >= 0:
            adjusted_score.append(score*ratio)
        else:
            adjusted_score.append(score*(1-ratio))
    return adjusted_score

#Hàm điều chỉnh điểm dòng tiền của cổ phiếu tránh sự đột biến khi đóng góp vào nhóm chung
def adjust_score_for_smooth(row, column_name, max_percent, mark):
    origin_score = row[column_name]
    
    if abs(origin_score) > row['total'] * max_percent:

        sum_abs = row['total'] - abs(row[column_name])
        fixed_score = sum_abs / (1 - max_percent) - sum_abs

        if origin_score >= 0:
            return fixed_score
        else:
            return -fixed_score
    else:
        mark[0] = 0
        return origin_score

#Áp dụng hàm điều chỉnh điểm phía trên vào các nhóm cổ phiếu, việc này lặp lại nhiều lần cho tới khi triệt tiêu sự đột biến
def apply_smooth_score(group_stock, group_name, type_name):
    if type_name == 'itd':
        initial_score_df = time_series.copy()
        score_dict = itd_score_dict
    elif type_name == 'eod':
        score_dict = eod_score_dict
        initial_score_df = date_series.copy()

    for key in group_stock.keys():

        score_df = initial_score_df.copy()
        current_stock_list = list(score_dict.keys())
        
        if group_name == 'all_stock':
            temp_stock_list_full = stock_classification_filtered['stock'].tolist()
            temp_stock_list = list(set(temp_stock_list_full) & set(current_stock_list))
        else:
            temp_stock_list_full = stock_classification_filtered[stock_classification_filtered[f'{group_name}']==key]['stock'].tolist()
            temp_stock_list = list(set(temp_stock_list_full) & set(current_stock_list))

        for stock in temp_stock_list:
            try:
                score_df[stock] = score_dict[stock][f't0_score']
            except:
                pass

        max_percent = max(0.1, min(5*(1/len(temp_stock_list)), 0.5))
        score_df['total'] = score_df.iloc[:, 1:].abs().sum(axis=1)

        mark = [1]
        while True:
            if mark[0] == 1:
                for stock in temp_stock_list:
                    score_df[stock] = score_df.iloc[:, 1:].apply(adjust_score_for_smooth, axis=1, args=(stock, max_percent, mark))
            if mark[0] == 0:
                break

        for stock in temp_stock_list:
            try:
                score_dict[stock][f't0_{group_name}'] = score_df[stock]
            except:
                pass

- Dòng tiền vào nhóm cổ phiếu EOD

In [None]:
#Loại bỏ các giá trị điểm đột biến của các cổ phiếu khi đóng góp vào điểm dòng tiền ngành
apply_smooth_score(eod_industry_name, 'industry_name', 'eod')
apply_smooth_score(eod_industry_perform, 'industry_perform', 'eod')
apply_smooth_score(eod_marketcap_group, 'marketcap_group', 'eod')
apply_smooth_score(eod_all_stock, 'all_stock', 'eod')

In [None]:
#Tính độ rộng cho từng phiên phục vụ cho việc điều chỉnh điểm dòng tiền
temp_df = date_series.copy()
for stock, df in eod_score_dict.items():
    temp_df[stock] = eod_score_dict[stock]['t0_score']
temp_df.iloc[:,1:] = temp_df.iloc[:,1:].applymap(lambda x: 1 if x > 0 else 0)

eod_market_breath = date_series.copy()

industry_name_breadth_dict = {}
for key in eod_industry_name.keys():
    stock_list = stock_classification_filtered[stock_classification_filtered['industry_name']==key]['stock'].tolist()
    industry_name_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = industry_name_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

industry_perform_breadth_dict = {}
for key in eod_industry_perform.keys():
    stock_list = stock_classification_filtered[stock_classification_filtered['industry_perform']==key]['stock'].tolist()
    industry_perform_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = industry_perform_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

marketcap_group_breadth_dict = {}
for key in eod_marketcap_group.keys():
    stock_list = stock_classification_filtered[stock_classification_filtered['marketcap_group']==key]['stock'].tolist()
    marketcap_group_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = marketcap_group_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

all_stock_breadth_dict = {}
for key in eod_all_stock.keys():
    stock_list = stock_classification_filtered['stock'].tolist()
    all_stock_breadth_dict[key] = temp_df[['date'] + [columns for columns in stock_list]]
    eod_market_breath[key] = all_stock_breadth_dict[key].iloc[:,1:].sum(axis=1)/len(stock_list)

#Chỉnh sửa lại điểm dòng tiền t0 cho từng cổ phiếu với tác động của độ rộng từng nhóm
for stock, df in eod_score_dict.items():
    name_of_industry_name = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    name_of_industry_perform = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_perform'].item()
    name_of_marketcap_group = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()

    df[f't0_industry_name'] = adjust_score_by_breath(df['t0_industry_name'], eod_market_breath[name_of_industry_name])
    df[f't0_industry_perform'] = adjust_score_by_breath(df['t0_industry_perform'], eod_market_breath[name_of_industry_perform])
    df[f't0_marketcap_group'] = adjust_score_by_breath(df['t0_marketcap_group'], eod_market_breath[name_of_marketcap_group])
    df[f't0_all_stock'] = adjust_score_by_breath(df['t0_all_stock'], eod_market_breath['all_stock'])

In [None]:
def mean_of_net_values(df):
    net_values = df  # Lọc ra các giá trị âm
    return net_values.mean(axis=1)

# Tạo bảng dữ liệu điểm dòng tiền cho các nhóm cổ phiếu
eod_group_score_df_net = date_series.copy()

# Thêm cột điểm dòng tiền toàn bộ cổ phiếu
for nganh in eod_all_stock.keys():
    score_df = date_series.copy()
    for stock in stock_classification_filtered['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_all_stock']
    score_df['total'] = mean_of_net_values(score_df.iloc[:, 1:])
    eod_group_score_df_net[nganh] = score_df['total']

# Thêm các cột điểm dòng tiền ngành
eod_industry_name_score_df = date_series.copy()
for nganh in eod_industry_name.keys():
    score_df = date_series.copy()
    for stock in stock_classification_filtered[stock_classification_filtered['industry_name']==nganh]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_industry_name']
    score_df['total'] = mean_of_net_values(score_df.iloc[:, 1:])
    eod_group_score_df_net[nganh] = score_df['total']

# Thêm các cột điểm dòng tiền nhóm hiệu suất
eod_industry_perform_score_df = date_series.copy()
for group in eod_industry_perform.keys():
    score_df = date_series.copy()
    for stock in stock_classification_filtered[stock_classification_filtered['industry_perform']==group]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_industry_perform']
    score_df['total'] = mean_of_net_values(score_df.iloc[:, 1:])
    eod_group_score_df_net[group] = score_df['total']

# Thêm các cột điểm dòng tiền nhóm vốn hoá
eod_marketcap_group_score_df = date_series.copy()
for marketcap in eod_marketcap_group.keys():
    score_df = date_series.copy()
    for stock in stock_classification_filtered[stock_classification_filtered['marketcap_group']==marketcap]['stock']:
        score_df[stock] = eod_score_dict[stock]['t0_marketcap_group']
    score_df['total'] = mean_of_net_values(score_df.iloc[:, 1:])
    eod_group_score_df_net[marketcap] = score_df['total']

eod_group_score_df_net = eod_group_score_df_net.fillna(0)
# Thêm các cột thông tin về ngày tháng
eod_group_score_df_net['week'] = eod_group_score_df_net['date'].dt.strftime('%U-%Y')
eod_group_score_df_net['month'] = eod_group_score_df_net['date'].dt.strftime('%m-%Y')
eod_group_score_df_net['week_day'] = eod_group_score_df_net['date'].dt.day_name()
eod_group_score_df_net['day_num'] = eod_group_score_df_net['date'].dt.day

In [None]:
def mean_of_negative_values(df, length):
    negative_values = df[df < 0]  # Lọc ra các giá trị âm
    return negative_values.sum(axis=1)/length

# Tạo bảng dữ liệu điểm dòng tiền cho các nhóm cổ phiếu
eod_group_score_df_negative = date_series.copy()

# Thêm cột điểm dòng tiền toàn bộ cổ phiếu
for nganh in eod_all_stock.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_all_stock']
    score_df['total'] = mean_of_negative_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_negative[nganh] = score_df['total']

# Thêm các cột điểm dòng tiền ngành
eod_industry_name_score_df = date_series.copy()
for nganh in eod_industry_name.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered[stock_classification_filtered['industry_name']==nganh]['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_industry_name']
    score_df['total'] = mean_of_negative_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_negative[nganh] = score_df['total']

# Thêm các cột điểm dòng tiền nhóm hiệu suất
eod_industry_perform_score_df = date_series.copy()
for group in eod_industry_perform.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered[stock_classification_filtered['industry_perform']==group]['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_industry_perform']
    score_df['total'] = mean_of_negative_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_negative[group] = score_df['total']

# Thêm các cột điểm dòng tiền nhóm vốn hoá
eod_marketcap_group_score_df = date_series.copy()
for marketcap in eod_marketcap_group.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered[stock_classification_filtered['marketcap_group']==marketcap]['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_marketcap_group']
    score_df['total'] = mean_of_negative_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_negative[marketcap] = score_df['total']


eod_group_score_df_negative = eod_group_score_df_negative.fillna(0)
# Thêm các cột thông tin về ngày tháng
eod_group_score_df_negative['week'] = eod_group_score_df_negative['date'].dt.strftime('%U-%Y')
eod_group_score_df_negative['month'] = eod_group_score_df_negative['date'].dt.strftime('%m-%Y')
eod_group_score_df_negative['week_day'] = eod_group_score_df_negative['date'].dt.day_name()
eod_group_score_df_negative['day_num'] = eod_group_score_df_negative['date'].dt.day

In [None]:
def mean_of_positive_values(df, length):
    positive_values = df[df >= 0]  # Lọc ra các giá trị âm
    return positive_values.sum(axis=1)/length

# Tạo bảng dữ liệu điểm dòng tiền cho các nhóm cổ phiếu
eod_group_score_df_positive = date_series.copy()

# Thêm cột điểm dòng tiền toàn bộ cổ phiếu
for nganh in eod_all_stock.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_all_stock']
    score_df['total'] = mean_of_positive_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_positive[nganh] = score_df['total']

# Thêm các cột điểm dòng tiền ngành
eod_industry_name_score_df = date_series.copy()
for nganh in eod_industry_name.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered[stock_classification_filtered['industry_name']==nganh]['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_industry_name']
    score_df['total'] = mean_of_positive_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_positive[nganh] = score_df['total']

# Thêm các cột điểm dòng tiền nhóm hiệu suất
eod_industry_perform_score_df = date_series.copy()
for group in eod_industry_perform.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered[stock_classification_filtered['industry_perform']==group]['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_industry_perform']
    score_df['total'] = mean_of_positive_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_positive[group] = score_df['total']

# Thêm các cột điểm dòng tiền nhóm vốn hoá
eod_marketcap_group_score_df = date_series.copy()
for marketcap in eod_marketcap_group.keys():
    score_df = date_series.copy()
    temp_stock_list = stock_classification_filtered[stock_classification_filtered['marketcap_group']==marketcap]['stock']
    for stock in temp_stock_list:
        score_df[stock] = eod_score_dict[stock]['t0_marketcap_group']
    score_df['total'] = mean_of_positive_values(score_df.iloc[:, 1:], len(temp_stock_list))
    eod_group_score_df_positive[marketcap] = score_df['total']


eod_group_score_df_positive = eod_group_score_df_positive.fillna(0)
# Thêm các cột thông tin về ngày tháng
eod_group_score_df_positive['week'] = eod_group_score_df_positive['date'].dt.strftime('%U-%Y')
eod_group_score_df_positive['month'] = eod_group_score_df_positive['date'].dt.strftime('%m-%Y')
eod_group_score_df_positive['week_day'] = eod_group_score_df_positive['date'].dt.day_name()
eod_group_score_df_positive['day_num'] = eod_group_score_df_positive['date'].dt.day

#### Index các nhóm cổ phiếu

In [None]:
def calculate_index(stock_group, name):
    price_index_date_series_copy = copy.deepcopy(price_index_date_series)

    for stock, df in stock_group[name].items():
        price_index_date_series_copy[stock] = df['close']
        price_index_date_series_copy[stock] = price_index_date_series_copy[stock][::-1].pct_change()[::-1]

    price_index_date_series_copy['total_change'] = price_index_date_series_copy.iloc[:,1:].sum(axis=1)
    price_index_date_series_copy['total_change'] = (price_index_date_series_copy['total_change']/len(stock_group[name]))*100
    price_index_date_series_copy['total_change'] = price_index_date_series_copy['total_change']*10
    price_index_date_series_copy['index_value'] = price_index_date_series_copy['total_change'][::-1].cumsum()[::-1] + 1000

    return price_index_date_series_copy['index_value']

In [108]:
price_index_date_series = price_index_date_series.loc[price_index_date_series['date'] > pd.Timestamp(year=previous_year, month=1, day=1)]
price_index_df = price_index_date_series.copy()

vnindex_series = eod_index_dict['VNINDEX'][['date','close']]
price_index_df['VNINDEX'] = vnindex_series[vnindex_series['date'] > pd.Timestamp(year=previous_year, month=1, day=1)]['close']

for group, df in eod_all_stock.items():
    price_index_df[group] = calculate_index(eod_all_stock, group)

for group, df in eod_industry_name.items():
    price_index_df[group] = calculate_index(eod_industry_name, group)

for group, df in eod_industry_perform.items():
    price_index_df[group] = calculate_index(eod_industry_perform, group)

for group, df in eod_marketcap_group.items():
    price_index_df[group] = calculate_index(eod_marketcap_group, group)

#### Tính toán phân bổ vốn

In [150]:
def transform_value(x):
    if x < 0.2:
        return 0
    elif 0.2 <= x < 0.3:
        return 0.2
    elif 0.3 <= x < 0.5:
        return 0.4
    elif 0.5 <= x < 0.7:
        return 0.6
    elif 0.7 <= x < 0.8:
        return 0.8
    elif x >= 0.8:
        return 1

In [178]:
phan_bo_von_df = pd.DataFrame()
phan_bo_von_df['date'] = date_series['date']
phan_bo_von_df[['A+','B+','C+','D+']] = eod_group_score_df_positive[['A','B','C','D']][::-1].rolling(window=5, min_periods=1).mean()[::-1]
phan_bo_von_df[['A-','B-','C-','D-']] = eod_group_score_df_negative[['A','B','C','D']][::-1].rolling(window=5, min_periods=1).mean().abs()[::-1]

phan_bo_von_df['A_raw'] = (phan_bo_von_df['A+'] - phan_bo_von_df['A-']) / (phan_bo_von_df['A+'] + phan_bo_von_df['A-'])
phan_bo_von_df['B_raw'] = (phan_bo_von_df['B+'] - phan_bo_von_df['B-']) / (phan_bo_von_df['B+'] + phan_bo_von_df['B-'])
phan_bo_von_df['C_raw'] = (phan_bo_von_df['C+'] - phan_bo_von_df['C-']) / (phan_bo_von_df['C+'] + phan_bo_von_df['C-'])
phan_bo_von_df['D_raw'] = (phan_bo_von_df['D+'] - phan_bo_von_df['D-']) / (phan_bo_von_df['D+'] + phan_bo_von_df['D-'])

phan_bo_von_df[['A_mean','B_mean','C_mean','D_mean']] = phan_bo_von_df[['A_raw','B_raw','C_raw','D_raw']][::-1].rolling(window=3, min_periods=1).mean()[::-1]


phan_bo_von_df[['A_fix','B_fix','C_fix','D_fix']] = phan_bo_von_df[['A_raw','B_raw','C_raw','D_raw']].applymap(transform_value)
phan_bo_von_df['A_final'] = phan_bo_von_df['A_fix']*0.4
phan_bo_von_df['B_final'] = phan_bo_von_df['B_fix']*0.3
phan_bo_von_df['C_final'] = phan_bo_von_df['C_fix']*0.2
phan_bo_von_df['D_final'] = phan_bo_von_df['D_fix']*0.1

phan_bo_von_df['sum'] = phan_bo_von_df[['A_final','B_final','C_final','D_final']].sum(axis=1).apply(transform_value)

phan_bo_von_df['VNINDEX'] = price_index_df['VNINDEX']
phan_bo_von_df = phan_bo_von_df.merge(market_ms[market_ms['name']=='Thị trường'][['date','trend_5p','trend_20p']], on='date', how='left')

#### Lưu vào file excel

In [179]:
with pd.ExcelWriter('test_data.xlsx', engine='openpyxl') as writer:
    phan_bo_von_df.to_excel(writer, sheet_name='sheet1', index=False)

In [None]:
# with pd.ExcelWriter('test_data.xlsx', engine='openpyxl') as writer:
#     eod_group_score_df_positive.to_excel(writer, sheet_name='eod_group_score_df_positive', index=False)
#     eod_group_score_df_negative.to_excel(writer, sheet_name='eod_group_score_df_negative', index=False)
#     eod_group_score_df_net.to_excel(writer, sheet_name='eod_group_score_df_net', index=False)
#     group_stock_price_index.to_excel(writer, sheet_name='group_stock_price_index', index=False)
#     market_ms.to_excel(writer, sheet_name='market_ms', index=False)