In [1]:
import os
import pandas as pd
import numpy as np
from datetime import timedelta
import datetime as dt
import copy

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

#### Import Data

In [2]:
#Đọc name map để chuyển đỏi các tên thành dạng full
name_map = pd.read_excel("data/t2m_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'order'],axis=1)
name_map_dict = name_map.set_index('code')['full_name'].to_dict()

order_map = pd.read_excel("data/t2m_classification.xlsx", sheet_name='name_map').drop(columns=['group', 'full_name'],axis=1)
order_map_dict = order_map.set_index('code')['order'].to_dict()

group_map = pd.read_excel("data/t2m_classification.xlsx", sheet_name='name_map').drop(columns=['order', 'full_name'],axis=1)
group_map_dict = group_map.set_index('code')['group'].to_dict()

In [3]:
#Đọc toàn bộ các file csv được xuất ra từ ami eod
eod_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_eod_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        eod_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

for item, df in eod_item_dict.items():
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%y%m%d')
    eod_item_dict[item] = df

#Tạo bảng tổng hớp tất cả các item
eod_item_df = pd.DataFrame(list(eod_item_dict.keys())).rename(columns={0:'item'})
eod_item_df['len'] = eod_item_df['item'].apply(lambda x: len(x))
eod_item_df['last_2chars'] = eod_item_df['item'].str[-2:]
eod_item_df['first_4chars'] = eod_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
stock_name_df = eod_item_df[eod_item_df['len']==3].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_name_df = eod_item_df[(eod_item_df['len']>3) & (eod_item_df['len']!=6) & (eod_item_df['len']<10) & 
                (eod_item_df['item']!='0001')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
                
eod_stock_dict = {k:v.drop(['option'], axis=1) for k,v in eod_item_dict.items() if k in stock_name_df['item'].tolist()}
eod_index_dict = {k:v.rename(columns={'option':'value'}).drop('cap', axis=1)
                for k,v in eod_item_dict.items() if k in index_name_df['item'].tolist()}

#Lọc ra danh sách tên các cổ phiếu, index giao dịch tự doanh và nước ngoài
stock_name_td_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='TD')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
stock_name_nn_df = eod_item_df[(eod_item_df['len']==6) & (eod_item_df['last_2chars']=='NN')].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)
index_td_nn_df = eod_item_df[(eod_item_df['len']>=10) & (eod_item_df['first_4chars']!='VN30') & ((eod_item_df['last_2chars']=='NN') | (eod_item_df['last_2chars']=='TD'))].reset_index(drop=True).drop(['len','last_2chars','first_4chars'], axis=1)

stock_td_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_td_df['item'].tolist()}
stock_nn_dict = {k:v.drop(['high','low','cap'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in stock_name_nn_df['item'].tolist()}
index_td_nn_dict = {k:v.drop(['high','low','cap','stock'], axis=1).rename(columns={'open':'sell_volume','close':'buy_volume','volume':'sell_value','option':'buy_value'})
                    for k,v in eod_item_dict.items() if k in index_td_nn_df['item'].tolist()}

#Điều chỉnh đơn vị của các bảng NN và TD
for df in index_td_nn_dict.values():
    df['buy_volume'] = df['buy_volume']/1000
    df['sell_volume'] = -df['sell_volume']/1000
    df['buy_value'] = df['buy_value']/1000000000
    df['sell_value'] = -df['sell_value']/1000000000
    df['net_volume'] = df['buy_volume'] + df['sell_volume']
    df['net_value'] = df['buy_value'] + df['sell_value']

In [4]:
#Tạo một date_series bao gồm khoảng ngày tính toán eod
date_series = pd.DataFrame(eod_index_dict['VNINDEX']['date']).rename(columns={0:'date'})

#Tạo một time_series bao gồm khoảng ngày tính toán itd (tính thừa 1 ngày để trừ dần đi)
time_series_list = []
for day in date_series['date'].iloc[:1].tolist():
    time_series_list.extend(pd.date_range(start=f'{day} 09:00:00', end=f'{day} 11:25:00', freq='5T'))
    time_series_list.extend(pd.date_range(start=f'{day} 13:00:00', end=f'{day} 14:55:00', freq='5T'))
time_series = pd.DataFrame(time_series_list).rename(columns={0:'date'})

#Tạo 1 khung thời gian trong ngày từ 9h15 tới hết giờ
itd_series = pd.DataFrame(time_series_list[3:]).rename(columns={0:'date'})

In [5]:
#Đọc toàn bộ các file csv được xuất ra từ ami itd
itd_item_dict = {}
folder_path = 'D:\\t2m-project\\ami-data\\ami_itd_data'
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        key = os.path.splitext(filename)[0]
        itd_item_dict[key] = pd.read_csv(os.path.join(folder_path, filename)).sort_values('date', ascending=False).reset_index(drop=True)

#Lấy thời gian hiện tại của dữ liệu được xuất ra
current_time = pd.to_datetime(itd_item_dict['HNXINDEX']['date'].iloc[0], format='%y%m%d %H%M%S')

#Điều chỉnh lại timeseries cho khớp với khung thời gian dữ liệu, bỏ đi các hàng chưa có dữ liệu
time_series = time_series.loc[time_series['date'] <= current_time].sort_values('date', ascending=False).reset_index(drop=True)

for item, df in itd_item_dict.items():
    
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%y%m%d %H%M%S')

    #Fill dữ liệu vào các khoảng thời gian trống
    df = time_series.merge(df, on='date', how='left').sort_values('date', ascending=False)
    df[['open','high','low','close']] = df[['open','high','low','close']].fillna(method='bfill')
    df['volume'] = df['volume'].fillna(0)
    df['stock'] = item


    itd_item_dict[item] = df

#Tạo bảng tổng hợp tất cả các item
itd_item_df = pd.DataFrame(list(itd_item_dict.keys())).rename(columns={0:'item'})
itd_item_df['len'] = itd_item_df['item'].apply(lambda x: len(x))
itd_item_df['last_2chars'] = itd_item_df['item'].str[-2:]
itd_item_df['third_last_char'] = itd_item_df['item'].str[-3:-2]
itd_item_df['first_4chars'] = itd_item_df['item'].str[:4]

#Lọc ra danh sách tên các cổ phiếu và index
stock_name_df = itd_item_df[itd_item_df['len']==3].reset_index(drop=True).drop(['len','last_2chars','third_last_char','first_4chars'], axis=1)
index_name_df = itd_item_df[(itd_item_df['len']>3) & (itd_item_df['len']!=6) & (itd_item_df['len']<10) & (itd_item_df['item']!='0001')]\
                .reset_index(drop=True).drop(['len','last_2chars','third_last_char','first_4chars'], axis=1)

itd_stock_dict = {k:v for k,v in itd_item_dict.items() if k in stock_name_df['item'].tolist()}
itd_index_dict = {k:v.rename(columns={'option':'value'})
                for k,v in itd_item_dict.items() if k in index_name_df['item'].tolist()}

In [6]:
def calculate_time_percent(time):
    start_time_am = dt.time(9, 00)
    end_time_am = dt.time(11, 30)
    start_time_pm = dt.time(13, 00)
    end_time_pm = dt.time(15, 00)

    def time_difference_in_minutes(time1, time2):
        delta1 = dt.timedelta(hours=time1.hour, minutes=time1.minute, seconds=time1.second)
        delta2 = dt.timedelta(hours=time2.hour, minutes=time2.minute, seconds=time2.second)
        diff = delta2 - delta1
        return diff.seconds // 60

    time = (time + timedelta(minutes=5)).time()
    full_time_range = time_difference_in_minutes(start_time_am, end_time_am) + time_difference_in_minutes(start_time_pm, end_time_pm)

    if time <= end_time_am:
        time_range = time_difference_in_minutes(start_time_am, time)
    elif time >= start_time_pm:
        time_range = time_difference_in_minutes(start_time_am, time) - time_difference_in_minutes(end_time_am, start_time_pm)

    return time_range/full_time_range

#Điều chỉnh lại time_series bỏ đi các hàng thời gian chưa có dữ liệu
time_series = time_series.loc[time_series['date'] <= current_time].sort_values('date', ascending=False).reset_index(drop=True)

#Tính thêm time percent
time_percent = time_series.copy()
time_percent['percent'] = time_percent['date'].apply(calculate_time_percent)
time_percent['percent'] = time_percent['percent'].apply(lambda x: x if x < 1 else 1)
current_time_percent = time_percent['percent'].iloc[0]

In [7]:
#Tạo bảng thời gian update
def get_update_time(start_time_am, end_time_am, start_time_pm, end_time_pm):
    if (dt.datetime.now()).weekday() <= 4:
        current_time = dt.datetime.now().time()
        if current_time < start_time_am: current_time = end_time_pm
        elif (current_time >= start_time_am) & (current_time < end_time_am): current_time = current_time
        elif (current_time >= end_time_am) & (current_time < start_time_pm): current_time = end_time_am
        elif (current_time >= start_time_pm) & (current_time < end_time_pm): current_time = current_time
        elif current_time >= end_time_pm: current_time = end_time_pm
        return current_time
    if (dt.datetime.now()).weekday() > 4:
        return end_time_pm

time_update = get_update_time(dt.time(9, 00), dt.time(11, 30), dt.time(13, 00), dt.time(15, 00))
date_time_update = dt.datetime.combine(current_time.date(), time_update)
update_time = pd.DataFrame([f"Cập nhât: {date_time_update.strftime('%d/%m/%Y %H:%M:%S')}"]).rename(columns={0:'date'})

#### Đường trung bình

In [8]:
#Tính toán các đường trung bình và các đường MA
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in eod_stock_dict.items()}

eod_stock_dict = {
    key: df.assign(
        high5=df['high'].rolling(window=5, min_periods=1).max(),
        low5=df['low'].rolling(window=5, min_periods=1).min(),
        high20=df['high'].rolling(window=20, min_periods=1).max(),
        low20=df['low'].rolling(window=20, min_periods=1).min(),
        high60=df['high'].rolling(window=60, min_periods=1).max(),
        low60=df['low'].rolling(window=60, min_periods=1).min(),
        high120=df['high'].rolling(window=120, min_periods=1).max(),
        low120=df['low'].rolling(window=120, min_periods=1).min(),
        high240=df['high'].rolling(window=240, min_periods=1).max(),
        low240=df['low'].rolling(window=240, min_periods=1).min(),
        high480=df['high'].rolling(window=480, min_periods=1).max(),
        low480=df['low'].rolling(window=480, min_periods=1).min(),

        ma5_V=df['volume'].rolling(window=5, min_periods=1).mean().shift(1),
        ma20_V=df['volume'].rolling(window=20, min_periods=1).mean().shift(1),
        ma60_V=df['volume'].rolling(window=60, min_periods=1).mean().shift(1),
        ma120_V=df['volume'].rolling(window=120, min_periods=1).mean().shift(1),

        ma5=df['close'].rolling(window=5, min_periods=1).mean(),
        ma20=df['close'].rolling(window=20, min_periods=1).mean(),
        ma60=df['close'].rolling(window=60, min_periods=1).mean(),
        ma120=df['close'].rolling(window=120, min_periods=1).mean(),
        ma240=df['close'].rolling(window=240, min_periods=1).mean(),
        ma480=df['close'].rolling(window=480, min_periods=1).mean(),
    )
    for key, df in eod_stock_dict.items()
}

eod_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in eod_stock_dict.items()
}
eod_stock_dict = {k: v.sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in eod_stock_dict.items()}

In [9]:
#Gán các đường trung bình và MA sang bảng dữ liệu ITD
for stock, df in itd_stock_dict.items():
    temp_data = eod_stock_dict[stock][['high5', 'low5', 'high20', 'low20', 'high60', 'low60',
                                       'high120','low120', 'high240', 'low240', 'high480', 'low480']].iloc[0]
    itd_stock_dict[stock] = df.assign(**temp_data)


itd_stock_dict = {k: v.sort_values(by=['date'], ascending=True).reset_index(drop=True) for k, v in itd_stock_dict.items()}
itd_stock_dict = {
    key: df.assign(
        trend_5p=(df['close'] > ((df['high5'] + df['low5'])/2).shift(1)).astype(int),
        trend_20p=(df['close'] > ((df['high20'] + df['low20'])/2).shift(1)).astype(int),
        trend_60p=(df['close'] > ((df['high60'] + df['low60'])/2).shift(1)).astype(int),
        trend_120p=(df['close'] > ((df['high120'] + df['low120'])/2).shift(1)).astype(int),
        trend_240p=(df['close'] > ((df['high240'] + df['low240'])/2).shift(1)).astype(int),
        trend_480p=(df['close'] > ((df['high480'] + df['low480'])/2).shift(1)).astype(int)
    )
    for key, df in itd_stock_dict.items()
}
itd_stock_dict = {k: v.sort_values(by=['date'], ascending=False).reset_index(drop=True) for k, v in itd_stock_dict.items()}

In [10]:
#Xoá các cổ phiếu chưa có giao dịch trong ngày
delete_stock = []
for stock, df in eod_stock_dict.items():
    if date_series['date'].iloc[0] != df['date'].iloc[0]:
        delete_stock.append(stock)
for stock in delete_stock:
    try:
        itd_stock_dict.pop(stock)
        eod_stock_dict.pop(stock)
    except:
        eod_stock_dict.pop(stock)

#Xoá các cổ phiếu có giá bị lỗi bằng 0
delete_stock = []
for stock, df in eod_stock_dict.items():
    if df['close'].min() == 0:
        delete_stock.append(stock)
for stock in delete_stock:
    try:
        itd_stock_dict.pop(stock)
        eod_stock_dict.pop(stock)
    except:
        eod_stock_dict.pop(stock)

#Tính hệ số thanh khoản và đổi lại cap của cổ phiếu thành cap trung bình trong 20 phiên
for df in eod_stock_dict.values():
    df['liquid_ratio'] = df['volume'] / (df['ma5_V'])
    df['liquid_ratio'].iloc[0] = df['volume'].iloc[0] / ((df['ma5_V']).iloc[0]*current_time_percent)
    df['cap'] = df['cap'][::-1].rolling(window=20).mean()[::-1]

#### Phân nhóm cổ phiếu

In [11]:
stock_classification = pd.read_excel('data/t2m_classification.xlsx')
stock_classification = stock_classification[stock_classification['stock'].isin(list(eod_stock_dict.keys()))]

#Tạo ngày đầu tiên của tháng hiện tại
first_day_of_month = date_series[date_series['date'] >= pd.Timestamp(date_series['date'].iloc[0].year, date_series['date'].iloc[0].month, 1)]['date'].iloc[-1]

#Tạo các mảng dữ liệu vốn hoá và giá của phiên đầu tiên hàng tháng
price_arr = []
cap_arr = []
for stock, df in eod_stock_dict.items():
    if len(df[df['date'] == first_day_of_month]) > 0:
        price_arr.append(df[df['date'] == first_day_of_month]['close'].item())
        cap_arr.append(df[df['date'] == first_day_of_month]['cap'].iloc[0].item())
    else:
        price_arr.append(df['close'].iloc[0].item())
        cap_arr.append(df['cap'].iloc[0].item())

#Tạo bảng chia nhóm vốn hoá
vonhoa_classification_df = stock_classification.copy()
vonhoa_classification_df['price'] = price_arr
vonhoa_classification_df['cap'] = cap_arr

cap_coef = sum(cap_arr)/10000
vonhoa_classification_df['marketcap_group'] = vonhoa_classification_df.apply(lambda x:
    'small' if ((x['cap']>cap_coef) & (x['cap']<10*cap_coef)) | 
               ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']<10)) 
               else
    ('mid' if ((x['cap']>=10*cap_coef) & (x['cap']<20*cap_coef) & (x['price']>=10)) | 
              ((x['cap']>=20*cap_coef) & (x['cap']<100*cap_coef))
              else
    ('large' if x['cap']>=100*cap_coef
               else 'penny'
)), axis=1)

stock_classification = pd.concat([stock_classification, vonhoa_classification_df['marketcap_group']], axis=1)

In [12]:
# Convert DataFrame columns to dictionaries for quick access
stock_by_industry = stock_classification.set_index('stock')['industry_name'].to_dict()
stock_by_perform = stock_classification.set_index('stock')['industry_perform'].to_dict()
stock_by_marketcap = stock_classification.set_index('stock')['marketcap_group'].to_dict()

# Initialize dictionaries
eod_all_stock = {}
itd_all_stock = {}
eod_industry_name = {}
itd_industry_name = {}
eod_industry_perform = {}
itd_industry_perform = {}
eod_marketcap_group = {}
itd_marketcap_group = {}

# Function to create mappings based on category
def create_mapping(stock_dict, category_dict):
    category_map = {}
    for category, stocks in category_dict.items():
        category_map[category] = {stock: stock_dict[stock] for stock in stocks if stock in stock_dict}
    return category_map

# Precompute unique categories and relevant stocks
unique_industries = np.unique(list(stock_by_industry.values()))
unique_performs = np.unique(list(stock_by_perform.values()))
unique_marketcaps = ['large', 'mid', 'small', 'penny']

# Mapping for all_stock
itd_all_stock['all_stock'] = {key: value for key, value in itd_stock_dict.items()}
eod_all_stock['all_stock'] = {key: value for key, value in eod_stock_dict.items()}

# Mapping for industry
for industry in unique_industries:
    relevant_stocks = [stock for stock, ind in stock_by_industry.items() if ind == industry]
    eod_industry_name[industry] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_industry_name[industry] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}

# Mapping for performance
for performance in unique_performs:
    relevant_stocks = [stock for stock, perf in stock_by_perform.items() if perf == performance]
    eod_industry_perform[performance] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_industry_perform[performance] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}

# Mapping for marketcap
for marketcap in unique_marketcaps:
    relevant_stocks = [stock for stock, mcap in stock_by_marketcap.items() if mcap == marketcap]
    eod_marketcap_group[marketcap] = {stock: eod_stock_dict[stock] for stock in relevant_stocks if stock in eod_stock_dict}
    itd_marketcap_group[marketcap] = {stock: itd_stock_dict[stock] for stock in relevant_stocks if stock in itd_stock_dict}


In [13]:
group_stock_list = ['all_stock'] + stock_classification['industry_name'].unique().tolist()\
                            + stock_classification['industry_perform'].unique().tolist()\
                            + stock_classification['marketcap_group'].unique().tolist()

In [14]:
#Tạo bảng để slicer các nhóm cổ phiếu
group_slicer_df = pd.DataFrame(group_stock_list).rename(columns={0:'name'})
group_slicer_df['order'] = group_slicer_df['name'].map(order_map_dict)
group_slicer_df['group'] = group_slicer_df['name'].map(group_map_dict)
group_slicer_df['name'] = group_slicer_df['name'].map(name_map_dict)

#### Biểu đồ cấu trúc sóng

In [15]:
import pandas as pd

def transform_ms(stock_group):
    stock_dict = copy.deepcopy(stock_group)

    # Prepare a base date DataFrame from date_series
    dates_df = pd.DataFrame(date_series['date'].tolist(), columns=['date'])
    
    for group_name, stocks in stock_dict.items():
        # Initialize a DataFrame for group trends
        group_trends = dates_df.copy()

        # Compute trends across stocks
        for trend in ['trend_5p', 'trend_20p', 'trend_60p', 'trend_120p', 'trend_240p', 'trend_480p']:
            # Concatenate all trend data for current trend across all stocks
            trend_data = pd.concat([stocks[stock][trend] for stock in stocks], axis=1)
            trend_data.fillna(0, inplace=True)
            
            # Calculate the sum and percent for the trend
            sum_trend = trend_data.sum(axis=1)
            percent_trend = sum_trend / len(stocks)
            
            # Add to group trends DataFrame
            group_trends[f'{trend}'] = percent_trend

        # Sort the DataFrame by date and limit to the last 60 entries
        if group_name == 'all_stock':
            stock_dict[group_name] = group_trends.sort_values('date', ascending=False).iloc[:960]
        else :
            stock_dict[group_name] = group_trends.sort_values('date', ascending=False).iloc[:60]
    return stock_dict


In [16]:
#Tính toán các biểu đồ MS cho các nhóm cổ phiếu
all_stock_ms = transform_ms(eod_all_stock)
industry_name_ms = transform_ms(eod_industry_name)
industry_perform_ms = transform_ms(eod_industry_perform)
marketcap_group_ms = transform_ms(eod_marketcap_group)

In [17]:
#Gộp tất cả biểu đồ MS vào 1 bảng
market_ms = pd.DataFrame()
for item in [all_stock_ms, industry_name_ms, industry_perform_ms, marketcap_group_ms]:
    for group, df in item.items():
        df['name'] = group
        market_ms = pd.concat([market_ms, df], axis=0)

market_ms['name'] = market_ms['name'].map(name_map_dict)

#### Điểm dòng tiền từng cổ phiếu

- Điểm dòng tiền EOD

In [18]:
eod_stock_dict = {k: v.iloc[:60].reset_index(drop=True) for k, v in eod_stock_dict.items()}
date_series = date_series.iloc[:60]

In [19]:
def score_calculation(df):
    try:
        result = (((df['close'] - df['low']) - (df['high'] - df['close'])) / (df['high'] - df['low']) *
                  abs((df['close'] - df['close_prev'])) / df['close_prev'] *
                  (df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V'])) * 100 \
                  + ((df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V']))/100
        result.fillna(0, inplace=True)
        return result
    except ZeroDivisionError:
        # return 0
        return ((df['volume']*df['close']) / (df['ma5_prev'] * df['ma5_V']))/100

In [20]:
# Tính toán các cột cần thiết để lọc danh sách cổ phiếu dòng tiền
raw_eod_score_dict = {}
for stock in eod_stock_dict.keys():
    raw_eod_score_dict[stock] = eod_stock_dict[stock][['stock', 'date', 'high', 'low', 'close', 'volume', 'liquid_ratio', 'cap', 'ma5_V', 'ma20_V', 'ma60_V', 'ma120_V', 'ma5']]
    
    raw_eod_score_dict[stock]['ma5_prev'] = raw_eod_score_dict[stock]['ma5'].shift(-1)
    raw_eod_score_dict[stock]['close_prev'] = raw_eod_score_dict[stock]['close'].shift(-1)

    raw_eod_score_dict[stock]['raw_score'] = score_calculation(raw_eod_score_dict[stock])
    raw_eod_score_dict[stock]['raw_score'].iloc[0] = raw_eod_score_dict[stock]['raw_score'].iloc[0].item() / current_time_percent

    raw_eod_score_dict[stock]['highest_price'] = raw_eod_score_dict[stock]['close'][::-1].rolling(window=40, min_periods=1).max()[::-1]
    raw_eod_score_dict[stock]['lowest_volume60'] = raw_eod_score_dict[stock]['volume'][::-1].rolling(window=60, min_periods=1).min().shift(1)[::-1]
    raw_eod_score_dict[stock]['mean_volume20'] = raw_eod_score_dict[stock]['volume'][::-1].rolling(window=20, min_periods=1).mean().shift(1)[::-1]

# Lọc danh sách cổ phiếu dòng tiền
eod_score_dict = {
    stock: df[['stock', 'date', 'close', 'low', 'high', 'volume', 'liquid_ratio', 'raw_score', 'cap']]
    for stock, df in raw_eod_score_dict.items()
    if all([
        (df[df['date'] == first_day_of_month]['ma5_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['ma20_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['ma60_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['ma120_V'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['lowest_volume60'] > 0).all(),
        (df[df['date'] == first_day_of_month]['mean_volume20'] >= 50000).all(),
        (df[df['date'] == first_day_of_month]['close'] > df[df['date'] == first_day_of_month]['highest_price'] * 0.382).all()
    ])
}

stock_classification_filtered = stock_classification[stock_classification['stock'].isin(eod_score_dict.keys())].reset_index(drop=True)

In [21]:
for stock in eod_score_dict.keys():
    nganh = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    marketcap = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()

    eod_score_dict[stock]['t0_score'] = eod_score_dict[stock]['raw_score']

    eod_score_dict[stock].sort_values('date', ascending=True, inplace=True)
    eod_score_dict[stock]['t5_score'] = eod_score_dict[stock]['t0_score'].rolling(window=5, min_periods=1).mean()
    eod_score_dict[stock].sort_values('date', ascending=False, inplace=True)

    eod_score_dict[stock]['industry_name'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    eod_score_dict[stock]['industry_perform'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_perform'].item()
    eod_score_dict[stock]['stock_perform'] = stock_classification_filtered[stock_classification_filtered['stock'] == stock]['stock_perform'].item()
    eod_score_dict[stock]['marketcap_group'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()
    eod_score_dict[stock]['t2m_select'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['t2m_select'].item()

In [22]:
group_score = date_series.copy()
ranking_group = date_series.copy()

#Xếp hạng T5
for stock in eod_score_dict.keys():
    group_score[stock] = eod_score_dict[stock]['t5_score']
    group_score.fillna(0, inplace=True)
    ranking_group[stock] = 0
ranking_group = group_score.iloc[:,1:].rank(ascending=False, method='min', axis=1)

for stock, df in eod_score_dict.items():
    df['price_change'] = df['close'][::-1].pct_change()[::-1]
    df['value_change'] = df['close'][::-1].diff()[::-1]
    df['rank'] = ranking_group[stock]
    df['rank_prev'] = df['rank'].shift(-1)
    df['rank_change'] = df['rank_prev'] - df['rank']

#Xếp hạng T0
for stock in eod_score_dict.keys():
    group_score[stock] = eod_score_dict[stock]['t0_score']
    group_score.fillna(0, inplace=True)
    ranking_group[stock] = 0
ranking_group = group_score.iloc[:,1:].rank(ascending=False, method='min', axis=1)

for stock, df in eod_score_dict.items():
    df['rank_t0'] = ranking_group[stock]
    df['rank_t0_prev'] = df['rank_t0'].shift(-1)

#Check xem xếp hạng T0 nằm trong top 10% hay không
for stock, df in eod_score_dict.items():
    df['top_check'] = df['rank_t0'].apply(lambda x: 1 if x <= len(stock_classification_filtered)*0.1 else 0)
    df['top_count'] = df['top_check'][::-1].rolling(window=20).sum()[::-1]

eod_score_dict = {k: v.drop(
    columns=['raw_score', 'rank_t0_prev', 'rank_prev', 'top_check'])
    for k, v in eod_score_dict.items()}

In [23]:
#Tạo bảng tổng hợp điểm t0 của tất cả cổ phiếu
eod_score_df = pd.DataFrame(stock_classification_filtered['stock'])

score_list = []
for stock, df in eod_score_dict.items():
    score_list.append(df.iloc[0])

eod_score_df = pd.DataFrame(score_list).sort_values('t0_score', ascending=False).reset_index(drop=True)
eod_score_df = eod_score_df.fillna('')

eod_score_df['filter_t0'] = eod_score_df['t0_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
eod_score_df['filter_t5'] = eod_score_df['t5_score'].apply(lambda x: 'Tiền vào' if x >= 0 else 'Tiền ra')
eod_score_df['filter_liquid'] = eod_score_df['liquid_ratio'].apply(lambda x: '<50%' if x < 0.6 else (
                                                                             '50%-100%' if (x >= 0.5) & (x < 1) else (
                                                                             '100%-150%' if(x >= 1) & (x < 1.5) else (
                                                                             '150%-200%' if(x >= 1.5) & (x < 2) else '>200%'))))
eod_score_df['order_filter_liquid'] = eod_score_df['filter_liquid'].apply(lambda x: 1 if x == '<50%' else (
                                                                             2 if x == '50%-100%' else (
                                                                             3 if x == '100%-150%' else (
                                                                             4 if x == '150%-200%' else 5))))
eod_score_df['filter_rank'] = eod_score_df['rank'].apply(lambda x: '1-50' if x <= 50 else (
                                                                   '51-150' if (x > 50) & (x <= 150) else (
                                                                   '151-250' if(x > 150) & (x <= 250) else '>250')))
eod_score_df['order_filter_rank'] = eod_score_df['filter_rank'].apply(lambda x: 1 if x == '1-50' else (
                                                                             2 if x == '51-150' else (
                                                                             3 if x == '151-250' else 4)))

eod_score_df['industry_name'] = eod_score_df['industry_name'].map(name_map_dict)
eod_score_df['industry_perform'] = eod_score_df['industry_perform'].map(name_map_dict)
eod_score_df['marketcap_group'] = eod_score_df['marketcap_group'].map(name_map_dict)

- Điểm dòng tiền ITD

In [24]:
# Giả định date_series và itd_stock_dict đã được định nghĩa
hsx_itd_start = pd.Timestamp(date_series['date'].iloc[0].replace(hour=9, minute=15, second=0, microsecond=0))

# Danh sách stock từ stock_classification_filtered và danh sách HSX stocks
filtered_stocks = stock_classification_filtered['stock'].tolist()
hsx_stocks = stock_classification[stock_classification['exchange'] == 'HSX']['stock'].tolist()

# Lọc và cập nhật itd_score_dict trong một bước
itd_score_dict = {
    k: v.loc[v['date'] >= (hsx_itd_start if k in hsx_stocks else date_series['date'].iloc[0])]
    for k, v in copy.deepcopy(itd_stock_dict).items() if k in filtered_stocks
}

for stock, df in itd_score_dict.items():

    df['ma5_V'] = time_percent['percent']*(raw_eod_score_dict[stock]['ma5_V'].iloc[0])
    df['ma5_prev'] = raw_eod_score_dict[stock]['ma5_prev'].iloc[0]
    df['close_prev'] = raw_eod_score_dict[stock]['close_prev'].iloc[0]
    df['cap'] = raw_eod_score_dict[stock]['cap'].iloc[0]

    df['high'] = df['high'][::-1].cummax()[::-1]
    df['low'] = df['low'][::-1].cummin()[::-1]
    df['volume'] = df['volume'][::-1].cumsum()[::-1]
    df['liquid_ratio'] = df['volume']/df['ma5_V']

    df.loc[0, 'volume'] = raw_eod_score_dict[stock]['volume'].iloc[0]
    df.loc[0, 'close'] = raw_eod_score_dict[stock]['close'].iloc[0]
    df.loc[0, 'low'] = raw_eod_score_dict[stock]['low'].iloc[0]
    df.loc[0, 'high'] = raw_eod_score_dict[stock]['high'].iloc[0]

    df['raw_score'] = score_calculation(df)

In [25]:
for stock in itd_score_dict.keys():
    nganh = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    marketcap = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()

    itd_score_dict[stock]['t0_score'] = itd_score_dict[stock]['raw_score']

    itd_score_dict[stock]['price_change'] = (itd_score_dict[stock]['close'] - eod_stock_dict[stock]['open'].iloc[0])/eod_stock_dict[stock]['open'].iloc[0]
    itd_score_dict[stock]['industry_name'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_name'].item()
    itd_score_dict[stock]['industry_perform'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['industry_perform'].item()
    itd_score_dict[stock]['stock_perform'] = stock_classification_filtered[stock_classification_filtered['stock'] == stock]['stock_perform'].item()
    itd_score_dict[stock]['marketcap_group'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['marketcap_group'].item()
    itd_score_dict[stock]['t2m_select'] = stock_classification_filtered[stock_classification_filtered['stock']==stock]['t2m_select'].item()

In [26]:
itd_score_dict = {k: v[['stock', 'date','close', 'volume','t0_score', 'liquid_ratio', 'industry_name', 'industry_perform', 'stock_perform','marketcap_group', 't2m_select', 'price_change']]
                  for k, v in itd_score_dict.items()}

#### Dòng tiền trong tuần và trong tháng

In [27]:
def fill_month_flow(series):
    new_series = series.copy()
    for i in range(len(series) - 1):
        if i == 0:
            fill_value = 0
            new_series[i] = fill_value
        else:
            fill_value = new_series[i-1]
            if pd.isna(series[i]):
                if not series[i:-1].isna().all():
                    new_series[i] = fill_value
    return new_series

- Tính toán cho từng cổ phiếu

In [28]:
stock_score_df = date_series.copy()
all_stock_list = stock_classification_filtered['stock'].tolist()

for stock, df in eod_score_dict.items():
    stock_score_df[stock] = eod_score_dict[stock]['t0_score']

stock_score_df['week'] = stock_score_df['date'].dt.strftime('%U-%Y')
stock_score_df['month'] = stock_score_df['date'].dt.strftime('%m-%Y')
stock_score_df['week_day'] = stock_score_df['date'].dt.day_name()
stock_score_df['day_num'] = stock_score_df['date'].dt.day

In [29]:
#Tạo bảng dữ liệu theo tuần
week_day_index = {'Monday': 0,'Tuesday': 1,'Wednesday': 2,'Thursday': 3,'Friday': 4,'Saturday': 5,'Sunday': 6}
week_day_dict = {'Monday': "Thứ 2",'Tuesday': "Thứ 3",'Wednesday': "Thứ 4",'Thursday': "Thứ 5",'Friday': "Thứ 6"}
week_score_dict = {}
for i in range(2):
    week_score_dict[f'week_{i+1}'] = stock_score_df[stock_score_df['week'] == stock_score_df['week'].unique()[i]].drop(columns=['date','week','month','day_num']).set_index('week_day')

    temp_df = pd.DataFrame(['Monday','Tuesday','Wednesday','Friday','Thursday']).rename(columns={0:'week_day'}).set_index('week_day')
    week_score_dict[f'week_{i+1}'] = pd.concat([temp_df, week_score_dict[f'week_{i+1}']], axis=1).reset_index()

    columns_list = week_score_dict[f'week_{i+1}'].columns
    week_score_dict[f'week_{i+1}']['id'] = f'w{i+1}'
    
    week_score_dict[f'week_{i+1}'] = week_score_dict[f'week_{i+1}'].melt(id_vars=['week_day', 'id'], value_vars=all_stock_list, var_name='stock', value_name='value')
    week_score_dict[f'week_{i+1}'] = week_score_dict[f'week_{i+1}'].pivot_table(index=['week_day', 'stock'], columns='id', values='value', aggfunc='first').reset_index()

#Bảng so sánh 2 tuần
stock_score_week = week_score_dict['week_1'].merge(week_score_dict['week_2'], on=['week_day','stock'], how='outer')
stock_score_week['day_index'] = stock_score_week['week_day'].map(week_day_index)
stock_score_week['week_day'] = stock_score_week['week_day'].map(week_day_dict)
stock_score_week = stock_score_week.sort_values('day_index')

In [30]:
#Tạo bảng dữ liệu theo tháng
month_score_dict = {}
for i in range(2):
    month_score_dict[f'month_{i+1}'] = stock_score_df[stock_score_df['month'] == stock_score_df['month'].unique()[i]].drop(columns=['date','week','month','week_day']).set_index('day_num')

    temp_df = pd.DataFrame(list(range(0, 32))).rename(columns={0:'day_num'}).set_index('day_num')
    month_score_dict[f'month_{i+1}'] = pd.concat([temp_df, month_score_dict[f'month_{i+1}']], axis=1).reset_index()
    columns_list = month_score_dict[f'month_{i+1}'].columns


    for column in columns_list[1:]: 
        month_score_dict[f'month_{i+1}'][column] = month_score_dict[f'month_{i+1}'][column].cumsum()
        month_score_dict[f'month_{i+1}'][column].iloc[month_score_dict[f'month_{i+1}'][column].first_valid_index()-1] = 0
        month_score_dict[f'month_{i+1}'][column] = fill_month_flow(month_score_dict[f'month_{i+1}'][column])

    month_score_dict[f'month_{i+1}']['id'] = f'm{i+1}'
    
    month_score_dict[f'month_{i+1}'] = month_score_dict[f'month_{i+1}'].melt(id_vars=['day_num', 'id'], value_vars=all_stock_list, var_name='stock', value_name='value')
    month_score_dict[f'month_{i+1}'] = month_score_dict[f'month_{i+1}'].pivot_table(index=['day_num', 'stock'], columns='id', values='value', aggfunc='first').reset_index()

#Bảng so sánh các 2 tháng
stock_score_month = month_score_dict['month_1'].merge(month_score_dict['month_2'], on=['day_num','stock'], how='outer')

#### Chỉ số kĩ thuật

In [31]:
def calculate_ta_df(price_df):
    ta_df = price_df[['stock', 'date', 'open', 'high', 'low', 'close', 'volume']].copy()
    ta_df['week'] = ta_df['date'].dt.strftime('%Y-%U')
    ta_df['month'] = ta_df['date'].dt.to_period('M')
    ta_df['quarter'] = ta_df['date'].dt.to_period('Q')
    ta_df['year'] = ta_df['date'].dt.to_period('Y')
    return ta_df

In [32]:
def calculate_candle_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()
    # Define unique time frames up front to avoid recalculating them multiple times
    unique_weeks = ta_df['week'].unique()
    unique_months = ta_df_copy['month'].unique()
    unique_quarters = ta_df_copy['quarter'].unique()
    unique_years = ta_df_copy['year'].unique()

    # Define filters for reuse
    filter_week_1 = ta_df_copy['week'] == unique_weeks[1] if len(unique_weeks) > 1 else None
    filter_week_0 = ta_df_copy['week'] == unique_weeks[0] if len(unique_weeks) > 0 else None
    filter_month_1 = ta_df_copy['month'] == unique_months[1] if len(unique_months) > 1 else None
    filter_month_0 = ta_df_copy['month'] == unique_months[0] if len(unique_months) > 0 else None
    filter_quarter_1 = ta_df_copy['quarter'] == unique_quarters[1] if len(unique_quarters) > 1 else None
    filter_quarter_0 = ta_df_copy['quarter'] == unique_quarters[0] if len(unique_quarters) > 0 else None
    filter_year_1 = ta_df_copy['year'] == unique_years[1] if len(unique_years) > 1 else None
    filter_year_0 = ta_df_copy['year'] == unique_years[0] if len(unique_years) > 0 else None

    # Apply filters and calculate needed values
    if filter_week_1 is not None:
        ta_df_copy['week_last_low'] = ta_df_copy.loc[filter_week_1, 'low'].min()
        ta_df_copy['week_last_high'] = ta_df_copy.loc[filter_week_1, 'high'].max()
    if filter_week_0 is not None:
        ta_df_copy['week_open'] = ta_df_copy.loc[filter_week_0, 'open'].iloc[-1]

    if filter_month_1 is not None:
        ta_df_copy['month_last_low'] = ta_df_copy.loc[filter_month_1, 'low'].min()
        ta_df_copy['month_last_high'] = ta_df_copy.loc[filter_month_1, 'high'].max()
    if filter_month_0 is not None:
        ta_df_copy['month_open'] = ta_df_copy.loc[filter_month_0, 'open'].iloc[-1]

    if filter_quarter_1 is not None:
        ta_df_copy['quarter_last_low'] = ta_df_copy.loc[filter_quarter_1, 'low'].min()
        ta_df_copy['quarter_last_high'] = ta_df_copy.loc[filter_quarter_1, 'high'].max()
    if filter_quarter_0 is not None:
        ta_df_copy['quarter_open'] = ta_df_copy.loc[filter_quarter_0, 'open'].iloc[-1]

    if filter_year_1 is not None:
        ta_df_copy['year_last_low'] = ta_df_copy.loc[filter_year_1, 'low'].min()
        ta_df_copy['year_last_high'] = ta_df_copy.loc[filter_year_1, 'high'].max()
    if filter_year_0 is not None:
        ta_df_copy['year_open'] = ta_df_copy.loc[filter_year_0, 'open'].iloc[-1]

    # Compute 'from' values for stock or index
    columns_to_compute = ['week', 'month', 'quarter', 'year']
    for frame in columns_to_compute:
        suffix = ['last_high', 'last_low', 'open']
        for suf in suffix:
            column_name = f'{frame}_{suf}'
            if column_name in ta_df_copy.columns:
                if input_type == 'stock':
                    ta_df_copy[f'from_{frame}_{suf}'] = (ta_df_copy['close'] - ta_df_copy[column_name]) / ta_df_copy[column_name]
                elif input_type == 'index':
                    ta_df_copy[f'from_{frame}_{suf}'] = ta_df_copy['close'] - ta_df_copy[column_name]

    return ta_df_copy

In [33]:
def calculate_fibo_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()

    ta_df_copy['month_high'] = ta_df_copy[ta_df_copy['month'].isin(ta_df_copy['month'].unique()[:2].tolist())]['high'].max()
    ta_df_copy['quarter_high'] = ta_df_copy[ta_df_copy['quarter'].isin(ta_df_copy['quarter'].unique()[:2].tolist())]['high'].max()
    ta_df_copy['year_high'] = ta_df_copy[ta_df_copy['year'].isin(ta_df_copy['year'].unique()[:2].tolist())]['high'].max()

    ta_df_copy['month_low'] = ta_df_copy[ta_df_copy['month'].isin(ta_df_copy['month'].unique()[:2].tolist())]['low'].min()
    ta_df_copy['quarter_low'] = ta_df_copy[ta_df_copy['quarter'].isin(ta_df_copy['quarter'].unique()[:2].tolist())]['low'].min()
    ta_df_copy['year_low'] = ta_df_copy[ta_df_copy['year'].isin(ta_df_copy['year'].unique()[:2].tolist())]['low'].min()
        
    ta_df_copy['month_fibo_382'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.382
    ta_df_copy['month_fibo_500'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.5
    ta_df_copy['month_fibo_618'] = ta_df_copy['month_high'] - (ta_df_copy['month_high'] - ta_df_copy['month_low'])*0.618

    ta_df_copy['quarter_fibo_382'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.382
    ta_df_copy['quarter_fibo_500'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.5
    ta_df_copy['quarter_fibo_618'] = ta_df_copy['quarter_high'] - (ta_df_copy['quarter_high'] - ta_df_copy['quarter_low'])*0.618

    ta_df_copy['year_fibo_382'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.382
    ta_df_copy['year_fibo_500'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.5
    ta_df_copy['year_fibo_618'] = ta_df_copy['year_high'] - (ta_df_copy['year_high'] - ta_df_copy['year_low'])*0.618

    if input_type == 'stock':

        ta_df_copy['from_month_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_382'])/abs(ta_df_copy['month_fibo_382'])
        ta_df_copy['from_month_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_500'])/abs(ta_df_copy['month_fibo_500'])
        ta_df_copy['from_month_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_618'])/abs(ta_df_copy['month_fibo_618'])

        ta_df_copy['from_quarter_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_382'])/abs(ta_df_copy['quarter_fibo_382'])
        ta_df_copy['from_quarter_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_500'])/abs(ta_df_copy['quarter_fibo_500'])
        ta_df_copy['from_quarter_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_618'])/abs(ta_df_copy['quarter_fibo_618'])

        ta_df_copy['from_year_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_382'])/abs(ta_df_copy['year_fibo_382'])
        ta_df_copy['from_year_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_500'])/abs(ta_df_copy['year_fibo_500'])
        ta_df_copy['from_year_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_618'])/abs(ta_df_copy['year_fibo_618'])

    if input_type == 'index':
            
        ta_df_copy['from_month_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_382'])
        ta_df_copy['from_month_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_500'])
        ta_df_copy['from_month_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['month_fibo_618'])

        ta_df_copy['from_quarter_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_382'])
        ta_df_copy['from_quarter_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_500'])
        ta_df_copy['from_quarter_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['quarter_fibo_618'])

        ta_df_copy['from_year_fibo_382'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_382'])
        ta_df_copy['from_year_fibo_500'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_500'])
        ta_df_copy['from_year_fibo_618'] = (ta_df_copy['close'] - ta_df_copy['year_fibo_618'])

    return ta_df_copy

In [34]:
def calculate_pivot_ta_df(ta_df, input_type):
    ta_df_copy = ta_df.copy()

    try: ta_df_copy['month_high'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['high'].max()
    except: ta_df_copy['month_high'] = None
    try: ta_df_copy['quarter_high'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['high'].max()
    except: ta_df_copy['quarter_high'] = None
    try: ta_df_copy['year_high'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['high'].max()
    except: ta_df_copy['year_high'] = None

    try: ta_df_copy['month_low'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['low'].min()
    except: ta_df_copy['month_low'] = None
    try: ta_df_copy['quarter_low'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['low'].min()
    except: ta_df_copy['quarter_low'] = None
    try: ta_df_copy['year_low'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['low'].min()
    except: ta_df_copy['year_low'] = None

    try: ta_df_copy['month_close'] = ta_df_copy[ta_df_copy['month']==ta_df_copy['month'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['month_close'] = None
    try: ta_df_copy['quarter_close'] = ta_df_copy[ta_df_copy['quarter']==ta_df_copy['quarter'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['quarter_close'] = None
    try: ta_df_copy['year_close'] = ta_df_copy[ta_df_copy['year']==ta_df_copy['year'].unique()[1]]['close'].iloc[0]
    except: ta_df_copy['year_close'] = None

    ta_df_copy['month_pivot'] = (ta_df_copy['month_high'] + ta_df_copy['month_low'] + ta_df_copy['month_close'])/3
    ta_df_copy['quarter_pivot'] = (ta_df_copy['quarter_high'] + ta_df_copy['quarter_low'] + ta_df_copy['quarter_close'])/3
    ta_df_copy['year_pivot'] = (ta_df_copy['year_high'] + ta_df_copy['year_low'] + ta_df_copy['year_close'])/3

    if input_type == 'index':
        ta_df_copy['from_month_pivot'] = (ta_df_copy['close'] - ta_df_copy['month_pivot'])
        ta_df_copy['from_quarter_pivot'] = (ta_df_copy['close'] - ta_df_copy['quarter_pivot'])
        ta_df_copy['from_year_pivot'] = (ta_df_copy['close'] - ta_df_copy['year_pivot'])

    if input_type == 'stock':
        ta_df_copy['from_month_pivot'] = (ta_df_copy['close'] - ta_df_copy['month_pivot'])/abs(ta_df_copy['month_pivot'])
        ta_df_copy['from_quarter_pivot'] = (ta_df_copy['close'] - ta_df_copy['quarter_pivot'])/abs(ta_df_copy['quarter_pivot'])
        ta_df_copy['from_year_pivot'] = (ta_df_copy['close'] - ta_df_copy['year_pivot'])/abs(ta_df_copy['year_pivot'])

    return ta_df_copy

In [35]:
def calculate_ma_ta_df(ta_df,input_type):
    ta_df_copy = ta_df.copy()

    ta_df_copy['ma5'] = ta_df_copy['close'][::-1].rolling(window=5, min_periods=1).mean()[::-1]
    ta_df_copy['ma20'] = ta_df_copy['close'][::-1].rolling(window=20, min_periods=1).mean()[::-1]
    ta_df_copy['ma60'] = ta_df_copy['close'][::-1].rolling(window=60, min_periods=1).mean()[::-1]
    ta_df_copy['ma120'] = ta_df_copy['close'][::-1].rolling(window=120, min_periods=1).mean()[::-1]
    ta_df_copy['ma240'] = ta_df_copy['close'][::-1].rolling(window=240, min_periods=1).mean()[::-1]
    ta_df_copy['ma480'] = ta_df_copy['close'][::-1].rolling(window=480, min_periods=1).mean()[::-1]

    if input_type == 'stock':

        ta_df_copy['from_month_ma5'] = (ta_df_copy['close'] - ta_df_copy['ma5'])/ta_df_copy['ma5']
        ta_df_copy['from_month_ma20'] = (ta_df_copy['close'] - ta_df_copy['ma20'])/ta_df_copy['ma20']
        ta_df_copy['from_quarter_ma60'] = (ta_df_copy['close'] - ta_df_copy['ma60'])/ta_df_copy['ma60']
        ta_df_copy['from_quarter_ma120'] = (ta_df_copy['close'] - ta_df_copy['ma120'])/ta_df_copy['ma120']
        ta_df_copy['from_year_ma240'] = (ta_df_copy['close'] - ta_df_copy['ma240'])/ta_df_copy['ma240']
        ta_df_copy['from_year_ma480'] = (ta_df_copy['close'] - ta_df_copy['ma480'])/ta_df_copy['ma480']

    if input_type == 'index':

        ta_df_copy['from_month_ma5'] = (ta_df_copy['close'] - ta_df_copy['ma5'])
        ta_df_copy['from_month_ma20'] = (ta_df_copy['close'] - ta_df_copy['ma20'])
        ta_df_copy['from_quarter_ma60'] = (ta_df_copy['close'] - ta_df_copy['ma60'])
        ta_df_copy['from_quarter_ma120'] = (ta_df_copy['close'] - ta_df_copy['ma120'])
        ta_df_copy['from_year_ma240'] = (ta_df_copy['close'] - ta_df_copy['ma240'])
        ta_df_copy['from_year_ma480'] = (ta_df_copy['close'] - ta_df_copy['ma480'])

    return ta_df_copy

In [36]:
def transform_ta_df(ta_df,ta_name):
    df_list = []
    for time_frame in ['month','quarter','year']:
        if ta_name == 'candle':
            df = ta_df[['stock',f'{time_frame}_open',f'{time_frame}_last_high',f'{time_frame}_last_low',f'from_{time_frame}_open',f'from_{time_frame}_last_high',f'from_{time_frame}_last_low']].iloc[:1]
            df_name = ['Open','Last High','Last Low']
            coef = 4
        elif ta_name == 'fibo':
            df = ta_df[['stock',f'{time_frame}_fibo_382',f'{time_frame}_fibo_500',f'{time_frame}_fibo_618',f'from_{time_frame}_fibo_382',f'from_{time_frame}_fibo_500',f'from_{time_frame}_fibo_618']].iloc[:1]
            df_name = ['Fibo 0.382', 'Fibo 0.500', 'Fibo 0.618']
            coef = 4
        elif ta_name == 'pivot':
            df = ta_df[['stock',f'{time_frame}_pivot',f'from_{time_frame}_pivot']].iloc[:1]
            df_name = ['Pivot']
            coef = 2
        elif ta_name == 'ma':
            if time_frame == 'month':
                df = ta_df[['stock','ma5','ma20','from_month_ma5','from_month_ma20']].iloc[:1]
                df_name = ['MA5','MA20']
            elif time_frame == 'quarter':
                df = ta_df[['stock','ma60','ma120','from_quarter_ma60','from_quarter_ma120']].iloc[:1]
                df_name = ['MA60','MA120']
            elif time_frame == 'year':
                df = ta_df[['stock','ma240','ma480','from_year_ma240','from_year_ma480']].iloc[:1]
                df_name = ['MA240','MA480']
            coef = 3
        df_value = df.iloc[0,1:coef].tolist()
        df_from = df.iloc[0,coef:].tolist()

        if ta_name == 'pivot':
            df_order = 3
        else:
            df_order = [i for i in range(1, len(df_name) + 1)]
            
        df = pd.DataFrame({'stock':df['stock'].item(),'name': df_name,'value': df_value,'from': df_from, 'order': df_order})
        df['id'] = time_frame
        df['ta_name'] = ta_name
        df['value'] = df['value'].apply(lambda x: '{:.2f}'.format(x) if isinstance(x, (int, float)) else x)
        df_list.append(df)
    concat_df = pd.concat(df_list, axis=0)
    return concat_df

def concat_ta_df(df,input_type):
    ta_df = calculate_ta_df(df)

    df_candle_raw = calculate_candle_ta_df(ta_df,input_type)
    df_pivot_raw = calculate_pivot_ta_df(ta_df,input_type)
    df_ma_raw = calculate_ma_ta_df(ta_df,input_type)
    df_fibo_raw = calculate_fibo_ta_df(ta_df,input_type)

    df_candle = transform_ta_df(df_candle_raw,'candle')
    df_pivot = transform_ta_df(df_pivot_raw,'pivot')
    df_ma = transform_ta_df(df_ma_raw,'ma')
    df_fibo = transform_ta_df(df_fibo_raw,'fibo')

    concat_ta_df = pd.concat([df_candle,df_fibo,df_pivot,df_ma], axis=0)

    ta_dict = {
        'concat_ta_df': concat_ta_df,
        'ta_dict': {
            'df_candle': df_candle_raw,
            'df_pivot': df_pivot_raw,
            'df_ma': df_ma_raw,
            'df_fibo': df_fibo_raw,
        }
    }
    return ta_dict

- Tính toán chỉ số kĩ thuật cho cổ phiếu

In [37]:
ta_stock_df = pd.DataFrame()
ta_stock_dict = {}

for stock, df in eod_all_stock['all_stock'].items():
    df_copy = df.copy()
    temp_ta_dict = concat_ta_df(df_copy, 'stock')
    
    temp_ta_stock_df = temp_ta_dict['concat_ta_df']
    ta_stock_df = pd.concat([ta_stock_df, temp_ta_stock_df], axis=0)

    ta_stock_dict[stock] = temp_ta_dict['ta_dict']

#### Page 3: Phân tích nhóm ngành

- Biểu đồ đường thể hiện index các nhóm cổ phiếu

In [38]:
def calculate_index(stock_group, name):
    price_index_date_series_copy = copy.deepcopy(price_index_date_series)

    for stock, df in stock_group[name].items():
        price_index_date_series_copy[stock] = df['close']
        price_index_date_series_copy[stock] = price_index_date_series_copy[stock][::-1].pct_change()[::-1]

    price_index_date_series_copy['total_change'] = price_index_date_series_copy.iloc[:,1:].sum(axis=1)
    price_index_date_series_copy['total_change'] = (price_index_date_series_copy['total_change']/len(stock_group[name]))*100
    price_index_date_series_copy['total_change'] = price_index_date_series_copy['total_change']*10
    price_index_date_series_copy['index_value'] = price_index_date_series_copy['total_change'][::-1].cumsum()[::-1] + 1000

    return price_index_date_series_copy['index_value']

In [39]:
price_index_date_series = pd.DataFrame(eod_index_dict['VNINDEX']['date'])
previous_year = price_index_date_series['date'].iloc[0].year - 2
price_index_date_series = price_index_date_series.loc[price_index_date_series['date'] > pd.Timestamp(year=previous_year, month=1, day=1)]
temp_df = price_index_date_series.copy()

for group, df in eod_industry_name.items():
    temp_df[group] = calculate_index(eod_industry_name, group)

for group, df in eod_industry_perform.items():
    temp_df[group] = calculate_index(eod_industry_perform, group)

for group, df in eod_marketcap_group.items():
    temp_df[group] = calculate_index(eod_marketcap_group, group)

temp_df = temp_df.melt(id_vars=['date'], var_name='group_name', value_name='value')
temp_df['group_name'] = temp_df['group_name'].map(name_map_dict)

group_stock_price_index = pd.DataFrame()
for group_name in temp_df['group_name'].unique():
    group_stock_price_index = pd.concat([group_stock_price_index, temp_df[temp_df['group_name']==group_name].iloc[:200]])

group_stock_price_index.columns = ['date', 'group_name', 'value']

#### Page 4: Phân tích cổ phiếu

- Biểu đồ giá cổ phiếu

In [40]:
temp_df = pd.DataFrame(eod_index_dict['VNINDEX']['date'])
for stock in stock_classification_filtered['stock'].tolist():
    temp_df[stock] = eod_all_stock['all_stock'][stock]['close']

temp_df = temp_df.melt(id_vars=['date'], var_name='stock', value_name='value')
stock_price_chart_df = temp_df.groupby('stock').head(200).reset_index(drop=True)

- Biểu đồ dòng tiền và thanh khoản T0

In [41]:
stock_liquidty_score_t0 = pd.DataFrame()
for stock, df in itd_score_dict.items():
    temp_df = itd_series.merge(df, on='date', how='left')
    temp_df['stock'] = stock
    stock_liquidty_score_t0 = pd.concat([stock_liquidty_score_t0, temp_df[['stock', 'date','t0_score','liquid_ratio']]], axis=0)

- Diễn biến xếp hạng dòng tiền cổ phiếu, tương quan dòng tiền, hệ số thanh khoản

In [42]:
stock_score_power_df = pd.DataFrame()
for stock, df in eod_score_dict.items():
    temp_df = date_series.copy().iloc[:20]
    temp_df['stock'] = stock
    temp_df['close'] = eod_score_dict[stock]['close']
    temp_df['liquid_ratio'] = eod_score_dict[stock]['liquid_ratio']
    temp_df['t0_score'] = eod_score_dict[stock]['t0_score']
    temp_df['rank'] = eod_score_dict[stock]['rank']
    temp_df['rank_t0'] = eod_score_dict[stock]['rank_t0']
    temp_df['top_rank_check'] = temp_df['rank_t0'].apply(lambda x: 1 if x < temp_df['rank_t0'].max()*0.1 else 0)
    temp_df['bot_rank_check'] = temp_df['rank_t0'].apply(lambda x: 1 if x > temp_df['rank_t0'].max()*0.9 else 0)

    temp_df['score_change'] = (temp_df['t0_score'][::-1].cumsum()[::-1] - temp_df['t0_score'].iloc[-1])/100
    temp_df['price_change'] = temp_df['close'][::-1].pct_change()[::-1].fillna(0)[::-1].cumsum()[::-1]

    stock_score_power_df = pd.concat([stock_score_power_df, temp_df], axis=0)

#### Page 5: Bộ lọc cổ phiếu

In [43]:
stock_candle_df = pd.DataFrame({key: df['df_candle'][['stock',
                'from_week_open','from_week_last_high','from_week_last_low',
                'from_month_open','from_month_last_high','from_month_last_low',
                'from_quarter_open','from_quarter_last_high','from_quarter_last_low',
                'from_year_open','from_year_last_high','from_year_last_low']].iloc[0]
              for key, df in ta_stock_dict.items()}).T.reset_index(drop=True)

stock_pivot_df = pd.DataFrame({key: df['df_pivot'][['stock',
                'from_month_pivot',
                'from_quarter_pivot',
                'from_year_pivot']].iloc[0]
              for key, df in ta_stock_dict.items()}).T.reset_index(drop=True)

stock_ma_df = pd.DataFrame({key: df['df_ma'][['stock',
                'from_month_ma5','from_month_ma20',
                'from_quarter_ma60','from_quarter_ma120',
                'from_year_ma240','from_year_ma480']].iloc[0]
              for key, df in ta_stock_dict.items()}).T.reset_index(drop=True)

stock_fibo_df = pd.DataFrame({key: df['df_fibo'][['stock',
                'from_month_fibo_382','from_month_fibo_500','from_month_fibo_618',
                'from_quarter_fibo_382','from_quarter_fibo_500','from_quarter_fibo_618',
                'from_year_fibo_382','from_year_fibo_500','from_year_fibo_618']].iloc[0]
              for key, df in ta_stock_dict.items()}).T.reset_index(drop=True)

filter_stock_df = eod_score_df.merge(stock_candle_df, on='stock', how='left').merge(stock_pivot_df, on='stock', how='left').merge(stock_ma_df, on='stock', how='left').merge(stock_fibo_df, on='stock', how='left')

filter_stock_df['month_trend'] = filter_stock_df.apply(lambda x: 'Tăng mạnh' if (x['from_week_last_high'] >= 0) & (x['from_month_fibo_382']>=0) else (
                                                                  'Tăng' if (x['from_week_last_high'] < 0) & (x['from_month_fibo_382']>=0) else (
                                                                  'Trung lập' if (x['from_month_fibo_618'] >= 0) & (x['from_month_fibo_382']<0) else (
                                                                  'Giảm' if (x['from_week_last_low'] >= 0) & (x['from_month_fibo_618']<0) else 'Giảm mạnh'))), axis=1)

filter_stock_df['quarter_trend'] = filter_stock_df.apply(lambda x: 'Tăng mạnh' if (x['from_month_last_high'] >= 0) & (x['from_quarter_fibo_382']>=0) else (
                                                                  'Tăng' if (x['from_month_last_high'] < 0) & (x['from_quarter_fibo_382']>=0) else (
                                                                  'Trung lập' if (x['from_quarter_fibo_618'] >= 0) & (x['from_quarter_fibo_382']<0) else (
                                                                  'Giảm' if (x['from_month_last_low'] >= 0) & (x['from_quarter_fibo_618']<0) else 'Giảm mạnh'))), axis=1)

filter_stock_df['year_trend'] = filter_stock_df.apply(lambda x: 'Tăng mạnh' if (x['from_quarter_last_high'] >= 0) & (x['from_year_fibo_382']>=0) else (
                                                                  'Tăng' if (x['from_quarter_last_high'] < 0) & (x['from_year_fibo_382']>=0) else (
                                                                  'Trung lập' if (x['from_year_fibo_618'] >= 0) & (x['from_year_fibo_382']<0) else (
                                                                  'Giảm' if (x['from_quarter_last_low'] >= 0) & (x['from_year_fibo_618']<0) else 'Giảm mạnh'))), axis=1)

#### Lưu vào SQL

In [44]:
from sqlalchemy import create_engine, text, MetaData

# Thông tin kết nối cơ sở dữ liệu
username = 'twan'
password = 'chodom'
database = 't2m'
host = 'localhost'
# host = '14.225.192.30'
port = '3306'
engine = create_engine(f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}")

# #Xoá toàn bộ các bảng cũ
# conn = engine.connect()
# metadata = MetaData()
# metadata.reflect(bind=engine)
# for table in reversed(metadata.sorted_tables):
#     table.drop(engine)
# conn.close()

#Hàm lưu dữ liệu vào sql
def save_dataframe_to_sql(df, table_name, engine):
    temp_table_name = f"temp_{table_name}"
    if table_name == 'stock_price_chart_df': index = False
    else: index=True

    df.to_sql(name=temp_table_name, con=engine, if_exists='replace', index=index)
    with engine.begin() as connection:
        connection.execute(text(f"DROP TABLE IF EXISTS {table_name}"))
        connection.execute(text(f"ALTER TABLE {temp_table_name} RENAME TO {table_name}"))

# Lưu DataFrame vào cơ sở dữ liệu
save_dataframe_to_sql(market_ms,'market_ms', engine)
save_dataframe_to_sql(group_stock_price_index,'group_stock_price_index', engine)
save_dataframe_to_sql(stock_price_chart_df,'stock_price_chart_df', engine)
save_dataframe_to_sql(ta_stock_df,'stock_ta_df', engine)
save_dataframe_to_sql(stock_liquidty_score_t0,'stock_liquidty_score_t0', engine)
save_dataframe_to_sql(stock_score_week,'stock_score_week', engine)
save_dataframe_to_sql(stock_score_month,'stock_score_month', engine)
save_dataframe_to_sql(stock_score_power_df,'stock_score_power_df', engine)
save_dataframe_to_sql(filter_stock_df,'filter_stock_df', engine)