In [1]:
# 한국M1, 한국M2, 미국M1, 미국M2, 생산자물가지수, 전산업생산지수, 
# 경상수지, 미국소비자물가지수 컬럼은 2025년 5월까지의 컬럼만 존재

In [2]:
import pandas as pd

In [3]:
df11 = pd.read_excel('협의통화(M1).xlsx')
df11.columns=['기준년월','한국(M1)조원','한국(M1)변동%']
df11['기준년월'] = pd.to_datetime(df11['기준년월'], format='%Y-%m')

prev_m1 = df11.loc[df11['기준년월'] == '2025-04-01', '한국(M1)조원'].values[0]

change_pct = -0.83
new_m1 = round(prev_m1 * (1 + change_pct / 100), 4)

new_row = pd.DataFrame({
    '기준년월': [pd.to_datetime('2025-05-01')],
    '한국(M1)조원': [new_m1],
    '한국(M1)변동%': [change_pct]
})

df11 = pd.concat([df11, new_row], ignore_index=True)

print(df11.tail())


          기준년월   한국(M1)조원  한국(M1)변동%
660 2025-01-01  1277.5434       0.60
661 2025-02-01  1282.0857       0.40
662 2025-03-01  1279.5412      -0.20
663 2025-04-01  1272.4983      -0.60
664 2025-05-01  1261.9366      -0.83


In [4]:
df12 = pd.read_excel('광의통화(M2).xlsx')
df12.columns=['기준년월','한국(M2)조원','한국(M2)변동%']
df12['기준년월'] = pd.to_datetime(df12['기준년월'], format='%Y-%m')

prev_value = df12.loc[df12['기준년월'] == '2025-04-01', '한국(M2)조원'].values[0]

change_pct = 0.69
new_value = round(prev_value * (1 + change_pct / 100), 4)

new_row = pd.DataFrame({
    '기준년월': [pd.to_datetime('2025-05-01')],
    '한국(M2)조원': [new_value],
    '한국(M2)변동%': [change_pct]
})

df12 = pd.concat([df12, new_row], ignore_index=True)

print(df12.tail())

          기준년월   한국(M2)조원  한국(M2)변동%
468 2025-01-01  4203.8015       0.50
469 2025-02-01  4231.5708       0.70
470 2025-03-01  4227.6921      -0.10
471 2025-04-01  4235.8187       0.20
472 2025-05-01  4265.0458       0.69


In [5]:
df23 = pd.read_csv('경제정책불확실성지수.csv')
df23['기준년월'] = pd.to_datetime(df23['기준년월'], format='%Y-%m')

fill_values = [145.885, 194.465, 136.72, 124.96]
nan_indices = df23[df23['한국EPU'].isna()].index[-4:]
for idx, value in zip(nan_indices, fill_values):
    df23.loc[idx, '한국EPU'] = value

from sklearn.linear_model import LinearRegression
import numpy as np

df23 = df23.sort_values('기준년월').reset_index(drop=True)

feature_cols = ['일본EPU', '중국EPU', '한국EPU', '미국EPU']
target_cols = ['글로벌EPU_명목GDP기준', '글로벌EPU_PPP기준']

regression_forecast = {}

for target in target_cols:
    train_df = df23.loc[df23['기준년월'] <= '2025-04-01'].dropna(subset=feature_cols + [target])
    
    X_train = train_df[feature_cols].values
    y_train = train_df[target].values

    model = LinearRegression()
    model.fit(X_train, y_train)

    pred_df = df23[df23['기준년월'].isin(['2025-05-01', '2025-06-01'])].copy()
    X_pred = pred_df[feature_cols].values

    y_pred = model.predict(X_pred)
    regression_forecast[target] = y_pred

print("\n📌 다중 회귀 기반 예측 결과:")
for i, date in enumerate(['2025-05-01', '2025-06-01']):
    print(f"{date} ▶ 명목GDP기준: {regression_forecast['글로벌EPU_명목GDP기준'][i]:.2f}, PPP기준: {regression_forecast['글로벌EPU_PPP기준'][i]:.2f}")

idx_missing = df23[df23['글로벌EPU_명목GDP기준'].isna()].index

df23.loc[idx_missing[0], '글로벌EPU_명목GDP기준'] = regression_forecast['글로벌EPU_명목GDP기준'][0]
df23.loc[idx_missing[1], '글로벌EPU_명목GDP기준'] = regression_forecast['글로벌EPU_명목GDP기준'][1]
df23.loc[idx_missing[0], '글로벌EPU_PPP기준'] = regression_forecast['글로벌EPU_PPP기준'][0]
df23.loc[idx_missing[1], '글로벌EPU_PPP기준'] = regression_forecast['글로벌EPU_PPP기준'][1]


📌 다중 회귀 기반 예측 결과:
2025-05-01 ▶ 명목GDP기준: 466.11, PPP기준: 452.09
2025-06-01 ▶ 명목GDP기준: 329.91, PPP기준: 322.83


In [6]:
# 환율
df = pd.read_csv('미환율데이터.csv')
df = df.drop('거래량', axis=1)
df.columns = ['Date','미환율_종가','미환율_시가','미환율_고가','미환율_저가','미환율_변동%']
df['Date'] = pd.to_datetime(df['Date'])

# 시장데이터
df1 = pd.read_csv('WTI유 선물 과거 데이터.csv')
df1.columns=['Date','WTI유_종가','WTI유_시가','WTI유_고가','WTI유_저가','WTI유_거래량','WTI유_변동%']
df1['Date'] = pd.to_datetime(df1['Date'])

df2 = pd.read_csv('금 선물 과거 데이터.csv')
df2.columns=['Date','금_종가','금_시가','금_고가','금_저가','금_거래량','금_변동%']
df2['Date'] = pd.to_datetime(df2['Date'])

df3 = pd.read_csv('S&P 500 과거 데이터.csv')
df3 = df3.drop('거래량', axis=1)
df3.columns=['Date','S&P500_종가','S&P500_시가','S&P500_고가','S&P500_저가','S&P500_변동%']
df3['Date'] = pd.to_datetime(df3['Date'])

df4 = pd.read_csv('다우존스 과거 데이터.csv')
df4.columns=['Date','다우존스_종가','다우존스_시가','다우존스_고가','다우존스_저가','다우존스_거래량','다우존스_변동%']
df4['Date'] = pd.to_datetime(df4['Date'])

df5 = pd.read_csv('상해종합 과거 데이터.csv')
df5.columns=['Date','상해종합_종가','상해종합_시가','상해종합_고가','상해종합_저가','상해종합_거래량','상해종합_변동%']
df5['Date'] = pd.to_datetime(df5['Date'])

df6 = pd.read_csv('닛케이 과거 데이터.csv')
df6 = df6.drop('거래량', axis=1)
df6.columns=['Date','닛케이_종가','닛케이_시가','닛케이_고가','닛케이_저가','닛케이_변동%']
df6['Date'] = pd.to_datetime(df6['Date'])

df7 = pd.read_csv('코스피지수 과거 데이터.csv')
df7.columns=['Date','코스피_종가','코스피_시가','코스피_고가','코스피_저가','코스피_거래량','코스피_변동%']
df7['Date'] = pd.to_datetime(df7['Date'])

df8 = pd.read_csv('나스닥종합지수 과거 데이터.csv')
df8.columns=['Date','나스닥_종가','나스닥_시가','나스닥_고가','나스닥_저가','나스닥_거래량','나스닥_변동%']
df8['Date'] = pd.to_datetime(df8['Date'])

df9 = pd.read_csv('S&P 500 VIX 선물 과거 데이터.csv')
df9.columns=['Date','VIX_종가','VIX_시가','VIX_고가','VIX_저가','VIX_거래량','VIX_변동%']
df9['Date'] = pd.to_datetime(df9['Date'])

df10 = pd.read_csv('국내주식매수량관련데이터.csv', encoding='cp949')
df10.columns=['Date','시가총액_전체','시가총액_외국인보유','시가총액_비율','주식수_전체','주식수_외국인보유','주식수_비율']
df10['Date'] = pd.to_datetime(df10['Date'])

# 거시경제지표
# 5
# df11 = pd.read_excel('협의통화(M1).xlsx')
# df11.columns=['기준년월','한국(M1)조원','한국(M1)변동%']
# df11['기준년월'] = pd.to_datetime(df11['기준년월'], format='%Y-%m')

# 5
# df12 = pd.read_excel('광의통화(M2).xlsx')
# df12.columns=['기준년월','한국(M2)조원','한국(M2)변동%']
# df12['기준년월'] = pd.to_datetime(df12['기준년월'], format='%Y-%m')

# 5
df13 = pd.read_excel('미국 통화지표.xlsx')
df13.columns=['기준년월','미국(M1)십억달러','미국(M2)십억달러']
df13['기준년월'] = pd.to_datetime(df13['기준년월'], format='%Y-%m')

df14 = pd.read_csv('소비자지수0901~2506.csv', encoding='cp949')
df14.columns=['기준년월','소비자심리지수']
df14['기준년월'] = df14['기준년월'].astype(str)
df14['기준년월'] = df14['기준년월'].str.replace(r'(^\d{4})\.1$', r'\1.10', regex=True)
df14['기준년월'] = pd.to_datetime(df14['기준년월'], format='%Y.%m')

# 5
df15 = pd.read_csv('생산자물가지수0901~2505.csv', encoding='cp949')
df15.columns=['기준년월','생산자물가지수']
df15['기준년월'] = df15['기준년월'].astype(str)
df15['기준년월'] = df15['기준년월'].str.replace(r'(^\d{4})\.1$', r'\1.10', regex=True)
df15['기준년월'] = pd.to_datetime(df15['기준년월'], format='%Y.%m')

# 5
df16 = pd.read_csv('전산업생산지수0901~2505.csv', encoding='cp949')
df16.columns=['기준년월','산업생산지수']
df16['기준년월'] = df16['기준년월'].astype(str)
df16['기준년월'] = df16['기준년월'].str.replace(r'(^\d{4})\.1$', r'\1.10', regex=True)
df16['기준년월'] = pd.to_datetime(df16['기준년월'], format='%Y.%m')

df17 = pd.read_csv('대한민국외환보유액0901~2506.csv')
df17['기준년월'] = df17['기준년월'].astype(str)
df17['기준년월'] = pd.to_datetime(df17['기준년월'], format='%Y%m')

df18 = pd.read_csv('CD금리_국고채0901~2507.csv')
df18.columns=['Date','CD금리(91일)','국고채(3년)']
df18['Date'] = pd.to_datetime(df18['Date'], format='%Y/%m/%d')

df19 = pd.read_csv('한미기준금리.csv')
df19.columns=['Date','한국정책금리','미국정책금리']
df19['Date'] = pd.to_datetime(df19['Date'], format='%Y-%m-%d')

# 5
df20 = pd.read_csv('경상수지0901~2505.csv', encoding='cp949')
df20['기준년월'] = df20['기준년월'].astype(str)
df20['기준년월'] = df20['기준년월'].str.replace(r'(^\d{4})\.1$', r'\1.10', regex=True)
df20['기준년월'] = pd.to_datetime(df20['기준년월'], format='%Y.%m')

# 5
df21 = pd.read_csv('미국CPI.csv')
df21['기준년월'] = pd.to_datetime(df21['기준년월'], format='%Y-%m')
df21.columns=['기준년월','미국소비자물가지수']

# 웹데이터
df22 = pd.read_csv('구글검색량.csv')
df22['기준년월'] = pd.to_datetime(df22['기준년월'], format='%Y-%m')

# df23 = pd.read_csv('경제정책불확실성지수.csv')
# df23['기준년월'] = pd.to_datetime(df23['기준년월'], format='%Y-%m')

# 인호가 만든 파생변수
df24 = pd.read_csv('결측제거_환율파생변수.csv')
df24['Date'] = pd.to_datetime(df24['Date'])
bool_cols = df24.select_dtypes(include='bool').columns
df24[bool_cols] = df24[bool_cols].astype(int)

# ✅ 기준이 되는 환율 데이터
df_base = df.copy()

# ✅ 1. 일별 데이터 병합 (df1 ~ df10) + (df18, df19)
daily_dfs = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df18, df19, df24]
for df_ in daily_dfs:
    df_base = pd.merge(df_base, df_, on='Date', how='left')

# ✅ 2. 월별 데이터 병합을 위한 '기준년월' 컬럼 생성
df_base['기준년월'] = df_base['Date'].dt.to_period('M').astype(str)
df_base['기준년월'] = pd.to_datetime(df_base['기준년월'], format='%Y-%m')

# ✅ 3. 월별 데이터 병합 (df11 ~ df21)
monthly_dfs = [df11, df12, df13, df14, df15, df16, df17, df20, df21]
for df_ in monthly_dfs:
    df_base = pd.merge(df_base, df_, on='기준년월', how='left')

# ✅ 4. 웹데이터 병합 (df22, df23)
df_base = pd.merge(df_base, df22, on='기준년월', how='left')
df_base = pd.merge(df_base, df23, on='기준년월', how='left')

# ✅ 5. 필요 시 기준년월 제거
# df_base.drop('기준년월', axis=1, inplace=True)

def convert_volume(val):
    try:
        val = str(val).replace(',', '').strip()
        if val.endswith('M'):
            return float(val[:-1]) * 1e6
        elif val.endswith('B'):
            return float(val[:-1]) * 1e9
        elif val.endswith('K'):
            return float(val[:-1]) * 1e3
        else:
            return float(val)
    except:
        return np.nan

volume_columns = [col for col in df_base.columns if '거래량' in col]

for col in volume_columns:
    df_base[col] = df_base[col].apply(convert_volume)

# 1. object 타입 컬럼 중 숫자형으로 변환 가능한 것 선택
obj_cols = df_base.select_dtypes(include='object').columns

# 2. 수치형으로 변환 시도 (콤마, % 제거 포함)
for col in obj_cols:
    df_base[col] = (
        df_base[col]
        .astype(str)
        .str.replace(',', '', regex=False)  # 쉼표 제거
        .str.replace('%', '', regex=False)  # % 기호 제거
        .replace('-', '')                  # 음수 아닌 '-' 빈값 처리
    )
    # 숫자로 변환 (에러 시 NaN)
    df_base[col] = pd.to_numeric(df_base[col], errors='coerce')

# 무한대 결측으로 치환
df_base.replace([np.inf, -np.inf], np.nan, inplace=True)

# 2025 6월, 7월 제외
# cutoff_date = pd.to_datetime('2025-05-31')
cutoff_date = pd.to_datetime('2019-12-31')
df_base = df_base[df_base['Date'] <= cutoff_date].reset_index(drop=True)


In [7]:
# ✅ 제거할 컬럼 리스트
cols_to_drop = ['Close', 'Open', 'High', 'Low', '미환율_변동%_y']
df_base = df_base.drop(columns=cols_to_drop, errors='ignore')

# ✅ 컬럼명 변경
df_base = df_base.rename(columns={'미환율_변동%_x': '미환율_변동%'})


In [14]:
df_base.to_csv('어쩌면최종데이터.csv', index=False)

In [8]:
# ✅ 결측값 개수 및 비율 계산
null_df = pd.DataFrame({
    '결측값 개수': df_base.isnull().sum(),
    '결측 비율 (%)': df_base.isnull().mean() * 100
})

# ✅ 'alpha'가 컬럼명에 포함된 것 중에서만 결측 비율 5% 이상인 컬럼 필터링
alpha_cols = [col for col in df_base.columns if 'alpha' in col]
alpha_null_df = null_df.loc[alpha_cols]
drop_cols = alpha_null_df[alpha_null_df['결측 비율 (%)'] >= 5].index.tolist()

# ✅ 해당 alpha 컬럼들만 삭제
df_base = df_base.drop(columns=drop_cols)

# ✅ 남은 결측값 전체 삭제 (단, alpha 외 컬럼 포함)
df_base = df_base.dropna()


In [9]:
df_base = df_base.sort_values(by='Date').reset_index(drop=True)
df_base.Date

0      2009-09-09
1      2009-09-10
2      2009-09-11
3      2009-09-14
4      2009-09-15
          ...    
1580   2019-12-19
1581   2019-12-24
1582   2019-12-26
1583   2019-12-27
1584   2019-12-30
Name: Date, Length: 1585, dtype: datetime64[ns]

# 모델링

In [40]:
# 상승 / 하락및보합

In [54]:
import pandas as pd
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ✅ Seed 고정
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# ✅ Focal Loss 정의
def focal_loss(gamma=2., alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_mean(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1)) \
               -tf.reduce_mean((1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0))
    return focal_loss_fixed

# ✅ 하이퍼파라미터 범위 설정
change_cut = 5
alpha_list = np.round(np.arange(0.01, 1.0, 0.1), 2)       # 성능 중심 구간
threshold_list = np.round(np.arange(0.1, 1.0, 0.1), 2)    # 유효 범위 압축
weight_list = np.round(np.arange(6.0, 16.0, 1.0), 2)      # class_weight_1

# ✅ 결과 저장
final_results = []

# 🎯 데이터 준비
df = df_base.copy()
df['next_day_close'] = df['미환율_종가'].shift(-1)
df['change'] = df['next_day_close'] - df['미환율_종가']
df['target'] = (df['change'] >= change_cut).astype(int)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['Date', '기준년월', 'return', 'return_future', 'target', 'next_day_close', 'change'], errors='ignore')
y = df['target'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 🎯 시퀀스 구성
seq_length = 10
X_seq, y_seq = [], []
for i in range(seq_length, len(X_scaled)):
    X_seq.append(X_scaled[i-seq_length:i])
    y_seq.append(y[i])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# 🎯 train/test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# ✅ 튜닝 시작
for class_weight_1 in weight_list:
    class_weights = {0: 1.0, 1: class_weight_1}
    
    for alpha in alpha_list:
        # 모델 구성
        inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
        x = LSTM(64, return_sequences=True)(inputs)
        x = LSTM(32)(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs, outputs)

        model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=alpha), metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.fit(X_train, y_train,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  batch_size=32,
                  callbacks=[early_stop],
                  verbose=0,
                  class_weight=class_weights)

        y_proba = model.predict(X_test).flatten()

        for threshold in threshold_list:
            y_pred = (y_proba > threshold).astype(int)
            macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
            final_results.append({
                'alpha': alpha,
                'threshold': threshold,
                'class_weight_1': class_weight_1,
                'macro_f1': macro_f1
            })

# ✅ 최적 조합 선택
df_results = pd.DataFrame(final_results)
best_row = df_results.loc[df_results['macro_f1'].idxmax()]
best_alpha = best_row['alpha']
best_threshold = best_row['threshold']
best_weight = best_row['class_weight_1']

# ✅ 최적 조합 재적용
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = LSTM(64, return_sequences=True)(inputs)
x = LSTM(32)(x)
x = Dropout(0.2)(x)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=best_alpha), metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=32,
          callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
          verbose=0,
          class_weight={0: 1.0, 1: best_weight})

# ✅ 최종 예측 및 평가
y_proba = model.predict(X_test).flatten()
y_pred = (y_proba > best_threshold).astype(int)

print(f"\n✅ [최종 조합 적용 결과]")
print(f"change_cut: {change_cut}, alpha: {best_alpha}, threshold: {best_threshold:.4f}, class_weight_1: {best_weight}, macro f1-score: {f1_score(y_test, y_pred, average='macro'):.4f}")
print("\n📊 분류 리포트:")
print(classification_report(y_test, y_pred))
print("🧩 혼동 행렬:")
print(confusion_matrix(y_test, y_pred))


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━

In [None]:
# 하락 / 상승및보합

In [56]:
import pandas as pd
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ✅ Seed 고정
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# ✅ Focal Loss 정의
def focal_loss(gamma=2., alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_mean(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1)) \
               -tf.reduce_mean((1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0))
    return focal_loss_fixed

# :흰색_확인_표시: 하이퍼파라미터 범위 설정
change_cut = 5
alpha_list = np.round(np.arange(0.01, 1.0, 0.1), 2)        # 성능 중심 구간
threshold_list = np.round(np.arange(0.1, 1.0, 0.1), 2)   # 유효 범위 압축
weight_list = np.round(np.arange(6.0, 16.0, 1.0), 2)      # class_weight_1 (편집됨) 

# ✅ 결과 저장
final_results = []

# 🎯 데이터 준비
df = df_base.copy()
df['next_day_close'] = df['미환율_종가'].shift(-1)
df['change'] = df['next_day_close'] - df['미환율_종가']
df['target'] = (df['change'] <= -change_cut).astype(int)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['Date', '기준년월', 'return', 'return_future', 'target', 'next_day_close', 'change'], errors='ignore')
y = df['target'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 🎯 시퀀스 구성
seq_length = 10
X_seq, y_seq = [], []
for i in range(seq_length, len(X_scaled)):
    X_seq.append(X_scaled[i-seq_length:i])
    y_seq.append(y[i])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# 🎯 train/test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# ✅ 튜닝 시작
for class_weight_1 in weight_list:
    class_weights = {0: 1.0, 1: class_weight_1}
    
    for alpha in alpha_list:
        # 모델 구성
        inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
        x = LSTM(64, return_sequences=True)(inputs)
        x = LSTM(32)(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs, outputs)

        model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=alpha), metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.fit(X_train, y_train,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  batch_size=32,
                  callbacks=[early_stop],
                  verbose=0,
                  class_weight=class_weights)

        y_proba = model.predict(X_test).flatten()

        for threshold in threshold_list:
            y_pred = (y_proba > threshold).astype(int)
            macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
            final_results.append({
                'alpha': alpha,
                'threshold': threshold,
                'class_weight_1': class_weight_1,
                'macro_f1': macro_f1
            })

# ✅ 최적 조합 선택
df_results = pd.DataFrame(final_results)
best_row = df_results.loc[df_results['macro_f1'].idxmax()]
best_alpha = best_row['alpha']
best_threshold = best_row['threshold']
best_weight = best_row['class_weight_1']

# ✅ 최적 조합 재적용
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = LSTM(64, return_sequences=True)(inputs)
x = LSTM(32)(x)
x = Dropout(0.2)(x)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=best_alpha), metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=32,
          callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
          verbose=0,
          class_weight={0: 1.0, 1: best_weight})

# ✅ 최종 예측 및 평가
y_proba = model.predict(X_test).flatten()
y_pred = (y_proba > best_threshold).astype(int)

print(f"\n✅ [최종 조합 적용 결과]")
print(f"change_cut: {change_cut}, alpha: {best_alpha}, threshold: {best_threshold:.4f}, class_weight_1: {best_weight}, macro f1-score: {f1_score(y_test, y_pred, average='macro'):.4f}")
print("\n📊 분류 리포트:")
print(classification_report(y_test, y_pred))
print("🧩 혼동 행렬:")
print(confusion_matrix(y_test, y_pred))


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━

In [None]:
# 성능개선 -> 상승 / 하락및보합

In [None]:
# change_cut = 0

In [11]:
import pandas as pd
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ✅ Seed 고정
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# ✅ Focal Loss 정의
def focal_loss(gamma=2., alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_mean(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1)) \
               -tf.reduce_mean((1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0))
    return focal_loss_fixed

# :흰색_확인_표시: 하이퍼파라미터 범위 설정
change_cut = 0
alpha_list = np.round(np.arange(0.01, 1.0, 0.1), 2)        # 성능 중심 구간
threshold_list = np.round(np.arange(0.1, 1.0, 0.1), 2)   # 유효 범위 압축
weight_list = np.round(np.arange(6.0, 16.0, 1.0), 2)      # class_weight_1 (편집됨) 

# ✅ 결과 저장
final_results = []

# 🎯 데이터 준비
df = df_base.copy()
df['next_day_close'] = df['미환율_종가'].shift(-1)
df['change'] = df['next_day_close'] - df['미환율_종가']
df['target'] = (df['change'] <= -change_cut).astype(int)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['Date', '기준년월', 'return', 'return_future', 'target', 'next_day_close', 'change'], errors='ignore')
y = df['target'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 🎯 시퀀스 구성
seq_length = 10
X_seq, y_seq = [], []
for i in range(seq_length, len(X_scaled)):
    X_seq.append(X_scaled[i-seq_length:i])
    y_seq.append(y[i])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# 🎯 train/test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# ✅ 튜닝 시작
for class_weight_1 in weight_list:
    class_weights = {0: 1.0, 1: class_weight_1}
    
    for alpha in alpha_list:
        # 모델 구성
        inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
        x = LSTM(64, return_sequences=True)(inputs)
        x = LSTM(32)(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs, outputs)

        model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=alpha), metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.fit(X_train, y_train,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  batch_size=32,
                  callbacks=[early_stop],
                  verbose=0,
                  class_weight=class_weights)

        y_proba = model.predict(X_test).flatten()

        for threshold in threshold_list:
            y_pred = (y_proba > threshold).astype(int)
            macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
            final_results.append({
                'alpha': alpha,
                'threshold': threshold,
                'class_weight_1': class_weight_1,
                'macro_f1': macro_f1
            })

# ✅ 최적 조합 선택
df_results = pd.DataFrame(final_results)
best_row = df_results.loc[df_results['macro_f1'].idxmax()]
best_alpha = best_row['alpha']
best_threshold = best_row['threshold']
best_weight = best_row['class_weight_1']

# ✅ 최적 조합 재적용
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = LSTM(64, return_sequences=True)(inputs)
x = LSTM(32)(x)
x = Dropout(0.2)(x)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=best_alpha), metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=32,
          callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
          verbose=0,
          class_weight={0: 1.0, 1: best_weight})

# ✅ 최종 예측 및 평가
y_proba = model.predict(X_test).flatten()
y_pred = (y_proba > best_threshold).astype(int)

print(f"\n✅ [최종 조합 적용 결과]")
print(f"change_cut: {change_cut}, alpha: {best_alpha}, threshold: {best_threshold:.4f}, class_weight_1: {best_weight}, macro f1-score: {f1_score(y_test, y_pred, average='macro'):.4f}")
print("\n📊 분류 리포트:")
print(classification_report(y_test, y_pred))
print("🧩 혼동 행렬:")
print(confusion_matrix(y_test, y_pred))


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━

In [17]:
import pandas as pd
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ✅ Seed 고정
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# ✅ Focal Loss 정의
def focal_loss(gamma=2., alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_mean(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1)) \
               -tf.reduce_mean((1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0))
    return focal_loss_fixed

# :흰색_확인_표시: 하이퍼파라미터 범위 설정
change_cut = 0
alpha_list = np.round(np.arange(0.01, 1.0, 0.1), 2)        # 성능 중심 구간
threshold_list = np.round(np.arange(0.1, 1.0, 0.1), 2)   # 유효 범위 압축
weight_list = np.round(np.arange(6.0, 16.0, 1.0), 2)      # class_weight_1 (편집됨) 

# ✅ 결과 저장
final_results = []

# 🎯 데이터 준비
df = df_base.copy()
df['next_day_close'] = df['미환율_종가'].shift(-1)
df['change'] = df['next_day_close'] - df['미환율_종가']
df['target'] = (df['change'] <= -change_cut).astype(int)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['Date', '기준년월', 'return', 'return_future', 'target', 'next_day_close', 'change'], errors='ignore')
y = df['target'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 🎯 시퀀스 구성
seq_length = 10
X_seq, y_seq = [], []
for i in range(seq_length, len(X_scaled)):
    X_seq.append(X_scaled[i-seq_length:i])
    y_seq.append(y[i])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# 🎯 train/test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# ✅ 튜닝 시작
for class_weight_1 in weight_list:
    class_weights = {0: 1.0, 1: class_weight_1}
    
    for alpha in alpha_list:
        # 모델 구성
        inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
        x = LSTM(64, return_sequences=True)(inputs)
        x = LSTM(32)(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs, outputs)

        model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=alpha), metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.fit(X_train, y_train,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  batch_size=32,
                  callbacks=[early_stop],
                  verbose=0,
                  class_weight=class_weights)

        y_proba = model.predict(X_test).flatten()

        for threshold in threshold_list:
            y_pred = (y_proba > threshold).astype(int)
            macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
            final_results.append({
                'alpha': alpha,
                'threshold': threshold,
                'class_weight_1': class_weight_1,
                'macro_f1': macro_f1
            })

# ✅ 최적 조합 선택
df_results = pd.DataFrame(final_results)
best_row = df_results.loc[df_results['macro_f1'].idxmax()]
best_alpha = best_row['alpha']
best_threshold = best_row['threshold']
best_weight = best_row['class_weight_1']

# ✅ 최적 조합 재적용
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = LSTM(64, return_sequences=True)(inputs)
x = LSTM(32)(x)
x = Dropout(0.2)(x)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=best_alpha), metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=32,
          callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
          verbose=0,
          class_weight={0: 1.0, 1: best_weight})

# ✅ 최종 예측 및 평가
y_proba = model.predict(X_test).flatten()
y_pred = (y_proba > best_threshold).astype(int)

print(f"\n✅ [최종 조합 적용 결과]")
print(f"change_cut: {change_cut}, alpha: {best_alpha}, threshold: {best_threshold:.4f}, class_weight_1: {best_weight}, macro f1-score: {f1_score(y_test, y_pred, average='macro'):.4f}")
print("\n📊 분류 리포트:")
print(classification_report(y_test, y_pred))
print("🧩 혼동 행렬:")
print(confusion_matrix(y_test, y_pred))


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [11]:
import pandas as pd
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ✅ Seed 고정
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# ✅ Focal Loss 정의
def focal_loss(gamma=2., alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_mean(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1)) \
               -tf.reduce_mean((1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0))
    return focal_loss_fixed

# :흰색_확인_표시: 하이퍼파라미터 범위 설정
change_cut = 0
alpha_list = np.round(np.arange(0.01, 1.0, 0.1), 2)        # 성능 중심 구간
threshold_list = np.round(np.arange(0.1, 1.0, 0.1), 2)   # 유효 범위 압축
weight_list = np.round(np.arange(6.0, 16.0, 1.0), 2)      # class_weight_1 (편집됨) 

# ✅ 결과 저장
final_results = []

# 🎯 데이터 준비
df = df_base.copy()
df['next_day_close'] = df['미환율_종가'].shift(-1)
df['change'] = df['next_day_close'] - df['미환율_종가']
df['target'] = (df['change'] <= -change_cut).astype(int)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['Date', '기준년월', 'return', 'return_future', 'target', 'next_day_close', 'change'], errors='ignore')
y = df['target'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 🎯 시퀀스 구성
seq_length = 10
X_seq, y_seq = [], []
for i in range(seq_length, len(X_scaled)):
    X_seq.append(X_scaled[i-seq_length:i])
    y_seq.append(y[i])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# 🎯 train/test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# ✅ 튜닝 시작
for class_weight_1 in weight_list:
    class_weights = {0: 1.0, 1: class_weight_1}
    
    for alpha in alpha_list:
        # 모델 구성
        inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
        x = LSTM(64, return_sequences=True)(inputs)
        x = LSTM(32)(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs, outputs)

        model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=alpha), metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.fit(X_train, y_train,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  batch_size=32,
                  callbacks=[early_stop],
                  verbose=0,
                  class_weight=class_weights)

        y_proba = model.predict(X_test).flatten()

        for threshold in threshold_list:
            y_pred = (y_proba > threshold).astype(int)
            macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
            final_results.append({
                'alpha': alpha,
                'threshold': threshold,
                'class_weight_1': class_weight_1,
                'macro_f1': macro_f1
            })

# ✅ 최적 조합 선택
df_results = pd.DataFrame(final_results)
best_row = df_results.loc[df_results['macro_f1'].idxmax()]
best_alpha = best_row['alpha']
best_threshold = best_row['threshold']
best_weight = best_row['class_weight_1']

# ✅ 최적 조합 재적용
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = Bidirectional(LSTM(64, return_sequences=True))(inputs)   # 🔁 양방향
x = LSTM(64, return_sequences=True)(x)                        # 🔁 추가 시계열 추출
x = LSTM(32)(x)                                               # 🔁 마지막 요약
x = Dropout(0.3)(x)                                           # 🔽 과적합 방지
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=best_alpha), metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=32,
          callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
          verbose=0,
          class_weight={0: 1.0, 1: best_weight})

# ✅ 최종 예측 및 평가
y_proba = model.predict(X_test).flatten()
y_pred = (y_proba > best_threshold).astype(int)

print(f"\n✅ [최종 조합 적용 결과]")
print(f"change_cut: {change_cut}, alpha: {best_alpha}, threshold: {best_threshold:.4f}, class_weight_1: {best_weight}, macro f1-score: {f1_score(y_test, y_pred, average='macro'):.4f}")
print("\n📊 분류 리포트:")
print(classification_report(y_test, y_pred))
print("🧩 혼동 행렬:")
print(confusion_matrix(y_test, y_pred))


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [12]:
import pandas as pd
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Layer
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ✅ Seed 고정
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# ✅ Focal Loss 정의
def focal_loss(gamma=2., alpha=0.5):
    def loss(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_mean(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1)) \
               -tf.reduce_mean((1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0))
    return loss

# ✅ Attention Layer 정의
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal")
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros")
        super().build(input_shape)

    def call(self, x):
        e = tf.keras.backend.tanh(tf.keras.backend.dot(x, self.W) + self.b)
        a = tf.keras.backend.softmax(e, axis=1)
        output = x * a
        return tf.keras.backend.sum(output, axis=1)

# ✅ 하이퍼파라미터 범위 설정
change_cut = 0
alpha_list = np.round(np.arange(0.1, 1.0, 0.1), 2)
threshold_list = np.round(np.arange(0.1, 0.91, 0.05), 2)
weight_list = np.round(np.arange(6.0, 16.0, 1.0), 2)

# ✅ 결과 저장
final_results = []

# ✅ 데이터 준비
df = df_base.copy()
df['next_day_close'] = df['미환율_종가'].shift(-1)
df['change'] = df['next_day_close'] - df['미환율_종가']
df['target'] = (df['change'] <= -change_cut).astype(int)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['Date', '기준년월', 'return', 'return_future', 'target', 'next_day_close', 'change'], errors='ignore')
y = df['target'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# ✅ 시퀀스 구성
seq_length = 10
X_seq, y_seq = [], []
for i in range(seq_length, len(X_scaled)):
    X_seq.append(X_scaled[i-seq_length:i])
    y_seq.append(y[i])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# ✅ train/test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# ✅ 튜닝 시작
for class_weight_1 in weight_list:
    class_weights = {0: 1.0, 1: class_weight_1}
    
    for alpha in alpha_list:
        # ✅ 모델 구성 (Attention 포함)
        inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
        x = LSTM(64, return_sequences=True)(inputs)
        x = LSTM(32, return_sequences=True)(x)
        x = Attention()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs, outputs)

        model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=alpha), metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.fit(X_train, y_train,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  batch_size=32,
                  callbacks=[early_stop],
                  verbose=0,
                  class_weight=class_weights)

        y_proba = model.predict(X_test).flatten()

        for threshold in threshold_list:
            y_pred = (y_proba > threshold).astype(int)
            macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
            final_results.append({
                'alpha': alpha,
                'threshold': threshold,
                'class_weight_1': class_weight_1,
                'macro_f1': macro_f1
            })

# ✅ 최적 조합 선택
df_results = pd.DataFrame(final_results)
best_row = df_results.loc[df_results['macro_f1'].idxmax()]
best_alpha = best_row['alpha']
best_threshold = best_row['threshold']
best_weight = best_row['class_weight_1']

# ✅ 최적 조합 적용해 모델 재학습
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = LSTM(64, return_sequences=True)(inputs)
x = LSTM(32, return_sequences=True)(x)
x = Attention()(x)
x = Dropout(0.3)(x)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=best_alpha), metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=32,
          callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
          verbose=0,
          class_weight={0: 1.0, 1: best_weight})

# ✅ 최종 예측 및 평가
y_proba = model.predict(X_test).flatten()
y_pred = (y_proba > best_threshold).astype(int)

print(f"\n✅ [최종 조합 적용 결과]")
print(f"change_cut: {change_cut}, alpha: {best_alpha}, threshold: {best_threshold:.4f}, class_weight_1: {best_weight}, macro f1-score: {f1_score(y_test, y_pred, average='macro'):.4f}")
print("\n📊 분류 리포트:")
print(classification_report(y_test, y_pred))
print("🧩 혼동 행렬:")
print(confusion_matrix(y_test, y_pred))



[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━

In [13]:
import pandas as pd
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Layer
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ✅ Seed 고정
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# ✅ Focal Loss 정의
def focal_loss(gamma=2., alpha=0.5):
    def loss(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_mean(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1)) \
               -tf.reduce_mean((1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0))
    return loss

# ✅ Attention Layer 정의
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal")
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros")
        super().build(input_shape)

    def call(self, x):
        e = tf.keras.backend.tanh(tf.keras.backend.dot(x, self.W) + self.b)
        a = tf.keras.backend.softmax(e, axis=1)
        output = x * a
        return tf.keras.backend.sum(output, axis=1)

def add_technical_indicators(df, price_col='미환율_종가'):
    df = df.copy()

    # ✅ 이동평균 (MA)
    df['MA5'] = df[price_col].rolling(window=5).mean()
    df['MA10'] = df[price_col].rolling(window=10).mean()
    df['MA20'] = df[price_col].rolling(window=20).mean()

    # ✅ 이동표준편차 (Volatility)
    df['STD5'] = df[price_col].rolling(window=5).std()
    df['STD10'] = df[price_col].rolling(window=10).std()

    # ✅ RSI (상대강도지수)
    delta = df[price_col].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / (avg_loss + 1e-6)
    df['RSI'] = 100 - (100 / (1 + rs))

    # ✅ MACD
    ema12 = df[price_col].ewm(span=12, adjust=False).mean()
    ema26 = df[price_col].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema12 - ema26
    df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
    df['MACD_hist'] = df['MACD'] - df['MACD_signal']

    # ✅ Bollinger Band
    df['Bollinger_upper'] = df['MA20'] + 2 * df['STD10']
    df['Bollinger_lower'] = df['MA20'] - 2 * df['STD10']

    # ✅ Stochastic Oscillator
    low14 = df[price_col].rolling(window=14).min()
    high14 = df[price_col].rolling(window=14).max()
    df['%K'] = (df[price_col] - low14) / (high14 - low14 + 1e-6) * 100
    df['%D'] = df['%K'].rolling(window=3).mean()

    # ✅ 1일, 3일 수익률
    df['return_1d'] = df[price_col].pct_change(1)
    df['return_3d'] = df[price_col].pct_change(3)

    return df

# 기술적 지표 생성 적용
df_base = add_technical_indicators(df_base)

# 결측값 처리 (지표 생성 초기에는 NaN 발생)
df_base = df_base.dropna().reset_index(drop=True)

# ✅ 하이퍼파라미터 범위 설정
change_cut = 0
alpha_list = np.round(np.arange(0.1, 1.0, 0.1), 2)
threshold_list = np.round(np.arange(0.1, 0.91, 0.05), 2)
weight_list = np.round(np.arange(6.0, 16.0, 1.0), 2)

# ✅ 결과 저장
final_results = []

# ✅ 데이터 준비
df = df_base.copy()
df['next_day_close'] = df['미환율_종가'].shift(-1)
df['change'] = df['next_day_close'] - df['미환율_종가']
df['target'] = (df['change'] <= -change_cut).astype(int)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['Date', '기준년월', 'return', 'return_future', 'target', 'next_day_close', 'change'], errors='ignore')
y = df['target'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# ✅ 시퀀스 구성
seq_length = 10
X_seq, y_seq = [], []
for i in range(seq_length, len(X_scaled)):
    X_seq.append(X_scaled[i-seq_length:i])
    y_seq.append(y[i])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# ✅ train/test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# ✅ 튜닝 시작
for class_weight_1 in weight_list:
    class_weights = {0: 1.0, 1: class_weight_1}
    
    for alpha in alpha_list:
        # ✅ 모델 구성 (Attention 포함)
        inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
        x = LSTM(64, return_sequences=True)(inputs)
        x = LSTM(32, return_sequences=True)(x)
        x = Attention()(x)
        x = Dropout(0.3)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = Model(inputs, outputs)

        model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=alpha), metrics=['accuracy'])

        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.fit(X_train, y_train,
                  validation_data=(X_test, y_test),
                  epochs=100,
                  batch_size=32,
                  callbacks=[early_stop],
                  verbose=0,
                  class_weight=class_weights)

        y_proba = model.predict(X_test).flatten()

        for threshold in threshold_list:
            y_pred = (y_proba > threshold).astype(int)
            macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
            final_results.append({
                'alpha': alpha,
                'threshold': threshold,
                'class_weight_1': class_weight_1,
                'macro_f1': macro_f1
            })

# ✅ 최적 조합 선택
df_results = pd.DataFrame(final_results)
best_row = df_results.loc[df_results['macro_f1'].idxmax()]
best_alpha = best_row['alpha']
best_threshold = best_row['threshold']
best_weight = best_row['class_weight_1']

# ✅ 최적 조합 적용해 모델 재학습
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = LSTM(64, return_sequences=True)(inputs)
x = LSTM(32, return_sequences=True)(x)
x = Attention()(x)
x = Dropout(0.3)(x)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=best_alpha), metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=32,
          callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
          verbose=0,
          class_weight={0: 1.0, 1: best_weight})

# ✅ 최종 예측 및 평가
y_proba = model.predict(X_test).flatten()
y_pred = (y_proba > best_threshold).astype(int)

print(f"\n✅ [최종 조합 적용 결과]")
print(f"change_cut: {change_cut}, alpha: {best_alpha}, threshold: {best_threshold:.4f}, class_weight_1: {best_weight}, macro f1-score: {f1_score(y_test, y_pred, average='macro'):.4f}")
print("\n📊 분류 리포트:")
print(classification_report(y_test, y_pred))
print("🧩 혼동 행렬:")
print(confusion_matrix(y_test, y_pred))


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 81ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━