# 알파 생성

In [59]:
import numpy as np
import pandas as pd
import yfinance as yf

In [60]:
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
pd.reset_option('display.max_colwidth')
pd.reset_option('display.width')


In [61]:
def alpha_values(df):
    df.columns=['Date','Close','Open','High','Low','Change']
    
    df['Date']=pd.to_datetime(df['Date'],format='%Y- %m- %d')
    
    df=df.set_index('Date')
    df=df.sort_index()
    
    # 1) 단순 수익률 & 로그 수익률
    df['return']     = df['Close'].pct_change()
    df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
    
    # 2) Lag 특성 (1~5일)
    for lag in range(1, 6):
        df[f'lag_{lag}'] = df['log_return'].shift(lag)
    
    # 3) 이동평균 & 이동표준편차 (SMA, STD)
    ma_windows = [5, 10, 20, 60]
    for w in ma_windows:
        df[f'sma_{w}'] = df['Close'].rolling(window=w).mean()
        df[f'std_{w}'] = df['Close'].rolling(window=w).std()
    
    # 4) 볼린저 밴드 (20일 기준, +-2σ)
    df['bb_upper'] = df['sma_20'] + 2 * df['std_20']
    df['bb_lower'] = df['sma_20'] - 2 * df['std_20']
    df['bb_width'] = df['bb_upper'] - df['bb_lower']
    
    # 5) 연환산 변동성
    df['volatility_20'] = df['log_return'].rolling(20).std() * np.sqrt(252)
    
    # 6) 모멘텀 지표
    mom_windows = [5, 10, 20]
    for w in mom_windows:
        df[f'momentum_{w}'] = df['Close'] - df['Close'].shift(w)
    
    # 7) RSI (14일 기본)
    def compute_rsi(series, window=14):
        delta = series.diff()
        gain  = delta.clip(lower=0)
        loss  = -delta.clip(upper=0)
        avg_gain = gain.rolling(window).mean()
        avg_loss = loss.rolling(window).mean()
        rs = avg_gain / avg_loss
        return 100 - 100 / (1 + rs)
    
    df['rsi_14'] = compute_rsi(df['Close'], 14)
    
    # 8) MACD & Signal
    ema_short = df['Close'].ewm(span=12, adjust=False).mean()
    ema_long  = df['Close'].ewm(span=26, adjust=False).mean()
    df['macd']     = ema_short - ema_long
    df['macd_sig'] = df['macd'].ewm(span=9, adjust=False).mean()
    df['macd_hist']= df['macd'] - df['macd_sig']
    
    # 9) 스토캐스틱 오실레이터 (%K, %D)
    low_min  = df['Close'].rolling(14).min()
    high_max = df['Close'].rolling(14).max()
    df['sto_k'] = (df['Close'] - low_min) / (high_max - low_min) * 100
    df['sto_d'] = df['sto_k'].rolling(3).mean()
    
    from pandas_datareader import data as pdr
    
    # 1) FRED에서 US M1 불러오기 (시계열: 월간)
    m1 = pdr.DataReader('MYAGM1EZM196N', 'fred',
                        start=df.index.min(),
                        end=df.index.max())
    m1.rename(columns={'MYAGM1EZM196N': 'M1'}, inplace=True)
    
    # 2) 날짜 인덱스 맞추고 결측은 직전값으로 채우기
    m1.index = pd.to_datetime(m1.index)
    # 영업일 기준으로 재색인 & ffill
    m1 = m1.reindex(df.index, method='ffill')
    
    # 3) df에 합치기
    df = df.join(m1)
    
    # 4) –1 × 롤링 상관계수 계산 (윈도우=10)
    df['neg_rank_corr_10_M1'] = -(
        df['Close'].rank()
          .rolling(window=10)
          .corr(df['M1'].rank())
    )
    
    # M1 로그수익률
    df['M1_log_ret'] = np.log(df['M1'] / df['M1'].shift(1))
    
    # –1 × rank corr on returns
    df['neg_rank_corr_10_M1_ret'] = -(
        df['Close'].rank()
          .rolling(10)
          .corr(df['M1_log_ret'].rank())
    )
    
    
    col = df['neg_rank_corr_10_M1_ret']
    total = len(col)
    
    # NaN 개수
    num_nan = col.isna().sum()
    # ±inf 개수
    num_inf = np.isinf(col).sum()
    
    # 비율 계산
    ratio_nan = num_nan / total
    ratio_inf = num_inf / total
    ratio_total = (num_nan + num_inf) / total
    
    mo = df[['Close','M1']].resample('M').last()
    mo['neg_corr_3M'] = -(
      mo['Close'].rank().rolling(3).corr(mo['M1'].rank())
    )
    
    
    # 만약 M1을 거래량 대용으로 쓰고 있다면 먼저 컬럼 리네임
    df.rename(columns={'M1': 'Volume'}, inplace=True)
    
    # 1) 1일 차분(delta)
    delta_close  = df['Close'].diff(1)
    delta_volume = df['Volume'].diff(1)
    
    # 2) 차분값을 rank로 변환한 뒤 elementwise 곱하고 -1 곱하기
    df['neg_rank_mul_delta'] = - (
        delta_close.rank() *
        delta_volume.rank()
    )
    
    # 10일 롤링 –rank corr
    df['neg_rank_corr_10'] = -(
        df['Close'].rank()
              .rolling(window=10)
              .corr(df['Volume'].rank())
    )
    
    
    # 1) Low 컬럼에 대한 전체 순위 계산
    low_rank = df['Low'].rank()
    
    # 2) 9일 윈도우 내에서 ‘현재(마지막) 값의 순위’를 비율로 계산하는 함수
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    # 3) ts_rank(rank(Low), 9) 계산 후 –1 곱해서 새로운 컬럼에 저장
    df['neg_ts_rank_low_9'] = - ts_rank(low_rank, 9)
    
    # —————————————— helper functions ——————————————
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def ts_rank(s, window):
        # 윈도우 안에서 마지막 값의 상대적 순위(0~1)
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def delay(s, period):
        return s.shift(period)
    
    # —————————————— alpha5 ——————————————
    # alpha5 = rank(open - ts_max(open,5)) ** rank(volume)
    df['alpha5'] = (
        (df['Open'] - ts_max(df['Open'], 5)).rank()
        ** df['Volume'].rank()
    )
    
    # —————————————— alpha6 ——————————————
    # alpha6 = -1 * corr(open, volume, 10)
    df['alpha6'] = - df['Open'].rolling(window=10).corr(df['Volume'])
    
    # —————————————— alpha7 ——————————————
    # alpha7 = - ts_rank(low,3) if volume != 0 else 0
    r7 = ts_rank(df['Low'], 3)
    df['alpha7'] = np.where(df['Volume'] != 0, -r7, 0)
    
    # —————————————— alpha8 ——————————————
    # alpha8 = -1 * rank( ts_max( rank(corr(volume, low,5)), 3 ) )
    corr_vl = df['Volume'].rolling(window=5).corr(df['Low'])
    rank_corr = corr_vl.rank()
    tsmax_rc3 = ts_max(rank_corr, 3)
    df['alpha8'] = - tsmax_rc3.rank()
    
    # —————————————— alpha9 ——————————————
    # alpha9 = rank(open - delay(open,1)) + rank(delay(open,1) - delay(open,2))
    r1 = (df['Open'] - delay(df['Open'], 1)).rank()
    r2 = (delay(df['Open'], 1) - delay(df['Open'], 2)).rank()
    df['alpha9'] = r1 + r2
    
    # ————— helper functions —————
    def delta(s, period):
        return s.diff(period)
    
    def ts_min(s, window):
        return s.rolling(window).min()
    
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def sign(s):
        return np.sign(s)
    
    def rank(s):
        return s.rank()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    def covariance(s1, s2, window):
        return s1.rolling(window).cov(s2)
    
    
    # — ensure necessary columns — 
    # (if you haven’t already renamed Volume & computed VWAP)
    # df.rename(columns={'M1': 'Volume'}, inplace=True)
    if 'VWAP' not in df.columns:
        df['VWAP'] = (df['High'] + df['Low'] + df['Close']) / 3
    
    
    # ————— alpha10 —————
    # –1 * rank(abs(close – open))
    df['alpha10'] = - rank((df['Close'] - df['Open']).abs())
    
    
    # ————— alpha11 —————
    # ( rank(ts_max(vwap–close,3)) + rank(ts_min(vwap–close,3)) ) * rank(delta(volume,3))
    p = df['VWAP'] - df['Close']
    df['alpha11'] = (
        rank(ts_max(p, 3)) +
        rank(ts_min(p, 3))
    ) * rank(delta(df['Volume'], 3))
    
    
    # ————— alpha12 —————
    # sign(delta(volume,1)) * (–1 * delta(close,1))
    df['alpha12'] = sign(delta(df['Volume'], 1)) * (- delta(df['Close'], 1))
    
    
    # ————— alpha13 —————
    # –1 * rank(covariance(rank(close), rank(volume),5))
    cov_cv = covariance(rank(df['Close']), rank(df['Volume']), 5)
    df['alpha13'] = - rank(cov_cv)
    
    
    # ————— alpha14 —————
    # –1 * rank(delta(return,3)) * correlation(open, volume,10)
    # (your returns column is named 'return' from pct_change())
    df['alpha14'] = - rank(delta(df['return'], 3)) * correlation(df['Open'], df['Volume'], 10)
    
    
    # ————— alpha15 —————
    # –1 * ts_sum( rank(correlation(rank(high), rank(volume),3)), 3 )
    corr_hv3 = correlation(rank(df['High']), rank(df['Volume']), 3)
    df['alpha15'] = - ts_sum(rank(corr_hv3), 3)
    
    
    # ————— alpha16 —————
    # –1 * rank(covariance(rank(high), rank(volume),5))
    cov_hv5 = covariance(rank(df['High']), rank(df['Volume']), 5)
    df['alpha16'] = - rank(cov_hv5)
    
    
    # ————— helper functions —————
    def delta(s, period):
        return s.diff(period)
    
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def ts_min(s, window):
        return s.rolling(window).min()
    
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def stddev(s, window):
        return s.rolling(window).std()
    
    def rank(s):
        return s.rank()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    def sign(s):
        return np.sign(s)
    
    def delay(s, period):
        return s.shift(period)
    
    # — ensure VWAP & adv20 exist —
    if 'VWAP' not in df.columns:
        df['VWAP'] = (df['High'] + df['Low'] + df['Close']) / 3
    df['adv20'] = df['Volume'].rolling(20).mean()
    
    # ————— alpha17 —————
    # (-1 * rank(ts_rank(close,10))) * rank(Δ² close) * rank(ts_rank(volume/adv20,5))
    df['alpha17'] = (
        - rank(ts_rank(df['Close'], 10))
        * rank(delta(delta(df['Close'], 1), 1))
        * rank(ts_rank(df['Volume'] / df['adv20'], 5))
    )
    
    # ————— alpha18 —————
    # -1 * rank(stddev(|close-open|,5) + (close-open) + corr(close, open,10))
    expr18 = (
        stddev((df['Close'] - df['Open']).abs(), 5)
        + (df['Close'] - df['Open'])
        + correlation(df['Close'], df['Open'], 10)
    )
    df['alpha18'] = - rank(expr18)
    
    # ————— alpha19 —————
    # -1 * sign((close - delay(close,7)) + Δ close(7)) * (1 + rank(1 + ts_sum(returns,250)))
    sign19 = sign((df['Close'] - delay(df['Close'], 7)) + delta(df['Close'], 7))
    sum250 = ts_sum(df['return'], 250)
    df['alpha19'] = -1 * sign19 * (1 + rank(1 + sum250))
    
    # ————— alpha20 —————
    # (-1 * rank(open - delay(high,1))) * rank(open - delay(close,1)) * rank(open - delay(low,1))
    r20a = - rank(df['Open'] - delay(df['High'], 1))
    r20b = rank(df['Open'] - delay(df['Close'], 1))
    r20c = rank(df['Open'] - delay(df['Low'], 1))
    df['alpha20'] = r20a * r20b * r20c
    
    # ————— alpha21 —————
    # 삼중 조건문
    ma8 = ts_sum(df['Close'], 8) / 8
    ma2 = ts_sum(df['Close'], 2) / 2
    sd8 = stddev(df['Close'], 8)
    vol_rel = df['Volume'] / df['adv20']
    
    df['alpha21'] = np.where(
        (ma8 + sd8) < ma2, -1,
        np.where(
            ma2 < (ma8 - sd8), 1,
            np.where((vol_rel >= 1), 1, -1)
        )
    )
    
    # ————— alpha22 —————
    # -1 * Δ(corr(high, volume,5), 5) * rank(stddev(close,20))
    corr5_hv = correlation(df['High'], df['Volume'], 5)
    df['alpha22'] = - delta(corr5_hv, 5) * rank(stddev(df['Close'], 20))
    
    # ————— alpha23 —————
    # ts_sum(high,20)/20 < high → -Δ high(2) else 0
    df['alpha23'] = np.where(
        (ts_sum(df['High'], 20) / 20) < df['High'],
        - delta(df['High'], 2),
        0
    )
    
    # ————— alpha24 —————
    # 조건문: Δ(ma100,100)/delay(close,100) <= 0.05
    ma100 = ts_sum(df['Close'], 100) / 100
    frac = delta(ma100, 100) / delay(df['Close'], 100)
    df['alpha24'] = np.where(
        frac <= 0.05,
        - (df['Close'] - ts_min(df['Close'], 100)),
        - delta(df['Close'], 3)
    )
    
    # ————— alpha25 —————
    # rank((-1 * returns) * adv20 * VWAP * (high - close))
    expr25 = (-1 * df['return']) * df['adv20'] * df['VWAP'] * (df['High'] - df['Close'])
    df['alpha25'] = rank(expr25)
    
    # ————— alpha26 —————
    # -1 * ts_max(corr(ts_rank(volume,5), ts_rank(high,5),5), 3)
    corr_vh5 = correlation(ts_rank(df['Volume'], 5), ts_rank(df['High'], 5), 5)
    df['alpha26'] = - ts_max(corr_vh5, 3)
    
    # ————— helper functions —————
    def delta(s, period):
        return s.diff(period)
    
    def rank(s):
        return s.rank()
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def ts_min(s, window):
        return s.rolling(window).min()
    
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def stddev(s, window):
        return s.rolling(window).std()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    def sign(s):
        return np.sign(s)
    
    def delay(s, period):
        return s.shift(period)
    
    def scale(s):
        return (s - s.mean()) / s.std()
    
    def decay_linear(s, period):
        weights = np.arange(1, period+1)
        return s.rolling(period).apply(
            lambda x: np.dot(x, weights) / weights.sum(),
            raw=True
        )
    
    def product(*args):
        res = args[0]
        for arr in args[1:]:
            res = res * arr
        return res
    
    # ensure VWAP & adv20 존재
    if 'VWAP' not in df.columns:
        df['VWAP'] = (df['High'] + df['Low'] + df['Close']) / 3
    df['adv20'] = df['Volume'].rolling(20).mean()
    
    # ————— alpha27 —————
    # if rank(ts_sum(corr(rank(volume),rank(vwap),6),2)/2) > 0.5 then -1 else 1
    tmp27 = ts_sum(correlation(rank(df['Volume']), rank(df['VWAP']), 6), 2) / 2
    df['alpha27'] = np.where(rank(tmp27) > 0.5, -1, 1)
    
    # ————— alpha28 —————
    # scale(corr(adv20, low,5) + (high+low)/2 - close)
    expr28 = correlation(df['adv20'], df['Low'], 5) + (df['High'] + df['Low'])/2 - df['Close']
    df['alpha28'] = scale(expr28)
    
    # ————— alpha29 —————
    # ts_min(product(rank(rank(scale(log(ts_sum(ts_min(rank(rank(-1*rank(delta(close-1,5))),2),1))))),1),5)
    # + ts_rank(delay(-1*returns,6),5)
    part29 = ts_sum(ts_min(rank(rank(-1 * rank(delta(df['Close']-1, 5)))), 2), 1)
    part29 = np.log(part29).pipe(scale).pipe(rank).pipe(rank)
    prod29 = product(rank(part29), 1)
    df['alpha29'] = ts_min(prod29, 5) + ts_rank(delay(-1 * df['return'], 6), 5)
    
    # ————— alpha30 —————
    # ((1-rank(sign(Δ1+Δ2+Δ3))) * ts_sum(volume,5)) / ts_sum(volume,20)
    sigs = sign(df['Close'] - delay(df['Close'],1)) \
           + sign(delay(df['Close'],1) - delay(df['Close'],2)) \
           + sign(delay(df['Close'],2) - delay(df['Close'],3))
    df['alpha30'] = ((1 - rank(sigs)) * ts_sum(df['Volume'], 5)) / ts_sum(df['Volume'], 20)
    
    # ————— alpha31 —————
    # rank(rank(rank(decay_linear(-1*rank(rank(delta(close,10))),10)))) 
    # + rank(-delta(close,3)) + sign(scale(corr(adv20,low,12)))
    part31 = decay_linear(-1 * rank(rank(delta(df['Close'], 10))), 10)
    df['alpha31'] = rank(rank(rank(part31))) \
                    + rank(-delta(df['Close'], 3)) \
                    + sign(scale(correlation(df['adv20'], df['Low'], 12)))
    
    # ————— alpha32 —————
    # scale((ts_sum(close,7)/7)-close) + 20*scale(corr(vwap,delay(close,5),230))
    df['alpha32'] = scale(ts_sum(df['Close'], 7)/7 - df['Close']) \
                    + 20 * scale(correlation(df['VWAP'], delay(df['Close'], 5), 230))
    
    # ————— alpha33 —————
    # rank(-((1-open/close)**1))
    df['alpha33'] = rank(-((1 - df['Open']/df['Close'])**1))
    
    # ————— alpha34 —————
    # rank((1-rank(stddev(returns,2)/stddev(returns,5))) + (1-rank(delta(close,1))))
    expr34 = (1 - rank(stddev(df['return'], 2) / stddev(df['return'], 5))) \
             + (1 - rank(delta(df['Close'], 1)))
    df['alpha34'] = rank(expr34)
    
    # ————— alpha35 —————
    # ts_rank(volume,32)*(1-ts_rank(close+high-low,16))*(1-ts_rank(returns,32))
    df['alpha35'] = ts_rank(df['Volume'], 32) \
                    * (1 - ts_rank(df['Close'] + df['High'] - df['Low'], 16)) \
                    * (1 - ts_rank(df['return'], 32))
    
    # ————— alpha36 —————
    # 2.21*rank(corr(close-open,delay(volume,1),15)) + 0.7*rank(open-close)
    # +0.73*rank(ts_rank(delay(-1*returns,6),5)) + rank(abs(corr(vwap,adv20,6)))
    # +0.6*rank(((ts_sum(close,200)/200-open)*(close-open)))
    term1 = rank(correlation(df['Close']-df['Open'], delay(df['Volume'],1), 15)) * 2.21
    term2 = rank(df['Open'] - df['Close']) * 0.7
    term3 = rank(ts_rank(delay(-1 * df['return'], 6), 5)) * 0.73
    term4 = rank(correlation(df['VWAP'], df['adv20'], 6).abs())
    term5 = rank((ts_sum(df['Close'], 200)/200 - df['Open']) * (df['Close'] - df['Open'])) * 0.6
    df['alpha36'] = term1 + term2 + term3 + term4 + term5
    
    # ————— alpha37 —————
    # rank(corr(delay(open-close,1), close,200)) + rank(open-close)
    df['alpha37'] = rank(correlation(delay(df['Open']-df['Close'],1), df['Close'], 200)) \
                    + rank(df['Open'] - df['Close'])
    
    # — helper functions —
    def delta(s, period):
        return s.diff(period)
    
    def rank(s):
        return s.rank()
    
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def decay_linear(s, period):
        weights = np.arange(1, period+1)
        return s.rolling(period).apply(
            lambda x: np.dot(x, weights) / weights.sum(),
            raw=True
        )
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def stddev(s, window):
        return s.rolling(window).std()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    # — ensure returns column exists —
    # If your pct_change return is in 'return', alias it:
    df['returns'] = df.get('returns', df['return'])
    
    # — alpha38 —
    # -1 * rank(ts_rank(Close,10)) * rank(Close/Open)
    df['alpha38'] = -1 * rank(ts_rank(df['Close'], 10)) * rank(df['Close'] / df['Open'])
    
    # — alpha39 —
    # -1 * rank(Δ7(Close) * (1 - rank(decay_linear(Volume/adv20,9)))) * (1 + rank(ts_sum(returns,250)))
    term39 = delta(df['Close'], 7) * (1 - rank(decay_linear(df['Volume'] / df['adv20'], 9)))
    df['alpha39'] = -1 * rank(term39) * (1 + rank(ts_sum(df['returns'], 250)))
    
    # — alpha40 —
    # -1 * rank(stddev(High,10)) * corr(High, Volume,10)
    df['alpha40'] = -1 * rank(stddev(df['High'], 10)) * correlation(df['High'], df['Volume'], 10)
    
    # — alpha41 —
    # sqrt(High * Low) - VWAP
    df['alpha41'] = (df['High'] * df['Low'])**0.5 - df['VWAP']
    
    # — alpha42 —
    # rank(VWAP - Close) / rank(VWAP + Close)
    df['alpha42'] = rank(df['VWAP'] - df['Close']) / rank(df['VWAP'] + df['Close'])
    
    # — alpha43 —
    # ts_rank(Volume/adv20,20) * ts_rank(-Δ7(Close),8)
    df['alpha43'] = ts_rank(df['Volume'] / df['adv20'], 20) * ts_rank(-1 * delta(df['Close'], 7), 8)
    
    # — alpha44 —
    # -1 * corr(High, rank(Volume),5)
    df['alpha44'] = -1 * correlation(df['High'], rank(df['Volume']), 5)
    
    # — alpha48 재계산 예시 —
    num48 = (
        correlation(delta(df['Close'], 1),
                    delta(df['Close'].shift(1), 1), 250)
        * delta(df['Close'], 1)
    ) / df['Close']
    den48 = ts_sum((delta(df['Close'], 1) / df['Close'].shift(1)) ** 2, 250)
    
    # global neutralize: 전체 평균을 빼줌
    glob_neut = num48 - num48.mean()
    
    # alpha48 재계산
    df['alpha48'] = glob_neut / den48
    
    # alpha45
    df['alpha45'] = -1 * (
        rank(ts_sum(delay(df['Close'], 5), 20) / 20)
        * correlation(df['Close'], df['Volume'], 2)
        * rank(correlation(ts_sum(df['Close'], 5), ts_sum(df['Close'], 20), 2))
    )
    
    # alpha46
    expr46 = ((delay(df['Close'], 20) - delay(df['Close'], 10)) / 10
              - (delay(df['Close'], 10) - df['Close']) / 10)
    df['alpha46'] = np.where(
        expr46 > 0.25, -1,
        np.where(expr46 < 0, 1, -1 * (df['Close'] - delay(df['Close'], 1)))
    )
    
    # alpha47
    df['alpha47'] = (
        ((rank(1 / df['Close']) * df['Volume']) / df['adv20'])
        * ((df['High'] * rank(df['High'] - df['Close'])) / (ts_sum(df['High'], 5) / 5))
    ) - rank(df['VWAP'] - delay(df['VWAP'], 5))
    
    # alpha49
    expr49 = ((delay(df['Close'], 20) - delay(df['Close'], 10)) / 10
              - (delay(df['Close'], 10) - df['Close']) / 10)
    df['alpha49'] = np.where(
        expr49 < -0.1, 1,
        -1 * (df['Close'] - delay(df['Close'], 1))
    )
    
    # alpha50
    df['alpha50'] = -1 * ts_max(
        rank(correlation(rank(df['Volume']), rank(df['VWAP']), 5)),
        5
    )
    
    # 1) FRED에서 미국 실질 GDP 불러오기 (분기별)
    gdp = pdr.DataReader('NAEXKP01EZQ657S', 'fred',
                         start=df.index.min(),
                         end=df.index.max())
    gdp.rename(columns={'NAEXKP01EZQ657S': 'cap'}, inplace=True)
    
    # 2) 분기→일별(영업일) 인덱스로 재색인하고 이전 값으로 채우기
    gdp = gdp.reindex(df.index, method='ffill')
    
    # 3) df에 ‘cap’ 컬럼으로 합치기
    df = df.join(gdp['cap'])
    
    # 4) α56 다시 계산
    #    -1 * rank( ts_sum(returns,10) / ts_sum(ts_sum(returns,2),3) ) * rank(returns * cap)
    df['alpha56'] = (
        - rank(ts_sum(df['returns'], 10) / ts_sum(ts_sum(df['returns'], 2), 3))
        * rank(df['returns'] * df['cap'])
    )
    
    # ts_argmax 정의 (각 윈도우에서 최대값 위치 반환)
    def ts_argmax(s, window):
        # x.argmax()는 윈도우 내 최대값 첫 위치(0-based)를 반환
        return s.rolling(window).apply(lambda x: x.argmax(), raw=True)
    
    # 그 뒤에 alpha57 재계산
    df['alpha57'] = -1 * (df['Close'] - df['VWAP']) \
                    / decay_linear(rank(ts_argmax(df['Close'], 30)), 2)
    
    # num58: α58의 분자 계산 부분 (원래 코드를 그대로 이용)
    num58 = correlation(
                delta(df['Close'], 1),
                delta(df['Close'].shift(1), 1),
                int(3.92795)
            )
    num58 = decay_linear(num58, int(7.89291))
    
    # 글로벌 중립화: 전체 평균 빼기
    glob_neut58 = num58 - num58.mean()
    
    # α58 재계산
    df['alpha58'] = -1 * ts_rank(glob_neut58, int(5.50322))
    
    
    # 1) weighted_vwap 정의 (여기선 사실 VWAP 그대로지만, 계수만 반영)
    weighted_vwap = df['VWAP'] * 0.728317 + df['VWAP'] * (1 - 0.728317)
    
    # 2) 원래 α59 식 분자: corr(weighted_vwap, Volume, window≈4.25) → Int로 변환
    num59 = correlation(weighted_vwap, df['Volume'], window=int(4.25197))
    
    # 3) decay_linear 적용 (period≈16.23 → Int로)
    decay_num59 = decay_linear(num59, period=int(16.2289))
    
    # 4) 글로벌 중립화 (전체 평균 빼기)
    glob_neut59 = decay_num59 - decay_num59.mean()
    
    # 5) 최종 α59 (–1× ts_rank(..., window≈8.20))
    df['alpha59'] = -1 * ts_rank(glob_neut59, window=int(8.19648))
    
    # α51
    df['alpha51'] = np.where(
        ((df['Close'].shift(20) - df['Close'].shift(10)) / 10
         - (df['Close'].shift(10) - df['Close']) / 10) < -0.05,
        1,
        -1 * (df['Close'] - df['Close'].shift(1))
    )
    
    # α52
    df['alpha52'] = (
        (- ts_min(df['Low'], 5)
         + delay(ts_min(df['Low'], 5), 5))
        * rank((ts_sum(df['returns'], 240) - ts_sum(df['returns'], 20)) / 220)
        * ts_rank(df['Volume'], 5)
    )
    
    # α53
    df['alpha53'] = -1 * delta(
        ((df['Close'] - df['Low']) - (df['High'] - df['Close']))
        / (df['Close'] - df['Low']),
        9
    )
    
    # α54
    df['alpha54'] = (
        -1 * (df['Low'] - df['Close']) * df['Open']**5
    ) / (
        (df['Low'] - df['High']) * df['Close']**5
    )
    
    # α55
    expr55 = (df['Close'] - ts_min(df['Low'], 12)) \
             / (ts_max(df['High'], 12) - ts_min(df['Low'], 12))
    df['alpha55'] = -1 * correlation(
        rank(expr55),
        rank(df['Volume']),
        6
    )
    
    
    
    # α60
    term60 = (((df['Close'] - df['Low']) - (df['High'] - df['Close']))
              / (df['High'] - df['Low'])) * df['Volume']
    df['alpha60'] = -1 * (
        2 * scale(rank(term60))
        - scale(rank(ts_argmax(df['Close'], 10)))
    )
    
    df['adv180'] = df['Volume'].rolling(180).mean()
    df['adv120'] = df['Volume'].rolling(120).mean()
    df['adv60']  = df['Volume'].rolling(60).mean()
    df['adv50']  = df['Volume'].rolling(50).mean()
    df['adv15']  = df['Volume'].rolling(15).mean()
    
    
    # global neutralize: 전체 평균 빼기
    def global_neutralize(s):
        return s - s.mean()
    
    # α61 (변경 없음)
    df['alpha61'] = (
        rank(df['VWAP'] - ts_min(df['VWAP'], 16))
        < rank(correlation(df['VWAP'], df['adv180'], 17))
    ).astype(int)
    
    # α62 (변경 없음)
    df['alpha62'] = (
        (rank(correlation(df['VWAP'], ts_sum(df['adv20'], 22), 9))
         < rank((rank(df['Open']) + rank(df['Open']))
                < (rank((df['High'] + df['Low'])/2) + rank(df['High']))))
        * -1
    )
    
    # α63 (global neutralize 적용)
    df['alpha63'] = -(
        rank(decay_linear(delta(global_neutralize(df['Close']), 2), 8))
        - rank(decay_linear(
            correlation(
                df['VWAP'] * 0.318108 + df['Open'] * (1 - 0.318108),
                ts_sum(df['adv180'], 37),
                13
            ),
            12
        ))
    )
    
    # α64 (변경 없음)
    df['alpha64'] = -(
        rank(correlation(
            ts_sum(df['Open'] * 0.178404 + df['Low'] * (1 - 0.178404), 12),
            ts_sum(df['adv120'], 12),
            16
        ))
        < rank(delta(
            (df['High'] + df['Low'])/2 * 0.178404
            + df['VWAP'] * (1 - 0.178404),
            3
        ))
    ).astype(int)
    
    # α65 (변경 없음)
    df['alpha65'] = -(
        rank(correlation(
            df['Open'] * 0.00817205 + df['VWAP'] * (1 - 0.00817205),
            ts_sum(df['adv60'], 8),
            6
        ))
        < rank(df['Open'] - ts_min(df['Open'], 13))
    ).astype(int)
    
    # α66 (변경 없음)
    df['alpha66'] = -(
        rank(decay_linear(delta(df['VWAP'], 3), 7))
        + ts_rank(decay_linear(
            ((df['Low'] * 0.96633 + df['Low'] * (1 - 0.96633)) - df['VWAP'])
            / (df['Open'] - (df['High'] + df['Low'])/2),
            11
        ), 6)
    )
    
    # α67 (global neutralize 적용)
    df['alpha67'] = - rank(df['High'] - ts_min(df['High'], 2)) \
                    * rank(correlation(
                        global_neutralize(df['VWAP']),
                        global_neutralize(df['adv20']),
                        6
                    ))
    
    # α68 (변경 없음)
    df['alpha68'] = -(
        ts_rank(correlation(rank(df['High']), rank(df['adv15']), 8), 13)
        < rank(delta(df['Close'] * 0.518371 + df['Low'] * (1 - 0.518371), 1))
    ).astype(int)
    
    # α69 (global neutralize 적용)
    df['alpha69'] = - (
        rank(ts_max(delta(global_neutralize(df['VWAP']), 2), 4))
        * ts_rank(correlation(
            df['Close'] * 0.490655 + df['VWAP'] * (1 - 0.490655),
            df['adv20'],
            4
        ), 9)
    )
    
    # α70 (global neutralize 적용)
    df['alpha70'] = - (
        rank(delta(df['VWAP'], 1))
        * ts_rank(correlation(
            global_neutralize(df['Close']),
            df['adv50'],
            17
        ), 17)
    )
    
    # 1) adv 컬럼 일괄 생성
    for w in [5, 10, 15, 20, 30, 40, 50, 60, 81, 120, 150, 180]:
        df[f'adv{w}'] = df['Volume'].rolling(window=w).mean()
    
    # 2) 그 다음 α71–α80 코드 실행
    # (이전에 짜둔 α71–α80 스니펫을 그대로 붙여넣으면 KeyError 없이 계산됩니다)
    
    
    # — adv 컬럼 확인/생성 (필요한 advX)
    for w in [15,30,40,50,60,81,120,150,180]:
        df[f'adv{w}'] = df['Volume'].rolling(w).mean()
    
    # — α73 (period 인자 둘 다 명시)
    df['alpha73'] = -1 * np.maximum(
        # decay_linear(delta(VWAP, 4.72775), 2.91864)
        rank(
            decay_linear(
                delta(df['VWAP'], int(4.72775)),
                int(2.91864)
            )
        ),
        # ts_rank(decay_linear(… , 3.33829), 16.7411)
        ts_rank(
            decay_linear(
                -1 * delta(
                    df['Open'] * 0.147155 + df['Low'] * (1 - 0.147155),
                    int(2.03608)
                ),
                int(3.33829)
            ),
            int(16.7411)
        )
    )
    
    
    # 0) 필요한 adv 컬럼 미리 생성
    for w in [10,15,20,30,40,50,60,81,120,150,180]:
        df[f'adv{w}'] = df['Volume'].rolling(int(w)).mean()
    
    # 1) 글로벌 중립화 함수
    def global_neutralize(s):
        return s - s.mean()
    
    # 2) α70
    df['alpha70'] = -1 * (
        rank(delta(df['VWAP'], int(1.29456)))
        * ts_rank(
            correlation(
                global_neutralize(df['Close']),
                df['adv50'],
                int(17.8256)
            ),
            int(17.9171)
        )
    )
    
    # 3) α71
    df['alpha71'] = np.maximum(
        ts_rank(
            decay_linear(
                correlation(
                    ts_rank(df['Close'], int(3.43976)),
                    ts_rank(df['adv180'], int(12.0647)),
                    int(18.0175)
                ),
                int(4.20501)
            ),
            int(15.6948)
        ),
        ts_rank(
            decay_linear(
                rank(df['Low'] + df['Open'] - 2*df['VWAP']),
                int(16.4662)
            ),
            int(4.4388)
        )
    )
    
    # 4) α72
    df['alpha72'] = (
        rank(
            decay_linear(
                correlation(
                    (df['High'] + df['Low'])/2,
                    df['adv40'],
                    int(8.93345)
                ),
                int(10.1519)
            )
        ) /
        rank(
            decay_linear(
                correlation(
                    ts_rank(df['VWAP'], int(3.72469)),
                    ts_rank(df['Volume'], int(18.5188)),
                    int(6.86671)
                ),
                int(2.95011)
            )
        )
    )
    
    # 5) α73
    df['alpha73'] = -1 * np.maximum(
        rank(decay_linear(delta(df['VWAP'], int(4.72775)), int(2.91864))),
        ts_rank(
            decay_linear(
                -1 * delta(
                    df['Open']*0.147155 + df['Low']*(1-0.147155),
                    int(2.03608)
                ),
                int(3.33829)
            ),
            int(16.7411)
        )
    )
    
    # 6) α74
    df['alpha74'] = (
        rank(correlation(df['Close'], ts_sum(df['adv30'], int(37.4843)), int(15.1365)))
        <
        rank(correlation(
            rank(df['High']*0.0261661 + df['VWAP']*(1-0.0261661)),
            rank(df['Volume']),
            int(11.4791)
        ))
    ) * -1
    
    # 7) α75
    df['alpha75'] = (
        rank(correlation(df['VWAP'], df['Volume'], int(4.24304)))
        <
        rank(correlation(rank(df['Low']), rank(df['adv50']), int(12.4413)))
    )
    
    # 8) α76
    df['alpha76'] = -1 * np.maximum(
        rank(decay_linear(delta(df['VWAP'], int(1.24383)), int(11.8259))),
        ts_rank(
            decay_linear(
                ts_rank(
                    correlation(
                        global_neutralize(df['Low']),
                        df['adv81'],
                        int(8.14941)
                    ),
                    int(19.569)
                ),
                int(17.1543)
            ),
            int(19.383)
        )
    )
    
    # 9) α77
    df['alpha77'] = np.minimum(
        rank(decay_linear(((df['High']+df['Low'])/2 + df['High'] - df['VWAP'] - df['High']), int(20.0451))),
        rank(decay_linear(correlation((df['High']+df['Low'])/2, df['adv40'], int(3.1614)), int(5.64125)))
    )
    
    # α78 (수정)
    df['alpha78'] = (
        rank(
            correlation(
                ts_sum(df['Low'] * 0.352233 + df['VWAP'] * (1 - 0.352233), int(19.7428)),
                ts_sum(df['adv40'], int(19.7428)),
                int(6.83313)
            )
        )
        *
        rank(
            correlation(
                rank(df['VWAP']),
                rank(df['Volume']),
                int(5.77492)
            )
        )
    )
    
    # 11) α79
    df['alpha79'] = (
        rank(delta(global_neutralize(df['Close']*0.60733 + df['Open']*(1-0.60733)), int(1.23438)))
        <
        rank(correlation(ts_rank(df['VWAP'], int(3.60973)), ts_rank(df['adv150'], int(9.18637)), int(14.6644)))
    )
    
    # α79
    df['alpha79'] = (
        rank(
            delta(
                global_neutralize(df['Close'] * 0.60733 + df['Open'] * (1 - 0.60733)),
                int(1.23438)
            )
        )
        < rank(
            correlation(
                ts_rank(df['VWAP'], int(3.60973)),
                ts_rank(df['adv150'], int(9.18637)),
                int(14.6644)
            )
        )
    )
    
    # α80
    df['alpha80'] = -1 * (
        rank(
            sign(
                delta(
                    global_neutralize(df['Open'] * 0.868128 + df['High'] * (1 - 0.868128)),
                    int(4.04545)
                )
            )
        )
        * ts_rank(
            correlation(df['High'], df['adv10'], int(5.11456)),
            int(5.53756)
        )
    )
    
    # α81
    df['alpha81'] = -1 * (
        rank(
            np.log(
                product(
                    rank(
                        rank(
                            correlation(
                                df['VWAP'],
                                ts_sum(df['adv10'], int(49.6054)),
                                int(8.47743)
                            )
                        ) ** 4
                    ),
                    int(14.9655)
                )
            )
        )
        < rank(
            correlation(
                rank(df['VWAP']),
                rank(df['Volume']),
                int(5.07914)
            )
        )
    )
    
    # α82
    df['alpha82'] = -1 * np.minimum(
        rank(
            decay_linear(
                delta(
                    global_neutralize(df['Volume']),
                    int(1.46063)
                ),
                int(14.8717)
            )
        ),
        ts_rank(
            decay_linear(
                correlation(
                    global_neutralize(df['Volume']),
                    df['Open'] * 0.634196 + df['Open'] * (1 - 0.634196),
                    int(17.4842)
                ),
                int(6.92131)
            ),
            int(13.4283)
        )
    )
    
    # α83
    num83 = (
        rank(
            delay(
                (df['High'] - df['Low']) / (ts_sum(df['Close'], 5) / 5),
                int(2)
            )
        )
        * rank(df['Volume'].rank())
    )
    den83 = ((df['High'] - df['Low']) / (ts_sum(df['Close'], 5) / 5)) / (df['VWAP'] - df['Close'])
    df['alpha83'] = num83 / den83
    
    def signedpower(base, exponent):
        """
        Applies signed power: sign(base) * (|base| ** exponent)
        """
        return np.sign(base) * (np.abs(base) ** exponent)
    
    # …이후에 α84 계산 부분…
    df['alpha84'] = signedpower(
        ts_rank(df['VWAP'] - ts_max(df['VWAP'], int(15.3217)), int(20.7127)),
        delta(df['Close'], int(4.96796))
    )
    
    
    # α85
    df['alpha85'] = (
        rank(
            correlation(
                df['High'] * 0.876703 + df['Close'] * (1 - 0.876703),
                df['adv30'],
                int(9.61331)
            )
        )
        * rank(
            correlation(
                ts_rank((df['High'] + df['Low']) / 2, int(3.70596)),
                ts_rank(df['Volume'], int(10.1595)),
                int(7.11408)
            )
        )
    )
    
    # α86
    df['alpha86'] = -1 * (
        ts_rank(
            correlation(
                df['Close'],
                ts_sum(df['adv20'], int(14.7444)),
                int(6.00049)
            ),
            int(20.4195)
        )
        < rank(df['Open'] + df['Close'] - (df['VWAP'] + df['Open']))
    )
    
    # α87
    tmp87a = rank(
        decay_linear(
            delta(
                df['Close'] * 0.369701 + df['VWAP'] * (1 - 0.369701),
                int(1.91233)
            ),
            int(2.65461)
        )
    )
    tmp87b = ts_rank(
        decay_linear(
            abs(
                correlation(
                    global_neutralize(df['adv81']),
                    df['Close'],
                    int(13.4132)
                )
            ),
            int(4.89768)
        ),
        int(14.4535)
    )
    df['alpha87'] = -1 * np.maximum(tmp87a, tmp87b)
    
    # α88
    df['alpha88'] = np.minimum(
        rank(
            decay_linear(
                rank(df['Open']) + rank(df['Low']) - rank(df['High']) - rank(df['Close']),
                int(8.06882)
            )
        ),
        ts_rank(
            decay_linear(
                correlation(
                    ts_rank(df['Close'], int(8.44728)),
                    ts_rank(df['adv60'], int(20.6966)),
                    int(8.01266)
                ),
                int(6.65053)
            ),
            int(2.61957)
        )
    )
    
    # α89
    df['alpha89'] = (
        ts_rank(
            decay_linear(
                correlation(df['Low'], df['adv10'], int(6.94279)),
                int(5.51607)
            ),
            int(3.79744)
        )
        - ts_rank(
            decay_linear(
                delta(global_neutralize(df['VWAP']), int(3.48158)),
                int(10.1466)
            ),
            int(15.3012)
        )
    )
    
    # — helper functions —
    def delta(s: pd.Series, period: int) -> pd.Series:
        return s.diff(int(period))
    
    def delay(s: pd.Series, period: int) -> pd.Series:
        return s.shift(int(period))
    
    def rank(s: pd.Series) -> pd.Series:
        return s.rank()
    
    def ts_sum(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).sum()
    
    def ts_max(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).max()
    
    def ts_min(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).min()
    
    def ts_rank(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).apply(lambda x: x.rank().iloc[-1]/len(x), raw=False)
    
    def ts_argmax(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).apply(lambda x: x.argmax(), raw=True)
    
    def ts_argmin(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).apply(lambda x: x.argmin(), raw=True)
    
    def decay_linear(s: pd.Series, period: int) -> pd.Series:
        w = np.arange(1, int(period)+1)
        return s.rolling(int(period)).apply(lambda x: np.dot(x, w)/w.sum(), raw=True)
    
    def correlation(s1: pd.Series, s2: pd.Series, window: int) -> pd.Series:
        return s1.rolling(int(window)).corr(s2)
    
    def global_neutralize(s: pd.Series) -> pd.Series:
        return s - s.mean()
    
    def product(*args) -> pd.Series:
        res = args[0]
        for a in args[1:]:
            res = res * a
        return res
    
    # — ensure adv columns exist —
    for w in [5,10,15,20,30,40,50,60,81,120,150,180]:
        df[f'adv{w}'] = df['Volume'].rolling(int(w)).mean()
    # α90
    df['alpha90'] = -1 * (
        rank(df['Close'] - ts_max(df['Close'], int(4.66719))) *
        ts_rank(
            correlation(global_neutralize(df['adv40']), df['Low'], int(5.38375)),
            int(3.21856)
        )
    )
    
    # α91
    df['alpha91'] = -1 * (
        ts_rank(
            decay_linear(
                decay_linear(
                    correlation(global_neutralize(df['Close']), df['Volume'], int(9.74928)),
                    int(16.398)
                ),
                int(3.83219)
            ),
            int(4.8667)
        )
        - rank(
            decay_linear(
                correlation(df['VWAP'], df['adv30'], int(4.01303)),
                int(2.6809)
            )
        )
    )
    
    # α92
    df['alpha92'] = np.minimum(
        ts_rank(
            decay_linear(
                ((df['High'] + df['Low']) / 2 + df['Close']) < (df['Low'] + df['Open']),
                int(14.7221)
            ),
            int(18.8683)
        ),
        ts_rank(
            decay_linear(
                correlation(
                    rank(df['Low']),
                    rank(df['adv30']),
                    int(7.58555)
                ),
                int(6.94024)
            ),
            int(6.80584)
        )
    )
    
    # α93
    df['alpha93'] = (
        ts_rank(
            decay_linear(
                correlation(global_neutralize(df['VWAP']), df['adv81'], int(17.4193)),
                int(19.848)
            ),
            int(7.54455)
        )
        /
        rank(
            decay_linear(
                delta(df['Close'] * 0.524434 + df['VWAP'] * (1 - 0.524434), int(2.77377)),
                int(16.2664)
            )
        )
    )
    
    # α94
    df['alpha94'] = -1 * (
        rank(df['VWAP'] - ts_min(df['VWAP'], int(11.5783))) *
        ts_rank(
            correlation(
                ts_rank(df['VWAP'], int(19.6462)),
                ts_rank(df['adv60'], int(4.02992)),
                int(18.0926)
            ),
            int(2.70756)
        )
    )
    
    # α95
    df['alpha95'] = (
        rank(df['Open'] - ts_min(df['Open'], int(12.4105))) <
        ts_rank(
            rank(
                correlation(
                    ts_sum((df['High'] + df['Low']) / 2, int(19.1351)),
                    ts_sum(df['adv40'], int(19.1351)),
                    int(12.8742)
                )
            ) ** 5,
            int(11.7584)
        )
    )
    
    # α96
    df['alpha96'] = -1 * np.maximum(
        ts_rank(
            decay_linear(
                correlation(rank(df['VWAP']), rank(df['Volume']), int(3.83878)),
                int(4.16783)
            ),
            int(8.38151)
        ),
        ts_rank(
            decay_linear(
                ts_argmax(
                    correlation(
                        ts_rank(df['Close'], int(7.45404)),
                        ts_rank(df['adv60'], int(4.13242)),
                        int(3.65459)
                    ),
                    int(12.6556)
                ),
                int(14.0365)
            ),
            int(13.4143)
        )
    )
    
    # α97
    df['alpha97'] = -1 * (
        rank(
            decay_linear(
                delta(global_neutralize(df['Low'] * 0.721001 + df['VWAP'] * (1 - 0.721001)), int(3.3705)),
                int(20.4523)
            )
        )
        - ts_rank(
            decay_linear(
                ts_rank(
                    correlation(
                        ts_rank(df['Low'], int(7.87871)),
                        ts_rank(df['adv60'], int(17.255)),
                        int(4.97547)
                    ),
                    int(18.5925)
                ),
                int(15.7152)
            ),
            int(6.71659)
        )
    )
    
    # α98
    df['alpha98'] = (
        rank(
            decay_linear(
                correlation(df['VWAP'], ts_sum(df['adv5'], int(26.4719)), int(4.58418)),
                int(7.18088)
            )
        )
        - rank(
            decay_linear(
                ts_rank(
                    ts_argmin(
                        correlation(rank(df['Open']), rank(df['adv15']), int(20.8187)),
                        int(8.62571)
                    ),
                    int(6.95668)
                ),
                int(8.07206)
            )
        )
    )
    
    # — helper functions —
    def delta(s, period):        return s.diff(int(period))
    def ts_sum(s, w):            return s.rolling(int(w)).sum()
    def ts_max(s, w):            return s.rolling(int(w)).max()
    def ts_min(s, w):            return s.rolling(int(w)).min()
    def ts_rank(s, w):           return s.rolling(int(w)).apply(lambda x: x.rank().iloc[-1]/len(x), raw=False)
    def ts_argmin(s, w):         return s.rolling(int(w)).apply(lambda x: x.argmin(), raw=True)
    def correlation(s1, s2, w):  return s1.rolling(int(w)).corr(s2)
    def decay_linear(s, p):      
        w = np.arange(1, int(p)+1)
        return s.rolling(int(p)).apply(lambda x: np.dot(x, w)/w.sum(), raw=True)
    def rank(s):                 return s.rank()
    def scale(s):                return (s - s.mean())/s.std()
    def global_neutralize(s):    return s - s.mean()
    
    # — ensure adv5, adv15, adv20, adv30, adv40, adv50, adv60, adv81 exist —
    for w in [5,15,20,30,40,50,60,81]:
        df[f'adv{w}'] = df['Volume'].rolling(int(w)).mean()
    
    # — α98 —
    df['alpha98'] = (
        rank(
            decay_linear(
                correlation(df['VWAP'], ts_sum(df['adv5'], 26), 4),
                7
            )
        )
        - rank(
            decay_linear(
                ts_rank(
                    ts_argmin(
                        correlation(rank(df['Open']), rank(df['adv15']), 20),
                        8
                    ),
                    7
                ),
                8
            )
        )
    )
    
    # — α99 —
    df['alpha99'] = -1 * (
        rank(
            correlation(
                ts_sum((df['High']+df['Low'])/2, 20),
                ts_sum(df['adv60'], 20),
                9
            )
        )
        < rank(correlation(df['Low'], df['Volume'], 6))
    )
    
    # — α100 (global-neutralize 대신 subindustry 제거) —
    # 1) 원본 factor
    f1 = (((df['Close']-df['Low']) - (df['High']-df['Close']))/(df['High']-df['Low'])) * df['Volume']
    # 2) 변동분 neutralized globally
    g1 = global_neutralize(rank(f1))
    g2 = global_neutralize(
            correlation(df['Close'], rank(df['adv20']), 5)
            - rank(ts_argmin(df['Close'], 30))
         )
    df['alpha100'] = -1 * ((1.5 * scale(g1) - scale(g2)) * (df['Volume']/df['adv20']))
    
    # — α101 —
    df['alpha101'] = (df['Close'] - df['Open']) / ((df['High'] - df['Low']) + 0.001)
    
    df.rename(columns={
        'neg_rank_corr_10_M1_ret': 'alpha1',
        'neg_rank_mul_delta':     'alpha2',
        'neg_rank_corr_10':       'alpha3',
        'neg_ts_rank_low_9':      'alpha4'
    }, inplace=True)
    
    # 두 컬럼 삭제
    df.drop(columns=['neg_rank_corr_10_M1', 'M1_log_ret'], inplace=True)

    
    return df


In [62]:
import cloudpickle
with open("alpha_values.pkl", "wb") as f:
    cloudpickle.dump(alpha_values, f)

# 실시간

In [63]:
def real_times(symbol,days=600):
    ticker = yf.Ticker(symbol)
    df = ticker.history(period=f"{days+3}d", interval="1d")
    
    df = df.copy()
    df = df[["Open", "High", "Low", "Close"]]
    df.columns = [f"{col}" for col in df.columns]
    df["Date"] = df.index.date
    df.reset_index(drop=True, inplace=True)
    # 변동량 계산
    df["Change"] = df["Close"].diff()
    
    df['Date']=pd.to_datetime(df['Date'],format='%Y-%m-%d')
    
    df=df.reindex(columns=['Date','Close','Open','High','Low','Change'])
    return df

In [64]:
real_times('EURKRW=X')

Unnamed: 0,Date,Close,Open,High,Low,Change
0,2023-04-06,1433.130005,1434.369995,1439.109985,1431.930054,
1,2023-04-07,1438.910034,1438.910034,1439.229980,1408.800049,5.780029
2,2023-04-10,1439.119995,1439.119995,1439.119995,1433.000000,0.209961
3,2023-04-11,1439.069946,1439.069946,1443.349976,1435.369995,-0.050049
4,2023-04-12,1439.699951,1439.699951,1456.180054,1439.579956,0.630005
...,...,...,...,...,...,...
595,2025-07-22,1615.589966,1615.359985,1623.520020,1610.000000,-0.180054
596,2025-07-23,1620.390015,1619.920044,1620.609985,1610.579956,4.800049
597,2025-07-24,1617.300049,1616.579956,1616.630005,1606.050049,-3.089966
598,2025-07-25,1612.349976,1611.739990,1622.280029,1609.939941,-4.950073


In [65]:
import cloudpickle
with open("real_times.pkl", "wb") as f:
    cloudpickle.dump(real_times, f)

# 리얼타임 알파

In [66]:
def real_times_alphas(symbol,days=600):
    
    ticker = yf.Ticker(symbol)
    df = ticker.history(period=f"{days+3}d", interval="1d")
    
    df = df.copy()
    df = df[["Open", "High", "Low", "Close"]]
    df.columns = [f"{col}" for col in df.columns]
    df["Date"] = df.index.date
    df.reset_index(drop=True, inplace=True)
    # 변동량 계산
    df["Change"] = df["Close"].diff()
    
    df['Date']=pd.to_datetime(df['Date'],format='%Y-%m-%d')
    
    df=df.reindex(columns=['Date','Close','Open','High','Low','Change'])
    
    df=df.set_index('Date')
    df=df.sort_index()
    
    # 1) 단순 수익률 & 로그 수익률
    df['return']     = df['Close'].pct_change()
    df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
    
    # 2) Lag 특성 (1~5일)
    for lag in range(1, 6):
        df[f'lag_{lag}'] = df['log_return'].shift(lag)
    
    # 3) 이동평균 & 이동표준편차 (SMA, STD)
    ma_windows = [5, 10, 20, 60]
    for w in ma_windows:
        df[f'sma_{w}'] = df['Close'].rolling(window=w).mean()
        df[f'std_{w}'] = df['Close'].rolling(window=w).std()
    
    # 4) 볼린저 밴드 (20일 기준, +-2σ)
    df['bb_upper'] = df['sma_20'] + 2 * df['std_20']
    df['bb_lower'] = df['sma_20'] - 2 * df['std_20']
    df['bb_width'] = df['bb_upper'] - df['bb_lower']
    
    # 5) 연환산 변동성
    df['volatility_20'] = df['log_return'].rolling(20).std() * np.sqrt(252)
    
    # 6) 모멘텀 지표
    mom_windows = [5, 10, 20]
    for w in mom_windows:
        df[f'momentum_{w}'] = df['Close'] - df['Close'].shift(w)
    
    # 7) RSI (14일 기본)
    def compute_rsi(series, window=14):
        delta = series.diff()
        gain  = delta.clip(lower=0)
        loss  = -delta.clip(upper=0)
        avg_gain = gain.rolling(window).mean()
        avg_loss = loss.rolling(window).mean()
        rs = avg_gain / avg_loss
        return 100 - 100 / (1 + rs)
    
    df['rsi_14'] = compute_rsi(df['Close'], 14)
    
    # 8) MACD & Signal
    ema_short = df['Close'].ewm(span=12, adjust=False).mean()
    ema_long  = df['Close'].ewm(span=26, adjust=False).mean()
    df['macd']     = ema_short - ema_long
    df['macd_sig'] = df['macd'].ewm(span=9, adjust=False).mean()
    df['macd_hist']= df['macd'] - df['macd_sig']
    
    # 9) 스토캐스틱 오실레이터 (%K, %D)
    low_min  = df['Close'].rolling(14).min()
    high_max = df['Close'].rolling(14).max()
    df['sto_k'] = (df['Close'] - low_min) / (high_max - low_min) * 100
    df['sto_d'] = df['sto_k'].rolling(3).mean()
    
    from pandas_datareader import data as pdr
    
    # 1) FRED에서 US M1 불러오기 (시계열: 월간)
    m1 = pdr.DataReader('MYAGM1EZM196N', 'fred',
                        start=df.index.min(),
                        end=df.index.max())
    m1.rename(columns={'MYAGM1EZM196N': 'M1'}, inplace=True)
    
    # 2) 날짜 인덱스 맞추고 결측은 직전값으로 채우기
    m1.index = pd.to_datetime(m1.index)
    # 영업일 기준으로 재색인 & ffill
    m1 = m1.reindex(df.index, method='ffill')
    
    # 3) df에 합치기
    df = df.join(m1)
    
    # 4) –1 × 롤링 상관계수 계산 (윈도우=10)
    df['neg_rank_corr_10_M1'] = -(
        df['Close'].rank()
          .rolling(window=10)
          .corr(df['M1'].rank())
    )
    
    # M1 로그수익률
    df['M1_log_ret'] = np.log(df['M1'] / df['M1'].shift(1))
    
    # –1 × rank corr on returns
    df['neg_rank_corr_10_M1_ret'] = -(
        df['Close'].rank()
          .rolling(10)
          .corr(df['M1_log_ret'].rank())
    )
    
    
    col = df['neg_rank_corr_10_M1_ret']
    total = len(col)
    
    # NaN 개수
    num_nan = col.isna().sum()
    # ±inf 개수
    num_inf = np.isinf(col).sum()
    
    # 비율 계산
    ratio_nan = num_nan / total
    ratio_inf = num_inf / total
    ratio_total = (num_nan + num_inf) / total
    
    mo = df[['Close','M1']].resample('M').last()
    mo['neg_corr_3M'] = -(
      mo['Close'].rank().rolling(3).corr(mo['M1'].rank())
    )
    
    
    # 만약 M1을 거래량 대용으로 쓰고 있다면 먼저 컬럼 리네임
    df.rename(columns={'M1': 'Volume'}, inplace=True)
    
    # 1) 1일 차분(delta)
    delta_close  = df['Close'].diff(1)
    delta_volume = df['Volume'].diff(1)
    
    # 2) 차분값을 rank로 변환한 뒤 elementwise 곱하고 -1 곱하기
    df['neg_rank_mul_delta'] = - (
        delta_close.rank() *
        delta_volume.rank()
    )
    
    # 10일 롤링 –rank corr
    df['neg_rank_corr_10'] = -(
        df['Close'].rank()
              .rolling(window=10)
              .corr(df['Volume'].rank())
    )
    
    
    # 1) Low 컬럼에 대한 전체 순위 계산
    low_rank = df['Low'].rank()
    
    # 2) 9일 윈도우 내에서 ‘현재(마지막) 값의 순위’를 비율로 계산하는 함수
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    # 3) ts_rank(rank(Low), 9) 계산 후 –1 곱해서 새로운 컬럼에 저장
    df['neg_ts_rank_low_9'] = - ts_rank(low_rank, 9)
    
    # —————————————— helper functions ——————————————
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def ts_rank(s, window):
        # 윈도우 안에서 마지막 값의 상대적 순위(0~1)
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def delay(s, period):
        return s.shift(period)
    
    # —————————————— alpha5 ——————————————
    # alpha5 = rank(open - ts_max(open,5)) ** rank(volume)
    df['alpha5'] = (
        (df['Open'] - ts_max(df['Open'], 5)).rank()
        ** df['Volume'].rank()
    )
    
    # —————————————— alpha6 ——————————————
    # alpha6 = -1 * corr(open, volume, 10)
    df['alpha6'] = - df['Open'].rolling(window=10).corr(df['Volume'])
    
    # —————————————— alpha7 ——————————————
    # alpha7 = - ts_rank(low,3) if volume != 0 else 0
    r7 = ts_rank(df['Low'], 3)
    df['alpha7'] = np.where(df['Volume'] != 0, -r7, 0)
    
    # —————————————— alpha8 ——————————————
    # alpha8 = -1 * rank( ts_max( rank(corr(volume, low,5)), 3 ) )
    corr_vl = df['Volume'].rolling(window=5).corr(df['Low'])
    rank_corr = corr_vl.rank()
    tsmax_rc3 = ts_max(rank_corr, 3)
    df['alpha8'] = - tsmax_rc3.rank()
    
    # —————————————— alpha9 ——————————————
    # alpha9 = rank(open - delay(open,1)) + rank(delay(open,1) - delay(open,2))
    r1 = (df['Open'] - delay(df['Open'], 1)).rank()
    r2 = (delay(df['Open'], 1) - delay(df['Open'], 2)).rank()
    df['alpha9'] = r1 + r2
    
    # ————— helper functions —————
    def delta(s, period):
        return s.diff(period)
    
    def ts_min(s, window):
        return s.rolling(window).min()
    
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def sign(s):
        return np.sign(s)
    
    def rank(s):
        return s.rank()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    def covariance(s1, s2, window):
        return s1.rolling(window).cov(s2)
    
    
    # — ensure necessary columns — 
    # (if you haven’t already renamed Volume & computed VWAP)
    # df.rename(columns={'M1': 'Volume'}, inplace=True)
    if 'VWAP' not in df.columns:
        df['VWAP'] = (df['High'] + df['Low'] + df['Close']) / 3
    
    
    # ————— alpha10 —————
    # –1 * rank(abs(close – open))
    df['alpha10'] = - rank((df['Close'] - df['Open']).abs())
    
    
    # ————— alpha11 —————
    # ( rank(ts_max(vwap–close,3)) + rank(ts_min(vwap–close,3)) ) * rank(delta(volume,3))
    p = df['VWAP'] - df['Close']
    df['alpha11'] = (
        rank(ts_max(p, 3)) +
        rank(ts_min(p, 3))
    ) * rank(delta(df['Volume'], 3))
    
    
    # ————— alpha12 —————
    # sign(delta(volume,1)) * (–1 * delta(close,1))
    df['alpha12'] = sign(delta(df['Volume'], 1)) * (- delta(df['Close'], 1))
    
    
    # ————— alpha13 —————
    # –1 * rank(covariance(rank(close), rank(volume),5))
    cov_cv = covariance(rank(df['Close']), rank(df['Volume']), 5)
    df['alpha13'] = - rank(cov_cv)
    
    
    # ————— alpha14 —————
    # –1 * rank(delta(return,3)) * correlation(open, volume,10)
    # (your returns column is named 'return' from pct_change())
    df['alpha14'] = - rank(delta(df['return'], 3)) * correlation(df['Open'], df['Volume'], 10)
    
    
    # ————— alpha15 —————
    # –1 * ts_sum( rank(correlation(rank(high), rank(volume),3)), 3 )
    corr_hv3 = correlation(rank(df['High']), rank(df['Volume']), 3)
    df['alpha15'] = - ts_sum(rank(corr_hv3), 3)
    
    
    # ————— alpha16 —————
    # –1 * rank(covariance(rank(high), rank(volume),5))
    cov_hv5 = covariance(rank(df['High']), rank(df['Volume']), 5)
    df['alpha16'] = - rank(cov_hv5)
    
    
    # ————— helper functions —————
    def delta(s, period):
        return s.diff(period)
    
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def ts_min(s, window):
        return s.rolling(window).min()
    
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def stddev(s, window):
        return s.rolling(window).std()
    
    def rank(s):
        return s.rank()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    def sign(s):
        return np.sign(s)
    
    def delay(s, period):
        return s.shift(period)
    
    # — ensure VWAP & adv20 exist —
    if 'VWAP' not in df.columns:
        df['VWAP'] = (df['High'] + df['Low'] + df['Close']) / 3
    df['adv20'] = df['Volume'].rolling(20).mean()
    
    # ————— alpha17 —————
    # (-1 * rank(ts_rank(close,10))) * rank(Δ² close) * rank(ts_rank(volume/adv20,5))
    df['alpha17'] = (
        - rank(ts_rank(df['Close'], 10))
        * rank(delta(delta(df['Close'], 1), 1))
        * rank(ts_rank(df['Volume'] / df['adv20'], 5))
    )
    
    # ————— alpha18 —————
    # -1 * rank(stddev(|close-open|,5) + (close-open) + corr(close, open,10))
    expr18 = (
        stddev((df['Close'] - df['Open']).abs(), 5)
        + (df['Close'] - df['Open'])
        + correlation(df['Close'], df['Open'], 10)
    )
    df['alpha18'] = - rank(expr18)
    
    # ————— alpha19 —————
    # -1 * sign((close - delay(close,7)) + Δ close(7)) * (1 + rank(1 + ts_sum(returns,250)))
    sign19 = sign((df['Close'] - delay(df['Close'], 7)) + delta(df['Close'], 7))
    sum250 = ts_sum(df['return'], 250)
    df['alpha19'] = -1 * sign19 * (1 + rank(1 + sum250))
    
    # ————— alpha20 —————
    # (-1 * rank(open - delay(high,1))) * rank(open - delay(close,1)) * rank(open - delay(low,1))
    r20a = - rank(df['Open'] - delay(df['High'], 1))
    r20b = rank(df['Open'] - delay(df['Close'], 1))
    r20c = rank(df['Open'] - delay(df['Low'], 1))
    df['alpha20'] = r20a * r20b * r20c
    
    # ————— alpha21 —————
    # 삼중 조건문
    ma8 = ts_sum(df['Close'], 8) / 8
    ma2 = ts_sum(df['Close'], 2) / 2
    sd8 = stddev(df['Close'], 8)
    vol_rel = df['Volume'] / df['adv20']
    
    df['alpha21'] = np.where(
        (ma8 + sd8) < ma2, -1,
        np.where(
            ma2 < (ma8 - sd8), 1,
            np.where((vol_rel >= 1), 1, -1)
        )
    )
    
    # ————— alpha22 —————
    # -1 * Δ(corr(high, volume,5), 5) * rank(stddev(close,20))
    corr5_hv = correlation(df['High'], df['Volume'], 5)
    df['alpha22'] = - delta(corr5_hv, 5) * rank(stddev(df['Close'], 20))
    
    # ————— alpha23 —————
    # ts_sum(high,20)/20 < high → -Δ high(2) else 0
    df['alpha23'] = np.where(
        (ts_sum(df['High'], 20) / 20) < df['High'],
        - delta(df['High'], 2),
        0
    )
    
    # ————— alpha24 —————
    # 조건문: Δ(ma100,100)/delay(close,100) <= 0.05
    ma100 = ts_sum(df['Close'], 100) / 100
    frac = delta(ma100, 100) / delay(df['Close'], 100)
    df['alpha24'] = np.where(
        frac <= 0.05,
        - (df['Close'] - ts_min(df['Close'], 100)),
        - delta(df['Close'], 3)
    )
    
    # ————— alpha25 —————
    # rank((-1 * returns) * adv20 * VWAP * (high - close))
    expr25 = (-1 * df['return']) * df['adv20'] * df['VWAP'] * (df['High'] - df['Close'])
    df['alpha25'] = rank(expr25)
    
    # ————— alpha26 —————
    # -1 * ts_max(corr(ts_rank(volume,5), ts_rank(high,5),5), 3)
    corr_vh5 = correlation(ts_rank(df['Volume'], 5), ts_rank(df['High'], 5), 5)
    df['alpha26'] = - ts_max(corr_vh5, 3)
    
    # ————— helper functions —————
    def delta(s, period):
        return s.diff(period)
    
    def rank(s):
        return s.rank()
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def ts_min(s, window):
        return s.rolling(window).min()
    
    def ts_max(s, window):
        return s.rolling(window).max()
    
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def stddev(s, window):
        return s.rolling(window).std()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    def sign(s):
        return np.sign(s)
    
    def delay(s, period):
        return s.shift(period)
    
    def scale(s):
        return (s - s.mean()) / s.std()
    
    def decay_linear(s, period):
        weights = np.arange(1, period+1)
        return s.rolling(period).apply(
            lambda x: np.dot(x, weights) / weights.sum(),
            raw=True
        )
    
    def product(*args):
        res = args[0]
        for arr in args[1:]:
            res = res * arr
        return res
    
    # ensure VWAP & adv20 존재
    if 'VWAP' not in df.columns:
        df['VWAP'] = (df['High'] + df['Low'] + df['Close']) / 3
    df['adv20'] = df['Volume'].rolling(20).mean()
    
    # ————— alpha27 —————
    # if rank(ts_sum(corr(rank(volume),rank(vwap),6),2)/2) > 0.5 then -1 else 1
    tmp27 = ts_sum(correlation(rank(df['Volume']), rank(df['VWAP']), 6), 2) / 2
    df['alpha27'] = np.where(rank(tmp27) > 0.5, -1, 1)
    
    # ————— alpha28 —————
    # scale(corr(adv20, low,5) + (high+low)/2 - close)
    expr28 = correlation(df['adv20'], df['Low'], 5) + (df['High'] + df['Low'])/2 - df['Close']
    df['alpha28'] = scale(expr28)
    
    # ————— alpha29 —————
    # ts_min(product(rank(rank(scale(log(ts_sum(ts_min(rank(rank(-1*rank(delta(close-1,5))),2),1))))),1),5)
    # + ts_rank(delay(-1*returns,6),5)
    part29 = ts_sum(ts_min(rank(rank(-1 * rank(delta(df['Close']-1, 5)))), 2), 1)
    part29 = np.log(part29).pipe(scale).pipe(rank).pipe(rank)
    prod29 = product(rank(part29), 1)
    df['alpha29'] = ts_min(prod29, 5) + ts_rank(delay(-1 * df['return'], 6), 5)
    
    # ————— alpha30 —————
    # ((1-rank(sign(Δ1+Δ2+Δ3))) * ts_sum(volume,5)) / ts_sum(volume,20)
    sigs = sign(df['Close'] - delay(df['Close'],1)) \
           + sign(delay(df['Close'],1) - delay(df['Close'],2)) \
           + sign(delay(df['Close'],2) - delay(df['Close'],3))
    df['alpha30'] = ((1 - rank(sigs)) * ts_sum(df['Volume'], 5)) / ts_sum(df['Volume'], 20)
    
    # ————— alpha31 —————
    # rank(rank(rank(decay_linear(-1*rank(rank(delta(close,10))),10)))) 
    # + rank(-delta(close,3)) + sign(scale(corr(adv20,low,12)))
    part31 = decay_linear(-1 * rank(rank(delta(df['Close'], 10))), 10)
    df['alpha31'] = rank(rank(rank(part31))) \
                    + rank(-delta(df['Close'], 3)) \
                    + sign(scale(correlation(df['adv20'], df['Low'], 12)))
    
    # ————— alpha32 —————
    # scale((ts_sum(close,7)/7)-close) + 20*scale(corr(vwap,delay(close,5),230))
    df['alpha32'] = scale(ts_sum(df['Close'], 7)/7 - df['Close']) \
                    + 20 * scale(correlation(df['VWAP'], delay(df['Close'], 5), 230))
    
    # ————— alpha33 —————
    # rank(-((1-open/close)**1))
    df['alpha33'] = rank(-((1 - df['Open']/df['Close'])**1))
    
    # ————— alpha34 —————
    # rank((1-rank(stddev(returns,2)/stddev(returns,5))) + (1-rank(delta(close,1))))
    expr34 = (1 - rank(stddev(df['return'], 2) / stddev(df['return'], 5))) \
             + (1 - rank(delta(df['Close'], 1)))
    df['alpha34'] = rank(expr34)
    
    # ————— alpha35 —————
    # ts_rank(volume,32)*(1-ts_rank(close+high-low,16))*(1-ts_rank(returns,32))
    df['alpha35'] = ts_rank(df['Volume'], 32) \
                    * (1 - ts_rank(df['Close'] + df['High'] - df['Low'], 16)) \
                    * (1 - ts_rank(df['return'], 32))
    
    # ————— alpha36 —————
    # 2.21*rank(corr(close-open,delay(volume,1),15)) + 0.7*rank(open-close)
    # +0.73*rank(ts_rank(delay(-1*returns,6),5)) + rank(abs(corr(vwap,adv20,6)))
    # +0.6*rank(((ts_sum(close,200)/200-open)*(close-open)))
    term1 = rank(correlation(df['Close']-df['Open'], delay(df['Volume'],1), 15)) * 2.21
    term2 = rank(df['Open'] - df['Close']) * 0.7
    term3 = rank(ts_rank(delay(-1 * df['return'], 6), 5)) * 0.73
    term4 = rank(correlation(df['VWAP'], df['adv20'], 6).abs())
    term5 = rank((ts_sum(df['Close'], 200)/200 - df['Open']) * (df['Close'] - df['Open'])) * 0.6
    df['alpha36'] = term1 + term2 + term3 + term4 + term5
    
    # ————— alpha37 —————
    # rank(corr(delay(open-close,1), close,200)) + rank(open-close)
    df['alpha37'] = rank(correlation(delay(df['Open']-df['Close'],1), df['Close'], 200)) \
                    + rank(df['Open'] - df['Close'])
    
    # — helper functions —
    def delta(s, period):
        return s.diff(period)
    
    def rank(s):
        return s.rank()
    
    def ts_rank(s, window):
        return s.rolling(window).apply(
            lambda x: x.rank().iloc[-1] / len(x),
            raw=False
        )
    
    def decay_linear(s, period):
        weights = np.arange(1, period+1)
        return s.rolling(period).apply(
            lambda x: np.dot(x, weights) / weights.sum(),
            raw=True
        )
    
    def ts_sum(s, window):
        return s.rolling(window).sum()
    
    def stddev(s, window):
        return s.rolling(window).std()
    
    def correlation(s1, s2, window):
        return s1.rolling(window).corr(s2)
    
    # — ensure returns column exists —
    # If your pct_change return is in 'return', alias it:
    df['returns'] = df.get('returns', df['return'])
    
    # — alpha38 —
    # -1 * rank(ts_rank(Close,10)) * rank(Close/Open)
    df['alpha38'] = -1 * rank(ts_rank(df['Close'], 10)) * rank(df['Close'] / df['Open'])
    
    # — alpha39 —
    # -1 * rank(Δ7(Close) * (1 - rank(decay_linear(Volume/adv20,9)))) * (1 + rank(ts_sum(returns,250)))
    term39 = delta(df['Close'], 7) * (1 - rank(decay_linear(df['Volume'] / df['adv20'], 9)))
    df['alpha39'] = -1 * rank(term39) * (1 + rank(ts_sum(df['returns'], 250)))
    
    # — alpha40 —
    # -1 * rank(stddev(High,10)) * corr(High, Volume,10)
    df['alpha40'] = -1 * rank(stddev(df['High'], 10)) * correlation(df['High'], df['Volume'], 10)
    
    # — alpha41 —
    # sqrt(High * Low) - VWAP
    df['alpha41'] = (df['High'] * df['Low'])**0.5 - df['VWAP']
    
    # — alpha42 —
    # rank(VWAP - Close) / rank(VWAP + Close)
    df['alpha42'] = rank(df['VWAP'] - df['Close']) / rank(df['VWAP'] + df['Close'])
    
    # — alpha43 —
    # ts_rank(Volume/adv20,20) * ts_rank(-Δ7(Close),8)
    df['alpha43'] = ts_rank(df['Volume'] / df['adv20'], 20) * ts_rank(-1 * delta(df['Close'], 7), 8)
    
    # — alpha44 —
    # -1 * corr(High, rank(Volume),5)
    df['alpha44'] = -1 * correlation(df['High'], rank(df['Volume']), 5)
    
    # — alpha48 재계산 예시 —
    num48 = (
        correlation(delta(df['Close'], 1),
                    delta(df['Close'].shift(1), 1), 250)
        * delta(df['Close'], 1)
    ) / df['Close']
    den48 = ts_sum((delta(df['Close'], 1) / df['Close'].shift(1)) ** 2, 250)
    
    # global neutralize: 전체 평균을 빼줌
    glob_neut = num48 - num48.mean()
    
    # alpha48 재계산
    df['alpha48'] = glob_neut / den48
    
    # alpha45
    df['alpha45'] = -1 * (
        rank(ts_sum(delay(df['Close'], 5), 20) / 20)
        * correlation(df['Close'], df['Volume'], 2)
        * rank(correlation(ts_sum(df['Close'], 5), ts_sum(df['Close'], 20), 2))
    )
    
    # alpha46
    expr46 = ((delay(df['Close'], 20) - delay(df['Close'], 10)) / 10
              - (delay(df['Close'], 10) - df['Close']) / 10)
    df['alpha46'] = np.where(
        expr46 > 0.25, -1,
        np.where(expr46 < 0, 1, -1 * (df['Close'] - delay(df['Close'], 1)))
    )
    
    # alpha47
    df['alpha47'] = (
        ((rank(1 / df['Close']) * df['Volume']) / df['adv20'])
        * ((df['High'] * rank(df['High'] - df['Close'])) / (ts_sum(df['High'], 5) / 5))
    ) - rank(df['VWAP'] - delay(df['VWAP'], 5))
    
    # alpha49
    expr49 = ((delay(df['Close'], 20) - delay(df['Close'], 10)) / 10
              - (delay(df['Close'], 10) - df['Close']) / 10)
    df['alpha49'] = np.where(
        expr49 < -0.1, 1,
        -1 * (df['Close'] - delay(df['Close'], 1))
    )
    
    # alpha50
    df['alpha50'] = -1 * ts_max(
        rank(correlation(rank(df['Volume']), rank(df['VWAP']), 5)),
        5
    )
    
    # 1) FRED에서 미국 실질 GDP 불러오기 (분기별)
    gdp = pdr.DataReader('NAEXKP01EZQ657S', 'fred',
                         start=df.index.min(),
                         end=df.index.max())
    gdp.rename(columns={'NAEXKP01EZQ657S': 'cap'}, inplace=True)
    
    # 2) 분기→일별(영업일) 인덱스로 재색인하고 이전 값으로 채우기
    gdp = gdp.reindex(df.index, method='ffill')
    
    # 3) df에 ‘cap’ 컬럼으로 합치기
    df = df.join(gdp['cap'])
    
    # 4) α56 다시 계산
    #    -1 * rank( ts_sum(returns,10) / ts_sum(ts_sum(returns,2),3) ) * rank(returns * cap)
    df['alpha56'] = (
        - rank(ts_sum(df['returns'], 10) / ts_sum(ts_sum(df['returns'], 2), 3))
        * rank(df['returns'] * df['cap'])
    )
    
    # ts_argmax 정의 (각 윈도우에서 최대값 위치 반환)
    def ts_argmax(s, window):
        # x.argmax()는 윈도우 내 최대값 첫 위치(0-based)를 반환
        return s.rolling(window).apply(lambda x: x.argmax(), raw=True)
    
    # 그 뒤에 alpha57 재계산
    df['alpha57'] = -1 * (df['Close'] - df['VWAP']) \
                    / decay_linear(rank(ts_argmax(df['Close'], 30)), 2)
    
    # num58: α58의 분자 계산 부분 (원래 코드를 그대로 이용)
    num58 = correlation(
                delta(df['Close'], 1),
                delta(df['Close'].shift(1), 1),
                int(3.92795)
            )
    num58 = decay_linear(num58, int(7.89291))
    
    # 글로벌 중립화: 전체 평균 빼기
    glob_neut58 = num58 - num58.mean()
    
    # α58 재계산
    df['alpha58'] = -1 * ts_rank(glob_neut58, int(5.50322))
    
    
    # 1) weighted_vwap 정의 (여기선 사실 VWAP 그대로지만, 계수만 반영)
    weighted_vwap = df['VWAP'] * 0.728317 + df['VWAP'] * (1 - 0.728317)
    
    # 2) 원래 α59 식 분자: corr(weighted_vwap, Volume, window≈4.25) → Int로 변환
    num59 = correlation(weighted_vwap, df['Volume'], window=int(4.25197))
    
    # 3) decay_linear 적용 (period≈16.23 → Int로)
    decay_num59 = decay_linear(num59, period=int(16.2289))
    
    # 4) 글로벌 중립화 (전체 평균 빼기)
    glob_neut59 = decay_num59 - decay_num59.mean()
    
    # 5) 최종 α59 (–1× ts_rank(..., window≈8.20))
    df['alpha59'] = -1 * ts_rank(glob_neut59, window=int(8.19648))
    
    # α51
    df['alpha51'] = np.where(
        ((df['Close'].shift(20) - df['Close'].shift(10)) / 10
         - (df['Close'].shift(10) - df['Close']) / 10) < -0.05,
        1,
        -1 * (df['Close'] - df['Close'].shift(1))
    )
    
    # α52
    df['alpha52'] = (
        (- ts_min(df['Low'], 5)
         + delay(ts_min(df['Low'], 5), 5))
        * rank((ts_sum(df['returns'], 240) - ts_sum(df['returns'], 20)) / 220)
        * ts_rank(df['Volume'], 5)
    )
    
    # α53
    df['alpha53'] = -1 * delta(
        ((df['Close'] - df['Low']) - (df['High'] - df['Close']))
        / (df['Close'] - df['Low']),
        9
    )
    
    # α54
    df['alpha54'] = (
        -1 * (df['Low'] - df['Close']) * df['Open']**5
    ) / (
        (df['Low'] - df['High']) * df['Close']**5
    )
    
    # α55
    expr55 = (df['Close'] - ts_min(df['Low'], 12)) \
             / (ts_max(df['High'], 12) - ts_min(df['Low'], 12))
    df['alpha55'] = -1 * correlation(
        rank(expr55),
        rank(df['Volume']),
        6
    )
    
    
    
    # α60
    term60 = (((df['Close'] - df['Low']) - (df['High'] - df['Close']))
              / (df['High'] - df['Low'])) * df['Volume']
    df['alpha60'] = -1 * (
        2 * scale(rank(term60))
        - scale(rank(ts_argmax(df['Close'], 10)))
    )
    
    df['adv180'] = df['Volume'].rolling(180).mean()
    df['adv120'] = df['Volume'].rolling(120).mean()
    df['adv60']  = df['Volume'].rolling(60).mean()
    df['adv50']  = df['Volume'].rolling(50).mean()
    df['adv15']  = df['Volume'].rolling(15).mean()
    
    
    # global neutralize: 전체 평균 빼기
    def global_neutralize(s):
        return s - s.mean()
    
    # α61 (변경 없음)
    df['alpha61'] = (
        rank(df['VWAP'] - ts_min(df['VWAP'], 16))
        < rank(correlation(df['VWAP'], df['adv180'], 17))
    ).astype(int)
    
    # α62 (변경 없음)
    df['alpha62'] = (
        (rank(correlation(df['VWAP'], ts_sum(df['adv20'], 22), 9))
         < rank((rank(df['Open']) + rank(df['Open']))
                < (rank((df['High'] + df['Low'])/2) + rank(df['High']))))
        * -1
    )
    
    # α63 (global neutralize 적용)
    df['alpha63'] = -(
        rank(decay_linear(delta(global_neutralize(df['Close']), 2), 8))
        - rank(decay_linear(
            correlation(
                df['VWAP'] * 0.318108 + df['Open'] * (1 - 0.318108),
                ts_sum(df['adv180'], 37),
                13
            ),
            12
        ))
    )
    
    # α64 (변경 없음)
    df['alpha64'] = -(
        rank(correlation(
            ts_sum(df['Open'] * 0.178404 + df['Low'] * (1 - 0.178404), 12),
            ts_sum(df['adv120'], 12),
            16
        ))
        < rank(delta(
            (df['High'] + df['Low'])/2 * 0.178404
            + df['VWAP'] * (1 - 0.178404),
            3
        ))
    ).astype(int)
    
    # α65 (변경 없음)
    df['alpha65'] = -(
        rank(correlation(
            df['Open'] * 0.00817205 + df['VWAP'] * (1 - 0.00817205),
            ts_sum(df['adv60'], 8),
            6
        ))
        < rank(df['Open'] - ts_min(df['Open'], 13))
    ).astype(int)
    
    # α66 (변경 없음)
    df['alpha66'] = -(
        rank(decay_linear(delta(df['VWAP'], 3), 7))
        + ts_rank(decay_linear(
            ((df['Low'] * 0.96633 + df['Low'] * (1 - 0.96633)) - df['VWAP'])
            / (df['Open'] - (df['High'] + df['Low'])/2),
            11
        ), 6)
    )
    
    # α67 (global neutralize 적용)
    df['alpha67'] = - rank(df['High'] - ts_min(df['High'], 2)) \
                    * rank(correlation(
                        global_neutralize(df['VWAP']),
                        global_neutralize(df['adv20']),
                        6
                    ))
    
    # α68 (변경 없음)
    df['alpha68'] = -(
        ts_rank(correlation(rank(df['High']), rank(df['adv15']), 8), 13)
        < rank(delta(df['Close'] * 0.518371 + df['Low'] * (1 - 0.518371), 1))
    ).astype(int)
    
    # α69 (global neutralize 적용)
    df['alpha69'] = - (
        rank(ts_max(delta(global_neutralize(df['VWAP']), 2), 4))
        * ts_rank(correlation(
            df['Close'] * 0.490655 + df['VWAP'] * (1 - 0.490655),
            df['adv20'],
            4
        ), 9)
    )
    
    # α70 (global neutralize 적용)
    df['alpha70'] = - (
        rank(delta(df['VWAP'], 1))
        * ts_rank(correlation(
            global_neutralize(df['Close']),
            df['adv50'],
            17
        ), 17)
    )
    
    # 1) adv 컬럼 일괄 생성
    for w in [5, 10, 15, 20, 30, 40, 50, 60, 81, 120, 150, 180]:
        df[f'adv{w}'] = df['Volume'].rolling(window=w).mean()
    
    # 2) 그 다음 α71–α80 코드 실행
    # (이전에 짜둔 α71–α80 스니펫을 그대로 붙여넣으면 KeyError 없이 계산됩니다)
    
    
    # — adv 컬럼 확인/생성 (필요한 advX)
    for w in [15,30,40,50,60,81,120,150,180]:
        df[f'adv{w}'] = df['Volume'].rolling(w).mean()
    
    # — α73 (period 인자 둘 다 명시)
    df['alpha73'] = -1 * np.maximum(
        # decay_linear(delta(VWAP, 4.72775), 2.91864)
        rank(
            decay_linear(
                delta(df['VWAP'], int(4.72775)),
                int(2.91864)
            )
        ),
        # ts_rank(decay_linear(… , 3.33829), 16.7411)
        ts_rank(
            decay_linear(
                -1 * delta(
                    df['Open'] * 0.147155 + df['Low'] * (1 - 0.147155),
                    int(2.03608)
                ),
                int(3.33829)
            ),
            int(16.7411)
        )
    )
    
    
    # 0) 필요한 adv 컬럼 미리 생성
    for w in [10,15,20,30,40,50,60,81,120,150,180]:
        df[f'adv{w}'] = df['Volume'].rolling(int(w)).mean()
    
    # 1) 글로벌 중립화 함수
    def global_neutralize(s):
        return s - s.mean()
    
    # 2) α70
    df['alpha70'] = -1 * (
        rank(delta(df['VWAP'], int(1.29456)))
        * ts_rank(
            correlation(
                global_neutralize(df['Close']),
                df['adv50'],
                int(17.8256)
            ),
            int(17.9171)
        )
    )
    
    # 3) α71
    df['alpha71'] = np.maximum(
        ts_rank(
            decay_linear(
                correlation(
                    ts_rank(df['Close'], int(3.43976)),
                    ts_rank(df['adv180'], int(12.0647)),
                    int(18.0175)
                ),
                int(4.20501)
            ),
            int(15.6948)
        ),
        ts_rank(
            decay_linear(
                rank(df['Low'] + df['Open'] - 2*df['VWAP']),
                int(16.4662)
            ),
            int(4.4388)
        )
    )
    
    # 4) α72
    df['alpha72'] = (
        rank(
            decay_linear(
                correlation(
                    (df['High'] + df['Low'])/2,
                    df['adv40'],
                    int(8.93345)
                ),
                int(10.1519)
            )
        ) /
        rank(
            decay_linear(
                correlation(
                    ts_rank(df['VWAP'], int(3.72469)),
                    ts_rank(df['Volume'], int(18.5188)),
                    int(6.86671)
                ),
                int(2.95011)
            )
        )
    )
    
    # 5) α73
    df['alpha73'] = -1 * np.maximum(
        rank(decay_linear(delta(df['VWAP'], int(4.72775)), int(2.91864))),
        ts_rank(
            decay_linear(
                -1 * delta(
                    df['Open']*0.147155 + df['Low']*(1-0.147155),
                    int(2.03608)
                ),
                int(3.33829)
            ),
            int(16.7411)
        )
    )
    
    # 6) α74
    df['alpha74'] = (
        rank(correlation(df['Close'], ts_sum(df['adv30'], int(37.4843)), int(15.1365)))
        <
        rank(correlation(
            rank(df['High']*0.0261661 + df['VWAP']*(1-0.0261661)),
            rank(df['Volume']),
            int(11.4791)
        ))
    ) * -1
    
    # 7) α75
    df['alpha75'] = (
        rank(correlation(df['VWAP'], df['Volume'], int(4.24304)))
        <
        rank(correlation(rank(df['Low']), rank(df['adv50']), int(12.4413)))
    )
    
    # 8) α76
    df['alpha76'] = -1 * np.maximum(
        rank(decay_linear(delta(df['VWAP'], int(1.24383)), int(11.8259))),
        ts_rank(
            decay_linear(
                ts_rank(
                    correlation(
                        global_neutralize(df['Low']),
                        df['adv81'],
                        int(8.14941)
                    ),
                    int(19.569)
                ),
                int(17.1543)
            ),
            int(19.383)
        )
    )
    
    # 9) α77
    df['alpha77'] = np.minimum(
        rank(decay_linear(((df['High']+df['Low'])/2 + df['High'] - df['VWAP'] - df['High']), int(20.0451))),
        rank(decay_linear(correlation((df['High']+df['Low'])/2, df['adv40'], int(3.1614)), int(5.64125)))
    )
    
    # α78 (수정)
    df['alpha78'] = (
        rank(
            correlation(
                ts_sum(df['Low'] * 0.352233 + df['VWAP'] * (1 - 0.352233), int(19.7428)),
                ts_sum(df['adv40'], int(19.7428)),
                int(6.83313)
            )
        )
        *
        rank(
            correlation(
                rank(df['VWAP']),
                rank(df['Volume']),
                int(5.77492)
            )
        )
    )
    
    # 11) α79
    df['alpha79'] = (
        rank(delta(global_neutralize(df['Close']*0.60733 + df['Open']*(1-0.60733)), int(1.23438)))
        <
        rank(correlation(ts_rank(df['VWAP'], int(3.60973)), ts_rank(df['adv150'], int(9.18637)), int(14.6644)))
    )
    
    # α79
    df['alpha79'] = (
        rank(
            delta(
                global_neutralize(df['Close'] * 0.60733 + df['Open'] * (1 - 0.60733)),
                int(1.23438)
            )
        )
        < rank(
            correlation(
                ts_rank(df['VWAP'], int(3.60973)),
                ts_rank(df['adv150'], int(9.18637)),
                int(14.6644)
            )
        )
    )
    
    # α80
    df['alpha80'] = -1 * (
        rank(
            sign(
                delta(
                    global_neutralize(df['Open'] * 0.868128 + df['High'] * (1 - 0.868128)),
                    int(4.04545)
                )
            )
        )
        * ts_rank(
            correlation(df['High'], df['adv10'], int(5.11456)),
            int(5.53756)
        )
    )
    
    # α81
    df['alpha81'] = -1 * (
        rank(
            np.log(
                product(
                    rank(
                        rank(
                            correlation(
                                df['VWAP'],
                                ts_sum(df['adv10'], int(49.6054)),
                                int(8.47743)
                            )
                        ) ** 4
                    ),
                    int(14.9655)
                )
            )
        )
        < rank(
            correlation(
                rank(df['VWAP']),
                rank(df['Volume']),
                int(5.07914)
            )
        )
    )
    
    # α82
    df['alpha82'] = -1 * np.minimum(
        rank(
            decay_linear(
                delta(
                    global_neutralize(df['Volume']),
                    int(1.46063)
                ),
                int(14.8717)
            )
        ),
        ts_rank(
            decay_linear(
                correlation(
                    global_neutralize(df['Volume']),
                    df['Open'] * 0.634196 + df['Open'] * (1 - 0.634196),
                    int(17.4842)
                ),
                int(6.92131)
            ),
            int(13.4283)
        )
    )
    
    # α83
    num83 = (
        rank(
            delay(
                (df['High'] - df['Low']) / (ts_sum(df['Close'], 5) / 5),
                int(2)
            )
        )
        * rank(df['Volume'].rank())
    )
    den83 = ((df['High'] - df['Low']) / (ts_sum(df['Close'], 5) / 5)) / (df['VWAP'] - df['Close'])
    df['alpha83'] = num83 / den83
    
    def signedpower(base, exponent):
        """
        Applies signed power: sign(base) * (|base| ** exponent)
        """
        return np.sign(base) * (np.abs(base) ** exponent)
    
    # …이후에 α84 계산 부분…
    df['alpha84'] = signedpower(
        ts_rank(df['VWAP'] - ts_max(df['VWAP'], int(15.3217)), int(20.7127)),
        delta(df['Close'], int(4.96796))
    )
    
    
    # α85
    df['alpha85'] = (
        rank(
            correlation(
                df['High'] * 0.876703 + df['Close'] * (1 - 0.876703),
                df['adv30'],
                int(9.61331)
            )
        )
        * rank(
            correlation(
                ts_rank((df['High'] + df['Low']) / 2, int(3.70596)),
                ts_rank(df['Volume'], int(10.1595)),
                int(7.11408)
            )
        )
    )
    
    # α86
    df['alpha86'] = -1 * (
        ts_rank(
            correlation(
                df['Close'],
                ts_sum(df['adv20'], int(14.7444)),
                int(6.00049)
            ),
            int(20.4195)
        )
        < rank(df['Open'] + df['Close'] - (df['VWAP'] + df['Open']))
    )
    
    # α87
    tmp87a = rank(
        decay_linear(
            delta(
                df['Close'] * 0.369701 + df['VWAP'] * (1 - 0.369701),
                int(1.91233)
            ),
            int(2.65461)
        )
    )
    tmp87b = ts_rank(
        decay_linear(
            abs(
                correlation(
                    global_neutralize(df['adv81']),
                    df['Close'],
                    int(13.4132)
                )
            ),
            int(4.89768)
        ),
        int(14.4535)
    )
    df['alpha87'] = -1 * np.maximum(tmp87a, tmp87b)
    
    # α88
    df['alpha88'] = np.minimum(
        rank(
            decay_linear(
                rank(df['Open']) + rank(df['Low']) - rank(df['High']) - rank(df['Close']),
                int(8.06882)
            )
        ),
        ts_rank(
            decay_linear(
                correlation(
                    ts_rank(df['Close'], int(8.44728)),
                    ts_rank(df['adv60'], int(20.6966)),
                    int(8.01266)
                ),
                int(6.65053)
            ),
            int(2.61957)
        )
    )
    
    # α89
    df['alpha89'] = (
        ts_rank(
            decay_linear(
                correlation(df['Low'], df['adv10'], int(6.94279)),
                int(5.51607)
            ),
            int(3.79744)
        )
        - ts_rank(
            decay_linear(
                delta(global_neutralize(df['VWAP']), int(3.48158)),
                int(10.1466)
            ),
            int(15.3012)
        )
    )
    
    # — helper functions —
    def delta(s: pd.Series, period: int) -> pd.Series:
        return s.diff(int(period))
    
    def delay(s: pd.Series, period: int) -> pd.Series:
        return s.shift(int(period))
    
    def rank(s: pd.Series) -> pd.Series:
        return s.rank()
    
    def ts_sum(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).sum()
    
    def ts_max(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).max()
    
    def ts_min(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).min()
    
    def ts_rank(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).apply(lambda x: x.rank().iloc[-1]/len(x), raw=False)
    
    def ts_argmax(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).apply(lambda x: x.argmax(), raw=True)
    
    def ts_argmin(s: pd.Series, window: int) -> pd.Series:
        return s.rolling(int(window)).apply(lambda x: x.argmin(), raw=True)
    
    def decay_linear(s: pd.Series, period: int) -> pd.Series:
        w = np.arange(1, int(period)+1)
        return s.rolling(int(period)).apply(lambda x: np.dot(x, w)/w.sum(), raw=True)
    
    def correlation(s1: pd.Series, s2: pd.Series, window: int) -> pd.Series:
        return s1.rolling(int(window)).corr(s2)
    
    def global_neutralize(s: pd.Series) -> pd.Series:
        return s - s.mean()
    
    def product(*args) -> pd.Series:
        res = args[0]
        for a in args[1:]:
            res = res * a
        return res
    
    # — ensure adv columns exist —
    for w in [5,10,15,20,30,40,50,60,81,120,150,180]:
        df[f'adv{w}'] = df['Volume'].rolling(int(w)).mean()
    # α90
    df['alpha90'] = -1 * (
        rank(df['Close'] - ts_max(df['Close'], int(4.66719))) *
        ts_rank(
            correlation(global_neutralize(df['adv40']), df['Low'], int(5.38375)),
            int(3.21856)
        )
    )
    
    # α91
    df['alpha91'] = -1 * (
        ts_rank(
            decay_linear(
                decay_linear(
                    correlation(global_neutralize(df['Close']), df['Volume'], int(9.74928)),
                    int(16.398)
                ),
                int(3.83219)
            ),
            int(4.8667)
        )
        - rank(
            decay_linear(
                correlation(df['VWAP'], df['adv30'], int(4.01303)),
                int(2.6809)
            )
        )
    )
    
    # α92
    df['alpha92'] = np.minimum(
        ts_rank(
            decay_linear(
                ((df['High'] + df['Low']) / 2 + df['Close']) < (df['Low'] + df['Open']),
                int(14.7221)
            ),
            int(18.8683)
        ),
        ts_rank(
            decay_linear(
                correlation(
                    rank(df['Low']),
                    rank(df['adv30']),
                    int(7.58555)
                ),
                int(6.94024)
            ),
            int(6.80584)
        )
    )
    
    # α93
    df['alpha93'] = (
        ts_rank(
            decay_linear(
                correlation(global_neutralize(df['VWAP']), df['adv81'], int(17.4193)),
                int(19.848)
            ),
            int(7.54455)
        )
        /
        rank(
            decay_linear(
                delta(df['Close'] * 0.524434 + df['VWAP'] * (1 - 0.524434), int(2.77377)),
                int(16.2664)
            )
        )
    )
    
    # α94
    df['alpha94'] = -1 * (
        rank(df['VWAP'] - ts_min(df['VWAP'], int(11.5783))) *
        ts_rank(
            correlation(
                ts_rank(df['VWAP'], int(19.6462)),
                ts_rank(df['adv60'], int(4.02992)),
                int(18.0926)
            ),
            int(2.70756)
        )
    )
    
    # α95
    df['alpha95'] = (
        rank(df['Open'] - ts_min(df['Open'], int(12.4105))) <
        ts_rank(
            rank(
                correlation(
                    ts_sum((df['High'] + df['Low']) / 2, int(19.1351)),
                    ts_sum(df['adv40'], int(19.1351)),
                    int(12.8742)
                )
            ) ** 5,
            int(11.7584)
        )
    )
    
    # α96
    df['alpha96'] = -1 * np.maximum(
        ts_rank(
            decay_linear(
                correlation(rank(df['VWAP']), rank(df['Volume']), int(3.83878)),
                int(4.16783)
            ),
            int(8.38151)
        ),
        ts_rank(
            decay_linear(
                ts_argmax(
                    correlation(
                        ts_rank(df['Close'], int(7.45404)),
                        ts_rank(df['adv60'], int(4.13242)),
                        int(3.65459)
                    ),
                    int(12.6556)
                ),
                int(14.0365)
            ),
            int(13.4143)
        )
    )
    
    # α97
    df['alpha97'] = -1 * (
        rank(
            decay_linear(
                delta(global_neutralize(df['Low'] * 0.721001 + df['VWAP'] * (1 - 0.721001)), int(3.3705)),
                int(20.4523)
            )
        )
        - ts_rank(
            decay_linear(
                ts_rank(
                    correlation(
                        ts_rank(df['Low'], int(7.87871)),
                        ts_rank(df['adv60'], int(17.255)),
                        int(4.97547)
                    ),
                    int(18.5925)
                ),
                int(15.7152)
            ),
            int(6.71659)
        )
    )
    
    # α98
    df['alpha98'] = (
        rank(
            decay_linear(
                correlation(df['VWAP'], ts_sum(df['adv5'], int(26.4719)), int(4.58418)),
                int(7.18088)
            )
        )
        - rank(
            decay_linear(
                ts_rank(
                    ts_argmin(
                        correlation(rank(df['Open']), rank(df['adv15']), int(20.8187)),
                        int(8.62571)
                    ),
                    int(6.95668)
                ),
                int(8.07206)
            )
        )
    )
    
    # — helper functions —
    def delta(s, period):        return s.diff(int(period))
    def ts_sum(s, w):            return s.rolling(int(w)).sum()
    def ts_max(s, w):            return s.rolling(int(w)).max()
    def ts_min(s, w):            return s.rolling(int(w)).min()
    def ts_rank(s, w):           return s.rolling(int(w)).apply(lambda x: x.rank().iloc[-1]/len(x), raw=False)
    def ts_argmin(s, w):         return s.rolling(int(w)).apply(lambda x: x.argmin(), raw=True)
    def correlation(s1, s2, w):  return s1.rolling(int(w)).corr(s2)
    def decay_linear(s, p):      
        w = np.arange(1, int(p)+1)
        return s.rolling(int(p)).apply(lambda x: np.dot(x, w)/w.sum(), raw=True)
    def rank(s):                 return s.rank()
    def scale(s):                return (s - s.mean())/s.std()
    def global_neutralize(s):    return s - s.mean()
    
    # — ensure adv5, adv15, adv20, adv30, adv40, adv50, adv60, adv81 exist —
    for w in [5,15,20,30,40,50,60,81]:
        df[f'adv{w}'] = df['Volume'].rolling(int(w)).mean()
    
    # — α98 —
    df['alpha98'] = (
        rank(
            decay_linear(
                correlation(df['VWAP'], ts_sum(df['adv5'], 26), 4),
                7
            )
        )
        - rank(
            decay_linear(
                ts_rank(
                    ts_argmin(
                        correlation(rank(df['Open']), rank(df['adv15']), 20),
                        8
                    ),
                    7
                ),
                8
            )
        )
    )
    
    # — α99 —
    df['alpha99'] = -1 * (
        rank(
            correlation(
                ts_sum((df['High']+df['Low'])/2, 20),
                ts_sum(df['adv60'], 20),
                9
            )
        )
        < rank(correlation(df['Low'], df['Volume'], 6))
    )
    
    # — α100 (global-neutralize 대신 subindustry 제거) —
    # 1) 원본 factor
    f1 = (((df['Close']-df['Low']) - (df['High']-df['Close']))/(df['High']-df['Low'])) * df['Volume']
    # 2) 변동분 neutralized globally
    g1 = global_neutralize(rank(f1))
    g2 = global_neutralize(
            correlation(df['Close'], rank(df['adv20']), 5)
            - rank(ts_argmin(df['Close'], 30))
         )
    df['alpha100'] = -1 * ((1.5 * scale(g1) - scale(g2)) * (df['Volume']/df['adv20']))
    
    # — α101 —
    df['alpha101'] = (df['Close'] - df['Open']) / ((df['High'] - df['Low']) + 0.001)
    
    df.rename(columns={
        'neg_rank_corr_10_M1_ret': 'alpha1',
        'neg_rank_mul_delta':     'alpha2',
        'neg_rank_corr_10':       'alpha3',
        'neg_ts_rank_low_9':      'alpha4'
    }, inplace=True)
    
    # 두 컬럼 삭제
    df.drop(columns=['neg_rank_corr_10_M1', 'M1_log_ret'], inplace=True)

    
    return df


In [67]:
real_times_alphas('EURKRW=X')

  mo = df[['Close','M1']].resample('M').last()


Unnamed: 0_level_0,Close,Open,High,Low,Change,return,log_return,lag_1,lag_2,lag_3,...,alpha92,alpha93,alpha94,alpha95,alpha96,alpha97,alpha98,alpha99,alpha100,alpha101
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-04-06,1433.130005,1434.369995,1439.109985,1431.930054,,,,,,,...,,,,False,,,,0,,-0.172678
2023-04-07,1438.910034,1438.910034,1439.229980,1408.800049,5.780029,0.004033,0.004025,,,,...,,,,False,,,,0,,0.000000
2023-04-10,1439.119995,1439.119995,1439.119995,1433.000000,0.209961,0.000146,0.000146,0.004025,,,...,,,,False,,,,0,,0.000000
2023-04-11,1439.069946,1439.069946,1443.349976,1435.369995,-0.050049,-0.000035,-0.000035,0.000146,0.004025,,...,,,,False,,,,0,,0.000000
2023-04-12,1439.699951,1439.699951,1456.180054,1439.579956,0.630005,0.000438,0.000438,-0.000035,0.000146,0.004025,...,,,,False,,,,0,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-22,1615.589966,1615.359985,1623.520020,1610.000000,-0.180054,-0.000111,-0.000111,-0.000402,0.003433,0.001447,...,,,,False,,,,0,,0.017009
2025-07-23,1620.390015,1619.920044,1620.609985,1610.579956,4.800049,0.002971,0.002967,-0.000111,-0.000402,0.003433,...,,,,False,,,,0,,0.046852
2025-07-24,1617.300049,1616.579956,1616.630005,1606.050049,-3.089966,-0.001907,-0.001909,0.002967,-0.000111,-0.000402,...,,,,False,,,,0,,0.068056
2025-07-25,1612.349976,1611.739990,1622.280029,1609.939941,-4.950073,-0.003061,-0.003065,-0.001909,0.002967,-0.000111,...,,,,False,,,,0,,0.049427


In [68]:
import cloudpickle
with open("real_times_alphas.pkl", "wb") as f:
    cloudpickle.dump(real_times_alphas, f)

# 실시간 지수

In [69]:
import yfinance as yf
import pandas as pd

def euro_indicator(days=600):
    tickers = {
        "DAX": "^GDAXI",
        "EUROSTOXX50": "^STOXX50E",
        "CAC": "^FCHI"
    }
    
    data_list = []
    
    for name, symbol in tickers.items():
        ticker = yf.Ticker(symbol)
        df = ticker.history(period=f"{days+12}d", interval="1d")
        df = df.reset_index()
    
        df = df[["Date", "Open", "High", "Low", "Close", "Volume"]]
        df = df.rename(columns={
            "Open": f"{name}_Open",
            "High": f"{name}_High",
            "Low": f"{name}_Low",
            "Close": f"{name}_Close",
            "Volume": f"{name}_Volume"
        })
        
        data_list.append(df)
    
    df_merged = data_list[0]
    for df in data_list[1:]:
        df_merged = pd.merge(df_merged, df, on="Date", how="inner")
    
    df_merged['Date'] = pd.to_datetime(df_merged['Date']).dt.strftime('%Y-%m-%d')
    df_merged['Date'] = pd.to_datetime(df_merged['Date'])
    return df_merged

In [70]:
euro_indicator().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                600 non-null    datetime64[ns]
 1   DAX_Open            600 non-null    float64       
 2   DAX_High            600 non-null    float64       
 3   DAX_Low             600 non-null    float64       
 4   DAX_Close           600 non-null    float64       
 5   DAX_Volume          600 non-null    int64         
 6   EUROSTOXX50_Open    600 non-null    float64       
 7   EUROSTOXX50_High    600 non-null    float64       
 8   EUROSTOXX50_Low     600 non-null    float64       
 9   EUROSTOXX50_Close   600 non-null    float64       
 10  EUROSTOXX50_Volume  600 non-null    int64         
 11  CAC_Open            600 non-null    float64       
 12  CAC_High            600 non-null    float64       
 13  CAC_Low             600 non-null    float64       

In [71]:
import cloudpickle
with open("euro_indicator.pkl", "wb") as f:
    cloudpickle.dump(euro_indicator, f)

In [72]:
pd.merge(real_times_alphas('EURKRW=X'),euro_indicator(),on='Date')

  mo = df[['Close','M1']].resample('M').last()


Unnamed: 0,Date,Close,Open,High,Low,Change,return,log_return,lag_1,lag_2,...,EUROSTOXX50_Open,EUROSTOXX50_High,EUROSTOXX50_Low,EUROSTOXX50_Close,EUROSTOXX50_Volume,CAC_Open,CAC_High,CAC_Low,CAC_Close,CAC_Volume
0,2023-04-06,1433.130005,1434.369995,1439.109985,1431.930054,,,,,,...,4295.549805,4313.740234,4293.850098,4309.450195,28509400,7328.569824,7344.339844,7312.609863,7324.750000,67271300
1,2023-04-11,1439.069946,1439.069946,1443.349976,1435.369995,-0.050049,-0.000035,-0.000035,0.000146,0.004025,...,4312.660156,4343.919922,4312.660156,4333.290039,30115400,7381.770020,7403.669922,7370.089844,7390.279785,73186100
2,2023-04-12,1439.699951,1439.699951,1456.180054,1439.579956,0.630005,0.000438,0.000438,-0.000035,0.000146,...,4332.330078,4374.229980,4323.669922,4334.029785,30179800,7396.810059,7463.669922,7379.790039,7396.939941,63577700
3,2023-04-13,1452.790039,1453.449951,1457.040039,1437.260010,13.090088,0.009092,0.009051,0.000438,-0.000035,...,4334.669922,4363.609863,4334.669922,4363.240234,28715800,7450.759766,7485.589844,7450.759766,7480.830078,59146200
4,2023-04-14,1437.060059,1437.020020,1440.479980,1430.209961,-15.729980,-0.010827,-0.010886,0.009051,0.000438,...,4364.089844,4397.580078,4363.310059,4390.750000,35527500,7500.770020,7533.410156,7491.430176,7519.609863,71857700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,2025-07-22,1615.589966,1615.359985,1623.520020,1610.000000,-0.180054,-0.000111,-0.000111,-0.000402,0.003433,...,5332.319824,5332.319824,5277.850098,5290.479980,16474700,7775.040039,7782.660156,7726.740234,7744.410156,47029700
572,2025-07-23,1620.390015,1619.920044,1620.609985,1610.579956,4.800049,0.002971,0.002967,-0.000111,-0.000402,...,5305.589844,5371.350098,5305.589844,5344.250000,33305200,7839.709961,7869.959961,7816.270020,7850.430176,66792400
573,2025-07-24,1617.300049,1616.579956,1616.630005,1606.050049,-3.089966,-0.001907,-0.001909,0.002967,-0.000111,...,5365.919922,5396.310059,5337.970215,5355.200195,26211600,7873.500000,7898.029785,7797.279785,7818.279785,74863300
574,2025-07-25,1612.349976,1611.739990,1622.280029,1609.939941,-4.950073,-0.003061,-0.003065,-0.001909,0.002967,...,5342.660156,5362.229980,5317.540039,5352.160156,20673400,7782.390137,7851.459961,7759.589844,7834.580078,59054700
