In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from prophet import Prophet
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

train = pd.read_csv('./train.csv')
# 결과를 저장하기 위한 dataframe 생성
results_df = pd.DataFrame(columns=['종목코드', 'final_return'])

# train 데이터에 존재하는 독립적인 종목코드 추출
unique_codes = train['종목코드'].unique()

# 과거 데이터를 기반으로 데이터프레임을 구성하는 함수
def create_features(df, n_days):
    df_new = df.copy()
    for i in range(1, n_days+1):
        df_new[f'종가_lag_{i}'] = df_new['종가'].shift(i)
    df_new.dropna(inplace=True)
    return df_new

# 각 종목코드에 대해서 모델 학습 및 추론 반복
for code in tqdm(unique_codes):
    
    # 학습 데이터 생성
    train_data = train[train['종목코드'] == code][['일자', '종가']]
    train_data['일자'] = pd.to_datetime(train_data['일자'], format='%Y%m%d')
    train_data.set_index('일자', inplace=True)
    
    # Prophet 모델 학습 및 추론
    df = train_data.reset_index().rename(columns={'일자':'ds', '종가':'y'})
    model_prophet = Prophet()
    model_prophet.fit(df)
    future = model_prophet.make_future_dataframe(periods=15)
    forecast = model_prophet.predict(future)
    final_return_prophet = (forecast['yhat'].values[-1] - forecast['yhat'].values[-15]) / forecast['yhat'].values[-15]
    
    # Gradient Boosting 모델 학습 및 추론
    train_data = create_features(train_data, 5)
    X = train_data.drop('종가', axis=1).values
    y = train_data['종가'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    model_gb = XGBRegressor(n_estimators=100, learning_rate=0.1)
    model_gb.fit(X_train, y_train)
    predictions = []
    X_last = X_test[-1, :]
    for _ in range(15):
        pred = model_gb.predict(X_last.reshape(1, -1))
        predictions.append(pred[0])
        X_last = np.roll(X_last, -1)
        X_last[-1] = pred
    final_return_gb = (predictions[-1] - predictions[0]) / predictions[0]
    
    # 두 모델의 결과를 앙상블
    final_return = (final_return_prophet + final_return_gb) / 2
    
    # 결과 저장
    results_df = results_df.append({'종목코드': code, 'final_return': final_return}, ignore_index=True)

# 각 순위를 중복없이 생성
results_df['순위'] = results_df['final_return'].rank(method='first', ascending=False).astype('int')
results_df


  0%|          | 0/2000 [00:00<?, ?it/s]01:09:41 - cmdstanpy - INFO - Chain [1] start processing
01:09:41 - cmdstanpy - INFO - Chain [1] done processing
  results_df = results_df.append({'종목코드': code, 'final_return': final_return}, ignore_index=True)
  0%|          | 1/2000 [00:00<23:24,  1.42it/s]01:09:42 - cmdstanpy - INFO - Chain [1] start processing
01:09:42 - cmdstanpy - INFO - Chain [1] done processing
  results_df = results_df.append({'종목코드': code, 'final_return': final_return}, ignore_index=True)
  0%|          | 2/2000 [00:01<21:10,  1.57it/s]01:09:42 - cmdstanpy - INFO - Chain [1] start processing
01:09:43 - cmdstanpy - INFO - Chain [1] done processing
  results_df = results_df.append({'종목코드': code, 'final_return': final_return}, ignore_index=True)
  0%|          | 3/2000 [00:02<22:20,  1.49it/s]01:09:43 - cmdstanpy - INFO - Chain [1] start processing
01:09:43 - cmdstanpy - INFO - Chain [1] done processing
  results_df = results_df.append({'종목코드': code, 'final_return': final_

Unnamed: 0,종목코드,final_return,순위
0,A060310,-0.145520,1991
1,A095570,-0.010874,1641
2,A006840,0.053218,97
3,A054620,-0.007791,1556
4,A265520,0.010359,762
...,...,...,...
1995,A189980,0.001354,1171
1996,A000540,-0.000077,1251
1997,A003280,-0.010531,1635
1998,A037440,0.053064,98


In [3]:
sample_submission = pd.read_csv('./sample_submission.csv')
sample_submission

Unnamed: 0,종목코드,순위
0,A000020,1
1,A000040,2
2,A000050,3
3,A000070,4
4,A000080,5
...,...,...
1995,A375500,1996
1996,A378850,1997
1997,A383220,1998
1998,A383310,1999


In [4]:
baseline_submission = sample_submission[['종목코드']].merge(results_df[['종목코드', '순위']], on='종목코드', how='left')
baseline_submission

Unnamed: 0,종목코드,순위
0,A000020,1711
1,A000040,1030
2,A000050,840
3,A000070,538
4,A000080,1531
...,...,...
1995,A375500,1206
1996,A378850,155
1997,A383220,396
1998,A383310,1446


In [5]:
baseline_submission.to_csv('ProphetGBEnsenble_submission.csv', index=False)