In [2]:
%pip install prophet

Collecting prophet
  Downloading prophet-1.1.4-py3-none-macosx_10_9_x86_64.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting cmdstanpy>=1.0.4
  Downloading cmdstanpy-1.1.0-py3-none-any.whl (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.2/83.2 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Collecting importlib-resources
  Downloading importlib_resources-6.0.0-py3-none-any.whl (31 kB)
Installing collected packages: importlib-resources, cmdstanpy, prophet
  Attempting uninstall: cmdstanpy
    Found existing installation: cmdstanpy 0.9.5
    Uninstalling cmdstanpy-0.9.5:
      Successfully uninstalled cmdstanpy-0.9.5
Successfully installed cmdstanpy-1.1.0 importlib-resources-6.0.0 prophet-1.1.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mn

In [2]:
from prophet import Prophet

Importing plotly failed. Interactive plots will not work.


In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [4]:
train = pd.read_csv('./train.csv')

In [5]:
results_df = pd.DataFrame(columns=['종목코드', 'final_return'])
results_df


Unnamed: 0,종목코드,final_return


In [6]:
# train 데이터에 존재하는 독립적인 종목코드 추출
unique_codes = train['종목코드'].unique()
unique_codes

array(['A060310', 'A095570', 'A006840', ..., 'A003280', 'A037440',
       'A238490'], dtype=object)

In [7]:
# 각 종목코드에 대해서 모델 학습 및 추론 반복
for code in tqdm(unique_codes):
    
    # 학습 데이터 생성
    train_close = train[train['종목코드'] == code][['일자', '종가']]
    train_close.columns = ['ds', 'y']  # Prophet requires the time column to be 'ds' and the value column to be 'y'
    train_close['ds'] = pd.to_datetime(train_close['ds'], format='%Y%m%d')

    # 모델 선언 및 학습
    model = Prophet(daily_seasonality=True) 
    model.fit(train_close)
    
    # 추론
    future = model.make_future_dataframe(periods=15)  # we want to predict the next 15 days
    forecast = model.predict(future)
    predictions = forecast['yhat'][-15:]  # we only want the last 15 predictions

    # 최종 수익률 계산
    final_return = (predictions.iloc[-1] - predictions.iloc[0]) / predictions.iloc[0]
    
    # 결과 저장
    results_df = pd.concat([results_df, pd.DataFrame({'종목코드': [code], 'final_return': [final_return]})], ignore_index=True)

# 각 순위를 중복없이 생성
results_df['순위'] = results_df['final_return'].rank(method='first', ascending=False).astype('int')
results_df

  0%|          | 0/2000 [00:00<?, ?it/s]23:06:04 - cmdstanpy - INFO - Chain [1] start processing
23:06:04 - cmdstanpy - INFO - Chain [1] done processing
  0%|          | 1/2000 [00:00<31:12,  1.07it/s]23:06:05 - cmdstanpy - INFO - Chain [1] start processing
23:06:05 - cmdstanpy - INFO - Chain [1] done processing
  0%|          | 2/2000 [00:01<21:01,  1.58it/s]23:06:05 - cmdstanpy - INFO - Chain [1] start processing
23:06:05 - cmdstanpy - INFO - Chain [1] done processing
  0%|          | 3/2000 [00:01<17:17,  1.93it/s]23:06:05 - cmdstanpy - INFO - Chain [1] start processing
23:06:06 - cmdstanpy - INFO - Chain [1] done processing
  0%|          | 4/2000 [00:02<15:25,  2.16it/s]23:06:06 - cmdstanpy - INFO - Chain [1] start processing
23:06:06 - cmdstanpy - INFO - Chain [1] done processing
  0%|          | 5/2000 [00:02<14:34,  2.28it/s]23:06:06 - cmdstanpy - INFO - Chain [1] start processing
23:06:06 - cmdstanpy - INFO - Chain [1] done processing
  0%|          | 6/2000 [00:02<14:11,  2.3

Unnamed: 0,종목코드,final_return,순위
0,A060310,-0.001471,1213
1,A095570,-0.032898,1911
2,A006840,0.020361,355
3,A054620,-0.009749,1583
4,A265520,0.010894,617
...,...,...,...
1995,A189980,0.005083,864
1996,A000540,0.001473,1041
1997,A003280,-0.016741,1756
1998,A037440,-0.002140,1255


In [8]:
# 주식별로 15일 동안의 예상 수익률 계산
results_df['expected_return'] = results_df['final_return'].apply(lambda x: (x + 1) ** (1/15) - 1)

# 상위 200개 주식과 하위 200개 주식 선택
top_200 = results_df.sort_values(by='final_return', ascending=False).head(200)
bottom_200 = results_df.sort_values(by='final_return', ascending=False).tail(200)

# 롱 포지션과 숏 포지션에 대한 예상 수익률 계산
long_return = top_200['expected_return'].mean()
short_return = bottom_200['expected_return'].mean()

# 포트폴리오 전체의 예상 수익률 계산
portfolio_return = (long_return * 200 - short_return * 200) / 400

# 롱 포지션과 숏 포지션에 대한 수익률의 표준편차 계산
long_std = top_200['expected_return'].std()
short_std = bottom_200['expected_return'].std()

# 포트폴리오 전체의 수익률 표준편차 계산
portfolio_std = ((long_std ** 2 * 200 + short_std ** 2 * 200) / 400) ** 0.5

# 샤프 비율 계산
sharpe_ratio = portfolio_return / portfolio_std

sharpe_ratio

0.5858071249681575

In [15]:
sample_submission = pd.read_csv('./sample_submission.csv')
sample_submission

Unnamed: 0,종목코드,순위
0,A000020,1
1,A000040,2
2,A000050,3
3,A000070,4
4,A000080,5
...,...,...
1995,A375500,1996
1996,A378850,1997
1997,A383220,1998
1998,A383310,1999


In [16]:
baseline_submission = sample_submission[['종목코드']].merge(results_df[['종목코드', '순위']], on='종목코드', how='left')
baseline_submission

Unnamed: 0,종목코드,순위
0,A000020,1323
1,A000040,412
2,A000050,1339
3,A000070,664
4,A000080,1699
...,...,...
1995,A375500,1602
1996,A378850,712
1997,A383220,967
1998,A383310,566


In [17]:
baseline_submission.to_csv('prophet_submission.csv', index=False)