In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from datetime import datetime, date, timedelta

import warnings
warnings.filterwarnings('ignore')

import FinanceDataReader as fdr

# 시각화 사용자 설정
from matplotlib import rcParams
sns.set_style('whitegrid')
%matplotlib inline

from sklearn.preprocessing import MinMaxScaler
from xgboost import plot_importance as xgb_plot_importance
from lightgbm import plot_importance as lgb_plot_importance

from keras.models import load_model
import joblib
import talib
import yfinance as yf

In [2]:
#정규화
def scale(df, method='minmax', makeDf=True) :
    if method == 'minmax' :
        scaler = MinMaxScaler()
    elif method == 'standard' :
        scaler = StandardScaler()
    fitted = scaler.fit(df)
    output = scaler.transform(df)
    if makeDf :
        output = pd.DataFrame(output, columns=df.columns, index=list(df.index.values))
    return (scaler, output)

In [3]:
def loadFeatureList(preDate, num_step = 5) :
    # preDate 일의 데이터를 기준으로 다음 영업일의 삼성전자 변화율 예측하기
    print("*** 데이터 불러오는 중 ... ***")
    errMsg = "기준일에 해당하는 데이터가 없어 예측할 수 없습니다."
    
    startYear = str(int(preDate[0:4]) - 1)
    samsung = fdr.DataReader('005930', startYear, preDate)[(-1) * (num_step+10):]
    usdkrw = fdr.DataReader('USD/KRW', startYear, preDate)[(-1) * (num_step+10):]
    sox = fdr.DataReader('SOXX', startYear, preDate)[(-1) * (num_step+10):]
    dji = fdr.DataReader('DJI', startYear, preDate)[(-1) * (num_step+10):]
    hangseng = fdr.DataReader('HSI', startYear, preDate)[(-1) * (num_step+10):]
    micron = fdr.DataReader('MU', startYear, preDate)[(-1) * (num_step+10):]
    
    treasury_5y = yf.download("^FVX", start=startYear+'-01-01', end=preDate)
    treasury_5y['Change'] = treasury_5y['Adj Close'].pct_change()
    treasury_5y = treasury_5y[(-1) * (num_step+10):]
    
    # 집
    #sp500 = fdr.DataReader('US500', startYear, preDate)[(-1) * (num_step+10):]
    #vix = fdr.DataReader('VIX', startYear, preDate)[(-1) * (num_step+10):]
    # 인터넷PC
    sp500 = yf.download("^GSPC", startYear+'-01-01', end=preDate)
    sp500['Change'] = sp500['Adj Close'].pct_change()
    sp500 = sp500[(-1) * (num_step+10):]
    vix = yf.download("^VIX", startYear+'-01-01', end=preDate)
    vix['Change'] = vix['Adj Close'].pct_change()
    vix = vix[(-1) * (num_step+10):]
    
    vol = pd.DataFrame(index=samsung.index)
    vol['Close'] = samsung['Volume']
    vol = scale(vol)[1]
    
    if any(x.empty for x in [treasury_5y, micron, hangseng, vix, dji, sp500, sox, usdkrw, samsung]) :
        print(errMsg)
        return None
    
    compare_Change = pd.DataFrame(index = samsung.index)
    compare_Change['samsung'] = samsung.Change
    compare_Change['usdkrw'] = usdkrw.Change
    compare_Change['sox'] = sox.Change
    compare_Change['sp500'] = sp500.Change
    compare_Change['dji'] = dji.Change
    compare_Change['vix'] = vix.Change
    compare_Change['hangseng'] = hangseng.Change
    compare_Change['micron'] = micron.Change
    compare_Change['treasury_5y'] = treasury_5y.Change
    compare_Change['vol'] = vol.Close
    compare_Change.fillna(method='ffill', inplace=True)
    compare_Change = compare_Change[(-1) * num_step:]

    '''
    others = [
        ('SK하이닉스', '000660'),
        ('삼성전자우', '005935'),
        ('오리온', '271560'),
        ('DB하이텍', '000990'),
        ('메리츠증권', '008560'),
        ('넥센타이어', '002350'),
        ('대교', '019680'),
        ('인천도시가스', '034590'),
        ('미래에셋 미국 시니어론 100 ETN', '520021'),
        ('삼성 Alerian 에너지인프라 MLP ETN', '530033'),
        ('KINDEX S&P아시아TOP50', '277540'),
        ('KODEX 미국채10년선물', '308620'),
        ('미래에셋 인버스 S&P500 ETN(H)', '590011'),
        ('미래에셋 인버스 전기전자 Core5 ETN', '520005'),
        ('하나마이크론', '067310'),
        ('디바이스이엔지', '187870'),
        ('국전약품', '307750')
        ]
    for stock in others :
        el = fdr.DataReader(stock[1], preDate, preDate)
        if el.empty :
            print(errMsg)
            return None
        compare_Change[stock[0]] = el.Change
    '''
    return compare_Change

In [4]:
def dataToPCA(data, filePath='./') :
    try :
        n_components = 8
        columns = ['PCA_'+str(x) for x in range(0, n_components)]
        pca_fit = joblib.load(filePath + 'pca_fit_classifier.pkl')
        printcipalComponents = pca_fit.transform(data)
        principalDf = pd.DataFrame(data=printcipalComponents, columns = columns, index=data.index)
        return principalDf
    except :
        print("데이터 변환 시 문제가 발생하였습니다.")
        return None

In [5]:
def predictFromML(preDate, data, filePath='./') :
    up = "오를 가능성이 있습니다."
    no_up = "떨어질 가능성이 있습니다."
    pred_list = []
    
    model_list = [
        ('KNeighborsClassifier', "KNN 모델"),
        ('SVC', "SVM 모델"), 
        ('RandomForestClassifier', "랜덤 포레스트 모델"),
        ('GradientBoostingClassifier', "그레디언트 부스팅 모델"),
        ('XGBClassifier', "XGB 모델"),
        ('LGBMClassifier', "LGBM 모델"),
        ('VotingClassifier_hard', "Voting hard 모델"),
        ('VotingClassifier_soft', "Voting soft 모델")
    ]
    for model in model_list :
        try :
            load_model = joblib.load(filePath + model[0] + '.pkl')
        except :
            continue
        data = data[-1:]
        pred = load_model.predict(data)
        pred_list.append((model[0], model[1], pred))
        
    for pred in pred_list :
        isUp = up if pred[2]==1 else no_up
        print(f"{pred[1]} : {isUp}")
    print()
    return pred_list

In [6]:
def makeLSTMData(data, num_step = 5):
    x_batch = np.reshape(np.array(data), (1, num_step, len(data.columns)))
    return x_batch

def predictFromDL(preDate, data, filePath='./') :
    up = "오를 가능성이 있습니다."
    no_up = "떨어질 가능성이 있습니다."
    pred_list = []
    
    model_list = [
        ('LSTM', "딥러닝 모델"),
    ]
    for model in model_list :
        dlmodel = load_model(filePath + model[0] + '.h5')
        data = makeLSTMData(data)
        predicted = dlmodel.predict(data)
        pred = np.argmax(predicted, axis=1)
        pred_list.append((model[0], model[1], pred))
          
    for pred in pred_list :
        isUp = up if pred[2]==1 else no_up
        print(f"{pred[1]} : {isUp}")
    print()
    return pred_list

In [7]:
def predict() :
    print("*** 삼성전자 주가 변화율(종가 기준) 예측 ***")
    preDate = input("기준일을 입력하세요(해당일의 다음 영업일에 대해 예측) EX) YYYY-MM-DD : \n")
    if preDate == "" :
        print("! 입력값이 없어 어제일자를 기준일로 잡습니다 !\n")
        preDate=(date.today() - timedelta(1)).isoformat()
    
    ori_data = loadFeatureList(preDate)
    if ori_data is None :
        print("종료.")
        return
    data = dataToPCA(ori_data)
    if data is None :
        print("종료.")
        return
    print("\n*** 기준일 다음 영업일에 삼성전자 종가의 UP/DOWN 예측하기 *** \n")
    
    print("*** [삼성전자 주가 변화율 UP/DOWN] 머신러닝 예측결과 ***")    
    predictFromML(preDate, data)
    
    print("*** [삼성전자 주가 변화율 UP/DOWN] 딥러닝 예측결과 ***")  
    predictFromDL(preDate, data)
    return

In [10]:
predict()

*** 삼성전자 주가 변화율(종가 기준) 예측 ***
기준일을 입력하세요(해당일의 다음 영업일에 대해 예측) EX) YYYY-MM-DD : 
2021-02-04
*** 데이터 불러오는 중 ... ***
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

*** 기준일 다음 영업일에 삼성전자 종가의 UP/DOWN 예측하기 *** 

*** [삼성전자 주가 변화율 UP/DOWN] 머신러닝 예측결과 ***
KNN 모델 : 오를 가능성이 있습니다.
SVM 모델 : 오를 가능성이 있습니다.
랜덤 포레스트 모델 : 떨어질 가능성이 있습니다.
그레디언트 부스팅 모델 : 떨어질 가능성이 있습니다.
XGB 모델 : 떨어질 가능성이 있습니다.
LGBM 모델 : 떨어질 가능성이 있습니다.

*** [삼성전자 주가 변화율 UP/DOWN] 딥러닝 예측결과 ***
딥러닝 모델 : 오를 가능성이 있습니다.



In [8]:
# 확인해보기
fdr.DataReader('005930', "2021-02-03")

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-02-03,84800,85400,83400,84600,22112205,0.00237
2021-02-04,83500,83800,82100,82500,24171688,-0.024823
2021-02-05,83100,84000,82500,83500,18036835,0.012121
2021-02-08,83800,84200,83000,83000,15338765,-0.005988
2021-02-09,84000,84800,82700,82700,20898332,-0.003614
2021-02-10,82600,82600,81600,81600,23025766,-0.013301
2021-02-15,83800,84500,83300,84200,23529706,0.031863
2021-02-16,84500,86000,84200,84900,20483100,0.008314
2021-02-17,83900,84200,83000,83200,18307735,-0.020024
2021-02-18,83200,83600,82100,82100,21327683,-0.013221
