In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM,Dropout,Bidirectional
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [2]:
def create_dataset(signal_data, look_back=1):
    dataX, dataY = [], []
    for i in range(len(signal_data)-look_back):
        dataX.append(signal_data[i:(i+look_back), 0])
        dataY.append(signal_data[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [15]:
def univariate(data, look_back):
    signal_data =data
    # 데이터 전처리
    scaler = MinMaxScaler(feature_range=(0, 1))
    signal_data = scaler.fit_transform(signal_data)

    # 데이터 분리
    train = signal_data[0:120]
    #val = signal_data[800:1200]
    test = signal_data[120:]

    # 데이터셋 생성
    x_train, y_train = create_dataset(train, look_back)
    #x_val, y_val = create_dataset(val, look_back)
    x_test, y_test = create_dataset(test, look_back)

    x_train = np.reshape(x_train,(x_train.shape[0],look_back,1)) #(size, timestamp,feature)
    x_test = np.reshape(x_test,(x_test.shape[0],look_back,1))
    
    return x_train, x_test, y_train, y_test,scaler, look_back

In [4]:
def run_bi(x_train, x_test, y_train, y_test,look_back, scaler,box):

    model = Sequential()
    model.add(Bidirectional(LSTM(box, input_shape=(look_back, 1)))) #(timestep,feature)
    model.add(Dropout(0.1))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)

    model.fit(x_train, y_train, epochs=100, batch_size=5, verbose=1, callbacks=[early_stop])
    score = model.evaluate(x_test,y_test,batch_size=5)
    pre =model.predict(x_test)
    #sc_pre = scaler.inverse_transform(pre)
    ans = y_test
    
    return score, pre, ans

In [34]:
def run_stateful(x_train, x_test, y_train, y_test, look_back, scaler,box):
    model = Sequential()
    model.add(LSTM(box, batch_input_shape=(1, look_back, 1), stateful=True))
    model.add(Dropout(0.1))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)

    model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=1, callbacks=[early_stop])
    score = model.evaluate(x_test,y_test,batch_size=1)
    pre =model.predict(x_test,batch_size=1) #모든 batch_size 바꾸면 error
    #sc_pre = scaler.inverse_transform(pre)
    ans = y_test
    
    return score, pre, ans

In [27]:
def run_stack(x_train, x_test, y_train, y_test,look_back, scaler,box,box2):

    model = Sequential()
    model.add(LSTM(box, input_shape=(look_back, 1), return_sequences=True))#(timestep,feature)
    model.add(LSTM(box2))
    model.add(Dropout(0.1))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)

    model.fit(x_train, y_train, epochs=100, batch_size=5, verbose=1, callbacks=[early_stop])
    score = model.evaluate(x_test,y_test,batch_size=5)
    pre =model.predict(x_test)
    #sc_pre = scaler.inverse_transform(pre)
    ans = y_test
    
    return score, pre, ans

In [39]:
def run_stateful_stack(x_train, x_test, y_train, y_test, look_back, scaler,box1,box2):
    model = Sequential()
    model.add(LSTM(box1, batch_input_shape=(1, look_back, 1), stateful=True,return_sequences=True))
    model.add(Dropout(0.1))
    model.add(LSTM(box2, batch_input_shape=(1, look_back, 1), stateful=True))
    model.add(Dropout(0.1))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)

    model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=1, callbacks=[early_stop])
    score = model.evaluate(x_test,y_test,batch_size=1)
    pre =model.predict(x_test,batch_size=1) #모든 batch_size 바꾸면 error
    #sc_pre = scaler.inverse_transform(pre)
    ans = y_test
    
    return score, pre, ans

In [6]:
PATH ='C:/Users/ckdrj/Google 드라이브/baseball/data/total/'
#PATH = 'C:/Users/LIM CHANG GEON/Google 드라이브/baseball/data/total'
hiter16 = pd.read_csv(PATH+"2020빅콘테스트_스포츠투아이_제공데이터_팀타자_2016.csv")
pitcher16 = pd.read_csv(PATH+"2020빅콘테스트_스포츠투아이_제공데이터_팀투수_2016.csv")

### usage
univariate(data_feature,look_back)
run_function select (~,box) or (~,box1,box2)

In [40]:
x_train, x_test, y_train, y_test,scaler,look_back = univariate(pitcher16[["R"]],10)
score, pre, ans = run_stateful(x_train, x_test, y_train, y_test,look_back, scaler,10)
#score, pre, ans = run_bi(x_train, x_test, y_train, y_test,look_back, scaler,10)
#score, pre, ans = run_stack(x_train, x_test, y_train, y_test,look_back, scaler,10,10)
score, pre, ans = run_stateful_stack(x_train, x_test, y_train, y_test,look_back, scaler,10,10)
sc_pre=scaler.inverse_transform(pre)
sc_ans=scaler.inverse_transform(ans.reshape(ans.shape[0],1))
print(score, sc_pre,sc_ans)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 00007: early stopping
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 00006: early stopping
0.043084419763101324 [[4.156399 ]
 [4.1444526]
 [4.066629 ]
 ...
 [4.0863366]
 [4.1136   ]
 [4.1166763]] [[2.]
 [1.]
 [3.]
 ...
 [4.]
 [5.]
 [8.]]
