In [2]:
import pandas as pd
import numpy as np

from keras.preprocessing import sequence

# train data
training_data_path = '../../datasets/train_data/series/'
result_path = '../../datasets/results/'
def series_features(series,maxlen=20):
    sec1 = series.apply(lambda x :[int(float(i)*100000) for i in x.split(' ')]).tolist()
    return sequence.pad_sequences(sec1, maxlen=maxlen) + 40000

series_names = ['unbiased','hourly_corrected','daily_corrected','global_corrected']
maxlen = 20

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [7]:
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.utils import np_utils
from keras.layers import Dense, Embedding,Flatten
from keras.models import Sequential
from keras.layers import LSTM
from keras.optimizers import Adam

def LSTM_regression(X,y,val_rate=0.3):
    LR = 0.01
    length_val = int(len(y)*0.3)

    X_train, y_train = X[length_val:,:],y[length_val:]
    X_test, y_test =X[:length_val,:],y[:length_val]

    X_train = np.reshape(X_train, (X_train.shape[0],1, X_train.shape[1]))
    X_test = np.reshape(X_test, (X_test.shape[0],1, X_test.shape[1]))
    print(X_train.shape)
    # build RNN model
    model = Sequential()
    model.add(LSTM(input_dim=20,
                   output_dim=50,return_sequences=True))
    model.add(Flatten())
    model.add(Dense(output_dim=1))
    # optimizer
    model.compile(optimizer='rmsprop',
                  loss='mean_squared_error')

    # training
    model.fit(X_train, y_train,nb_epoch=3,validation_data=(X_test, y_test))
    score = model.evaluate(X_test, y_test)

    print('test cost: ', score)
    return score





def LSTM_classifer(X,y,val_rate=0.3):
    LR = 0.01
    length_val = int(len(y)*0.3)
    num_classes = len(set(y))

    X_train, y_train = X[length_val:,:],y[length_val:]
    X_test, y_test =X[:length_val,:],y[:length_val]
    
    # data pre-processing
    y_train = np_utils.to_categorical(y_train, num_classes=num_classes)
    y_test = np_utils.to_categorical(y_test, num_classes=num_classes)

    # build RNN model
    model = Sequential()

    # RNN cell


    model.add(Embedding(200000, 128))

    model.add(LSTM(128, dropout=0.6, recurrent_dropout=0.2))
    # model.add(LSTM(128))
    model.add(Dense(num_classes, activation='softmax'))

    # optimizer
    model.compile(optimizer=Adam(LR),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # training
    model.fit(X_train, y_train,nb_epoch=3,validation_data=(X_test, y_test))
    score, acc = model.evaluate(X_test, y_test)

    print('test cost: ', score, 'test accuracy: ', acc)
    return acc

In [8]:
game_name = 'soccer'
df = pd.DataFrame()
dic = {}
for series_name in series_names:
    series_df = pd.read_csv(training_data_path+game_name+'_'+series_name+'.csv')
    sec1 = series_features(series_df['sec1'],maxlen=maxlen)
    sec2 = series_features(series_df['sec2'],maxlen=maxlen)
    sec3 = series_features(series_df['sec3'],maxlen=maxlen)

    dic['sec1_home_win'] = LSTM_classifer(sec1,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec2_home_win'] = LSTM_classifer(sec2,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec3_home_win'] = LSTM_classifer(sec3,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec1_direction'] = LSTM_classifer(sec1,series_df['direction_2'].apply(int).values,val_rate=0.3)
    dic['sec2_direction'] = LSTM_classifer(sec2,series_df['direction_3'].apply(int).values,val_rate=0.3)
    pred = pd.Series(dic) 
    pred.name = series_name
    df = df.append(pred)
df.to_csv(result_path+'soccer_rnn.csv')



Train on 2693 samples, validate on 1153 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.6729930563404161 test accuracy:  0.6027753686036427
Train on 2693 samples, validate on 1153 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.6736141476079059 test accuracy:  0.6209887250650477
Train on 2693 samples, validate on 1153 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.6657459262341701 test accuracy:  0.6287944492627927
Train on 2693 samples, validate on 1153 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  1.0988322565685844 test accuracy:  0.40763226366001737
Train on 2693 samples, validate on 1153 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  1.0784699340954307 test accuracy:  0.424978317432784
Train on 2693 samples, validate on 1153 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.8100055453318259 test accuracy:  0.5680832610581092
Train on 2693 samples, validate on 1153 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.8861460805663004 test accuracy:  0.552471812

In [116]:
game_name = 'iceball'
df = pd.DataFrame()
dic = {}
for series_name in series_names:
    series_df = pd.read_csv(training_data_path+game_name+'_'+series_name+'.csv')
    sec1 = series_features(series_df['sec1'],maxlen=maxlen)
    sec2 = series_features(series_df['sec2'],maxlen=maxlen)
    sec3 = series_features(series_df['sec3'],maxlen=maxlen)
    
    dic['sec1_home_win'] = LSTM_classifer(sec1,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec2_home_win'] = LSTM_classifer(sec2,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec3_home_win'] = LSTM_classifer(sec3,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec1_direction'] = LSTM_classifer(sec1,series_df['direction_2'].apply(int).values,val_rate=0.3)
    dic['sec2_direction'] = LSTM_classifer(sec2,series_df['direction_3'].apply(int).values,val_rate=0.3)
    pred = pd.Series(dic) 
    pred.name = series_name
    df = df.append(pred)
df.to_csv(result_path+'iceball_rnn.csv')



Train on 4513 samples, validate on 1934 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.7183501133011441 test accuracy:  0.520165460124504
Train on 4513 samples, validate on 1934 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.7177997690691943 test accuracy:  0.5351602895553258
Train on 4513 samples, validate on 1934 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.7023355323501556 test accuracy:  0.5501551189553282
Train on 4513 samples, validate on 1934 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  1.0898112519697172 test accuracy:  0.42244053774560497
Train on 4513 samples, validate on 1934 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.9559883385311115 test accuracy:  0.4519131333406364
Train on 4513 samples, validate on 1934 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.7267230182054001 test accuracy:  0.5646328852119958
Train on 4513 samples, validate on 1934 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.759063213601354 test accuracy:  0.5289555326

In [9]:
game_name = 'baseball'
df = pd.DataFrame()
dic = {}
for series_name in series_names:
    series_df = pd.read_csv(training_data_path+game_name+'_'+series_name+'.csv')
    sec1 = series_features(series_df['sec1'],maxlen=maxlen)
    sec2 = series_features(series_df['sec2'],maxlen=maxlen)
    sec3 = series_features(series_df['sec3'],maxlen=maxlen)
    
    dic['sec1_home_win'] = LSTM_classifer(sec1,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec2_home_win'] = LSTM_classifer(sec2,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec3_home_win'] = LSTM_classifer(sec3,series_df['home'].apply(int).values,val_rate=0.3)
    dic['sec1_direction'] = LSTM_classifer(sec1,series_df['direction_2'].apply(int).values,val_rate=0.3)
    dic['sec2_direction'] = LSTM_classifer(sec2,series_df['direction_3'].apply(int).values,val_rate=0.3)
    pred = pd.Series(dic) 
    pred.name = series_name
    df = df.append(pred)
df.to_csv(result_path+'baseball_rnn.csv')



Train on 7844 samples, validate on 3361 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.6949673214685133 test accuracy:  0.5400178518298125
Train on 7844 samples, validate on 3361 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.6895238728758765 test accuracy:  0.5367450163641774
Train on 7844 samples, validate on 3361 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.6958428791008404 test accuracy:  0.5522166022017256
Train on 7844 samples, validate on 3361 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.970812259703582 test accuracy:  0.46712288009520975
Train on 7844 samples, validate on 3361 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.8788316547072597 test accuracy:  0.49300803332341564
Train on 7844 samples, validate on 3361 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.7044874943639863 test accuracy:  0.5319845284141624
Train on 7844 samples, validate on 3361 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
test cost:  0.6990276298967013 test accuracy:  0.52543885