In [None]:
import numpy as np
import pandas as pd

from tslearn.neural_network import TimeSeriesMLPRegressor, TimeSeriesMLPClassifier
from tslearn.svm import TimeSeriesSVR, TimeSeriesSVC

from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

from sktime.classification.interval_based import TimeSeriesForest
from sktime.classification.shapelet_based import MrSEQLClassifier
from sktime.transformations.panel.compose import ColumnConcatenator
from sktime.regression.compose import TimeSeriesForestRegressor
from sktime.utils.data_processing import from_3d_numpy_to_nested

train_features = "train_features.csv"
train_labels = "train_labels.csv"
test_features = "test_features.csv"

#data set
trainx = np.loadtxt(train_features, delimiter=',', skiprows=1) [:,2:] 
trainy = np.loadtxt(train_labels, delimiter=',', skiprows=1) [:,1:]

testx  = np.loadtxt(test_features, delimiter=',', skiprows=1) [:,2:] 
testy_1 = pd.read_csv("sample.csv")
testy_2 = pd.read_csv("sample.csv")
#deal with headers better, change to pd read next time


In [None]:
#simple impute

def impute(x ,steps=12):
    imputer=SimpleImputer(missing_values=np.nan,strategy='mean')
    x_trans=x
    
    for i in range(x_trans.shape[0]//steps):
        
        #fill 0 for columns that are all nan
        for j in range(x_trans.shape[1]):
            if np.all(np.isnan(x_trans[i*12:i*12+12,j])):
                x_trans[i*12:i*12+12,j].fill(0)

        imputer=imputer.fit(x_trans[i*12:i*12+12,0:35])
        x_trans[i*12:i*12+12,0:35]=imputer.transform(x_trans[i*12:i*12+12,0:35])
        #this transform will remove columns with all nan
        
    return np.array(x_trans)

trainx = impute(trainx)
testx  = impute(testx)
print(trainx.shape)

In [None]:
#data rearrange

#18995 cases, 15 features(dimensions) and 12 time points
#tslearn input format: 3d array (n_samples, n_timesteps, n_dimensions)
#sktime input format: nested pd.dataframe, rows->instances cols->dimensions, cells->time series

trainx = trainx.reshape((int(trainx.shape[0]/12), 12, trainx.shape[1]))
testx  = testx.reshape((int(testx.shape[0]/12), 12, testx.shape[1]))
trainx_sk = from_3d_numpy_to_nested(trainx.transpose((0, 2, 1)))
testx_sk = from_3d_numpy_to_nested(testx.transpose((0, 2, 1)))
#double check with input format....
print(trainx.shape)
print(trainx_sk.shape)

In [None]:
#classifier
steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForest(n_estimators=200)),
]

for i in range(11):
    y=trainy[:,i]
    #clf = TimeSeriesSVC(C=1.0, kernel="gak",probability=True,class_weight="balanced")
    mlp = TimeSeriesMLPClassifier(hidden_layer_sizes=(64, 64),random_state=0)
    mlp.fit(trainx, y)
    
    clf = Pipeline(steps)
    clf.fit(trainx_sk, y)

    pred_1 = mlp.predict_proba(testx)[:, 1]    
    pred_2 = clf.predict_proba(testx_sk)[:, 1]   
    #shape(N, k) k classes, for 0 or 1 here
    testy_1[testy_1.columns[i+1]] = pred_1
    testy_2[testy_2.columns[i+1]] = pred_2
    print("iteration {} done".format(i+1))


In [25]:
#regressor


for i in range(11, 15):
    y=trainy[:,i]
    mlp = TimeSeriesMLPRegressor(hidden_layer_sizes=(64, 64),learning_rate_init=0.001, max_iter=2000)
    mlp.fit(trainx, y)
    #clf = TimeSeriesSVR(C=1.0, kernel="gak")

    pred_1 = mlp.predict(testx)
    #pred_2 = clf.fit(trainx, y).predict(testx)
    testy_1[testy_1.columns[i+1]] = pred_1
    testy_2[testy_2.columns[i+1]] = pred_1
    print("iteration {} done".format(i+1))


iteration 12 done
iteration 13 done
iteration 14 done
iteration 15 done


In [26]:
#write data
testy_1.to_csv('prediction_nn.csv', index=False, float_format='%.3f')
testy_2.to_csv('prediction_sk_tsf.csv', index=False, float_format='%.3f')