In [1]:
from keras.models import Sequential
from keras.layers import Convolution1D, MaxPooling1D,AveragePooling1D, Dense, Conv1D
from keras.optimizers import RMSprop, Adam, SGD, Nadam
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dense, Dropout, Activation, Flatten
import util.DataUtil as data_util
from dataLayer.DataLayer import DataLayer
import util.Constants as Constants
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split


class oneD_CNN():

    def __init__(self):
        self.model = self.create_model()
        self.data_layer = DataLayer()
        self.data = self.data_layer.get_data_with_indicators()
#         self.class_target = self.data_layer.get_classification_target()
        self.regression_target = self.data_layer.get_regression_target()
        self.prepared_data = ""

    def prepare_data(self):
        data = self.data
        target = self.regression_target
        print (data.shape , target.shape)
        #x_train, y_train, x_test, y_test = data_util.split_train_testy_test(data,target,split_train_ratio =  Constants.SPLIT_TRAIN_RATIO)
        x_train, y_train, x_test, y_test = train_test_split(data, target, test_size = 0.2)
        print ("number of training samples", str(y_train.shape))
        print ("validation samples", str(y_test.shape))
        x_train = np.asarray(x_train)
        x_test = np.asarray(x_test)
        x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
        x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
        return x_train, y_train, x_test, y_test

    def create_model(self):
        model = Sequential()
        model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(35, 1)))
        model.add(BatchNormalization())
        model.add(AveragePooling1D(pool_size=2))
        model.add(Dropout(0.2))

        model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
        model.add(BatchNormalization())
        model.add(AveragePooling1D(pool_size=2))
        model.add(Dropout(0.2))

        model.add(Flatten())
        model.add(Dense(1, activation='linear'))
        return model

    def compile_model(self,model):
        model.compile(loss="mae", optimizer="adam", metrics=["accuracy"])

    def train_model(self, x_train, y_train, x_test, y_test,model):
        print('x_train', x_train.shape, 'y_train',y_train.shape,  'x_test', x_test.shape, 'y_test', y_test.shape)
        print('train model', x_train.shape, y_train.shape)
        batch_size = 32
        epochs = 100

        model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
                       validation_data=(x_test, y_test))
 

    def accuracy_metrics(self,model,y_test,x_test):
        
        pred_df = pd.DataFrame(columns=['actual', 'predicted'])
        
        pred_df['actual'] = y_test
        pred_df['predicted'] = model.predict(x_test)
        pred_df['true_positive'] = np.zeros(len(pred_df))
        pred_df['false_positive'] = np.zeros(len(pred_df))
        pred_df['true_negative'] = np.zeros(len(pred_df))
        pred_df['false_negative'] = np.zeros(len(pred_df))
        for i in range(len(pred_df)):
            if (pred_df.iloc[i, 0] < 0) and (pred_df.iloc[i, 1] < 0):
                pred_df.iloc[i, 4] = 1
            elif (pred_df.iloc[i, 0] > 0) and (pred_df.iloc[i, 1] > 0):
                pred_df.iloc[i, 2] = 1
            elif (pred_df.iloc[i, 0] < 0) and (pred_df.iloc[i, 1] > 0):
                pred_df.iloc[i, 3] = 1
            elif (pred_df.iloc[i, 0] > 0) and (pred_df.iloc[i, 1] < 0):
                pred_df.iloc[i, 5] = 1
        print('The number of true positives is {}'.format(sum(pred_df.loc[:, 'true_positive'])))
        print('The number of false positives is {}'.format(sum(pred_df.loc[:, 'false_positive'])))
        print('The number of true negatives is {}'.format(sum(pred_df.loc[:, 'true_negative'])))
        print('The number of false negatives is {}'.format(sum(pred_df.loc[:, 'false_negative'])))

    def run_model(self):
        x_train, y_train, x_test, y_test = self.prepare_data()
        model = self.create_model()
        self.compile_model(model)
        print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
        self.train_model(x_train, y_train, x_test, y_test,model)
        self.accuracy_metrics(model,y_test,x_test)


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
model=oneD_CNN()
model.run_model()

sma called
output size sma (2710, 2)
output size ema (2710, 1)
output size (2710, 1)
output size (2710, 2)
output size (2710, 1)
output size (2710, 3)
output size (2710, 3)
output size (2710, 1)
output size (2710, 1)
output size (2710, 1)
indicators  (2710, 27) type indicator <class 'pandas.core.frame.DataFrame'>
shape =  (2710, 32)
shape 1 =  (2458, 32)
df shape (2457, 35)
data [[ 7.70676966e-01  0.00000000e+00 -5.21064673e-14  8.99248566e+00
   1.13247944e+03  1.10798000e+03  1.08348056e+03  5.04009710e+01
   4.60693313e+01  4.33163963e+00  6.69971439e+04  6.89229811e+01
   4.14856898e+01 -5.09427296e-03  1.32180125e-02  5.50934673e-02
  -3.01947690e-02  3.56654173e-01 -1.76010772e-02  3.69627519e-02
   2.70636160e-03  3.25087681e-03 -1.42432916e-02 -5.69879426e-03
   1.47451866e+00  4.51373537e-04  8.39547913e-03  5.97988299e-03
  -3.12468941e-05  4.51373537e-04  9.56068879e-04 -4.01773044e-04
   3.21394709e-03  3.13780272e-03  2.76989336e-03]
 [ 7.31144892e-01  1.45149649e+01  4.83

ValueError: Found input variables with inconsistent numbers of samples: [2455, 2456]

In [13]:
model.pred

<__main__.oneD_CNN at 0x7f3dab9056a0>