In [1]:
import numpy as np
import tensorflow as tf
import sys
import time
from datetime import timedelta
import tensorflow.contrib.keras as kr
from sklearn import metrics
from sklearn.model_selection import KFold

import moxing as mox
mox.file.shift('os', 'mox')

INFO:root:Using MoXing-v1.14.1-ddfd6c9a
INFO:root:Using OBS-Python-SDK-3.1.2


In [2]:
trainDataPath = "s3://corpus-2/dataset/corpus_5_new.txt"
vocabPath = "s3://corpus-text-classification1/data/glove.6B.100d.txt"

In [3]:
split_info = {
    "random": False,
    "expert": [20, 4],
    "bundle": [920, 1],
    "table": [37, 3]
}


def dataset_split(info):
    if info:
        [num, pi] = info
        train_data = [[] for i in range(num)]
        with open(trainDataPath, "r", encoding='utf-8') as fp:
            for line in fp.readlines():
                word = line.split()
                info = word[0].split(":")
                index = int(info[pi]) - 1
                label = int(info[0])
                content = word[1:]
                train_data[index].append([content,label])

        for i in range(num):
            np.random.shuffle(train_data[i])
            train_data[i] = np.asarray(train_data[i])

        np.random.shuffle(train_data)   
        return train_data
    
    
    train_data = []
    with open(trainDataPath, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            word = line.split()
            label = int(word[0].split(":")[0])
            content = word[1:]
            train_data.append([content,label])
    
    np.random.shuffle(train_data)
    return np.asarray(train_data)


def mergeData(data_x, data_y):
    merge_x = data_x[0]
    merge_y = data_y[0]
    for i in range(1,len(data_x)):
        merge_x = np.r_[merge_x,data_x[i]]
        merge_y = np.r_[merge_y,data_y[i]]
        
    return merge_x, merge_y


def train_split_data(train_data, split_type):
    
    print(split_type)
    
    test_acc = []
    fold_id = 0
    
    if split_type != "random":
        tx = []
        ty = []
        for ti in train_data:
            x_train, y_train = process_file(ti[:,0], ti[:,1], word_to_id, num_classes, seq_length)
            tx.append(x_train)
            ty.append(y_train)

        tx = np.asarray(tx)
        ty = np.asarray(ty)

        print(len(tx),len(tx[0]),len(tx[1]),len(tx[0][0]))
        
        for train_i, test_i in kf.split(tx):
            fold_id += 1
            print("Fold: ", fold_id)
            train_x, train_y = mergeData(tx[train_i],ty[train_i])
            test_x, test_y = mergeData(tx[test_i],ty[test_i])
            test_acc.append(classifier.train(
                X_train=train_x,
                y_train=train_y,
                X_eval=test_x,
                y_eval=test_y,
                categories=categories,
                epochs=50
            ))
        
    else:
        tx, ty = process_file(train_data[:,0], train_data[:,1], word_to_id, num_classes, seq_length)
        print(len(tx),len(tx[0]),len(tx[1]))

        for train_i, test_i in kf.split(tx):
            fold_id += 1
            print("Fold: ", fold_id)
            test_acc.append(classifier.train(
                X_train=tx[train_i],
                y_train=ty[train_i],
                X_eval=tx[test_i],
                y_eval=ty[test_i],
                categories=categories,
                epochs=50
            ))
        
    print(test_acc)
    print("%s, %s, %s, %s" % (np.mean(test_acc),np.std(test_acc),np.std(test_acc,ddof=1),np.var(test_acc)))
    return test_acc

In [4]:
def loadGloVe(filename):
    vocab = []
    embd = []
    print('Loading GloVe!')
    # vocab.append('unk') #装载不认识的词
    # embd.append([0] * emb_size) #这个emb_size可能需要指定
    file = open(filename,'r',encoding='utf-8')
    for line in file.readlines():
        row = line.strip().split(' ')
        vocab.append(row[0])
        embd.append([float(ei) for ei in row[1:]])
    file.close()
    print('Completed!')
    return vocab,embd


def process_file(contents, labels, word_to_id, num_classes, pad_max_length):
    """
    将文件转换为id表示,并且将每个单独的样本长度固定为pad_max_lengtn
    """
    # contents, labels = readfile(filePath)
    data_id, label_id = [], []
    # 将文本内容转换为对应的id形式
    for i in range(len(contents)):
        data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
        label_id.append(labels[i] - 1)  # label_id.append(cat_to_id[labels[i]])
    # 使用keras提供的pad_sequences来将文本pad为固定长度
    x_pad = kr.preprocessing.sequence.pad_sequences(data_id, pad_max_length)
    ''' https://blog.csdn.net/TH_NUM/article/details/80904900
    pad_sequences(sequences, maxlen=None, dtype=’int32’, padding=’pre’, truncating=’pre’, value=0.) 
        sequences：浮点数或整数构成的两层嵌套列表
        maxlen：None或整数，为序列的最大长度。大于此长度的序列将被截短，小于此长度的序列将在后部填0.
        dtype：返回的numpy array的数据类型
        padding：‘pre’或‘post’，确定当需要补0时，在序列的起始还是结尾补
        truncating：‘pre’或‘post’，确定当需要截断序列时，从起始还是结尾截断
        value：浮点数，此值将在填充时代替默认的填充值0
    '''
    y_pad = kr.utils.to_categorical(label_id, num_classes=num_classes)  # 将标签转换为one-hot表示
    ''' https://blog.csdn.net/nima1994/article/details/82468965
    to_categorical(y, num_classes=None, dtype='float32')
        将整型标签转为onehot。y为int数组，num_classes为标签类别总数，大于max(y)（标签从0开始的）。
        返回：如果num_classes=None，返回len(y) * [max(y)+1]（维度，m*n表示m行n列矩阵，下同），否则为len(y) * num_classes。
    '''
    return x_pad, y_pad

In [5]:
categories = ['Retrieve Value', 'Filter', 'Compute Derived Value', 'Find Extremum', 'Sort', 
                  'Determine Range', 'Characterize Distribution', 'Find Anomalies', 'Cluster', 'Correlate']
num_classes = len(categories)

vocab, embd = loadGloVe(vocabPath)
vocab_size = len(vocab)
embedding_dim = len(embd[0])
embedding = np.asarray(embd)
word_to_id = dict(zip(vocab, range(vocab_size)))

print(len(embedding),embedding_dim,vocab_size)
 
seq_length = 41  # seq_length = 37  TREC

Loading GloVe!
Completed!
400000 100 400000


In [6]:
class Classifier:

    def __init__(self, model, input_length, output_length):
        self.model = model
        self.input_length = input_length
        self.output_length = output_length

    def compile(self, batch_size=32):
        self._ds_x = tf.placeholder(tf.float32, [None, self.input_length])
        self._ds_y = tf.placeholder(tf.float32, [None, self.output_length])

        ds = tf.data.Dataset.from_tensor_slices((self._ds_x, self._ds_y))
        ds = ds.batch(batch_size)

        self._ds_it = ds.make_initializable_iterator()
        self._input, self._labels = self._ds_it.get_next()

        self._features = self.model(self._input)
        self._output = _create_dense_layer(self._features, self.output_length)

        self._create_acc_computations()
        self._create_backpropagation()

    def _create_acc_computations(self):
        self._predictions = tf.argmax(self._output, 1)
        labels = tf.argmax(self._labels, 1)
        self._accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self._predictions, labels), 'float32'))

    def _create_backpropagation(self):
        losses = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=self._output,
            labels=self._labels)
        self._loss = tf.reduce_mean(losses)

        optimizer = tf.train.AdamOptimizer(0.001)
        global_step = tf.Variable(0, name="global_step", trainable=False)
        grads_and_vars = optimizer.compute_gradients(self._loss)

        self._train_op = optimizer.apply_gradients(
            grads_and_vars, global_step=global_step)

    def summary(self):
        print('input:', self._input.shape)
        self.model.summary()
        print('output:', self._output.shape)

    def train(self, X_train, y_train, X_eval, y_eval, categories, epochs=20, require_improve=3):
        
        session = tf.Session()
        session.run(tf.global_variables_initializer())
        session.run(tf.local_variables_initializer())
        
        best_vac_acc = 0.0
        last_improved = 0
        
        for e in range(epochs):
            start_time = time.time()
            loss, acc = self._train(X_train, y_train, session)
            duration = time.time() - start_time

            val_loss, val_acc = self._eval(X_eval, y_eval, session)
            
            if val_acc > best_vac_acc:
                best_vac_acc = val_acc
                last_improved = e
                improved_str = '*'
            else:
                improved_str = ''
            
            output = 'Epoch: {:>1}, Train Loss: {:>6.4}, Train Acc: {:>6.2%}, Val Loss: {:>6.4}, Val Acc: {:>6.2%}, Time: {:.2f}s {}'
            print(output.format(e + 1, loss, acc, val_loss, val_acc, duration, improved_str))
            
            if e - last_improved > require_improve:
                print("No optimization for a long time, auto-stopping...")
                
                y_test_cls = np.argmax(y_eval, 1)  # 获得类别
                y_test_pred_cls = np.argmax(self.predict(X_eval, session), 1)
                accuracy_score = metrics.accuracy_score(y_test_cls, y_test_pred_cls)
                
                # evaluate
                print("Precision, Recall and F1-Score...")
                print(metrics.classification_report(y_test_cls, y_test_pred_cls, target_names=categories))
                '''
                sklearn中的classification_report函数用于显示主要分类指标的文本报告．在报告中显示每个类的精确度，召回率，F1值等信息。
                    y_true：1维数组，或标签指示器数组/稀疏矩阵，目标值。 
                    y_pred：1维数组，或标签指示器数组/稀疏矩阵，分类器返回的估计值。 
                    labels：array，shape = [n_labels]，报表中包含的标签索引的可选列表。 
                    target_names：字符串列表，与标签匹配的可选显示名称（相同顺序）。 
                    原文链接：https://blog.csdn.net/akadiao/article/details/78788864
                '''

                print("Confusion Matrix...")
                print(metrics.confusion_matrix(y_test_cls, y_test_pred_cls))
                '''
                混淆矩阵是机器学习中总结分类模型预测结果的情形分析表，以矩阵形式将数据集中的记录按照真实的类别与分类模型作出的分类判断两个标准进行汇总。
                这个名字来源于它可以非常容易的表明多个类别是否有混淆（也就是一个class被预测成另一个class）
                https://blog.csdn.net/u011734144/article/details/80277225
                '''
                break
        # endfor
        session.close()
        return accuracy_score

    def _train(self, X_train, y_train, session):
        import numpy as np

        session.run(
            fetches=self._ds_it.initializer,
            feed_dict={
                self._ds_x: X_train,
                self._ds_y: y_train
            })
        loss, acc, = [], []
        while True:
            try:
                _, vloss, vacc = session.run(
                    fetches=[self._train_op, self._loss, self._accuracy])

                loss.append(vloss)
                acc.append(vacc)
            except tf.errors.OutOfRangeError:
                break
        # endwhile

        loss, acc = np.mean(loss), np.mean(acc)
        return loss, acc

    def _eval(self, X_val, y_val, session):
        session.run(
            fetches=self._ds_it.initializer,
            feed_dict={
                self._ds_x: X_val,
                self._ds_y: y_val
            })

        loss, acc, = 0, 0
        while True:
            try:
                l, vloss, vacc = session.run(
                    fetches=[self._labels, self._loss, self._accuracy])

                loss += vloss * len(l)
                acc += vacc * len(l)
            except tf.errors.OutOfRangeError:
                break

        return loss / len(X_val), acc / len(X_val)

    def predict(self, X, session):
        session.run(self._ds_it.initializer,
                         feed_dict={
                             self._ds_x: X,
                             self._ds_y: np.empty((len(X), self.output_length))
                         }
                         )

        pred = list()
        while True:
            try:
                ppred = session.run(tf.nn.softmax(self._output))

                pred.extend(map(lambda l: l.tolist(), ppred))
            except tf.errors.OutOfRangeError:
                break

        return pred

def _create_dense_layer(x, output_length):
    '''Creates a dense layer
    '''
    input_size = x.shape[1].value
    W = tf.Variable(
        initial_value=tf.truncated_normal(
            shape=[input_size, output_length],
            stddev=0.1))
    b = tf.Variable(
        initial_value=tf.truncated_normal(
            shape=[output_length]))

    dense = tf.nn.xw_plus_b(x, W, b)

    return dense

In [7]:
class KimConvolutionalModel:
    '''
    Implementation proposal of: https://arxiv.org/pdf/1408.5882.pdf
    '''
    def __init__(self,
        embeddings_configuration,
        conv_configurations = [(3, 100), (4, 100), (5, 100)],
        drop_rate           = 0.5):
        '''Constructor.
        # Parameters:
        embeddings: List of embeddings configuration. Each configuration is a
            pair of the form (embedding, trainable). `embedding` is a numpy
            array and `trainable` is a boolean that indicates whether that
            embedding is trainable or not.
        conv_configurations: List of pairs. Each pair represents a
            convolution configuration. Each configuration determines the
            size and number of each filter.
        '''

        self._embeddings_configuration = embeddings_configuration
        self._conv_configurations = conv_configurations
        self._drop_rate = drop_rate

    def __call__(self, input):
        self._embeddings_tf = tf.stack(
            values = [
                self._create_embedding_layer(e, input)
                for e in self._embeddings_configuration],
            axis = 1
        )

        self._convolutions_tf = self._create_convolutional_layers(
            self._conv_configurations, self._embeddings_tf)
        
        self._add_tf = self._create_add_layers(self._convolutions_tf)

        self._poolings_tf = self._create_maxpooling_layer(
            self._add_tf)

        self._reshape_tf = self._create_reshape_layer(self._poolings_tf)
        self._dropout_tf = tf.nn.dropout(
            self._reshape_tf,
            keep_prob = self._drop_rate)

        return self._dropout_tf

    def summary(self):
        print('embedding:', str(self._embeddings_tf.shape))
        for c in self._convolutions_tf:
            print('conv:', str(c.shape))
        for a in self._add_tf:
            print('add:', str(a.shape))
        for p in self._poolings_tf:
            print('pool:', str(p.shape))
        print('reshape:', str(self._reshape_tf.shape))

    def _create_embedding_layer(self, embedding_configuration, input_x):
        return tf.nn.embedding_lookup(
            params = tf.Variable(
                initial_value = embedding_configuration[0],
                trainable     = embedding_configuration[1]),
            ids = tf.cast(input_x, 'int32')
        )

    def _create_convolutional_layers(self, configuration, input_embedding):
        '''Creates the convolutional layers.
        # Parameters:
        configuration: A list. It must be of the form
            [(filter_size, num_filters), ...]
        # Returns:
        A list of tensorflow nodes. Each node 'i' computes the configuration 'i'.
        '''
        convolutions = []
        for filter_height, num_filters in configuration:
            filter_width = input_embedding.shape[3].value
            filter_shape = [1, filter_height, filter_width, num_filters]

            # Create weights and bias
            W = tf.Variable(
                initial_value=tf.truncated_normal(
                    shape=filter_shape,
                    stddev=0.1))
            b = tf.Variable(
                initial_value=tf.truncated_normal(
                    shape=[num_filters]))

            conv = tf.nn.conv2d(
                input=input_embedding,
                filter=W,
                strides=[1, 1, 1, 1],
                padding="VALID")
            bias = tf.nn.bias_add(conv, b)
            h = tf.nn.relu(bias)
            convolutions.append(h)

        return convolutions

    def _create_add_layers(self, convolutions):
        return [
            tf.reduce_sum(
                input_tensor = c,
                axis=1,
                keepdims=True)
            for c in convolutions
        ]

    def _create_maxpooling_layer(self, tensors):
        '''Creates the maxpooling layer. Computes maxpooling on each node
        # Parameters:
        input_convolutions: List of tensorflow nodes.
        # Returns:
        A list of tensorflow nodes. Each node 'i' computes the maxpooling of node 'i'
        '''
        return [
            tf.reshape(
                tensor = tf.nn.max_pool(
                    value=t,
                    ksize=[1, 1, t.shape[2], 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID'),
                shape = [-1, t.shape[3]]
            )
            for t in tensors
        ]

    def _create_reshape_layer(self, tensors):
        '''Creates a flatten layer
        '''
        return tf.concat(tensors, axis=1)

In [8]:
word_vector = embedding.astype('float32')
model = KimConvolutionalModel(
        embeddings_configuration=[
            (word_vector, True)
        ]
    )

classifier = Classifier(
    model=model,
    input_length=seq_length,
    output_length=num_classes)

classifier.compile(batch_size=32)
classifier.summary()

input: (?, 41)
embedding: (?, 1, 41, 100)
conv: (?, 1, 39, 100)
conv: (?, 1, 38, 100)
conv: (?, 1, 37, 100)
add: (?, 1, 39, 100)
add: (?, 1, 38, 100)
add: (?, 1, 37, 100)
pool: (?, 100)
pool: (?, 100)
pool: (?, 100)
reshape: (?, 300)
output: (?, 10)


In [9]:
split_info = {
    # "random": False,
    "expert": [20, 4],
    "bundle": [920, 1],
    "table": [37, 3]
}

kf = KFold(n_splits=10)
test_acc_split = []
for split_type,info in split_info.items():
    train_data = dataset_split(info)
    test_acc_split.append(train_split_data(train_data, split_type))

expert
20 935 475 41
Fold:  1
Epoch: 1, Train Loss:  1.779, Train Acc: 45.09%, Val Loss:  1.428, Val Acc: 54.26%, Time: 8.13s *
Epoch: 2, Train Loss: 0.7172, Train Acc: 78.52%, Val Loss:  1.232, Val Acc: 62.13%, Time: 3.48s *
Epoch: 3, Train Loss:  0.444, Train Acc: 86.71%, Val Loss:  1.154, Val Acc: 66.45%, Time: 3.48s *
Epoch: 4, Train Loss:   0.29, Train Acc: 91.63%, Val Loss:   1.15, Val Acc: 67.16%, Time: 3.46s *
Epoch: 5, Train Loss: 0.2078, Train Acc: 94.00%, Val Loss:   1.14, Val Acc: 68.94%, Time: 3.46s *
Epoch: 6, Train Loss: 0.1498, Train Acc: 95.63%, Val Loss:  1.208, Val Acc: 68.01%, Time: 3.46s 
Epoch: 7, Train Loss:  0.108, Train Acc: 96.96%, Val Loss:  1.216, Val Acc: 69.72%, Time: 3.46s *
Epoch: 8, Train Loss: 0.09492, Train Acc: 97.21%, Val Loss:  1.183, Val Acc: 70.07%, Time: 3.45s *
Epoch: 9, Train Loss: 0.07364, Train Acc: 97.95%, Val Loss:  1.283, Val Acc: 69.29%, Time: 3.48s 
Epoch: 10, Train Loss: 0.06433, Train Acc: 98.09%, Val Loss:  1.413, Val Acc: 67.94%, Ti

Epoch: 1, Train Loss:  1.723, Train Acc: 46.39%, Val Loss:  1.368, Val Acc: 58.27%, Time: 3.78s *
Epoch: 2, Train Loss: 0.7225, Train Acc: 77.62%, Val Loss:  1.343, Val Acc: 61.66%, Time: 3.52s *
Epoch: 3, Train Loss: 0.4337, Train Acc: 87.29%, Val Loss:  1.485, Val Acc: 62.87%, Time: 3.51s *
Epoch: 4, Train Loss: 0.2838, Train Acc: 91.55%, Val Loss:   1.39, Val Acc: 65.05%, Time: 3.51s *
Epoch: 5, Train Loss: 0.2015, Train Acc: 94.08%, Val Loss:  1.367, Val Acc: 65.94%, Time: 3.52s *
Epoch: 6, Train Loss: 0.1553, Train Acc: 95.48%, Val Loss:  1.374, Val Acc: 66.18%, Time: 3.51s *
Epoch: 7, Train Loss: 0.1095, Train Acc: 96.84%, Val Loss:  1.428, Val Acc: 67.39%, Time: 3.52s *
Epoch: 8, Train Loss: 0.09259, Train Acc: 97.30%, Val Loss:  1.433, Val Acc: 69.73%, Time: 3.52s *
Epoch: 9, Train Loss: 0.0749, Train Acc: 97.80%, Val Loss:   1.57, Val Acc: 68.28%, Time: 3.53s 
Epoch: 10, Train Loss: 0.06158, Train Acc: 98.34%, Val Loss:  1.754, Val Acc: 65.62%, Time: 3.52s 
Epoch: 11, Train Lo

Epoch: 6, Train Loss: 0.1431, Train Acc: 95.97%, Val Loss:  1.822, Val Acc: 61.64%, Time: 3.52s *
Epoch: 7, Train Loss: 0.1154, Train Acc: 96.77%, Val Loss:  1.678, Val Acc: 63.52%, Time: 3.51s *
Epoch: 8, Train Loss: 0.08839, Train Acc: 97.56%, Val Loss:  1.784, Val Acc: 63.93%, Time: 3.53s *
Epoch: 9, Train Loss: 0.06989, Train Acc: 97.93%, Val Loss:  1.941, Val Acc: 61.48%, Time: 3.55s 
Epoch: 10, Train Loss: 0.05882, Train Acc: 98.34%, Val Loss:  2.092, Val Acc: 62.38%, Time: 3.54s 
Epoch: 11, Train Loss: 0.05453, Train Acc: 98.38%, Val Loss:  2.093, Val Acc: 62.13%, Time: 3.52s 
Epoch: 12, Train Loss: 0.04706, Train Acc: 98.61%, Val Loss:  2.208, Val Acc: 62.79%, Time: 3.52s 
No optimization for a long time, auto-stopping...
Precision, Recall and F1-Score...
                           precision    recall  f1-score   support

           Retrieve Value       0.39      0.52      0.45       115
                   Filter       0.41      0.53      0.46       110
    Compute Derived Valu

Epoch: 1, Train Loss:  1.755, Train Acc: 46.04%, Val Loss:  1.339, Val Acc: 58.78%, Time: 3.70s *
Epoch: 2, Train Loss: 0.7373, Train Acc: 77.12%, Val Loss:  1.177, Val Acc: 65.30%, Time: 3.50s *
Epoch: 3, Train Loss: 0.4505, Train Acc: 86.54%, Val Loss:    1.1, Val Acc: 67.71%, Time: 3.59s *
Epoch: 4, Train Loss: 0.2929, Train Acc: 91.35%, Val Loss:  1.102, Val Acc: 69.16%, Time: 3.51s *
Epoch: 5, Train Loss: 0.2164, Train Acc: 93.89%, Val Loss:   1.09, Val Acc: 70.21%, Time: 3.53s *
Epoch: 6, Train Loss: 0.1533, Train Acc: 95.85%, Val Loss:  1.156, Val Acc: 70.37%, Time: 3.51s *
Epoch: 7, Train Loss: 0.1252, Train Acc: 96.38%, Val Loss:  1.302, Val Acc: 68.68%, Time: 3.51s 
Epoch: 8, Train Loss: 0.09313, Train Acc: 97.40%, Val Loss:  1.318, Val Acc: 69.00%, Time: 3.54s 
Epoch: 9, Train Loss: 0.07708, Train Acc: 97.66%, Val Loss:  1.234, Val Acc: 71.42%, Time: 3.52s *
Epoch: 10, Train Loss: 0.05927, Train Acc: 98.30%, Val Loss:  1.286, Val Acc: 70.77%, Time: 3.51s 
Epoch: 11, Train Lo

Epoch: 1, Train Loss:  2.486, Train Acc: 30.51%, Val Loss:  1.632, Val Acc: 49.42%, Time: 3.65s *
Epoch: 2, Train Loss:  1.146, Train Acc: 63.44%, Val Loss:  1.392, Val Acc: 57.84%, Time: 3.47s *
Epoch: 3, Train Loss: 0.7613, Train Acc: 76.65%, Val Loss:  1.345, Val Acc: 62.73%, Time: 3.46s *
Epoch: 4, Train Loss: 0.5483, Train Acc: 83.78%, Val Loss:  1.313, Val Acc: 63.96%, Time: 3.47s *
Epoch: 5, Train Loss: 0.3928, Train Acc: 88.66%, Val Loss:  1.273, Val Acc: 66.19%, Time: 3.46s *
Epoch: 6, Train Loss: 0.2988, Train Acc: 90.99%, Val Loss:  1.293, Val Acc: 66.98%, Time: 3.48s *
Epoch: 7, Train Loss:  0.216, Train Acc: 93.81%, Val Loss:  1.414, Val Acc: 66.33%, Time: 3.48s 
Epoch: 8, Train Loss: 0.1604, Train Acc: 95.57%, Val Loss:  1.351, Val Acc: 68.13%, Time: 3.56s *
Epoch: 9, Train Loss:   0.13, Train Acc: 96.52%, Val Loss:  1.464, Val Acc: 67.91%, Time: 3.50s 
Epoch: 10, Train Loss: 0.1031, Train Acc: 97.21%, Val Loss:  1.577, Val Acc: 68.20%, Time: 3.48s *
Epoch: 11, Train Loss

Epoch: 2, Train Loss:  1.134, Train Acc: 63.45%, Val Loss:  1.615, Val Acc: 52.51%, Time: 3.45s *
Epoch: 3, Train Loss: 0.7512, Train Acc: 76.74%, Val Loss:  1.558, Val Acc: 58.04%, Time: 3.45s *
Epoch: 4, Train Loss:  0.535, Train Acc: 84.10%, Val Loss:  1.518, Val Acc: 60.07%, Time: 3.47s *
Epoch: 5, Train Loss: 0.3858, Train Acc: 88.63%, Val Loss:  1.638, Val Acc: 59.05%, Time: 3.44s 
Epoch: 6, Train Loss: 0.2788, Train Acc: 92.34%, Val Loss:   1.64, Val Acc: 60.95%, Time: 3.47s *
Epoch: 7, Train Loss: 0.2096, Train Acc: 94.46%, Val Loss:  1.719, Val Acc: 61.45%, Time: 3.46s *
Epoch: 8, Train Loss: 0.1614, Train Acc: 95.58%, Val Loss:  1.816, Val Acc: 60.65%, Time: 3.46s 
Epoch: 9, Train Loss: 0.1311, Train Acc: 96.28%, Val Loss:  1.842, Val Acc: 60.80%, Time: 3.47s 
Epoch: 10, Train Loss: 0.1037, Train Acc: 97.26%, Val Loss:  1.896, Val Acc: 62.04%, Time: 3.45s *
Epoch: 11, Train Loss: 0.08411, Train Acc: 97.65%, Val Loss:  2.025, Val Acc: 61.75%, Time: 3.47s 
Epoch: 12, Train Loss

Epoch: 1, Train Loss:  2.533, Train Acc: 28.40%, Val Loss:  1.623, Val Acc: 45.00%, Time: 3.64s *
Epoch: 2, Train Loss:  1.173, Train Acc: 62.88%, Val Loss:  1.327, Val Acc: 58.86%, Time: 3.47s *
Epoch: 3, Train Loss:  0.789, Train Acc: 75.69%, Val Loss:  1.253, Val Acc: 62.36%, Time: 3.47s *
Epoch: 4, Train Loss: 0.5733, Train Acc: 83.03%, Val Loss:    1.2, Val Acc: 63.79%, Time: 3.47s *
Epoch: 5, Train Loss: 0.4173, Train Acc: 87.93%, Val Loss:  1.248, Val Acc: 65.14%, Time: 3.55s *
Epoch: 6, Train Loss: 0.3047, Train Acc: 91.09%, Val Loss:  1.311, Val Acc: 66.21%, Time: 3.49s *
Epoch: 7, Train Loss: 0.2368, Train Acc: 93.32%, Val Loss:   1.33, Val Acc: 65.71%, Time: 3.47s 
Epoch: 8, Train Loss: 0.1692, Train Acc: 95.36%, Val Loss:  1.355, Val Acc: 66.07%, Time: 3.45s 
Epoch: 9, Train Loss: 0.1367, Train Acc: 96.31%, Val Loss:  1.429, Val Acc: 66.93%, Time: 3.48s *
Epoch: 10, Train Loss: 0.1088, Train Acc: 96.95%, Val Loss:  1.533, Val Acc: 66.93%, Time: 3.45s 
Epoch: 11, Train Loss:

Epoch: 1, Train Loss:  1.879, Train Acc: 43.26%, Val Loss:   1.76, Val Acc: 45.56%, Time: 3.35s *
Epoch: 2, Train Loss: 0.7737, Train Acc: 76.36%, Val Loss:  1.536, Val Acc: 54.93%, Time: 3.20s *
Epoch: 3, Train Loss: 0.4666, Train Acc: 85.83%, Val Loss:  1.549, Val Acc: 56.01%, Time: 3.18s *
Epoch: 4, Train Loss: 0.2993, Train Acc: 91.29%, Val Loss:   1.63, Val Acc: 56.63%, Time: 3.21s *
Epoch: 5, Train Loss: 0.2111, Train Acc: 94.19%, Val Loss:  1.657, Val Acc: 58.50%, Time: 3.22s *
Epoch: 6, Train Loss: 0.1523, Train Acc: 95.63%, Val Loss:  1.697, Val Acc: 59.41%, Time: 3.30s *
Epoch: 7, Train Loss: 0.1128, Train Acc: 96.75%, Val Loss:  1.866, Val Acc: 57.50%, Time: 3.21s 
Epoch: 8, Train Loss: 0.09509, Train Acc: 97.36%, Val Loss:  2.015, Val Acc: 56.97%, Time: 3.17s 
Epoch: 9, Train Loss: 0.07375, Train Acc: 98.01%, Val Loss:  1.984, Val Acc: 58.29%, Time: 3.19s 
Epoch: 10, Train Loss: 0.0612, Train Acc: 98.34%, Val Loss:  2.045, Val Acc: 59.29%, Time: 3.19s 
No optimization for a

Epoch: 1, Train Loss:  1.831, Train Acc: 44.35%, Val Loss:  1.573, Val Acc: 48.13%, Time: 3.59s *
Epoch: 2, Train Loss: 0.7872, Train Acc: 75.86%, Val Loss:  1.307, Val Acc: 58.13%, Time: 3.43s *
Epoch: 3, Train Loss: 0.4837, Train Acc: 85.79%, Val Loss:  1.249, Val Acc: 63.17%, Time: 3.42s *
Epoch: 4, Train Loss: 0.3291, Train Acc: 90.36%, Val Loss:  1.303, Val Acc: 62.43%, Time: 3.43s 
Epoch: 5, Train Loss: 0.2299, Train Acc: 93.42%, Val Loss:  1.421, Val Acc: 61.20%, Time: 3.41s 
Epoch: 6, Train Loss: 0.1651, Train Acc: 95.28%, Val Loss:  1.467, Val Acc: 62.12%, Time: 3.41s 
Epoch: 7, Train Loss: 0.1223, Train Acc: 96.57%, Val Loss:  1.505, Val Acc: 62.00%, Time: 3.41s 
No optimization for a long time, auto-stopping...
Precision, Recall and F1-Score...
                           precision    recall  f1-score   support

           Retrieve Value       0.72      0.42      0.53       168
                   Filter       0.46      0.56      0.50       201
    Compute Derived Value       

Epoch: 11, Train Loss: 0.05274, Train Acc: 98.50%, Val Loss:  1.703, Val Acc: 61.00%, Time: 3.76s 
No optimization for a long time, auto-stopping...
Precision, Recall and F1-Score...
                           precision    recall  f1-score   support

           Retrieve Value       0.36      0.12      0.18        34
                   Filter       0.39      0.41      0.40        32
    Compute Derived Value       0.64      0.67      0.65        63
            Find Extremum       1.00      0.82      0.90        77
                     Sort       0.67      0.62      0.64        26
          Determine Range       0.30      0.72      0.42        29
Characterize Distribution       0.37      0.36      0.36        45
           Find Anomalies       0.73      0.33      0.46        33
                  Cluster       0.72      0.60      0.65        30
                Correlate       0.66      0.90      0.76        49

                micro avg       0.59      0.59      0.59       418
           

In [None]:
kf = KFold(n_splits=10)
test_acc_split = []
for split_type,info in split_info.items():
    train_data = dataset_split(info)
    test_acc_split.append(train_split_data(train_data, split_type))

random
14035 41 41
Fold:  1
Epoch: 1, Train Loss:  1.721, Train Acc: 48.15%, Val Loss: 0.9305, Val Acc: 71.30%, Time: 54.91s *
Epoch: 2, Train Loss:  0.659, Train Acc: 79.91%, Val Loss: 0.5716, Val Acc: 82.69%, Time: 54.80s *
Epoch: 3, Train Loss: 0.3863, Train Acc: 88.56%, Val Loss: 0.4205, Val Acc: 87.32%, Time: 54.21s *
Epoch: 4, Train Loss: 0.2544, Train Acc: 92.69%, Val Loss: 0.3696, Val Acc: 89.46%, Time: 54.69s *
Epoch: 5, Train Loss: 0.1841, Train Acc: 94.77%, Val Loss:  0.306, Val Acc: 90.60%, Time: 55.56s *
Epoch: 6, Train Loss: 0.1327, Train Acc: 96.26%, Val Loss: 0.3011, Val Acc: 90.81%, Time: 54.52s *
Epoch: 7, Train Loss: 0.1044, Train Acc: 97.15%, Val Loss: 0.2641, Val Acc: 92.17%, Time: 54.46s *
Epoch: 8, Train Loss: 0.08492, Train Acc: 97.56%, Val Loss: 0.2606, Val Acc: 91.95%, Time: 54.39s 
Epoch: 9, Train Loss: 0.06751, Train Acc: 98.17%, Val Loss: 0.2519, Val Acc: 93.23%, Time: 54.23s *
Epoch: 10, Train Loss: 0.05726, Train Acc: 98.41%, Val Loss: 0.2443, Val Acc: 93

Epoch: 1, Train Loss:  1.709, Train Acc: 48.65%, Val Loss: 0.8859, Val Acc: 72.15%, Time: 53.89s *
Epoch: 2, Train Loss: 0.6522, Train Acc: 80.17%, Val Loss: 0.5756, Val Acc: 83.05%, Time: 53.17s *
Epoch: 3, Train Loss: 0.3933, Train Acc: 88.60%, Val Loss: 0.4061, Val Acc: 87.39%, Time: 54.41s *
Epoch: 4, Train Loss: 0.2686, Train Acc: 92.38%, Val Loss: 0.3286, Val Acc: 90.03%, Time: 49.41s *
Epoch: 5, Train Loss: 0.1823, Train Acc: 94.98%, Val Loss: 0.3236, Val Acc: 90.31%, Time: 54.95s *
Epoch: 6, Train Loss: 0.1359, Train Acc: 96.19%, Val Loss: 0.2738, Val Acc: 91.67%, Time: 53.75s *
Epoch: 7, Train Loss: 0.1077, Train Acc: 96.96%, Val Loss: 0.2733, Val Acc: 91.81%, Time: 54.28s *
Epoch: 8, Train Loss: 0.08884, Train Acc: 97.48%, Val Loss: 0.2216, Val Acc: 94.23%, Time: 53.74s *
Epoch: 9, Train Loss: 0.07443, Train Acc: 97.81%, Val Loss: 0.2339, Val Acc: 92.74%, Time: 54.85s 
Epoch: 10, Train Loss: 0.05679, Train Acc: 98.38%, Val Loss: 0.2421, Val Acc: 93.87%, Time: 54.81s 
Epoch: 1

Epoch: 1, Train Loss:  1.627, Train Acc: 50.55%, Val Loss: 0.9099, Val Acc: 70.14%, Time: 53.98s *
Epoch: 2, Train Loss: 0.6371, Train Acc: 80.31%, Val Loss:  0.591, Val Acc: 81.40%, Time: 53.24s *
Epoch: 3, Train Loss: 0.3915, Train Acc: 88.41%, Val Loss: 0.3932, Val Acc: 86.60%, Time: 53.93s *
Epoch: 4, Train Loss: 0.2603, Train Acc: 92.18%, Val Loss: 0.3294, Val Acc: 89.09%, Time: 53.47s *
Epoch: 5, Train Loss: 0.1804, Train Acc: 94.80%, Val Loss: 0.2738, Val Acc: 91.80%, Time: 54.73s *
Epoch: 6, Train Loss:  0.136, Train Acc: 96.07%, Val Loss: 0.2636, Val Acc: 91.59%, Time: 54.16s 
Epoch: 7, Train Loss: 0.1057, Train Acc: 97.04%, Val Loss: 0.2308, Val Acc: 93.01%, Time: 54.03s *
Epoch: 8, Train Loss: 0.08638, Train Acc: 97.60%, Val Loss: 0.2151, Val Acc: 93.16%, Time: 54.15s *
Epoch: 9, Train Loss: 0.06926, Train Acc: 98.07%, Val Loss: 0.1964, Val Acc: 93.80%, Time: 53.71s *
Epoch: 10, Train Loss: 0.05776, Train Acc: 98.25%, Val Loss: 0.1891, Val Acc: 94.58%, Time: 53.81s *
Epoch: 

Epoch: 17, Train Loss:  0.033, Train Acc: 99.03%, Val Loss: 0.2975, Val Acc: 93.16%, Time: 53.95s 
Epoch: 18, Train Loss: 0.02896, Train Acc: 99.07%, Val Loss: 0.2561, Val Acc: 94.30%, Time: 53.95s 
Epoch: 19, Train Loss: 0.02432, Train Acc: 99.22%, Val Loss: 0.2479, Val Acc: 95.01%, Time: 54.58s *
Epoch: 20, Train Loss: 0.02423, Train Acc: 99.30%, Val Loss: 0.2445, Val Acc: 95.22%, Time: 53.91s *
Epoch: 21, Train Loss: 0.02087, Train Acc: 99.37%, Val Loss: 0.2955, Val Acc: 94.30%, Time: 53.84s 
Epoch: 22, Train Loss: 0.02574, Train Acc: 99.20%, Val Loss: 0.2114, Val Acc: 95.65%, Time: 54.00s *
Epoch: 23, Train Loss: 0.01908, Train Acc: 99.36%, Val Loss: 0.2734, Val Acc: 94.65%, Time: 54.04s 
Epoch: 24, Train Loss: 0.01963, Train Acc: 99.45%, Val Loss:  0.303, Val Acc: 94.08%, Time: 48.68s 
Epoch: 25, Train Loss: 0.02071, Train Acc: 99.38%, Val Loss: 0.2925, Val Acc: 94.44%, Time: 54.33s 
Epoch: 26, Train Loss: 0.02078, Train Acc: 99.36%, Val Loss: 0.2674, Val Acc: 95.08%, Time: 53.78s

Epoch: 1, Train Loss:  1.768, Train Acc: 46.95%, Val Loss:   1.67, Val Acc: 50.64%, Time: 54.48s *
Epoch: 2, Train Loss: 0.7039, Train Acc: 78.40%, Val Loss:  1.658, Val Acc: 55.14%, Time: 54.53s *
Epoch: 3, Train Loss: 0.4165, Train Acc: 87.73%, Val Loss:  1.649, Val Acc: 57.80%, Time: 55.26s *
Epoch: 4, Train Loss: 0.2687, Train Acc: 92.45%, Val Loss:  1.773, Val Acc: 59.08%, Time: 53.96s *
Epoch: 5, Train Loss: 0.1826, Train Acc: 94.74%, Val Loss:  1.907, Val Acc: 59.73%, Time: 53.71s *
Epoch: 6, Train Loss: 0.1387, Train Acc: 96.12%, Val Loss:  1.997, Val Acc: 59.57%, Time: 54.64s 
Epoch: 7, Train Loss: 0.1061, Train Acc: 96.99%, Val Loss:  2.077, Val Acc: 59.32%, Time: 55.07s 
Epoch: 8, Train Loss: 0.08586, Train Acc: 97.62%, Val Loss:  2.093, Val Acc: 61.41%, Time: 54.15s *
Epoch: 9, Train Loss: 0.06717, Train Acc: 98.15%, Val Loss:  2.311, Val Acc: 58.28%, Time: 54.46s 
Epoch: 10, Train Loss: 0.05434, Train Acc: 98.59%, Val Loss:   2.25, Val Acc: 60.05%, Time: 54.37s 
Epoch: 11,

Epoch: 7, Train Loss: 0.1107, Train Acc: 96.99%, Val Loss:  1.707, Val Acc: 66.03%, Time: 52.83s 
Epoch: 8, Train Loss: 0.09013, Train Acc: 97.32%, Val Loss:  1.605, Val Acc: 68.01%, Time: 52.35s *
Epoch: 9, Train Loss: 0.06861, Train Acc: 98.03%, Val Loss:  1.677, Val Acc: 66.69%, Time: 52.79s 
Epoch: 10, Train Loss: 0.06077, Train Acc: 98.34%, Val Loss:  1.788, Val Acc: 66.81%, Time: 46.03s 
Epoch: 11, Train Loss: 0.04691, Train Acc: 98.69%, Val Loss:  1.782, Val Acc: 69.03%, Time: 52.66s *
Epoch: 12, Train Loss: 0.03792, Train Acc: 98.83%, Val Loss:  1.984, Val Acc: 68.49%, Time: 52.30s 
Epoch: 13, Train Loss: 0.0403, Train Acc: 98.68%, Val Loss:  1.959, Val Acc: 66.09%, Time: 52.07s 
Epoch: 14, Train Loss: 0.03804, Train Acc: 98.76%, Val Loss:  2.026, Val Acc: 66.45%, Time: 53.15s 
Epoch: 15, Train Loss: 0.0331, Train Acc: 98.99%, Val Loss:  2.121, Val Acc: 68.67%, Time: 52.39s 
No optimization for a long time, auto-stopping...
Precision, Recall and F1-Score...
                    

Epoch: 1, Train Loss:  1.666, Train Acc: 48.52%, Val Loss:  1.562, Val Acc: 51.06%, Time: 54.12s *
Epoch: 2, Train Loss: 0.7032, Train Acc: 78.77%, Val Loss:  1.416, Val Acc: 57.36%, Time: 54.70s *
Epoch: 3, Train Loss: 0.4258, Train Acc: 87.64%, Val Loss:  1.464, Val Acc: 59.25%, Time: 49.70s *
Epoch: 4, Train Loss: 0.2888, Train Acc: 91.68%, Val Loss:  1.552, Val Acc: 59.33%, Time: 54.42s *
