In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import csv

params = {
    'num_classes': 12,

    # cnn 파라미터
    'use_cnn': False,
    'num_filters': [8, 16, 32, 32, 64, 128],
    'filter_size': [3, 3, 3, 3, 3, 3],
    'cnn_batch_norm' : [True, True, True, True, True, True],
    'pool_sizes': [2, 2, 2, 2, 1, 1],
    'cnn_dropout_keep_prob': [0, 0.2, 0.3, 0.3, 0.3, 0.3],
    
    # dense 파라미터
    'use_fc': False,
    'fc_hidden_units': [1028, 512, 256],
    'fc_batch_norm': [True, True, True],
    'fc_dropout_keep_prob': [0.4, 0.3, 0.3],

    # rnn(lstm) 파라미터
    'use_rnn': True,
    'rnn_n_hiddens': [1024, 512],
    'rnn_dropout_keep_prob': [0.7, 0.7],

    # Global Average Pooling
    'use_gap': False,

    'learning_rate': 0.001,
    'activation': tf.nn.relu,
    'batch_size': 128,
    'epochs': 15,
    'height': 128,
    'width': 100,
    'l2_reg': False,
    'lambda': 0.01,
    'model_path': './6conv/',
    'model_file': '6conv'
}


class Model:
    def __init__(self, params,name):
        self.num_classes = params['num_classes']
        
        self.use_cnn = params['use_cnn']
        self.num_filters = params['num_filters']
        self.filter_sizes = params['filter_size']
        self.cnn_batch_norm  = params['cnn_batch_norm']
        self.pool_sizes = params['pool_sizes']
        self.cnn_dropout_keep_prob = params['cnn_dropout_keep_prob']
        
        self.use_fc = params['use_fc']
        self.fc_hidden_units = params['fc_hidden_units']
        self.fc_batch_norm = params['fc_batch_norm']
        self.fc_dropout_keep_prob = params['fc_dropout_keep_prob']
        
        self.use_rnn = params['use_rnn']
        self.rnn_n_hiddens = params['rnn_n_hiddens']
        self.rnn_dropout_keep_prob = params['rnn_dropout_keep_prob']
        
        self.use_gap = params['use_gap']
        
        self.learning_rate = params['learning_rate']
        self.activation = params['activation']
        
        self.height = params['height']
        self.width = params['width']
        self.model_path = params['model_path']
        
        self.idx_convolutional_layers = range(1, len(self.filter_sizes) + 1)
        self.idx_fc_layers = range(1, len(self.fc_hidden_units) + 1)
        self.idx_rnn_layers = range(1, len(self.rnn_n_hiddens) + 1)
        
        self.name = name 

        
    def convolutional_layers(self, X, is_training=True, reuse=False):
        inputs = X
        
        for i, num_filter, filter_size, use_bn, pool_size, keep_prob in zip(self.idx_convolutional_layers,
                                                                            self.num_filters,
                                                                            self.filter_sizes,
                                                                            self.cnn_batch_norm,
                                                                            self.pool_sizes,
                                                                            self.cnn_dropout_keep_prob):            
            L = tf.layers.conv2d(inputs,
                                 filters=num_filter,
                                 kernel_size=filter_size,
                                 strides=1,
                                 padding='SAME',
                                 name='CONV' + str(i),
                                 reuse=reuse)
            
#             print(L)
            
            if use_bn:
                L = tf.layers.batch_normalization(L, training=is_training, name='BN' + str(i), reuse=reuse)
                
            L = self.activation(L)
            
#             print(L)
            
            if keep_prob:
                L = tf.layers.dropout(L, keep_prob, training = is_training)
                
#             print(L)
            
            if pool_size != 1:
                L = tf.layers.max_pooling2d(L, pool_size=pool_size, strides=pool_size, padding='SAME')
            
#             print(L)
            
            inputs = L
            
            
        
        return inputs
    
    
    def fc_layers(self, X, is_training=True, reuse=False):
        inputs = X
        
        for i, units, use_bn, keep_prob in zip(self.idx_fc_layers, 
                                               self.fc_hidden_units, 
                                               self.fc_batch_norm, 
                                               self.fc_dropout_keep_prob):
            fc = tf.layers.dense(inputs,
                                 units=units,
                                 reuse=reuse,
                                 name='FC'+str(i))
            
            if use_bn:
                fc = tf.layers.batch_normalization(fc, training=is_training, name='fc_BN' + str(i), reuse=reuse)
                
            fc = self.activation(fc)
            
            if keep_prob:
                fc = tf.layers.dropout(fc, rate=keep_prob, training=is_training, name='fc_dropout' + str(i))
                
            inputs = fc 
            
        return inputs
  

    def rnn_layers(self, inputs, is_training=True, reuse=False):
        if is_training:
            keep_probs = self.rnn_dropout_keep_prob
            
        else:
            keep_probs = np.ones_like(self.rnn_dropout_keep_prob)
            
        # single layer
        if len(self.idx_rnn_layers) == 1:
            cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_n_hiddens[0], reuse=reuse)
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_probs[0])
        
        # multi layer 
        else:
            cell_list = []
            
            for i, n_hidden, keep_prob in zip(self.idx_rnn_layers, self.rnn_n_hiddens, keep_probs):
                cell_ = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, reuse=reuse)
                cell_ = tf.nn.rnn_cell.DropoutWrapper(cell_, output_keep_prob=keep_prob)
                cell_list.append(cell_)
                
            cell = tf.nn.rnn_cell.MultiRNNCell(cell_list)
            
        # output_shape [batch_size, width(n_step), n_classes]
        outputs, states = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
        
#         print(outputs.get_shape().as_list())
        
        outputs = tf.transpose(outputs, [1, 0, 2])
        outputs = outputs[-1]
        
        return outputs
 

    def get_reshaped_cnn_to_rnn(self, inputs):
        # [batch, height, width, n_feature map]
        shape = inputs.get_shape().as_list() 
        
        # 우리가 얻어야하는 사이즈 [batch, width, height x n_feature map]
        inputs = tf.transpose(inputs, [0, 2, 1, 3])
        reshaped_inputs = tf.reshape(inputs, [-1, shape[2], shape[1] * shape[3]])
        
        return reshaped_inputs
  

    def get_logits(self, X, is_training=True, reuse=False):
        with tf.variable_scope(self.name):
            L = X
            
            if self.use_cnn:
                L = self.convolutional_layers(L, is_training, reuse)

            if self.use_rnn:
                reshaped_fp = self.get_reshaped_cnn_to_rnn(L)
                L = self.rnn_layers(reshaped_fp, is_training, reuse)

            if self.use_gap:
                shape = L.get_shape().as_list()
                
                # 글로벌 풀링 사이즈 (height, width)
                pool_size = (shape[1], shape[2])
                L = tf.layers.average_pooling2d(L, pool_size=pool_size, strides=1, padding='VALID')
                
                # 마지막 dense layer를 위한 flatten
                L = tf.layers.flatten(L)

            if self.use_fc:
                if not self.use_gap:
                    L = tf.layers.flatten(L)
                L = self.fc_layers(L, is_training, reuse)
                
            output = tf.layers.dense(L, units= self.num_classes, reuse=reuse, name='out')
            
        return output
    

def train_parser(serialized_example):
    features = {
        "spectrum": tf.FixedLenFeature([128 * 100], tf.float32),
        "label": tf.FixedLenFeature([12], tf.int64)
    }

    parsed_feature = tf.parse_single_example(serialized_example, features)

    spec = parsed_feature['spectrum']
    label = parsed_feature['label']

    return spec, label
        
    
def test_parser(serialized_example):
    features = {
        "spectrum": tf.FixedLenFeature([128 * 100], tf.float32),
    }

    parsed_feature = tf.parse_single_example(serialized_example, features)

    spec = parsed_feature['spectrum']

    return spec

  return f(*args, **kwds)


In [3]:
tf.reset_default_graph()

test_data_dir = "../data/tfrecords/test_final.tfrecord"
train_data_dir = "../data/tfrecords/train_end.tfrecord"
eval_data_dir = "../data/tfrecords/eval_end.tfrecord"

train_dataset = tf.data.TFRecordDataset(train_data_dir).map(train_parser)
train_dataset = train_dataset.shuffle(700000, seed=1, reshuffle_each_iteration=True)
train_dataset = train_dataset.batch(params['batch_size'])

eval_dataset = tf.data.TFRecordDataset(eval_data_dir).map(train_parser)
eval_dataset = eval_dataset.shuffle(700000, seed=1, reshuffle_each_iteration=True)
eval_dataset = eval_dataset.batch(params['batch_size'])

test_dataset = tf.data.TFRecordDataset(test_data_dir).map(test_parser)
test_dataset = test_dataset.batch(params['batch_size'])

train_itr = tf.contrib.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
eval_itr = tf.contrib.data.Iterator.from_structure(eval_dataset.output_types, eval_dataset.output_shapes)
test_itr = tf.contrib.data.Iterator.from_structure(test_dataset.output_types, test_dataset.output_shapes)

spec, label = train_itr.get_next()
eval_spec, eval_label = eval_itr.get_next()
test_spec = test_itr.get_next()

eval_label = tf.reshape(eval_label, [-1, 12])
eval_label = tf.cast(eval_label, tf.int64)

spec = tf.reshape(spec, [-1, 128, 100, 1])
spec = tf.cast(spec, tf.float32)

eval_spec = tf.reshape(eval_spec, [-1, 128, 100, 1])
eval_spec = tf.cast(eval_spec, tf.float32)

test_spec = tf.reshape(test_spec, [-1, 128, 100, 1])
test_spec = tf.cast(test_spec, tf.float32)

train_init_op = train_itr.make_initializer(train_dataset)
eval_init_op = eval_itr.make_initializer(eval_dataset)
test_init_op = test_itr.make_initializer(test_dataset)

name = 'model'
model = Model(params, 'model')

with tf.device('/gpu:0'):
    X = tf.placeholder(tf.float32, [None, params['height'], params['width'], 1])
    Y = tf.placeholder(tf.float32, [None, params['num_classes']])
    global_step = tf.Variable(0, trainable = False, name = 'global_step')

    logits_train = model.get_logits(X)
    
    loss = tf.losses.softmax_cross_entropy(Y, logits_train)
    
    # L2 Regularization
    if params['l2_reg']:
        tv = tf.trainable_variables()
        l2 = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv if 'lstm' in v.name and 'bias' not in v.name])   
        loss = loss + (params['lambda'] * l2)
        
    for v in tf.trainable_variables():
        tf.summary.histogram('Var_{}'.format(v.name), v)
        print(v)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=name)            
    with tf.control_dependencies(update_ops):    
        optimizer = tf.train.AdamOptimizer(params['learning_rate']).minimize(loss, global_step=global_step)
        
    #eval
    logits_eval = model.get_logits(X, is_training=False, reuse=True)
    predict_proba_ = tf.nn.softmax(logits_eval)
    prediction = tf.argmax(predict_proba_, 1)
    accuracy = tf.metrics.accuracy(tf.argmax(Y, 1), prediction)
    
    #predict
    logits_test = model.get_logits(X, is_training=False, reuse=True)
    test_predict_proba_ = tf.nn.softmax(logits_test)
    test_prediction = tf.argmax(test_predict_proba_, 1)
    
    # 변수 프린트/ 텐서보드 summary 생성            
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('accuracy', accuracy[1])
    
#     for v in tf.trainable_variables():
#         tf.summary.histogram('Var_{}'.format(v.name), v)
#         print(v)
        
    merged = tf.summary.merge_all()
    
# 모델 저장
saver = tf.train.Saver(tf.global_variables())

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))

sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

INFO:tensorflow:Summary name Var_model/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0 is illegal; using Var_model/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel_0 instead.
<tf.Variable 'model/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0' shape=(1152, 4096) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0 is illegal; using Var_model/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias_0 instead.
<tf.Variable 'model/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0' shape=(4096,) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0 is illegal; using Var_model/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel_0 instead.
<tf.Variable 'model/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0' shape=(1536, 2048) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0 is illegal; using Var_model/rnn/multi_rnn_cell/cell_1/basic

In [None]:
writer = tf.summary.FileWriter(params['model_path'], sess.graph)
        
for epoch in range(params['epochs']):
    sess.run(train_init_op)
    acc = []
    
    while True:
        try:
            step = sess.run(global_step)
            
            _spec, _label = sess.run([spec, label])
            _, c, _summ = sess.run([optimizer, loss, merged], feed_dict = {X: _spec, Y: _label})
            acc_train = sess.run(accuracy, feed_dict = {X: _spec, Y: _label})
            
            acc.append(acc_train[1])
            
            writer.add_summary(_summ, step)
            
            if step % 500 == 0:
                print('step: {}, cost: {}'.format(step, c))
                
        except tf.errors.OutOfRangeError:
            break
            
    sess.run(eval_init_op)
    
    while True:
        try:
            step = sess.run(global_step)
            
            _spec, _label = sess.run([eval_spec, eval_label])
            _, c, _summ = sess.run([optimizer, loss, merged], feed_dict = {X: _spec, Y: _label})
            acc_train = sess.run(accuracy, feed_dict = {X: _spec, Y: _label})
            
            acc.append(acc_train[1])
            
            writer.add_summary(_summ, step)
            
            if step % 500 == 0:
                print('step: {}, cost: {}'.format(step, c))
                
        except tf.errors.OutOfRangeError:
            break
    
#     while True:
#         try:
#             _spec, _label = sess.run([eval_spec, eval_label])
#             val_acc = sess.run(accuracy, feed_dict = {X: _spec, Y: _label})
            
#             eval_acc.append(val_acc[1]) 
#         except tf.errors.OutOfRangeError:
#             break
            
    print('epoch: {}, cost : {}, train_acc: {}'.format(epoch, c, np.mean(acc)))
    
saver.save(sess, params['model_path'] + params['model_file'] + '.ckpt', global_step=sess.run(global_step))

print("Model is saved.")

sess.run(test_init_op)

test_spec_ = sess.run(test_spec)
predict = sess.run(test_prediction, feed_dict={X: test_spec_})

while True:
    try:
        test_spec_ = sess.run(test_spec)
        predict = np.hstack([predict, sess.run(test_prediction, feed_dict={X: test_spec_})])
        
    except tf.errors.OutOfRangeError:
        break
        
print(np.bincount(predict))
print(len(predict))

class_names = ['down', 'go', 'left', 'no', 'off', 'on', 'right', 'silence', 'stop', 'unknown', 'up', 'yes']

# audio_path = '../data/test/audio/'

# files = os.listdir(audio_path)
# files = sorted(files)

df = pd.read_csv("./sub/sample_submission.csv")
files = df['fname']

with open(params['model_path'] + 'sub_' + params['model_file'] + '.csv', 'w') as f:
    fieldnames=['fname', 'label']
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    
    writer.writeheader()
    
    for i in range(len(predict)):
        writer.writerow({'fname': files[i], 'label': class_names[predict[i]]})
        
print("Submission file is created.")

        
# predict proba
sess.run(test_init_op)

test_spec_ = sess.run(test_spec)
predict_proba = sess.run(test_predict_proba_, feed_dict={X: test_spec_})

while True:
    try:
        test_spec_ = sess.run(test_spec)
        predict_proba = np.vstack([predict_proba, sess.run(test_predict_proba_, feed_dict={X: test_spec_})])
        
    except tf.errors.OutOfRangeError:
        break
        
predict_proba = np.array(predict_proba)
print(predict_proba.shape)

pp = pd.DataFrame(predict_proba, index = files)
pp.to_csv(params['model_path'] + 'proba_' + params['model_file'] + '.csv', index = False)

print("Proba file is created.")
        
print("Finish.")