In [None]:
import tensorflow as tf
import numpy as np
import os
import pypianoroll as pr
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.python.saved_model import tag_constants

In [None]:
def loadDataSet(folder):
    rawData = []
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        rawData.append(pr.parse(filepath).tracks[0].pianoroll / 128.)
    return rawData

In [None]:
def prepareMiniBatches(rawData, Tx = 1024):
    mbs = []
    for dat in rawData:
        m = dat.shape[0]
        nslices = (m + Tx - 1) // Tx
        temp = np.pad(dat, ((0, nslices * Tx - m), (0, 0)), 'constant', constant_values = ((0,0), (0,0)))
        mbs += np.split(temp, nslices, axis=0)
    return np.array(mbs)

In [None]:
rawData = loadDataSet('./data/jazz')
mbs = prepareMiniBatches(rawData)

In [None]:
X = (mbs > 0) * 1.0
Y = mbs

In [None]:
def buildDenseNet(inputs, struct, keepProb=1.):
    if not isinstance(struct[0], (list, tuple)):
        struct = [(u, 'relu') for u in struct]
    densors = [tf.layers.Dense(u, activation=a, kernel_initializer=tf.contrib.layers.xavier_initializer()) for u, a in struct]
    dropouts = [tf.layers.Dropout(keepProb) for u, a in struct]
    X = inputs
    for densor, dropout in zip(densors, dropouts):
        X = densor(X)
        X = dropout(X)         
    return X

In [None]:
class SimpleRNN():
    def __init__(self, input_size=128, structEncode = [512, 256, 128], useRNNEncode = True, useGRU=False,
                 rnn_unit=256, structDecode = [(512, 'relu'), (256, 'relu'), (128, 'sigmoid')], 
                 output_size=128, keep_prob=1, alphaZero = 10):
        self.useRNNEncode = useRNNEncode
        self.input_size = input_size
        self.structEncode = structEncode
        self.rnn_unit = rnn_unit
        self.structDecode = structDecode
        self.output_size = output_size
        self.keep_prob = keep_prob
        self.alphaZero = alphaZero
        self.useGRU = useGRU
        
    def prepare(self):
        # reset graph
        tf.reset_default_graph()
    
        # input output unit
        self.inputs = tf.placeholder(tf.float32, (None, None, self.input_size))
        self.y_true = tf.placeholder(tf.float32, (None, None, self.output_size))
    
        # length of each piece of music
        self.seq_len = tf.placeholder(tf.int32, [None])
        self.learning_rate = tf.placeholder(tf.float32, shape=[])

        # build encode net
        with tf.variable_scope("encode"):
            
            if self.structEncode == None:
                rnnInputs = self.inputs
            else:
                rnnInputs = buildDenseNet(self.inputs, self.structEncode, self.keep_prob)
                
            if self.useRNNEncode:
                if self.useGRU:
                    rnn_cell = tf.contrib.rnn.GRUCell(self.rnn_unit)
                else:
                    rnn_cell = tf.contrib.rnn.RNNCell(self.rnn_unit)
                rnn_cell = tf.contrib.rnn.DropoutWrapper(rnn_cell,
                                                          input_keep_prob=self.keep_prob,
                                                          output_keep_prob=self.keep_prob)
                # link rnn cells
                self.encode,  rnn_state = tf.nn.dynamic_rnn(rnn_cell, rnnInputs, sequence_length=self.seq_len, dtype=tf.float32)
        
        # rnn cell with dropouts
        with tf.variable_scope("core"):
            if self.useGRU:
                self.rnn_cell = tf.contrib.rnn.GRUCell(self.rnn_unit)
            else:    
                self.rnn_cell = tf.contrib.rnn.RNNCell(self.rnn_unit)
            self.rnn_cell = tf.contrib.rnn.DropoutWrapper(self.rnn_cell,
                                                           input_keep_prob=self.keep_prob, 
                                                           output_keep_prob=self.keep_prob)

            # link rnn cells
            self.rnn_out, self.rnn_state = tf.nn.dynamic_rnn(self.rnn_cell, 
                                                               self.encode, 
                                                               sequence_length = self.seq_len,
                                                               dtype = tf.float32)
        
        
        # build output layers
        with tf.variable_scope("output"):
            self.y_pred = buildDenseNet(self.rnn_out, self.structDecode, self.keep_prob)
        
    
        self.defNewCost()
        
        # optimizer
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
    
        
    def defNewCost(self):
        
        notes = self.inputs
        nNotes = tf.reduce_sum(notes)
        emptyNotes = tf.to_float(tf.equal(self.inputs, 0))
        nZeros = tf.reduce_sum(emptyNotes)
        
        #loss
        squreDiff = tf.square(self.y_true - self.y_pred)
        zeroLoss = tf.reduce_sum(tf.multiply(emptyNotes, squreDiff)) / nZeros
        oneLoss = tf.reduce_sum(tf.multiply(notes, squreDiff)) / nNotes
        self.cost = zeroLoss * self.alphaZero + oneLoss
    
    def train(self, X, Y, batch = 512, epochs=100, learning_rate=0.001):
        
        m = X.shape[0]
        Tx = X.shape[1]
        seq_len = [Tx] * batch
        
        
        nBatch = (m + batch - 1) // batch
        idxs = np.array_split(np.random.permutation(m), nBatch)
        
        # Do the training loop
        for epoch in range(epochs):
            for idx in idxs:
                _, cost = self.sess.run([self.optimizer, self.cost], 
                                       feed_dict={
                                           self.inputs : X[idx, :, :],
                                           self.y_true: Y[idx, :, :],
                                           self.seq_len : [Tx] * len(idx),
                                           self.learning_rate : learning_rate
                                       })                
            print(epoch, cost)
            self.trainingLog(epoch, cost)
                
                
    def predict(self, x):
        
        m = x.shape[0]
        t_x = x.shape[1]
        seq_len = [t_x] * m
    

        return self.sess.run([self.y_pred], 
                             feed_dict={
                                 self.inputs : x,
                                 self.seq_len : seq_len
                             })
    
    def loadModel(self, folder):
        
        graph = tf.Graph()
        with graph.as_default():
            self.sess = tf.Session()
            tf.saved_model.loader.load(
                self.sess,
                [tag_constants.SERVING],
                folder,
            )
            self.y_pred = graph.get_tensor_by_name('output/dense_2/Sigmoid:0')
            self.inputs = graph.get_tensor_by_name('Placeholder:0')
            self.y_true = graph.get_tensor_by_name('Placeholder_1:0')
            self.seq_len = graph.get_tensor_by_name('Placeholder_2:0')
        
            self.cost = graph.get_tensor_by_name('add:0')
            self.optimizer = graph.get_operation_by_name('Adam')


    
    def init_tf(self):
        self.sess = tf.Session()

        # Run the initialization
        self.sess.run(tf.global_variables_initializer())
    
    def closeSess(self):
        self.sess.close()
        
    def simpleSave(self, filename):
        tf.saved_model.simple_save(self.sess, filename, 
                                   inputs={"inputs": self.inputs}, 
                                   outputs={"outputs": self.y_pred})

    def trainingLog(self, epoch, cost):
        with open('trainingLog.txt', 'a') as f:
            f.write(str(epoch)+', '+ str(cost))
            f.write('\n')

In [None]:
model = SimpleRNN(alphaZero=0.01, useRNNEncode=True, useGRU=True, keep_prob=1)#, structDecode=[(128, 'sigmoid')])
model.prepare()
model.init_tf()

In [None]:
nSongs = 3072
pred_y = model.train(X[:nSongs, :, :], Y[:nSongs, :, :], batch=128, epochs=100, learning_rate=0.001)

In [None]:
def summaryStats(y_pred, y_true):
    y_pred_flat = y_pred.flatten()
    y_true_flat = y_true.flatten()
    
    idx_notes = (y_true_flat > 0)
    idx_zeros = (y_true_flat == 0)
    
    pred_vel, true_vel = y_pred_flat[idx_notes], y_true_flat[idx_notes]
    pred_zeros = y_pred_flat[idx_zeros]
    
    return pred_vel, true_vel, pred_zeros
    
    

In [None]:
X_sample = X[2072:2074, :, :] 
Y_sample = Y[2072:2074, :, :] 
model.predict(X_sample)
y_pred = model.predict(X_sample)[0] * 128

In [None]:
pred_vel, true_vel, pred_zeros = summaryStats(y_pred[0], Y_sample[0] * 128)

In [None]:
diff = (pred_vel - true_vel)

In [None]:
sns.distplot(diff)

In [None]:
sns.regplot(true_vel, pred_vel)

In [None]:
sns.distplot(pred_zeros, bins=128)

In [None]:
model.closeSess()

In [None]:
pr_classical = pr.parse('./data/classical/beethoven_opus10_2_format0.mid')
pr_input = (pr_classical.tracks[0].pianoroll > 0).reshape(1, -1, 128) * 1.
pr_transferred = model.predict(pr_input)
pr_transferred = pr_transferred * pr_input
t = (pr_transferred * 128).round()
pr_classical.tracks[0].pianoroll = t.reshape(-1, 128)
pr_classical.write('./transfered_2.mid')