In [1]:
import numpy as np
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras import metrics

import random # for random
import sys # for printing

from pathlib import Path

Using TensorFlow backend.


## Variable for the notebook

In [2]:
char_to_idx = {}
idx_to_char = {}
index = 0

bufferText = ''
inputSeq = []
ouputSeq = []
inputSeqIdx = []
outputSeqIdx = []
inputSeqOH = []
outputSeqOH = []

sequenceLen = 20
sequenceShift = 1

In [3]:
# read all abc files in folder
for filePath in Path('../data/processed').glob('*.abc'):
    # check if file
    if filePath.is_file():
        # open file and read all char
        with filePath.open('r') as abc:
            for line in abc:
                # read all char
                for c in line:
                    # check if exist
                    if c not in char_to_idx:
                        char_to_idx[c] = index
                        idx_to_char[index] = c
                        # increment
                        index += 1
                # add to bufferText
                bufferText += line

In [4]:
# extract inputSeq and outputSeq
for i in range(int(len(bufferText) - sequenceLen)):
    tempNum = i
    inputSeq.append(bufferText[tempNum: tempNum + sequenceLen])
    ouputSeq.append(bufferText[tempNum + 1: tempNum + sequenceLen + 1])
print(len(inputSeq),len(ouputSeq))

98101 98101


In [5]:
# convert to index
for i in range(len(inputSeq)):
    tmpInput = []
    tmpOutput = []
    for c in inputSeq[i]:
        tmpInput.append(char_to_idx[c])
    for c in ouputSeq[i]:
        tmpOutput.append(char_to_idx[c])
    # append 
    inputSeqIdx.append(tmpInput)
    outputSeqIdx.append(tmpOutput)

In [6]:
# convert to one hot 
inputSeqOH = to_categorical(inputSeqIdx, num_classes=len(char_to_idx))
outputSeqOH = to_categorical(outputSeqIdx, num_classes=len(char_to_idx))

print(inputSeqOH.shape, outputSeqOH.shape)

(98101, 20, 52) (98101, 20, 52)


### Instruction
- We want two LSTM cells of dimension 512, with the second LSTM cell taking as input the output of the first cell. We want both cells to output sequences (i.e. we're interested in output at every timestep and not just at the end).
- Add Dropout with keep probability 80% for the LSTM cells.
- Add a dense layer of dimension size equal to the number of unique characters in your text. This layer converts the LSTM output of dimension 512 into the odds that the output should be each character (e.g. 'c' or 'k')
- Apply softmax to the dense layer to convert the values into probabilities the output should be each character.
- Calculate categorical cross entropy loss between the predicted softmax probabilities and labels.
- Apply an optimizer e.g. RMSProp with an appropriate learning rate

In [7]:
'''
Model constant
'''
KEEP_PROB = 0.8
DROPOUT = 1 - KEEP_PROB
LEARNING_RATE = 0.01

In [15]:
'''
Dense -> Just your regular densely-connected NN layer.
Dense(value) -> output will be of value dimension

* have to add return_sequences=True, https://github.com/keras-team/keras/issues/7403
'''

# building model
model = Sequential()
# one cell
model.add(LSTM(512, input_shape=(sequenceLen, len(char_to_idx)), dropout=(DROPOUT), return_sequences=True))
#model.add(Dense(len(char_to_idx))) # output as length of char_to_idx, which is 71
#model.add(Activation('softmax'))
# second cell
model.add(LSTM(512, input_shape=(sequenceLen, len(char_to_idx)), dropout=(DROPOUT), return_sequences=True)) # input from previous dense layer
model.add(Dense(len(char_to_idx)))
model.add(Activation('softmax'))

# optimizer
optimizer = RMSprop(lr=LEARNING_RATE)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])

In [21]:
# train model
model.fit(inputSeqOH, outputSeqOH,
          batch_size=128,
          epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x14516ae48>

In [33]:
# generate a sequence of characters note
def generate_seq(model, mapping, seq_length, seed_note, n_chars):
    nprobs = 3
    in_text = seed_note
    # generate a fixed number of characters
    for _ in range(n_chars):
        # encode the characters as integers
        encoded = [mapping[char] for char in in_text]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # one hot encode
        encoded = to_categorical(encoded, num_classes=len(mapping))
        
        # get prediction
        #y_hat = model.predict_classes(encoded)
        # convert to char (1, 20, 53)
        #in_text += idx_to_char[y_hat[0][-1]]
        
        # weighted sampling method
        yhat = model.predict(encoded)
        x = yhat[0][-1]
        x_copy = x.copy()
        x_ten_sorted = sorted(x, reverse=True)[:nprobs]
        x_chosen = random.choices(x_ten_sorted, weights=x_ten_sorted, k=1)
        x_index = np.where(x_copy == x_chosen)
        in_text += idx_to_char[x_index[0][0]]
        
    return in_text

In [34]:
generatedSeq = generate_seq(model, char_to_idx, sequenceLen, 'FFF|D', 384)
print(generatedSeq)

# manually strip away double \n\n
strippedGeneratedSeq = generatedSeq.replace('\n\n', '')
print(strippedGeneratedSeq)

# append to template
TEMPLATE = '''
X: 1
T: custom generated
S:Trad
L:1/8
M:6/8
K:C
{}

'''
with open('../output/abc/generated_abc.abc','w') as f:
    f.write(TEMPLATE.format(strippedGeneratedSeq))

FFF|D3 ||


D|"G"G2G "D7"AGF|"G"G3 -G2::
B/2c/2|"G"d2g gfg|"C"e2g "G7"fgf|"C"e3 "D7"d2c|"G"B2B BAB|"C"c3 c2:|


|:E|"A"E2E EAB|"Am"c2A A^GA|"D7"def "G"gdB|"D7"cAF "G"G2:|


D|"G"DGB dBG|"D7"FGA DEF|"G"G3 -G2::
B/2c/2|"G"d2d dBc|"G"dge "D7"d2c|"G"BAG G2:|
|:B/2c/2|"G"d^cd "Em"edB|"D"def "C"gfe|"G"ded "D7"e2f|"G"g3 "Em"gfg|"D"afd "A7"gec|"Bm"dff "E7/g+"efg|\
"A"a3 ||


|E|"D"F3 "A7/e"FED|
FFF|D3 ||
D|"G"G2G "D7"AGF|"G"G3 -G2::
B/2c/2|"G"d2g gfg|"C"e2g "G7"fgf|"C"e3 "D7"d2c|"G"B2B BAB|"C"c3 c2:|
|:E|"A"E2E EAB|"Am"c2A A^GA|"D7"def "G"gdB|"D7"cAF "G"G2:|
D|"G"DGB dBG|"D7"FGA DEF|"G"G3 -G2::
B/2c/2|"G"d2d dBc|"G"dge "D7"d2c|"G"BAG G2:|
|:B/2c/2|"G"d^cd "Em"edB|"D"def "C"gfe|"G"ded "D7"e2f|"G"g3 "Em"gfg|"D"afd "A7"gec|"Bm"dff "E7/g+"efg|\
"A"a3 ||
|E|"D"F3 "A7/e"FED|


In [35]:
'''
# it does not work as the current directory is diff from jupyter notebook relative path
import subprocess
import os
cwd = os.getcwd()
print(cwd)
subprocess.call("read_abc.py ../abc/generated_abc.abc")
'''
import os
# generate out.wav
%run -i PySynth/read_abc.py "../output/abc/generated_abc.abc"
# move out.wav to ../wav 
os.rename("out.wav", "../output/wav/generated.wav")

C 8
[['f4', 8.0], ['f4', 8.0], ['f4', 8.0], ['d4*', 2.6666666666666665], ['d4', 8.0], ['g4*', 4.0], ['g4', 8.0], ['a4', 8.0], ['g4', 8.0], ['f4', 8.0], ['g4*', 2.6666666666666665], ['g4', 4.0], ['f4', 8.0], ['f4', 8.0], ['f4', 8.0], ['d4*', 2.6666666666666665], ['d4', 8.0], ['g4*', 4.0], ['g4', 8.0], ['a4', 8.0], ['g4', 8.0], ['f4', 8.0], ['g4*', 2.6666666666666665], ['g4', 4.0], ['b4', 4.0], ['c5', 4.0], ['d5*', 4.0], ['g5', 8.0], ['g5', 8.0], ['f5', 8.0], ['g5', 8.0], ['e5*', 4.0], ['g5', 8.0], ['f5', 8.0], ['g5', 8.0], ['f5', 8.0], ['e5*', 2.6666666666666665], ['d5', 4.0], ['c5', 8.0], ['b4*', 4.0], ['b4', 8.0], ['b4', 8.0], ['a4', 8.0], ['b4', 8.0], ['c5*', 2.6666666666666665], ['c5', 4.0], ['b4', 4.0], ['c5', 4.0], ['d5*', 4.0], ['g5', 8.0], ['g5', 8.0], ['f5', 8.0], ['g5', 8.0], ['e5*', 4.0], ['g5', 8.0], ['f5', 8.0], ['g5', 8.0], ['f5', 8.0], ['e5*', 2.6666666666666665], ['d5', 4.0], ['c5', 8.0], ['b4*', 4.0], ['b4', 8.0], ['b4', 8.0], ['a4', 8.0], ['b4', 8.0], ['c5*', 2.6666666

In [20]:
from pickle import dump

#model.save('../model/2ep_jigs_nodense.h5')
#dump(char_to_idx, open('../model/2ep_jigs_nodense_c2i.pkl', 'wb'))
#dump(idx_to_char, open('../model/2ep_jigs_nodense_i2c.pkl', 'wb'))

In [14]:
from pickle import load
from keras.models import load_model
# loading model, pickle
#model = load_model('../model/4ep_jigs.h5')
#char_to_idx = load(open('../model/4ep_jigs_c2i.pkl', 'rb'))
#idx_to_char = load(open('../model/4ep_jigs_i2c.pkl', 'rb'))

ModuleNotFoundError: No module named 'keras.model'