## Convert Folk RNN samples to vectors

In [26]:
songs = []
with open('./../../Data/Experiments_Data/folk_rnn_abc.txt', 'r') as f:
    data = f.readlines()
    for i in range(len(data)):
        if (i - 4) % 5 == 0:
            songs.append(data[i].split("DMaj",1)[1])

In [51]:
def clean(s):
    """ Remove unwanted chars"""
    bad_chars = '|: 1>()/z_=<^'
    for c in bad_chars: 
        s = s.replace(c, "")
    return s

def remove_nums(s):
    """ Explicitly convert numbers to chars"""
    new_s = []
    temp = s[0]
    for i in range(1, len(s)):
        if s[i].isalpha():
            new_s.append(temp)
            temp = s[i]
        elif s[i].isdigit():
            temp_arr = [temp]*(int(s[i]))
            new_s.extend(temp_arr)
            temp = ''
        else:
            temp += s[i]
    return new_s

def convert_8ths_to_16ths(lst):
    new_lst = []
    for x in lst:
        if x == '':
            continue
        new_lst.append(x)
        new_lst.append(x)
    return new_lst
    
def convert_to_DMaj(s):
    """ Convert C-Major to D-Major """
    c_to_d = {
        'C':'D',
        'D':'E',
        'E':'F',
        'F':'G',
        'G':'a',
        'A':'B',
        'B':'C',
        'c':'d',
        'd':'e',
        'e':'f',
        'f':'g',
        'g':'a\'',
        'a':'b',
        'b':'c',
        'C,':'D,',
        'D,':'E,',
        'E,':'F,',
        'F,':'G,',
        'G,':'A',
        'A,':'B,',
        'B,':'C,',
        'c\'':'d\'',
        'd\'':'e\'',
        'e\'':'f\'',
        'f\'':'g\'',
        'g\'':'a\'\'',
        'a\'':'b\'',
        'b\'':'c\'',        
    }
    return [c_to_d[x] for x in s]
    
def convert_to_notes(song):
    """ Convert chars to midi notes"""
    chars_as_num = {
    'E,':53,
    'F,': 54,
    'G,': 55,
    'A,': 57,
    'A#,':58,
    'B,': 59,
    'C': 61,
    'D': 62,
    'D#':63,
    'E': 64,
    'E#':65,
    'F': 66,
    'G': 67,
    'A': 69,
    'A#':70,
    'B': 71,
    'B#': 72,
    'B#\'':72,
    'c': 73,
    'd': 74,
    'd#':75,
    'e': 76,
    'e#':77,
    'f': 78,
    'g': 79,
    'a': 81,
    'b': 83,
    'c\'': 85,
    'd\'': 86,
    'e\'': 88,
    'f\'': 90,
    'g\'':91,
    'a\'':93,
    'b\'':95,
    }
    return [chars_as_num[x] for x in song if x != '']


In [57]:
import numpy as np

def process(all_songs):
    """ Do the entire process - only keep songs that are 64 in length"""
    vectors = []
    for song in all_songs:
        updated_song = convert_to_notes(remove_nums(clean(song)))
        if len(updated_song) == 256:
            vectors.append(np.array(updated_song))
        elif 270 > len(updated_song) > 256:
            vectors.append(np.array(updated_song[:256]))
        elif 240 < len(updated_song) < 256:
            elem = updated_song[-1]
            updated_song.extend([elem]*(256-len(updated_song)))
            #print(updated_song)
            vectors.append(np.array(updated_song))
    return np.array(vectors)

In [58]:
lst = process(songs)
print (lst.shape)

(811, 256)


In [59]:
np.save('./../Experiments_Data/folkRnn_data',lst)

In [385]:
type(lst[0])

numpy.ndarray

In [11]:
songs[1]

" b2e2e4B2e2f2e2|d4d2e2d2B2A4|B2d2e2f2g2f2g2a2|b2B#'2d'2b2a4a2||b2e2e4B2e2f2e2|d4d2e2d2B2A4|B2d2e2f2g2f2g2a2|b2B#'2d'2b2a4a2||b2d'2d'4b2d'2d'4|b2e'2e'4b2e'2e'4|b2d'2d'4b2d'2d'4|e'2b2d'2b2a4f2a2||b2d'2d'4b2d'2d'4|b2e'2e'4b2e'2e'2d'2|b4b2a2a2b2d'2e'2|b2d'2b2a2a2f2a2\n"