In [1]:
import tensorflow as tf
import os
import random
import math
import sys
import numpy as np
import matplotlib.pyplot as plt
import librosa

In [2]:
_NUM_SHARDS = 5
_DATA_DIR = "./"

In [3]:
def get_filenames_and_classes(dataset_dir, train_val):
    wave_root = os.path.join(dataset_dir, train_val)
    directories = []
    class_names = []
#     print (wave_root)
#     print (os.listdir(wave_root))
    for filename in os.listdir(wave_root):
        path = os.path.join(wave_root, filename)
        if os.path.isdir(path):
            directories.append(path)
            class_names.append(filename)

    photo_filenames = []
#     print (directories)
#     print (class_names)
    for directory in directories:
        for filename in os.listdir(directory):
#             print (filename)
            path = os.path.join(directory, filename)
            photo_filenames.append(path)

    return photo_filenames, sorted(class_names)

wave_filenames, class_names = get_filenames_and_classes(_DATA_DIR, "9th Wonder Kit/")

In [4]:
wave_filenames[0]

'./9th Wonder Kit/Percussions/Tab_04.wav'

In [48]:
class WaveReader(object):
    
    def __init__(self):
        record_defaults = [tf.constant([], dtype=tf.float32)]
        # Initializes function that decodes 1 channel png data.
        self._decode_csv_data = tf.placeholder(dtype=tf.string)
        self._decode_csv = tf.decode_csv(self._decode_csv_data, record_defaults=record_defaults)

    def read_wave_dims(self, sess, wave_arr):
        wave = self._decode_csv(sess, wave_arr)
        return wave.shape
    #decode string image data to int image data
    def decode_png(self, sess, wave_arr):
        wave = sess.run(self._decode_csv,
                         feed_dict={self._decode_csv_data: wave_arr})
        return wave.shape

# make tensorflow record file name
def get_dataset_filename(dataset_dir, split_name, shard_id):
    output_filename = 'wave_%s_%05d-of-%05d.tfrecord' % (
        split_name, shard_id, _NUM_SHARDS)
    return os.path.join(dataset_dir, output_filename)

# Creates a TensorFlow Record Feature with value as a 64 bit integer.
def int64_feature(values):
    # if value is not tuple or list, make value to list
    if not isinstance(values, (tuple, list)):
        values = [values]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=values))

# # Creates a TensorFlow Record Feature with value as a byte array
def bytes_feature(values):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))

# #define tesnorflow record format
def wave_to_tfexample(wave_arr, wave_format, class_id):
    return tf.train.Example(features=tf.train.Features(feature={
      './9th_recode/encoded': bytes_feature(wave_arr),
      './9th_recode/format': bytes_feature(wave_format),
      './9th_recode/class/label': int64_feature(class_id),
#       './9th_recode/height': int64_feature(height),
#       './9th_recode/width': int64_feature(width),
    }))

def convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir):
    assert split_name in ['train', 'validation']
    
    num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))

    print ("file len: ",len(filenames), "num per shard: ", num_per_shard)

    with tf.Graph().as_default():
        wave_reader = WaveReader()

        with tf.Session('') as sess:

            for shard_id in range(_NUM_SHARDS):
                output_filename = get_dataset_filename(
                    dataset_dir, split_name, shard_id)

                #print (output_filename)
                
                #write tensorflow record file
                #create tfrecord_write
                with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
                    start_ndx = shard_id * num_per_shard
                    end_ndx = min((shard_id + 1) * num_per_shard, len(filenames))
                    for i in range(start_ndx, end_ndx):
                        sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
                            i + 1, len(filenames), shard_id))
                        sys.stdout.flush()

                        # Read the filename:
                        wave_arr, sr = librosa.load(filenames[i])
                        print(wave_arr.shape)
                        S = librosa.feature.melspectrogram(wave_arr, sr=sr, n_mels=128)
                        log_S = librosa.logamplitude(S, ref_power=np.max)
#                         wave_arr = tf.gfile.FastGFile(filenames[i], 'rb').read()
#                         height, width = wave_reader.read_wave_dims(sess, log_S)
#                         print(height)
#                         print(width)
                        print(log_S.shape)
                        class_name = os.path.basename(os.path.dirname(filenames[i]))
                        class_id = class_names_to_ids[class_name]
                        
                        #make tensorflow record object and write
                        example = wave_to_tfexample(
                            log_S, b'array', class_id)
                        tfrecord_writer.write(example.SerializeToString())

    sys.stdout.write('\n')
    sys.stdout.flush()

random.shuffle(wave_filenames)
# random.shuffle(validation_filenames)

class_names_to_ids = dict(zip(class_names, range(len(class_names))))
print (class_names_to_ids)

convert_dataset('train', wave_filenames, class_names_to_ids, _DATA_DIR)

{'Kicks': 3, 'Shakers': 5, 'Snares': 6, 'Percussions': 4, 'Cymbals': 1, 'Claps': 0, 'Hi-Hats': 2}
file len:  521 num per shard:  105
>> Converting image 1/521 shard 0(9923,)
(128, 20)


TypeError: array([[-26.15721425, -25.19977996, -28.86151974, ..., -80.        ,
        -80.        , -80.      has type numpy.ndarray, but expected one of: bytes