# Create Dataset

In [120]:
import tensorflow as tf

import magenta

from magenta.pipelines import pipeline
from magenta.protobuf import music_pb2
from magenta.protobuf import generator_pb2

import arrangement_create_dataset
import arrangement_model

from constants import *

### Run data through the following pipelines

The fields below are inside `performance_rnn_create_dataset`

In [122]:
def run_pipeline_graph():
    pipeline_instance = arrangement_create_dataset.get_pipeline(
        min_events = 32,
        max_events = 512,
        eval_ratio = EVAL_RATIO,
        config = arrangement_model.default_configs[CONFIG])

    # Runs the a pipeline on a data source and writes to a directory
    pipeline.run_pipeline_serial(
        pipeline_instance,
        pipeline.tf_record_iterator(TFRECORD_FILE, pipeline_instance.input_type),
        OUTPUT_DIR)

In [123]:
run_pipeline_graph()

INFO:tensorflow:

Completed.

INFO:tensorflow:Processed 2 inputs total. Produced 43 outputs.
INFO:tensorflow:DAGPipeline_PerformanceExtractor_eval_performance_lengths_in_seconds:
  [20,30): 1
  [30,40): 7
INFO:tensorflow:DAGPipeline_PerformanceExtractor_eval_performances_discarded_more_than_1_program: 0
INFO:tensorflow:DAGPipeline_PerformanceExtractor_eval_performances_discarded_too_short: 0
INFO:tensorflow:DAGPipeline_PerformanceExtractor_eval_performances_truncated: 0
INFO:tensorflow:DAGPipeline_PerformanceExtractor_eval_performances_truncated_timewise: 0
INFO:tensorflow:DAGPipeline_PerformanceExtractor_training_performance_lengths_in_seconds:
  [10,20): 35
INFO:tensorflow:DAGPipeline_PerformanceExtractor_training_performances_discarded_more_than_1_program: 0
INFO:tensorflow:DAGPipeline_PerformanceExtractor_training_performances_discarded_too_short: 0
INFO:tensorflow:DAGPipeline_PerformanceExtractor_training_performances_truncated: 0
INFO:tensorflow:DAGPipeline_PerformanceExtractor_t

### Helpful functions

#### IO

Reading the .tfrecord file without defining a computational graph.

In [125]:
training_performances_dir = OUTPUT_DIR + 'training_performances.tfrecord'

In [134]:
records = list()
for record in pipeline.tf_record_iterator(training_performances_dir, tf.train.SequenceExample):
    records.append(record)

# len(records)
# records[0].feature_lists.feature_list['inputs'].feature

#### Investigate the encoding

In [157]:
from magenta.models.performance_rnn import performance_model
from magenta.pipelines import note_sequence_pipelines

default_configs = {
    'performance': performance_model.PerformanceRnnConfig(
        magenta.protobuf.generator_pb2.GeneratorDetails(
            id='performance',
            description='Performance RNN'),
        magenta.music.OneHotEventSequenceEncoderDecoder(
            magenta.music.PerformanceOneHotEncoding()),
        tf.contrib.training.HParams(
            batch_size=64,
            rnn_layer_sizes=[512, 512, 512],
            dropout_keep_prob=1.0,
            clip_norm=3,
            learning_rate=0.001))
}

config = default_configs['performance']

quantizer_instance = note_sequence_pipelines.Quantizer(steps_per_second = config.steps_per_second,
                                                       name='Quantizer_jupyter')
perf_extractor_instance = arrangement_create_dataset.PerformanceExtractor(min_events=32,
                                                                          max_events=512,
                                                                          num_velocity_bins = config.num_velocity_bins)
encoder_pipeline_instance = arrangement_create_dataset.EncoderPipeline(config,
                                                                       name='EncoderPipeline_jupyter')




In [181]:
for record in pipeline.tf_record_iterator(TFRECORD_FILE, music_pb2.NoteSequence):
    note_sequence1 = record
# note_sequence1

In [186]:
note_sequence2 = quantizer_instance.transform(note_sequence1)[0]
# note_sequence2

In [253]:
note_sequence3 = perf_extractor_instance.transform(note_sequence2)[0]

# converts Performance to NoteSequence proto
note_sequence3.to_sequence()

# returns an event at position
note_sequence3.__getitem__(0) 

# returns an iterator
# for i, event in enumerate(note_sequence3.__iter__()): 
#     print(event)
#     if i > 25:
#         break

print(perf_extractor_instance.get_stats()[0]._pretty_print('performance_lengths_in_bars'))

performance_lengths_in_bars: 0


In [210]:
note_sequence4 = encoder_pipeline_instance.transform(note_sequence3)[0]
# note_sequence4

#### Determining which position is 1 in the one-hot vector

In [206]:
_event_ranges = [
    (1, 1, 127),
    (2, 1, 127),
    (3, 1, 100)
]

def encode_event(event):
    offset = 0
    for event_type, min_value, max_value in _event_ranges:
        if event[0] == event_type:
            return offset + event[1] - min_value
        offset += max_value - min_value + 1

encode_event((3, 100))

353

#### Investigate time-shifting