In [2]:
import os
import platform
import sys

if platform.system() == 'Windows':
    midi_ddsp_module_path = os.path.abspath(os.path.join('../../'))
    ddsp_module_path = os.path.abspath(os.path.join('../../../ddsp-playground-2/'))
else:
    midi_ddsp_module_path = os.path.abspath(os.path.join('../../'))
    ddsp_module_path = os.path.abspath(os.path.join('../../../ddsp/ddsp-playground-2/'))

def apply_module_path(module_path):
    print(f"module_path={module_path}")
    if module_path not in sys.path:
      sys.path.append(module_path)
      print(f"appending {module_path} to sys.path")
    else:
      print(f"do not appending {module_path} to sys.path")

apply_module_path(midi_ddsp_module_path)
apply_module_path(ddsp_module_path)

import sys
if platform.system() != 'Windows':
    sparsenet_module_path_abs = '/ssd003/home/burakovr/projects/vova/envs/main/lib/python3.8/site-packages/'
    apply_module_path(sparsenet_module_path_abs)

import midi_ddsp

module_path=E:\Code\Projects\TimbreTransfer\midi-ddsp-main
do not appending E:\Code\Projects\TimbreTransfer\midi-ddsp-main to sys.path
module_path=E:\Code\Projects\TimbreTransfer\ddsp-playground-2
do not appending E:\Code\Projects\TimbreTransfer\ddsp-playground-2 to sys.path


In [3]:
#  Copyright 2022 The MIDI-DDSP Authors.
#  #
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#  #
#      http://www.apache.org/licenses/LICENSE-2.0
#  #
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""Training code for Synthesis Generator."""

import tensorflow as tf
import time
import os
import sys
import logging
import argparse
import IPython

from keras.utils.layer_utils import print_summary
from livelossplot import PlotLosses

from midi_ddsp.data_handling.get_dataset import get_dataset
from midi_ddsp.utils.training_utils import print_hparams, set_seed, \
    save_results, str2bool
from midi_ddsp.utils.summary_utils import write_tensorboard_audio
#                          from midi_ddsp.hparams_synthesis_generator import hparams as hp
from midi_ddsp.hparams_synthesis_generator import hparams_debug as hp
from midi_ddsp.modules.recon_loss import ReconLossHelper
from midi_ddsp.modules.gan_loss import GANLossHelper
from midi_ddsp.modules.get_synthesis_generator import get_synthesis_generator, \
    get_fake_data_synthesis_generator
from midi_ddsp.modules.discriminator import Discriminator

from ddsp.colab.notebook_utils import play, specplot

# Dataset

In [4]:
# Load dataset.
training_data, length_training_data, evaluation_data, length_evaluation_data = get_dataset(hp, training_data_repeats=1)

In [5]:
eval_sample_batch = next(iter(evaluation_data))
train_sample_batch = next(iter(training_data))
logging.info('Data loaded! Data size: %s', str(length_training_data))

In [6]:
training_example = next(iter(training_data))
play(training_example['audio'])

# Model

## Functional API

In [7]:

f0_hz = tf.keras.layers.Input(shape=(1000, 1), name='f0_hz')
x = f0_hz
x = tf.keras.layers.Conv1D(1,  64, activation='relu', input_shape=(None, 1000, 1))(x)
x = tf.keras.layers.Conv1D(4,  64, activation='relu')(x)
x = tf.keras.layers.Conv1D(16, 64, activation='relu')(x)
x = tf.keras.layers.Conv1D(4,  64, activation='relu')(x)
x = tf.keras.layers.Conv1D(1,  64, activation='relu')(x)

x = tf.keras.layers.Flatten()(x)
x = tf.expand_dims(x, axis=-1)

x = tf.keras.layers.GlobalMaxPooling1D()(x)

onsets = tf.keras.layers.Dense(1000, activation='sigmoid')(x)
offsets = tf.keras.layers.Dense(1000, activation='sigmoid')(x)

#model = tf.keras.Model(inputs=inputs, outputs={'onsets': onsets, 'offsets': offsets})
model_func_api = tf.keras.Model(inputs=f0_hz, outputs=onsets)

In [8]:
model_func_api(training_example['f0_hz'])
tf.keras.Model.summary(model_func_api, expand_nested=True)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 f0_hz (InputLayer)          [(None, 1000, 1)]         0         
                                                                 
 conv1d (Conv1D)             (None, 937, 1)            65        
                                                                 
 conv1d_1 (Conv1D)           (None, 874, 4)            260       
                                                                 
 conv1d_2 (Conv1D)           (None, 811, 16)           4112      
                                                                 
 conv1d_3 (Conv1D)           (None, 748, 4)            4100      
                                                                 
 conv1d_4 (Conv1D)           (None, 685, 1)            257       
                                                                 
 flatten (Flatten)           (None, 685)               0     

## Subclassing API

In [9]:

class SubclassedModel(tf.keras.Model):
    def __init__(self):
        super(SubclassedModel, self).__init__()
        self.conv1 = tf.keras.layers.Conv1D(1, 64, activation='relu')
        self.conv2 = tf.keras.layers.Conv1D(4, 64, activation='relu')
        self.conv3 = tf.keras.layers.Conv1D(16, 64, activation='relu')
        self.conv4 = tf.keras.layers.Conv1D(4, 64, activation='relu')
        self.conv5 = tf.keras.layers.Conv1D(1, 64, activation='relu')
        self.flatten = tf.keras.layers.Flatten()
        self.expand_dims = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))
        self.global_max_pooling = tf.keras.layers.GlobalMaxPooling1D()
        self.onsets = tf.keras.layers.Dense(1000, activation='sigmoid')
        #self.offsets = tf.keras.layers.Dense(1000, activation='sigmoid')

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.flatten(x)
        x = self.expand_dims(x)
        x = self.global_max_pooling(x)
        onsets = self.onsets(x)
        #offsets = self.offsets(x)
        return onsets

model_subclassing_api = SubclassedModel()

In [10]:
model_subclassing_api(training_example['f0_hz'])
tf.keras.Model.summary(model_subclassing_api, expand_nested=True)

Model: "subclassed_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_5 (Conv1D)           multiple                  65        
                                                                 
 conv1d_6 (Conv1D)           multiple                  260       
                                                                 
 conv1d_7 (Conv1D)           multiple                  4112      
                                                                 
 conv1d_8 (Conv1D)           multiple                  4100      
                                                                 
 conv1d_9 (Conv1D)           multiple                  257       
                                                                 
 flatten_1 (Flatten)         multiple                  0         
                                                                 
 lambda (Lambda)             multiple             

# Saving

In [11]:
save_dir = 'HowToSaveToSavedModelFormat'

# Saving to SavedModel

In [12]:
model_func_api.save(f'{save_dir}/model_func_api')





INFO:tensorflow:Assets written to: HowToSaveToSavedModelFormat/model_func_api\assets


INFO:tensorflow:Assets written to: HowToSaveToSavedModelFormat/model_func_api\assets


In [13]:
model_subclassing_api.save(f'{save_dir}/model_subclassing_api')



INFO:tensorflow:Assets written to: HowToSaveToSavedModelFormat/model_subclassing_api\assets


INFO:tensorflow:Assets written to: HowToSaveToSavedModelFormat/model_subclassing_api\assets


# SynthCoder

In [14]:
from midi_ddsp.modules.model import SynthCoder
from midi_ddsp.modules.ddsp_inference import MelF0LDEncoder, FCHarmonicDecoder, Cnn8

cnn = Cnn8(pool_size=(1, 2))
encoder = MelF0LDEncoder(cnn, hp.nhid, hp.sample_rate, hp.win_length,
                         hp.hop_length, hp.n_fft, hp.num_mels,
                         hp.fmin)
decoder = FCHarmonicDecoder(hp.nhramonic, hp.nnoise)
use_f0_ld = False

synthcoder = SynthCoder(encoder, decoder)
synthcoder(eval_sample_batch)

synthcoder.summary()

Model: "synth_coder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mel_f0ld_encoder (MelF0LDEn  multiple                 7054912   
 coder)                                                          
                                                                 
 fc_harmonic_decoder (FCHarm  multiple                 64638     
 onicDecoder)                                                    
                                                                 
Total params: 7,119,550
Trainable params: 7,115,710
Non-trainable params: 3,840
_________________________________________________________________


In [15]:
synthcoder_save_path = 'test_synthcoder_saving/synthcoder_untrained'
synthcoder.save(synthcoder_save_path)

















INFO:tensorflow:Assets written to: test_synthcoder_saving/synthcoder_untrained\assets


INFO:tensorflow:Assets written to: test_synthcoder_saving/synthcoder_untrained\assets


In [16]:
synthcoder_restored = tf.keras.models.load_model(synthcoder_save_path)





In [20]:
synthcoder(eval_sample_batch)

synthcoder.summary()

KeyError: 'audio_synth'

# Full Model

In [7]:
#hp.data_dir = '../data/'
hp.data_dir = '../../data/'

hp.train_synth_coder_first = False
hp.training_epochs = 1 # 5k steps
hp.log_interval = 1
hp.checkpoint_save_interval = 1
hp.eval_interval = 1
hp.synth_coder_training_epochs = 1
#hp.batch_size=10
hp.batch_size=1
hp.reverb_length = 16000
experiment_name = "experiment_6"

In [8]:
midi_ddsp_model = get_synthesis_generator(hp)
midi_ddsp_model(eval_sample_batch)
midi_ddsp_model.summary()

debug: DAGLayer(<class 'ddsp.processors.ProcessorGroup'>) received input={'f0_hz': <tf.Tensor: shape=(6, 1000, 1), dtype=float32, numpy=
array([[[491.4859 ],
        [491.57083],
        [491.71805],
        ...,
        [439.45294],
        [439.41977],
        [439.48596]],

       [[587.99713],
        [586.93384],
        [585.87054],
        ...,
        [  0.     ],
        [  0.     ],
        [  0.     ]],

       [[890.00385],
        [889.8843 ],
        [889.96106],
        ...,
        [503.2882 ],
        [503.71463],
        [504.00864]],

       [[572.3636 ],
        [572.15906],
        [571.9542 ],
        ...,
        [373.732  ],
        [373.81238],
        [373.90674]],

       [[316.42538],
        [316.4605 ],
        [316.4141 ],
        ...,
        [373.20862],
        [373.60303],
        [373.6359 ]],

       [[293.9236 ],
        [293.9281 ],
        [293.85327],
        ...,
        [370.82068],
        [370.9535 ],
        [370.99457]]], dtype=float32)>, 

In [8]:
midi_ddsp_model_save_dir = 'test_midi_ddsp_model_saving'
midi_ddsp_model.save(midi_ddsp_model_save_dir)

debug: DAGLayer(<class 'ddsp.processors.ProcessorGroup'>) received input={'f0_hz': <tf.Tensor 'f0_hz:0' shape=(None, 1000, 1) dtype=float32>, 'amplitudes': <tf.Tensor 'midi_expression_ae/synth_coder/fc_harmonic_decoder/dense_2/BiasAdd:0' shape=(None, 1000, 1) dtype=float32>, 'harmonic_distribution': <tf.Tensor 'midi_expression_ae/synth_coder/fc_harmonic_decoder/dense_3/BiasAdd:0' shape=(None, 1000, 60) dtype=float32>, 'noise_magnitudes': <tf.Tensor 'midi_expression_ae/synth_coder/fc_harmonic_decoder/dense_4/BiasAdd:0' shape=(None, 1000, 65) dtype=float32>}
batch_size=Tensor("midi_expression_ae/filtered_noise/strided_slice_1:0", shape=(), dtype=int32), audio_shape=Tensor("midi_expression_ae/filtered_noise/Shape_2:0", shape=(2,), dtype=int32), audio=Tensor("midi_expression_ae/filtered_noise/random_uniform:0", shape=(None, 64000), dtype=float32), batch_size_ir=Tensor("midi_expression_ae/filtered_noise/strided_slice_4:0", shape=(), dtype=int32), ir_shape=Tensor("midi_expression_ae/filtered

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: <gast.gast.Expr object at 0x000001F9BBE741F0>


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: <gast.gast.Expr object at 0x000001F9BBE741F0>


StagingError: in user code:

    File "e:\code\projects\coursework\ddsp-playground\ddsp-playground-2\lib\site-packages\keras\saving\saving_utils.py", line 138, in _wrapped_model  *
        outputs = model(*args, **kwargs)
    File "e:\code\projects\coursework\ddsp-playground\ddsp-playground-2\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
        raise e.with_traceback(filtered_tb) from None

    StagingError: Exception encountered when calling layer "midi_expression_ae" (type MIDIExpressionAE).
    
    in user code:
    
        File "E:\Code\Projects\TimbreTransfer\midi-ddsp-main\midi_ddsp\modules\model.py", line 196, in call  *
            synth_params, control_params, synth_audio = self.run_synth_coder(features, training=training)
        File "E:\Code\Projects\TimbreTransfer\midi-ddsp-main\midi_ddsp\modules\model.py", line 115, in run_synth_coder  *
            control_params = self.processor_group.get_controls(synth_params, verbose=False)
        File "E:\Code\Projects\TimbreTransfer\ddsp-playground-2\ddsp\processors.py", line 147, in get_controls  *
            return super().call(inputs, **kwargs)
        File "E:\Code\Projects\TimbreTransfer\ddsp-playground-2\ddsp\dags.py", line 131, in call  *
            return self.run_dag(inputs, **kwargs)
        File "e:\code\projects\coursework\ddsp-playground\ddsp-playground-2\lib\site-packages\gin\config.py", line 237, in gin_wrapper  *
            return fn(*new_args, **new_kwargs)
        File "E:\Code\Projects\TimbreTransfer\ddsp-playground-2\ddsp\dags.py", line 174, in run_dag  *
            module_outputs = module(*inputs, return_outputs_dict=True, **kwargs)
        File "e:\code\projects\coursework\ddsp-playground\ddsp-playground-2\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "C:\Users\vovab\AppData\Local\Temp\__autograph_generated_filegyt6z8d_.py", line 37, in tf__call
            signal = ag__.converted_call(ag__.ld(self).get_signal, (), dict(**ag__.ld(controls)), fscope)
        File "C:\Users\vovab\AppData\Local\Temp\__autograph_generated_filenndub05_.py", line 15, in tf__get_signal
            retval_ = ag__.converted_call(ag__.ld(core).frequency_filter, (ag__.ld(signal), ag__.ld(magnitudes)), dict(window_size=ag__.ld(self).window_size), fscope)
        File "C:\Users\vovab\AppData\Local\Temp\__autograph_generated_file8zijvwwx.py", line 14, in tf__frequency_filter
            retval_ = ag__.converted_call(ag__.ld(fft_convolve), (ag__.ld(audio), ag__.ld(impulse_response)), dict(padding=ag__.ld(padding)), fscope)
        File "C:\Users\vovab\AppData\Local\Temp\__autograph_generated_file585a6mxv.py", line 65, in tf__fft_convolve
            audio_frames_out = ag__.converted_call(ag__.ld(tf).signal.irfft, (ag__.ld(audio_ir_fft),), None, fscope)
    
        ValueError: Exception encountered when calling layer "filtered_noise" (type FilteredNoise).
        
        in user code:
        
            File "E:\Code\Projects\TimbreTransfer\ddsp-playground-2\ddsp\processors.py", line 64, in call  *
                signal = self.get_signal(**controls)
            File "E:\Code\Projects\TimbreTransfer\ddsp-playground-2\ddsp\synths.py", line 198, in get_signal  *
                return core.frequency_filter(signal, magnitudes, window_size=self.window_size)
            File "E:\Code\Projects\TimbreTransfer\ddsp-playground-2\ddsp\core.py", line 1673, in frequency_filter  *
                return fft_convolve(audio, impulse_response, padding=padding)
            File "E:\Code\Projects\TimbreTransfer\ddsp-playground-2\ddsp\core.py", line 1488, in fft_convolve  *
                audio_frames_out = tf.signal.irfft(audio_ir_fft)
        
            ValueError: Cannot infer argument `num` from shape (None,)
        
        
        Call arguments received by layer "filtered_noise" (type FilteredNoise):
          • args=('tf.Tensor(shape=(None, 1000, 65), dtype=float32)',)
          • return_outputs_dict=True
          • kwargs={'training': 'False'}
    
    
    Call arguments received by layer "midi_expression_ae" (type MIDIExpressionAE):
      • features={'audio': 'tf.Tensor(shape=(None, 64000), dtype=float32)', 'f0_confidence': 'tf.Tensor(shape=(None, 1000), dtype=float32)', 'f0_hz': 'tf.Tensor(shape=(None, 1000, 1), dtype=float32)', 'instrument_id': 'tf.Tensor(shape=(None,), dtype=int32)', 'loudness_db': 'tf.Tensor(shape=(None, 1000, 1), dtype=float32)', 'note_active_frame_indices': 'tf.Tensor(shape=(None, 1000, 128), dtype=float32)', 'note_active_velocities': 'tf.Tensor(shape=(None, 1000, 128), dtype=float32)', 'note_offsets': 'tf.Tensor(shape=(None, 128000), dtype=float32)', 'note_onsets': 'tf.Tensor(shape=(None, 128000), dtype=float32)', 'power_db': 'tf.Tensor(shape=(None, 1000), dtype=float32)', 'recording_id': 'tf.Tensor(shape=(None,), dtype=string)', 'midi': 'tf.Tensor(shape=(None, 1000), dtype=int64)', 'onsets': 'tf.Tensor(shape=(None, 1000), dtype=int64)', 'offsets': 'tf.Tensor(shape=(None, 1000), dtype=int64)'}
      • training=False
      • run_synth_coder_only=None


In [16]:
t = tf.constant([[1.0, 3.0, 4.0], [1.0, 3.0, 4.0]])
#t = core.tf_float32(t)
print(t)
print(tf.shape(t))
print(tf.shape(tf.shape(t)))
print(tf.shape(tf.shape(t)) == 2)

if tf.shape(tf.shape(t)) == tf.constant(2):
    print('True')
#print(tf.shape(tf.shape(t))

tf.Tensor(
[[1. 3. 4.]
 [1. 3. 4.]], shape=(2, 3), dtype=float32)
tf.Tensor([2 3], shape=(2,), dtype=int32)
tf.Tensor([2], shape=(1,), dtype=int32)
tf.Tensor([ True], shape=(1,), dtype=bool)
True


In [20]:
dry_mask = tf.zeros([int(tf.shape(t)[0]), 1], tf.float32)
print(dry_mask)

tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)


In [9]:
q = tf.constant([None, 3, 2])
q0 = q[0]
print(q0)

ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.

# Saving as TFLite model

In [None]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)

converter = tf.lite.TFLiteConverter.from_keras_model(midi_ddsp_model)

#converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8, tf.lite.OpsSet.TFLITE_BUILTINS,tf.lite.OpsSet.SELECT_TF_OPS ]

#converter.representative_dataset = training_data

tflite_model = converter.convert()

In [None]:
import pathlib

log_dir_tflite = f'midi_ddsp_model_save_dir/tflite.tflite'

tflite_model_file = pathlib.Path(log_dir_tflite)
tflite_model_file.write_bytes(tflite_model)