In [None]:
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import ddsp
import ddsp.training

model = hub.load("https://tfhub.dev/google/spice/2")
model1 = hub.load("https://tfhub.dev/google/spice/1")

# A single wave, 128 samples (8ms at 16kHz) long.
wave = np.array(np.sin(np.linspace(-np.pi, np.pi, 128)), dtype=np.float32)

# 16 such waves (2048 samples).
waves = np.tile(wave, 16)
plt.plot(waves)

In [None]:

# Run model. One would use real singing as input, here we use the above
# waveform for testing.
input = tf.constant(waves)
output = model.signatures["serving_default"](input)
pitches = output["pitch"]
some_pitch = pitches[2]

def output2hz(pitch_output):
  # Calibration constants
  PT_OFFSET = 25.58
  PT_SLOPE = 63.07
  FMIN = 10.0;
  BINS_PER_OCTAVE = 12.0;
  cqt_bin = pitch_output * PT_SLOPE + PT_OFFSET;
  return FMIN * 2.0 ** (1.0 * cqt_bin / BINS_PER_OCTAVE)

# Should be ~ 125 hz
print(output2hz(some_pitch))

In [None]:
from ddsp.colab.colab_utils import (audio_bytes_to_np)
sample_rate = 16000

input_f = open("../data/audio/violin/II. Double.mp3", "rb")
wav_bytes = input_f.read()
audio = audio_bytes_to_np(wav_bytes)
audio = audio[:sample_rate * 4]

# if len(audio.shape) == 1:
#     audio = audio[np.newaxis, :]

In [None]:
output.keys()

In [None]:
from codetiming import Timer

In [None]:
with Timer():
  crepe_f0_hz, crepe_f0_confidence = ddsp.spectral_ops.compute_f0(
    audio,
    frame_rate=32,
    crepe_model="tiny",
  )

In [None]:
import crepe

with Timer():
  # Compute f0 with crepe.
  _, f0_hz, f0_confidence, _ = crepe.predict(
      audio,
      sr=sample_rate,
      viterbi=True,
      step_size=32,
      center=False,
      model_capacity="tiny",
      verbose=0)

In [None]:
with Timer():
  input = tf.constant(audio)
  output = model.signatures["serving_default"](input)
  pitches = output["pitch"]
  #some_pitch = pitches[2]


In [None]:
with Timer():
  input = tf.constant(audio)
  output1 = model1.signatures["serving_default"](input)
  pitches1 = output1["pitch"]
  #some_pitch = pitches[2]


In [None]:
plt.plot(crepe_f0_hz)

plt.plot(output2hz(output["pitch"]))

plt.plot(output2hz(output1["pitch"]))

In [None]:
crepe_f0_hz

In [None]:
output["pitch"].shape