In [1]:
import tensorflow as tf

In [10]:
batch_size, num_samples, sample_rate = 32, 32000, 16000.0
# A Tensor of [batch_size, num_samples] mono PCM samples in the range [-1, 1].
pcm = tf.random.normal([batch_size, num_samples], dtype=tf.float32)
print('pcm.shape:', pcm.shape)

# A 1024-point STFT with frames of 64 ms and 75% overlap.
stfts = tf.signal.stft(pcm, frame_length=1024, frame_step=256,
                       fft_length=1024)
print('stfs.shape:', stfs.shape)
spectrograms = tf.abs(stfts)
print('spectrograms.shape:', spectrograms.shape)

# Warp the linear scale spectrograms into the mel-scale.
num_spectrogram_bins = stfts.shape[-1]
print('num_spectrogram_bins:', num_spectrogram_bins)
lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
  num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
  upper_edge_hertz)
mel_spectrograms = tf.tensordot(
  spectrograms, linear_to_mel_weight_matrix, 1)
mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
  linear_to_mel_weight_matrix.shape[-1:]))

# Compute a stabilized log to get log-magnitude mel-scale spectrograms.
log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

# Compute MFCCs from log_mel_spectrograms and take the first 13.
mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
  log_mel_spectrograms)[..., :13]

In [12]:
pcm.shape

TensorShape([32, 32000])

In [18]:
stfts

<tf.Tensor: shape=(32, 122, 513), dtype=complex64, numpy=
array([[[  2.7641501 +0.0000000e+00j,  -4.2238817 +1.8097206e+01j,
           5.0274076 -1.0866295e+01j, ...,  14.134159  +3.7399616e+00j,
           6.3263397 -1.9564638e+00j, -12.886061  +0.0000000e+00j],
        [ 18.539774  +0.0000000e+00j, -17.900503  -2.4766321e+00j,
           4.3188024 -2.2069745e+00j, ...,  -8.099928  +1.5051771e+01j,
           3.8473516 -5.9248137e+00j,  -5.144312  +0.0000000e+00j],
        [  3.2243512 +0.0000000e+00j,   5.224023  -1.3674271e+01j,
          -6.1701565 +1.8731731e+01j, ...,   0.44927025-1.4230042e+01j,
           0.51571274+4.0971909e+00j,  -2.0999396 +0.0000000e+00j],
        ...,
        [ 17.836533  +0.0000000e+00j, -11.681706  +2.5913982e+00j,
          13.90275   +7.4866610e+00j, ...,  -6.1093817 -3.2028389e+01j,
         -10.948555  +2.7037197e+01j,  26.435879  +0.0000000e+00j],
        [ 12.15531   +0.0000000e+00j,  -8.661075  -7.6899195e+00j,
          11.118704  -7.1450224e+0

In [13]:
num_spectrogram_bins

513

In [17]:
log_mel_spectrograms.shape

TensorShape([32, 122, 80])