In [1]:
import wave
import os
from scipy.io import wavfile
import numpy as np

# Conversion between types
This part and the following require [SoX package](http://sox.sourceforge.net/) (Sound eXchange). For Max OS, run
```
brew install sox
```

In [2]:
def wav_to_data(wav_filepath):
  sample_rate, data = wavfile.read(wav_filepath)
  return data

In [3]:
def sphere_to_data(sphere_filepath):
  try:
    tmp_filepath = '/tmp/haha.wav'
    os.system('sox -t sph ' + sphere_filepath + ' ' + tmp_filepath)
    res = wav_to_data(tmp_filepath)
    return res
  except:
    raise ValueError("Converting SPHERE files requires SoX installed. For Max OS, run 'brew install sox'.")
    
def sphere_to_bytes(sphere_filepath):
  with open(sphere_filepath, 'rb') as f:
    f.read(1024)
    data_in_bytes = f.read()
  return data_in_bytes

# Play audio files

In [4]:
PLAY_CMD = 'play' # For Mac OS with SoX installed

def play_audio_from_file(audio_filename, cmd=PLAY_CMD):
  os.system(cmd + ' ' + audio_filename)

In [5]:
def play_audio_from_bytes(data_in_bytes, nchannels=1, sampwidth=2, 
                          framerate=16000, comptype='NONE', 
                          compname='not compressed'):
  
  if not isinstance(data_in_bytes, bytes):
    data_in_bytes = bytes(data_in_bytes)
    
  # Create a tmp file
  tmp_filepath = '/tmp/haha.wav'
  
  with wave.open(tmp_filepath, 'w') as f:
    
    # Set audio file parameters
    f.setcomptype(comptype, compname)
    f.setframerate(framerate)
    f.setnchannels(nchannels)
    f.setsampwidth(sampwidth)
    f.setnframes(len(data_in_bytes)//sampwidth)
    
    # Write data
    f.writeframes(data_in_bytes)
    
  play_audio_from_file(tmp_filepath)
  
  # Delete the tmp file
  os.system('rm ' + tmp_filepath)

In [6]:
filename1 = 'example1.sph' # NIST SPHERE file
filename2 = 'example2.sph' # NIST SPHERE file

In [7]:
data1 = sphere_to_data(filename1)
data1

array([ 1, -1,  2, ..., -1, -5, -8], dtype=int16)

In [8]:
data2 = sphere_to_data(filename2)
data2

array([-1,  0,  1, ...,  3,  1, -2], dtype=int16)

In [9]:
# Play audio
play_audio_from_bytes(data1)

In [10]:
play_audio_from_bytes(data2)

In [11]:
# Combine 2 audio files
play_audio_from_bytes(data1 + np.pad(data2, (0, data1.size - data2.size), mode='constant'))

In [12]:
# Play an interval
play_audio_from_bytes(data1[10000:20000])

# Convert WAV to AutoDLDataset (TFRecord)

In [13]:
import data_pb2

In [15]:
haha = data_pb2.DenseValue(value=[0.9])

In [17]:
data1

array([ 1, -1,  2, ..., -1, -5, -8], dtype=int16)