# H1 Zoom RAW to WAV

This code converts a 4-channel raw file created with a Zoom H1 recorder to a WAV file.

The following assumptions are made:
- Input file is sampled at 44.1kHz
- Input file is encoded as signed 16-bit little endian
- Input file contains 2 different _stereo_ signals, interleaved.
- Output files are 2 stereo WAV files, sampled at 44.1kHz, and with 16 bit resolution.

To de-interleave the two signals, two different segment sizes are employed, which keep alternating across the whole signal as follows:
- signal1_segmentsize1
- signal2_segmentsize1
- signal1_segmentsize2 
- signal2_segmentsize2
- signal1_segmentsize1
- etc.

Requirements
----

- librosa 0.7.2
- ffmpeg 4.2.2

In [None]:
%matplotlib notebook
import IPython
import librosa
import numpy as np
import soundfile
import subprocess
import matplotlib.pyplot as plt

from librosa import display

SR = 44100           # Sampling Rate (in Hz)
ENC = "s16le"        # Encoding (for FFMPEG)
OFFSET_SAMP = 269728 # Offset in samples
SEGMENT_SIZE1 = 131072  # Size for segment type 1 (in samples)
SEGMENT_SIZE2 = 139264  # Size for segment type 2 (in samples)

# Paths
RAW_FILE = "/Users/onieto/Downloads/bm.raw"
WAV_FILE1 = "/Users/onieto/Downloads/bm1.wav"
WAV_FILE2 = "/Users/onieto/Downloads/bm2.wav"
TMP_FILE = "tmp.wav"

In [None]:
def raw_to_interleaved_wav():
    """
    Convert to WAV from Zoom's RAW format. 
    If more than 2 channels, result will be interleaved.
    """
    cmd = "ffmpeg -y -f {} -ss 0 -t 300 -ar {:.1f}k -ac 2 -i {} {}".format(
        ENC, SR / 1000, RAW_FILE, TMP_FILE)
    subprocess.call(cmd.split(" "))  

# Convert RAW to WAV using FFMPEG
raw_to_interleaved_wav()

In [7]:
def make_even(x):
    return x if len(x) % 2 == 0 else x[:-1]

def to_mono(x):
    return x.flatten('F')

def to_stereo(x):
    return x.reshape((2, -1), order='F')

def gen_indeces(N, inc1=SEGMENT_SIZE1, inc2=SEGMENT_SIZE2):
    """Generates the indeces for the slicing. 
    Indeces are interleaved:
        - two segment1s
        - two segment2s
        - two segment1s
        - ...
    Assuming stereo.
    """
    s = [0]
    i = 0
    while s[-1] < N:
        inc = inc1 if i <= 1 else inc2
        s.append(s[-1] + inc * 2)
        i += 1
        i %= 4
    return s[1:]
    
def interleaved_wav_to_wavs():
    """Main function to deinterleave the signals."""
    # Read file
    x, fs = librosa.load(TMP_FILE, sr=SR, mono=False)
    
    # Read interleaved chunks
    x = to_mono(x[:, OFFSET_SAMP:])
    y = np.array_split(x, gen_indeces(len(x)))
    
    # De-interleave chunks and make even
    y1 = make_even(np.concatenate(y[::2]))
    y2 = make_even(np.concatenate(y[1::2]))
    
    # Return Stereo signals
    return to_stereo(y1), to_stereo(y2)
    
# Deinterleave
x1, x2 = interleaved_wav_to_wavs()

# Save wavs
soundfile.write(file=WAV_FILE1, data=x1.T, samplerate=SR)
soundfile.write(file=WAV_FILE2, data=x2.T, samplerate=SR)

# Sandbox

Code below is highly likely useless.

In [None]:
# 269727 -> 269728 # 2nd change
# 531871 -> 531872 # 4th change
# 1072543 -> 1072544 # 8th change 
# 3374495 -> 3374495 # 17th (?) change
# 4455839 -> 4455840 # 
# 4857247 -> 4857247 # Nth change - Beginning of mic
# 16620959 -> 16620960 # Nth change - Beginning of jack
# 16752031 -> 16752032 # Nth change - Beginning of mic (next segment!)
# 16891295 -> 16891296 # Nth change - Beginning of jack (next segment!)

# 18914719 -> 18914720 # Nth change - Beginning of mic
# 19053983 -> 19053984 # Nth change - Beginning of jack (next segment!)

In [None]:
(531871 - 269727) / 2

In [None]:
531871 - 131072 * 4

In [None]:
(1072543 - 531871) / 4

In [None]:
(1072543 - 269727) / 6

In [None]:
# Jack part length:
16752031 - 16620959

In [None]:
# Mic part length:
16891295 - 16752031

In [None]:
# Mic part length
19053983 - 18914719