In [1]:
import os
import pickle
import glob
import skimage
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
def get_data(data_dir, sample_start=0, sample_end=100):
    fnames = list(sorted(glob.glob(os.path.join(data_dir, "*.pkl"))))
    
    print('Found %s records' % len(fnames))
    # Load into a list of tuples of xmin, xmax, y, data
    data = []
    XMIN = None
    XMAX = None
    for fname in fnames:
        with open(fname, 'rb') as f:
            fft_data = pickle.load(f)

        # Isolate the frequency. In our case, 28kHz is usually around sample 8000 to 10000
        amplitudes = fft_data[:, sample_start:sample_end].max(axis=1)

        name = os.path.basename(fname).replace('.pkl', '').replace('continuous_', '')
        coords = [float(coord) for coord in name.split('_')]
        xmin, xmax, y = coords
        XMIN = xmin
        XMAX = xmax
        data.append((xmin, xmax, y, amplitudes))
        
    # Sort by y coordinate (xmin and xmax are expected to be the same for all)
    data = list(sorted(data))
    if not data:
        raise RuntimeError('No Data Found')
        
    # Just get the amplitudes and stack them on each other to form an image
    ampdata = [d[-1] for d in data]

    # Get the minimum size of any of these, so we can interpolate to a fixed array length
    target_size = np.median(np.array([len(x) for x in ampdata]))
    print('Median number of records in continguous strip: %s' % str(target_size))
    resized_ampdata = [np.interp(np.linspace(XMIN, XMAX, target_size),
                                 np.linspace(XMIN, XMAX, len(d)), d)
                       for d in ampdata]
    resized_ampdata = np.array(resized_ampdata)

    return resized_ampdata

In [None]:
get_data('../data/1551236003')