In [None]:
import numpy as np
import tqdm
from scipy import misc
import pandas as pd
import os.path
import matplotlib.pyplot as plt, matplotlib as mpl
%matplotlib inline

In [None]:
import loadData, os

In [None]:
ls /home/tsbertalan/data2/behavioralCloning/*.zip

In [None]:
dataGenerator = loadData.CenterOnlyDataGenerator(
    [
        os.path.join(loadData.HOME, 'data2', 'behavioralCloning', p)
        for p in (
            'data_provided.zip',
            'mouseForward.zip',
            'mouseReverse.zip',
        )
    ],
)
gen = dataGenerator.generate(stopOnEnd=True, epochSubsampling=.2)
Yunbalanced = np.vstack([y for (x, y) in tqdm.tqdm_notebook(gen)])

In [None]:
import random
_LCR = ('left', 'center', 'right')

# This way produces two big spikes for the left/right cameras associated with the zero-angle rows.

# class DeemphasizedZeroDataGenerator(loadData.DataGenerator):
#     zeroKeepProbability = .07
#     sideCameraChance = .6
    
#     rowsPerSample = 1
#     def sample(self, validation=False):
#         # Could be really inefficient; we read 3*n images, where n depends on the number of failed draws!
#         # In practice, though, we don't seem to fail too much.
#         p = lambda: random.uniform(0, 1)
#         while True:
#             X, Y = super().sample(validation=validation)
#             if p() < self.sideCameraChance:
#                 i = random.choice([0, 2])
#                 x, y = X[i], Y[i]
#                 break
#             elif p() < self.zeroKeepProbability:
#                 x, y = X[1], Y[1]
#                 break
#         return x.reshape((1, *x.shape)), y

class DeemphasizedZeroDataGenerator(loadData.DataGenerator):
    zeroKeepProbability = .07
    
    def sampleRow(self, validation=False):

        while True:
            # Get the index and increment the state.
            indices = self._indices[validation]
            state = self._state[validation]
            j = indices[state]

            # Reset or increment the counter.
            if self._state[validation] == len(indices) - 1:
                self._state[validation] = 0
            else:
                self._state[validation] += 1

            # Get the left, center, and right image paths
            # and corresponding requested response variables.
            lcrImagePaths = [self.log.at[j, key] for key in _LCR]
            response = np.array([self.log.at[j, key] for key in self.responseKeys])
            
            # Check for acceptable steering angle.
            if response[0] != 0 or np.random.uniform(0, 1) < self.zeroKeepProbability:
                break
            
        return lcrImagePaths, response

In [None]:
dataGenerator = DeemphasizedZeroDataGenerator(
    os.path.join(loadData.HOME, 'data2', 'behavioralCloning', 'data_provided.zip')
)

In [None]:
Ybalanced = np.vstack([dataGenerator.sample()[1] for _ in tqdm.tqdm_notebook(range(3000))])

fig, ax = plt.subplots()
kw = dict(bins=64, alpha=.25, normed=True)
ubN, ubBins, ubPatches = ax.hist(Yunbalanced, label='full distribution', color='red', **kw)
ubPatches[0].set_label('full distribution\n(max normed count is %.3g)' % max(ubN))
ax.hist(Ybalanced, label='zero-centered kept w.p. %s' % dataGenerator.zeroKeepProbability, color='blue', **kw)
# ax.set_xlim(-.25, .25)
ax.set_ylim(0, 4)
ax.legend(fontsize=12, loc='right')
ax.set_xlabel('turn angle [rad]')
ax.set_ylabel('normed count');
fig.savefig('doc/zeroCenteredKeptWP%s.png' % str(dataGenerator.zeroKeepProbability).replace('.', 'p'))