# Dependencies

There is a [known issue](https://github.com/biocore/songbird/issues/47) in the OpenMP library for OS X and Tensorflow. Unfortunatly, Jupyter gives no indication that something is wrong, but if you try to train from the comman-line you'll see the error. 

Running the following command seems to fix things:

```
$ conda install nomkl
```

If you get the [following error](https://github.com/numpy/numpy/issues/12744):

```text
cannot import name '_validate_lengths'
```

Then try updating scikit-image:

```bash
$ conda install -c conda-forge scikit-image
```

# Build model

In [None]:
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

model = Sequential([
    Dense(512, input_dim=8, activation='relu'),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.summary()

# Load data

In [None]:
import glob
import pickle
import numpy as np

N_SAMPLES = 20000

# X is the training input, Y is the labels
X, Y = np.empty((0,8)), []

for path in sorted(glob.glob("*/training.p")):
    print("Processing", path, "...")
    
    # x is the training data for this sample, y is the labels
    x, y = pickle.load(open(path, "rb"))
    
    # sample N_SAMPLES signal and N_SAMPLES background vectors
    for i in [1,0]:
        
        # x_i is the training data with the label i
        x_i = x[y == i]

        if len(x_i) > 0: 
            indicies = np.random.choice(len(x_i), N_SAMPLES, replace=False)
            samples = x_i[indicies, :]
            
            Y = np.append(Y, [i]*N_SAMPLES)
            X = np.append(X, samples, axis=0)

X = X.astype('float32') / 255
print("\nRead", len(X), "vectors and labels.")

# Prepare data

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, Y)

print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)

# Train

In [None]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=200)
checkpoint = ModelCheckpoint('models/checkpoint.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

history = model.fit(
    X_train, y_train,
    verbose=2, shuffle=True,
    batch_size=128, epochs=4000,
    callbacks=[stop, checkpoint],
    validation_data=(X_test, y_test)    
)

# Score

In [None]:
model = keras.models.load_model('models/checkpoint.h5')
loss, accuracy = model.evaluate(X_test, y_test, batch_size=128)

print('Test loss:', loss)
print('Test accuracy:', accuracy)