# Dependencies

There is a [known issue](https://github.com/biocore/songbird/issues/47) in the OpenMP library for OS X and Tensorflow. Unfortunatly, Jupyter gives no indication that something is wrong, but if you try to train from the comman-line you'll see the error. 

Running the following command seems to fix things:

```
$ conda install nomkl
```

# Build model

In [5]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout

model = Sequential([
    Dense(512, input_dim=8, activation='relu'),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 512)               4608      
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 513       
Total params: 267,777
Trainable params: 267,777
Non-trainable params: 0
_________________________________________________________________


# Load data

In [6]:
import glob
import pickle
import numpy as np

# X is the complete training input, y is the labels
X, y = np.empty((0,8)), np.empty((0,))

for path in sorted(glob.glob("*/data.p")):
    print("Processing", path, "...")
    
    # X_i is the training data for this sample, y_i is its labels
    X_i, y_i = pickle.load(open(path, "rb"))
    X = np.append(X, X_i, axis=0)
    y = np.append(y, y_i, axis=0)

print("\nRead", len(X), "vectors and labels.")

Processing C4 black car panel/data.p ...
Processing C4 metal panel/data.p ...
Processing C4 red glossy paper/data.p ...
Processing C4 white car panel/data.p ...
Processing C4 wood panel/data.p ...
Processing DSYP60-not-C4/data.p ...

Read 7036951 vectors and labels.


# Prepare data

In [7]:
from sklearn.model_selection import train_test_split

X = X.astype('float32') / 255
X_train, X_test, y_train, y_test = train_test_split(X, y)

print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)

x_train shape: (5277713, 8)
x_test shape: (1759238, 8)


# Train

In [None]:
history = model.fit(
    X_train, y_train,
    batch_size=128, epochs=5,
    verbose=1,
    validation_data=(X_test, y_test)
)

Train on 5277713 samples, validate on 1759238 samples
Epoch 1/5
Epoch 2/5

# Score

In [11]:
loss, accuracy = model.evaluate(X_test, y_test, batch_size=128)

print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.05271610697151234
Test accuracy: 0.9879072643951529


# Save model

In [13]:
model.save("models/c4-neural-network.h5")