In [None]:
% run utils.ipynb

In [None]:
import torch.utils.data as data_utils
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import StandardScaler
from tensorboardX import SummaryWriter

In [None]:
size = 75

In [None]:
df = pd.read_json('train.json')
df.head()

In [None]:
s = StandardScaler()
for r in df.band_1:
    s.partial_fit(np.reshape(r, (-1, 1)))

In [None]:
def tensorify(col):
    data = s.transform(np.stack(col.values))
    return torch.from_numpy(data.reshape(-1, 1, size, size)).float()

In [None]:
split_train_test = np.random.rand(len(df)) < 0.9
train = df[split_train_test]
test = df[~split_train_test]

In [None]:
train_x = tensorify(train.band_1)
test_x = tensorify(test.band_1)
train_y = torch.from_numpy(train.is_iceberg.values.reshape(-1, 1)).float()
test_y = torch.from_numpy(test.is_iceberg.values.reshape(-1, 1)).float()

In [None]:
cuda = torch.cuda.is_available()
num_epochs = 1
batch_size = 25
learning_rate = 0.00005
cuda

In [None]:
train_loader = data_utils.DataLoader(data_utils.TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
test_loader = data_utils.DataLoader(data_utils.TensorDataset(test_x, test_y), batch_size=batch_size, shuffle=True)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            #nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            #nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(32 * 18 * 18, 100),
            nn.Linear(100, 2),
            nn.Softmax(),
        )

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [None]:
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
writer = SummaryWriter()

if cuda:
    net.cuda()
    criterion.cuda()
    
epoch_train_loss = []
epoch_test_loss = []

In [None]:
for e in range(20):

    train_loss = 0
    test_loss = 0

    for batch_id, (x, y) in enumerate(tqdm(train_loader, desc='Training')):
        x = Variable(x)
        y = Variable(y).long().squeeze()
        
        if cuda:
            x = x.cuda()
            y = y.cuda()
        
        optimizer.zero_grad()
        
        outputs = net(x)
        loss = criterion(outputs, y)
        
        train_loss += loss.data.cpu()[0]
        
        loss.backward()
        optimizer.step()
        
    writer.add_scalar('data/scalar1', train_loss, e)
      
    for batch_id, (x, y) in enumerate(tqdm(test_loader, desc='Testing')):
        x = Variable(x)
        y = Variable(y).long().squeeze()
        
        if cuda:
            x = x.cuda()
            y = y.cuda()

        outputs = net(x)
        loss = criterion(outputs, y)

        test_loss += loss.data.cpu()[0]
     
    train_loss /= train.shape[0]
    test_loss /= test.shape[0]
    
    epoch_train_loss.append(train_loss)
    epoch_test_loss.append(test_loss)
    print('Training loss: {:.4f}'.format(train_loss))
    print('Testing  loss: {:.4f}'.format(test_loss))
    
writer.add_graph(net, outputs)

In [None]:
plt.plot(epoch_train_loss, label='train')
plt.plot(epoch_test_loss, label='test')
plt.legend();

In [None]:
preds = []
targets = []

for batch_id, (x, y) in enumerate(tqdm(train_loader, desc='Training')):
    x = Variable(x)
    y = Variable(y).long().squeeze()

    if cuda:
        x = x.cuda()
        y = y.cuda()
    
    outputs = net(x)
    preds.extend(outputs.data.cpu().numpy().argmax(axis=1))
    targets.extend(y.data.cpu().numpy())

accuracy_score(targets, preds)

In [None]:
log_loss(targets, preds)

In [None]:
preds = []
targets = []

for batch_id, (x, y) in enumerate(tqdm(test_loader, desc='Testing')):
    x = Variable(x)
    y = Variable(y).long().squeeze()

    if cuda:
        x = x.cuda()
        y = y.cuda()
    
    outputs = net(x)
    preds.extend(outputs.data.cpu().numpy().argmax(axis=1))
    targets.extend(y.data.cpu().numpy())

accuracy_score(targets, preds)

In [None]:
log_loss(targets, preds)