In [None]:
% run 1-datasource.ipynb

In [None]:
% run 4-graph-utils.ipynb

In [None]:
import torch.utils.data as data_utils
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import StandardScaler
from tensorboardX import SummaryWriter
import skimage.transform as transform
import networkx as nx
from scipy import spatial
import scipy.sparse.csgraph

In [None]:
icebergs.head()

In [None]:
gr = grid(38)
gr.shape

In [None]:
mask = distance(gr, k=3) > 0
plt.spy(mask[:40, :40]);

In [None]:
graphs, perm = coarsen(sp.sparse.csr.csr_matrix(mask), levels=3, self_connections=False)

In [None]:
len(perm)

In [None]:
laps = [sp.sparse.csgraph.laplacian(g, normed=True) for g in graphs[:-1]]

In [None]:
n = laps[0].shape[0]
n

In [None]:
icebergs = icebergs.assign(
    graph=icebergs
        .band_1
        .apply(lambda x: transform.rescale(x.reshape(75, 75), 0.5, mode='constant'))
        .apply(lambda x: np.r_[x[tuple(gr.reshape(2, -1))], np.zeros(n - 38 * 38)][perm])
)

In [None]:
train, test = train_test_split(icebergs, test_size=0.1, stratify=icebergs.is_iceberg)

In [None]:
scaler_1 = StandardScaler()
scaler_2 = StandardScaler()
scaler_graph = StandardScaler()

In [None]:
for _, i in train.iterrows():
    scaler_1.partial_fit(i.band_1.reshape(1, -1))
    scaler_2.partial_fit(i.band_2.reshape(1, -1))
    scaler_graph.partial_fit(i.graph.reshape(1, -1))

In [None]:
train_x = torch.from_numpy(scaler_graph.transform(np.stack(train.graph))).float()
test_x = torch.from_numpy(scaler_graph.transform(np.stack(test.graph))).float()
train_y = torch.from_numpy(train.is_iceberg.values.reshape(-1, 1)).long()
test_y = torch.from_numpy(test.is_iceberg.values.reshape(-1, 1)).long()

In [None]:
cuda = torch.cuda.is_available()
num_epochs = 1
batch_size = 25
learning_rate = 0.0001
cuda

In [None]:
train_loader = data_utils.DataLoader(data_utils.TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
test_loader = data_utils.DataLoader(data_utils.TensorDataset(test_x, test_y), batch_size=batch_size, shuffle=True)

In [None]:
f = Variable(torch.from_numpy(fourier(laps[0].todense())).float(), requires_grad=False)
f

In [None]:
class GraphConv(nn.Module):
    def __init__(self, fourier, n_filter):
        super(GraphConv, self).__init__()
        
        self.n = len(fourier)
        self.u = fourier
        self.n_filter = n_filter
        
        self.weight = nn.Parameter(torch.Tensor(self.n, self.n_filter))
        self.bias = nn.Parameter(torch.Tensor(self.n))
        
        stdv = 1. / np.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x): # samples x n
        # fourier
        out = x @ self.u # samples x n
        
        # filtre
        w = self.weight.unsqueeze(2) # n x f x 1
        out = out.t().unsqueeze(1) # n x 1 x samples
        out = w @ out # n x f x samples
        
        # un-fourier
        out = out.permute(2, 1, 0).contiguous() # samples x f x n
        out = out.view(-1, self.n) # (samples * f) x n
        out = out @ self.u.t() # (samples * f) x n
        out = out.view(-1, self.n_filter, self.n) # samples x f x n
        
        # bias?
        out = out + self.bias # samples x f x n
        return F.relu(out) # samples x f x n

In [None]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)        

In [None]:
net = nn.Sequential(
    GraphConv(f.cuda() if cuda else f, 10),
    nn.ReLU(),
    Flatten(),
    nn.Linear(n * 10, 200),
    nn.ReLU(),
    nn.Linear(200, 2),
    nn.Softmax(1),
)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
writer = SummaryWriter()

if cuda:
    net.cuda()
    criterion.cuda()

writer.add_graph(net, net(Variable(train_x.narrow(0, 0, 1)).cuda()))
    
epoch_train_loss = []
epoch_test_loss = []

In [None]:
def loop(loader, training=False):
    
    running_loss = 0
    targets = []
    predictions = []
    
    for batch_id, (x, y) in enumerate(loader):
        x = Variable(x)
        y = Variable(y).squeeze()
        
        if cuda:
            x = x.cuda()
            y = y.cuda()
        
        optimizer.zero_grad()
        
        outputs = net(x)
        loss = criterion(outputs, y)

        if training:
            loss.backward()
            optimizer.step()

        running_loss += loss.data.cpu()[0]
        targets.extend(y.data.cpu().numpy())
        predictions.extend(outputs.data.cpu().numpy())
        
    return np.array(targets), np.array(predictions), running_loss

In [None]:
for e in range(20):

    train_targets, train_preds, train_loss = loop(train_loader, training=True)
    writer.add_scalar('data/train_loss', train_loss, e)
    writer.add_scalar('data/train_accuracy', accuracy_score(train_targets, train_preds.argmax(axis=1)), e)
      
    test_targets, test_preds, test_loss = loop(test_loader, training=False)
    writer.add_scalar('data/test_loss', test_loss, e)
    writer.add_scalar('data/test_accuracy', accuracy_score(test_targets, test_preds.argmax(axis=1)), e)

    train_loss /= train.shape[0]
    test_loss /= test.shape[0]
    
    epoch_train_loss.append(train_loss)
    epoch_test_loss.append(test_loss)
    
    print('Training loss: {:.4f}'.format(train_loss))
    print('Testing  loss: {:.4f}'.format(test_loss))

In [None]:
plt.plot(epoch_train_loss, label='train')
plt.plot(epoch_test_loss, label='test')
plt.legend();

In [None]:
train_targets, train_preds, train_loss = loop(train_loader)
accuracy_score(train_targets, train_preds.argmax(axis=1))

In [None]:
log_loss(train_targets, train_preds)

In [None]:
test_targets, test_preds, test_loss = loop(test_loader)
accuracy_score(test_targets, test_preds.argmax(axis=1))

In [None]:
log_loss(test_targets, test_preds)