In [None]:
% run 1-datasource.ipynb

In [None]:
% run 4-graph-utils.ipynb

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
import torch.utils.data as data_utils
import torchvision.datasets as dset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import StandardScaler
from tensorboardX import SummaryWriter
import networkx as nx
from scipy import spatial
import scipy.sparse.csgraph
from timeit import default_timer as timer

In [None]:
gr_size = 28
gr = grid(gr_size)
gr.shape

In [None]:
mask = distance(gr, k=8, metric='euclidean') > 0
plt.spy(mask[:40, :40]);

In [None]:
graphs, perm = coarsen(sp.sparse.csr.csr_matrix(mask), levels=3, self_connections=False)

In [None]:
len(perm)

In [None]:
laps = [sp.sparse.csgraph.laplacian(g, normed=True) for g in graphs[:-1]]

In [None]:
n = laps[0].shape[0]
n

In [None]:
def fourier(laplacian):
    eigenvalues, eigenvectors = sp.linalg.eigh(laplacian)
    return eigenvectors

In [None]:
l0 = torch.from_numpy(laps[0].todense()).float()
l0.size()

In [None]:
l1 = torch.from_numpy(laps[1].todense()).float()
l1.size()

In [None]:
l2 = torch.from_numpy(laps[2].todense()).float()
l2.size()

In [None]:
f0 = torch.from_numpy(fourier(laps[0].todense())).float()
f0.size()

In [None]:
f1 = torch.from_numpy(fourier(laps[1].todense())).float()
f1.size()

In [None]:
f2 = torch.from_numpy(fourier(laps[2].todense())).float()
f2.size()

In [None]:
data_folder = './data'
train = dset.MNIST(data_folder, train=True, download=True)
test = dset.MNIST(data_folder, train=False)

In [None]:
train_x, train_y = zip(*train)
test_x, test_y = zip(*test)

train_x = np.stack([np.r_[np.reshape(i, -1), np.zeros(n - gr_size * gr_size)] for i in train_x])
test_x = np.stack([np.r_[np.reshape(i, -1), np.zeros(n - gr_size * gr_size)] for i in test_x])

train_x = torch.from_numpy(train_x).float()
test_x = torch.from_numpy(test_x).float()
train_y = torch.from_numpy(np.array(train_y)).long()
test_y = torch.from_numpy(np.array(test_y)).long()

train_x.shape, train_y.shape

In [None]:
test_x.shape, test_y.shape

In [None]:
class GraphConv(nn.Module):
    def __init__(self, fourier, n_filter):
        super(GraphConv, self).__init__()
        
        self.n = len(fourier)
        self.u = Variable(fourier, requires_grad=False)
        self.n_filter = n_filter
        
        self.weight = nn.Parameter(torch.Tensor(self.n, self.n_filter))
        self.bias = nn.Parameter(torch.Tensor(self.n))
        
        stdv = 1. / np.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x): # samples x n
        # fourier
        out = x @ self.u # samples x n
        
        # filter
        w = self.weight.unsqueeze(2) # n x f x 1
        out = out.t().unsqueeze(1) # n x 1 x samples
        out = w @ out # n x f x samples
        
        # un-fourier
        out = out.permute(2, 1, 0).contiguous() # samples x f x n
        out = out.view(-1, self.n) # (samples * f) x n
        out = out @ self.u.t() # (samples * f) x n
        out = out.view(-1, self.n_filter, self.n) # samples x f x n
        
        # bias?
        out = out + self.bias # samples x f x n
        return out

In [None]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)        

In [None]:
class GraphFourierConv(nn.Module):
    def __init__(self, fourier_basis, in_channels, out_channels, bias=True):
        super().__init__()
 
        self.n = fourier_basis.size(0)
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        assert fourier_basis.size(1) == self.n
        self.u = Variable(fourier_basis, requires_grad=False) # n x n
        self.ut = self.u.t()

        self.weight = nn.Parameter(torch.Tensor(self.n, self.out_channels, 1)) # n x out x 1
        
        if bias:
            self.bias = nn.Parameter(torch.Tensor(self.out_channels, 1))
        else:
            self.register_parameter('bias', None)
            
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1. / np.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x):
        # batch x in x n

        # fourier
        out = x.view(-1, self.n) # (batch * in) x n
        out = out @ self.u # (batch * in) x n
        
        # filter
        out = out.t().unsqueeze(1) # n x 1 x (batch * in)
        out = self.weight @ out # n x out x (batch * in)

        # un-fourier
        out = out.transpose(0, 2).contiguous() # (batch * in) x out x n
        out = out.view(-1, self.n) # (batch * in * out) x n
        out = out @ self.ut # (batch * in * out) x n
        out = out.view(-1, self.in_channels, self.out_channels, self.n) # batch x in x out x n
        
        # sum in dim + bias
        out = out.sum(1) # batch x out x n
        if self.bias is not None:
            out = out + self.bias # batch x out x n
        
        return out
    
    def __repr__(self):
        return '{}(fourier_basis, {}, {}, bias={})'.format(__class__.__name__, self.in_channels, self.out_channels, self.bias)

In [None]:
class GraphChebyConv(nn.Module):
    def __init__(self, laplacian, in_channels, out_channels, k=25, bias=True):
        super().__init__()
 
        self.n = laplacian.size(0)
        self.k = 25
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        assert laplacian.size(1) == self.n
        lmax = 2
        self.l = Variable(2 / lmax * laplacian - torch.eye(self.n).cuda(), requires_grad=False) # n x n

        self.weight = nn.Parameter(torch.Tensor(self.in_channels * self.k, self.out_channels)) # n x out x 1
        
        if bias:
            self.bias = nn.Parameter(torch.Tensor(self.out_channels, 1))
        else:
            self.register_parameter('bias', None)
            
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1. / np.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x):
        # batch x in x m
        
        out = x.view(-1, self.n) # (batch * in) x m
        out = out.t() # m x (batch * in)
        
        # chebyshev
        xs = [out, self.l @ out]
        
        #def concat(c, t):
        #    return torch.cat([c, t.unsqueeze(0)])
        
        #x2 = out
        #x1 = self.l @ out
        #xs = concat(x2.unsqueeze(0), x1)
        
        #s = timer()
        for k in range(2, self.k):
        #    x0 = 2 * self.l @ x1 - x2
        #    xs = concat(xs, x0)
        #    x1, x2 = x0, x1
            xs.append(2 * self.l @ xs[k - 1] - xs[k - 2])
        
        xs = torch.stack(xs) # k x m x (batch * in)
        out = xs
        
        #m = timer()
        # filter
        #out = xs.transpose(0, 2) # (batch * in) x m x k
        #out = out @ self.weight # (batch * in) x m x out
        #out = out.transpose(1, 2).contiguous() # (batch * in) x out x m
        #out = out.view(-1, self.in_channels, self.out_channels, self.n) # batch x in x out x m
        
        out = out.view(self.k, self.n, x.size(0), self.in_channels) # k x m x batch x in
        out = out.permute([2, 1, 3, 0]).contiguous() # batch x m x in x k
        out = out.view(x.size(0) * self.n, self.in_channels * self.k) # (batch * m) x (in * k)

        out = out @ self.weight # (batch * m) x out
        out = out.view(x.size(0), self.n, self.out_channels) # batch x m x out
        out = out.transpose(1, 2) # batch x out x m
        
        #e = timer()
        #print((m - s) / 1000, (e - m) / 1000)
        
        # sum in dim + bias
        #out = out.sum(1) # batch x out x m
        if self.bias is not None:
            out = out + self.bias # batch x out x m
        
        return out
    
    def __repr__(self):
        return '{}(fourier_basis, {}, {}, bias={})'.format(__class__.__name__, self.in_channels, self.out_channels, self.bias)

In [None]:
cuda = torch.cuda.is_available()
cuda

In [None]:
batch_size = 100
train_loader = data_utils.DataLoader(data_utils.TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
test_loader = data_utils.DataLoader(data_utils.TensorDataset(test_x, test_y), batch_size=batch_size, shuffle=True)

In [None]:
conv1_dim = 20
conv2_dim = 40

net = nn.Sequential(
#    GraphConv(f.cuda() if cuda else f, conv1_dim),
#    GraphFourierConv(f0.cuda() if cuda else f0, 1, conv1_dim, bias=False),
    GraphChebyConv(l0.cuda() if cuda else l0, 1, conv1_dim, bias=False),
#    nn.BatchNorm2d(conv1_dim),
    nn.MaxPool1d(4),
    nn.ReLU(),
    
#    GraphFourierConv(f2.cuda() if cuda else f2, conv1_dim, conv2_dim, bias=False),
    GraphChebyConv(l2.cuda() if cuda else l2, conv1_dim, conv2_dim, bias=False),
#    nn.BatchNorm2d(conv2_dim),
    nn.MaxPool1d(4),
    nn.ReLU(),
    
    Flatten(),
#    nn.Dropout(0.5),
     nn.Linear(len(f2) // 4 * conv2_dim, 512),
#    nn.Linear(len(f1) // 4 * conv2_dim, 512),
    nn.Linear(512, 10),
    nn.Softmax(1)
)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.0005, weight_decay=0.005)
writer = SummaryWriter()

if cuda:
    net.cuda()
    criterion.cuda()

writer.add_graph(net, net(Variable(train_x[0].unsqueeze(0)).cuda()))
    
epoch_train_loss = []
epoch_test_loss = []

In [None]:
def loop(net, loader, training=False):
    
    running_loss = 0
    targets = []
    predictions = []
    start = timer()
    
    for batch_id, (x, y) in enumerate(loader):
        x = Variable(x)
        y = Variable(y)
        
        if cuda:
            x = x.cuda()
            y = y.cuda()
        
        optimizer.zero_grad()
        
        outputs = net(x)
        loss = criterion(outputs, y)
        #ps = torch.cat([p.view(-1) for p in net.parameters()])
        #loss += 0.1 * F.l1_loss(ps, target=torch.zeros_like(ps), size_average=True)

        if training:
            loss.backward()
            optimizer.step()

        running_loss += loss.data.cpu()[0]
        targets.extend(y.data.cpu().numpy())
        predictions.extend(outputs.data.cpu().numpy())
        
        if (batch_id + 1) % 10 == 0 and training:
            print(running_loss / (batch_id * batch_size), end='\r')
        
    return np.array(targets), np.array(predictions), running_loss, (timer() - start)

In [None]:
for e in range(5):

    train_targets, train_preds, train_loss, train_duration = loop(net, train_loader, training=True)
    writer.add_scalar('data/train_loss', train_loss, e)
    train_acc = accuracy_score(train_targets, train_preds.argmax(axis=1))
    writer.add_scalar('data/train_accuracy', train_acc, e)
      
    test_targets, test_preds, test_loss, test_duration = loop(net, test_loader, training=False)
    writer.add_scalar('data/test_loss', test_loss, e)
    test_acc = accuracy_score(test_targets, test_preds.argmax(axis=1))
    writer.add_scalar('data/test_accuracy', test_acc, e)

    train_loss /= len(train)
    test_loss /= len(test)
    
    epoch_train_loss.append(train_loss)
    epoch_test_loss.append(test_loss)
    
    print('Training {:.4f} {:.2f}% Testing {:.4f} {:.2f}% Duration {:.2f}s {:.2f}s'.format(
        train_loss, train_acc * 100, test_loss, test_acc * 100, train_duration, test_duration
    ))

In [None]:
plt.plot(epoch_train_loss, label='train')
plt.plot(epoch_test_loss, label='test')
plt.legend();

In [None]:
train_targets, train_preds, train_loss, train_duration = loop(net, train_loader)
accuracy_score(train_targets, train_preds.argmax(axis=1)), train_duration

In [None]:
train_preds.argmax(axis=1)

In [None]:
test_targets, test_preds, test_loss, test_duration = loop(net, test_loader)
accuracy_score(test_targets, test_preds.argmax(axis=1)), test_duration