In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch import optim
import matplotlib.pyplot as plt
import numpy as np
from torch.autograd import Variable
import torch.nn.functional as F
from torch import nn

import imp
import utils.movie_readin as mru
import utils.plotutils as plu

In [None]:
#movie parameters
movie_filepath = '/home/vasha/research/datasets/stationary_motion/pixel2xlmomentlens/full_framerate/purple_willows_120.mp4_120fps.mp4'
pixel_patchsize = 12
frame_patchsize = 120
maxframes = 960
# read in the movie
#willows = mru.get_movie(movie_filepath, pixel_patchsize, frame_patchsize,
#                          normalize_patch=False, normalize_movie=True, encoding='mp4')
# plot a test movie
#test = willows[10,0,:,:]
#plt.imshow(test)

# hyperparameters
num_epochs = 5000
batch_size = 100
learning_rate = 1e-5
N_TEST_IMG = 5
lambda_activation = 0.01
lambda_biophysical = 0.01

# model parameters
conv_width = 7
compression = 10
hidden_nodes = int(pixel_patchsize**2 * frame_patchsize / compression)
print(f'Training model:{pixel_patchsize}^2 pixels by {frame_patchsize} frames, to {hidden_nodes} hidden nodes for {compression}x compression')


# make it a Pytorch dataset (inherits from Dataset)
class NaturalMovieDataset(Dataset):
    """Dataset of Stationary Naural Movies"""
    
    def __init__(self, movie_filepath, pixel_patchsize, frame_patchsize,
                     normalize_patch=False, normalize_movie=True, encoding='mp4'):
        """
        Args:
            movie_filepath (string): Path to the movie file
            pixel_patchsize (int): Number of pixels on the edge of a patch
            frame_patchsize (int): Number of frames in the movie
        """
        self.movies = mru.get_movie(movie_filepath, pixel_patchsize, maxframes, frame_patchsize,
                          normalize_patch=False, normalize_movie=True, encoding='mp4',
                          crop=True)

    def __len__(self):
        return len(self.movies)

    def __getitem__(self, idx):
        movie = self.movies[idx,:,:,:]
        movie = torch.from_numpy(movie)
        sample = Variable(movie)
        return sample

try:
    movie_dataset
except NameError:    
    movie_dataset = NaturalMovieDataset(movie_filepath, pixel_patchsize, frame_patchsize,
                              normalize_patch=False, normalize_movie=True, encoding='mp4')

    train_loader = DataLoader(movie_dataset, batch_size=batch_size,
                            shuffle=True, num_workers=4)

Training model:12^2 pixels by 120 frames, to 1728 hidden nodes for 10x compression
normalizing movie...
making patches...
(64240, 12, 12, 120)


In [None]:
class AEC(nn.Module):
    def __init__(self):
        super(AEC, self).__init__()
        
        temporal_conv_kernel_size = (conv_width, pixel_patchsize, pixel_patchsize)

        self.tconv = nn.Conv3d(1,
                               hidden_nodes, 
                               kernel_size=temporal_conv_kernel_size,
                               stride=1)
        self.tdeconv = nn.ConvTranspose3d(hidden_nodes,
                                          1,
                                          kernel_size = np.transpose(temporal_conv_kernel_size),
                                          stride=1)

    def encode(self, x):
        activations = F.relu(self.tconv(x))
        return activations

    def decode(self, z):
        recon = self.tdeconv(z)
        return recon

    def forward(self, x):
        activations = self.encode(x)
        #z = self.reparametrize(mu, logvar)
        decoded = self.decode(activations)
        return activations, decoded
    
# our model
model = AEC()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.1)
#scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

def loss_func(x, xhat, activations):
        recon_loss = ((x-xhat)**2).mean()
        activation_loss = torch.sum(activations * lambda_activation)
        loss = recon_loss + activation_loss
        return(loss)

loss_history = []
print('Training Epochs:', end='')
for i in range(num_epochs):
    for movie in train_loader:
        movie = torch.unsqueeze(movie,1)
        #print(movie.size())
        movie = Variable(movie.float()).cuda()
        optimizer.zero_grad()
        acts = model.encode(movie)
        recon = model.decode(acts)
        loss = loss_func(movie, recon, acts)
        loss_history.append(loss)
        loss.backward()
        optimizer.step()
    if(i%10==0):
        print(f'{i}', end='')
    else:
        print('*',end='')
print('Done!')
        

Training Epochs:

In [None]:
mp = model.parameters()
inw = next(mp).squeeze()
bias = next(mp)
outw = next(mp).squeeze()

print(inw.shape)
print(bias.shape)
print(outw.shape)

In [None]:
imp.reload(plu)
print('Kernels:')
for i in range(5,10):
    #print(f'Kernel {i}:')
    plu.plot_temporal_rf(inw, i, vectorize=False)

In [None]:
loss_evolution = [np.float(loss.detach()) for loss in loss_history]
plt.plot(loss_evolution)
plt.show()
plt.plot(np.log(loss_evolution))

In [None]:
print('Movies:')
for i in range(5):
    plu.plot_movies_recons(np.squeeze(movie), np.squeeze(recon), i)
plt.show()


In [None]:
def visualize_aec():
    with torch.no_grad():
        # Get a batch of training data
        data = next(iter(train_loader))[0].to(device)

        input_tensor = data.cpu()
        transformed_input_tensor = model.encode(data).cpu()

        in_grid = convert_image_np(
            torchvision.utils.make_grid(input_tensor))

        out_grid = convert_image_np(
            torchvision.utils.make_grid(transformed_input_tensor))

        # Plot the results side-by-side
        f, axarr = plt.subplots(1, 2)
        axarr[0].imshow(in_grid)
        axarr[0].set_title('Dataset Images')

        axarr[1].imshow(out_grid)
        axarr[1].set_title('Recon Images')


visualize_aec()
plt.ioff()
plt.show()

In [None]:
torchvision.utils.make_grid(input_tensor)

In [None]:
# initialize figure
f, a = plt.subplots(2, N_TEST_IMG, figsize=(5, 2))
plt.ion()   # continuously plot

# original data (first row) for viewing
view_data = train_data.train_data[:N_TEST_IMG].view(-1, 28*28).type(torch.FloatTensor)/255.
for i in range(N_TEST_IMG):
    a[0][i].imshow(np.reshape(view_data.data.numpy()[i], (28, 28)), cmap='gray'); a[0][i].set_xticks(()); a[0][i].set_yticks(())

    
    
for epoch in range(EPOCH):
    for step, (x, b_label) in enumerate(train_loader):
        b_x = x.view(-1, 28*28)   # batch x, shape (batch, 28*28)
        b_y = x.view(-1, 28*28)   # batch y, shape (batch, 28*28)

        encoded, decoded = autoencoder(b_x)

        loss = loss_func(decoded, b_y)      # mean square error
        optimizer.zero_grad()               # clear gradients for this training step
        loss.backward()                     # backpropagation, compute gradients
        optimizer.step()                    # apply gradients

        if step % 100 == 0:
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())

            # plotting decoded image (second row)
            _, decoded_data = autoencoder(view_data)
            for i in range(N_TEST_IMG):
                a[1][i].clear()
                a[1][i].imshow(np.reshape(decoded_data.data.numpy()[i], (28, 28)), cmap='gray')
                a[1][i].set_xticks(()); a[1][i].set_yticks(())
            plt.draw(); plt.pause(0.05)

plt.ioff()
plt.show()

In [None]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
#
# A simple test procedure to measure STN the performances on MNIST.
#

def test():
    with torch.no_grad():
        model.eval()
        test_loss = 0
        correct = 0
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)

            # sum up batch loss
            test_loss += F.nll_loss(output, target, size_average=False).item()
            # get the index of the max log-probability
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
              .format(test_loss, correct, len(test_loader.dataset),
                      100. * correct / len(test_loader.dataset)))
    

In [None]:
model = AEC()
if torch.cuda.is_available():
    model.cuda()