# Transfer learning compared to meta learning 

Meta learning prepares a model to interpolate generally over a submanifold in the parameter space, 
but the submanifold dimension is practically small relative the parameter space dimension, 
because each one must be engineered. 
Alternatively, transfer learning effectively contributes significant samples sizes to some dimensions 
and usually arrives with a little bias. 
Here, we show (HYPOTHESIZE) that meta learning is competitive near its submanifold, 
but sufficiently-abstracted transfer learning ultimately produces greater generality beyond the submanifold, 
at least when bias is sufficiently low. 
Ultimately, it's a trade-off between meta learning's effective submanifold, 
and the relevance of a data abstraction produced by transfer learning. 
If the abstraction is coherent and small, it should be more general. 
When done correctly, packing greater volumes of data into smaller dimensional spaces lends to greater generality.

In [4]:
import torch
import torch.nn as nn 

MNIST_DIM = 28 
LEARNING_RATE = 1e-3 
EMBEDDING_DIM = 20 

class BaseLayer(nn.Module): 
    def __init__(self,
            abstraction_dimension=20):
        self.abstraction_dimension = abstraction_dimension 
        self.fc1 = nn.Linear(MNIST_DIM*MNIST_DIM, self.abstraction_dimension) 
        self.relu1 = nn.LeakyReLU() 
        self.fc2 = nn.Linear(self.abstraction_dimension, EMBEDDING_DIM) 
        pass 
    def forward(self, 
            x): 
        x = self.fc1(x) 
        x = self.relu1(x) 
        x = self.fc2(x) 
        return x 
    pass 

class AutoEncoder(nn.Module): 
    def __init__(self,
            abstraction_dimension=20): 
        self.abstraction_dimension = abstraction_dimension 
        self.base_layer = BaseLayer(abstraction_dimension=self.abstraction_dimension) 
        self.relu1 = nn.LeakyReLU()
        self.fc1 = nn.Linear(EMBEDDING_DIM, self.abstraction_dimension) 
        self.relu2 = nn.LeakyReLU() 
        self.fc2 = nn.Linear(self.abstraction_dimension, MNIST_DIM*MNIST_DIM) 
        self.optimizer = torch.optim.Adam(self.parameters(), lr=LEARNING_RATE) 
        pass 
        def forward(self,
                x):
            x = self.base_layer(x) 
            x = self.relu1(x) 
            x = self.fc1(x) 
            x = self.relu2(x) 
            x = self.fc2(x) 
            x = torch.sigmoid(x) 
            return x 
    pass 

    class Classifier(nn.Module): 
        def __init__(self,
                abstraction_dimension=20, 
                base_layer_transfer=None, 
                n_labels=10): 
            self.abstraction_dimension = abstraction_dimension 
            self.n_labels=10 
            self.base_layer = BaseLayer(abstraction_dimension=self.abstraction_dimension) 
            self.relu1 = nn.LeakyReLU() 
            self.fc1 = nn.Linear(EMBEDDING_DIM, self.n_labels) 
            if base_layer_transfer is not None: 
                ## TODO copy params 
                pass 
            pass 
        def forward(self, 
                x): 
            x = self.base_layer(x) 
            x = self.relu1(x) 
            x = self.fc1(x) 
            x = torch.softmax(x) 
            return x 
        pass 

## TODO experimental cases: 
## Meta learning: fit model to linear interpolations of {0,1,2,3,4,5,6,7,8} making a 9-dim sub-manifold. 
## Illustrate effectiveness on fake, new, within-submanifold digits like p*2 + (1-p)*5 but ineffectiveness with 9. 
## Transfer learning: show how optimal abstraction dim on {0,1,2,3,4,5,6,7,8} results in greater effectiveness with 9. 

In [12]:
from torchvision import datasets

dataset1 = datasets.MNIST('../../data', train=True, download=True) 
dataset2 = datasets.MNIST('../../data', train=False) 

image, label = dataset1[0]

print(type(image)) 
print(image.shape) 
print(type(label))
print(label)

image

<class 'PIL.Image.Image'>


AttributeError: shape

In [20]:
from torch.nn.functional import one_hot 

one_hot(torch.tensor([3,4]), num_classes=10)

tensor([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]])

In [17]:
from torchvision.transforms.functional import pil_to_tensor 

pil_to_tensor(image)/256. 

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,