In [39]:
import numpy as np
from numpy.random import rand
import matplotlib.pyplot as plt

import jax.numpy as jnp
from jax import jit, vmap

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow.keras as keras

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers

import gc
from numba import cuda

In [40]:
# This is how the triagle lattice data is generated. You may find it helpful to generate some 
# of your own data
class Ising_tri():
    ''' Simulating the Ising model '''  
    def __init__(self, size, temp):
        self.temp = temp
        self.N = int(size)
    ## monte carlo moves
    def mcmove(self, config, N, beta):
        ''' This is to execute the monte carlo moves using 
        Metropolis algorithm such that detailed
        balance condition is satisified'''
        for i in range(N):
            for j in range(N):            
                    a = np.random.randint(0, N) # select a row
                    b = np.random.randint(0, N) # select a column
                    s =  config[a, b] # current state at (a, b)
                    if a%2:
                        nb = config[(a+1)%N,b] +config[(a+1)%N,(b+1)%N] + config[a,(b+1)%N] + \
                        config[(a-1)%N,b] + config[(a-1)%N,(b+1)%N] + config[a,(b-1)%N]
                    else:
                        nb = config[(a+1)%N,b] +config[(a+1)%N,(b-1)%N] + config[a,(b+1)%N] + \
                        config[(a-1)%N,b] + config[(a-1)%N,(b-1)%N] + config[a,(b-1)%N]
                    
                    
                    cost = 2*s*nb
                    if cost < 0:	
                        s *= -1
                    elif rand() < np.exp(-cost*beta):
                        s *= -1
                    config[a, b] = s
        return config
    
    def simulate(self):   
        ''' This module simulates the Ising model'''
        config = 2*np.random.randint(2, size=(self.N,self.N))-1   
        msrmnt = 81
        for i in range(msrmnt):
            self.mcmove(config, self.N, 1.0/self.temp)
        return config

You can import 4-temp data for square and triangular lattices as follows

In [41]:
N = 250
nx, ny = 32, 32

Xsq = np.ndarray((4*N,nx,ny,1))
ysq = np.ndarray(4*N)

for i in np.arange(N):
    Xsq[i + 0*N] = np.loadtxt("./square_T1/square_T1/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ysq[i + 0*N] = 0
    Xsq[i + 1*N] = np.loadtxt("./square_T2/square_T2/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ysq[i + 1*N] = 1
    Xsq[i + 2*N] = np.loadtxt("./square_T3/square_T3/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ysq[i + 2*N] = 2
    Xsq[i + 3*N] = np.loadtxt("./square_T4/square_T4/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ysq[i + 3*N] = 3

Xsq_train, Xsq_test, ysq_train, ysq_test = train_test_split(Xsq, ysq, test_size=0.2, random_state=0)

In [42]:
N = 250
nx, ny = 32, 32

Xtri = np.ndarray((4*N,nx,ny,1))
ytri = np.ndarray(4*N)

for i in np.arange(N):
    Xtri[i + 0*N] = np.loadtxt("./triangle_T1/triangle_T1/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ytri[i + 0*N] = 0
    Xtri[i + 1*N] = np.loadtxt("./triangle_T2/triangle_T2/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ytri[i + 1*N] = 1
    Xtri[i + 2*N] = np.loadtxt("./triangle_T3/triangle_T3/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ytri[i + 2*N] = 2
    Xtri[i + 3*N] = np.loadtxt("./triangle_T4/triangle_T4/{:03d}".format(i), delimiter=",").reshape(nx,ny,1)
    ytri[i + 3*N] = 3

Xtri_train, Xtri_test, ytri_train, ytri_test = train_test_split(Xtri, ytri, test_size=0.2, random_state=0)

Make sure you know the shape of data.

In [43]:
print("Shape of training data:")
print(Xsq_train.shape, Xtri_train.shape)
print(ysq_train.shape, ytri_train.shape)
print("Shape of test data:")
print(Xsq_test.shape, Xtri_test.shape)
print(ysq_test.shape, ytri_test.shape)

Shape of training data:
(800, 32, 32, 1) (800, 32, 32, 1)
(800,) (800,)
Shape of test data:
(200, 32, 32, 1) (200, 32, 32, 1)
(200,) (200,)


### (a) Train a fully connected neural network to do the classification on both datasets. Then, train  a  convolutional  neural  network  to  do  the  classification,  on  both datasets.   Make  a  table  of  your  performance  numbers  for  both  models  and  upload  these  numbers.   This,  together  with  your code,  should be uploaded to the course website when you turn in your homework.

The temperatures for square lattice are $T = 1.5, 2.1, 2.4, 3.5$. $T = 2.5, 3.2, 3.8, 5$ for triangle lattice.


Solution to (a):

In [44]:
class small_FNN:
    def __init__(self):
        model = self
    
    @staticmethod
    def build(width, height, depth, num_classes, channels_first=False):
        model = Sequential()

        if channels_first == False:
            inputShape = (height, width, depth)
            chanDim = -1
        elif channels_first == True:
            inputShape = (depth, height, width)
            chanDim = 1
            
        model.add(Flatten())
        
        model.add(Dense(256,  activation='relu'))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        
        model.add(Dense(128,  activation='relu'))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        
        model.add(Dense(16, activation='relu'))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        
        model.add(Dense(num_classes, activation="softmax"))
        
        return model

In [45]:
def train_model(input_model, train_data, train_lbls, test_data, 
                test_lbls, num_classes, input_shape, hyperparams):
    # Ensure data is shaped properly, assumes channels last set up
    x_train = train_data
    x_test = test_data
    
    # Create categorical labels
    y_train = keras.utils.to_categorical(train_lbls, num_classes)
    y_test = keras.utils.to_categorical(test_lbls, num_classes)
     
    # Set hyperparameters
    INIT_LR = hyperparams[0]# learning rate
    EPOCHS = hyperparams[1] # number of epochs
    BS = hyperparams[2] # batch size
    OPT = optimizers.Adagrad(lr=INIT_LR) # optimizing function
    
    # Compile the model
    model = input_model
    model.compile(loss='categorical_crossentropy', optimizer=OPT, metrics=['accuracy'])
    
    
    H = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=EPOCHS,
                  batch_size=BS)
    
    return H, model
    

In [46]:
input_shape = (32, 32, 1)
num_classes = 4
fnn_model = small_FNN.build(width=input_shape[0], height=input_shape[1], depth=input_shape[2],
                   num_classes=num_classes)

In [47]:
# hyperparams should be a tuple of: INIT_LR, EPOCHS, BS
FNN_hyperparams = (0.01, 25, 32)
H_sq_FNN, sq_FNN_model = train_model(fnn_model, Xsq_train, ysq_train, Xsq_test, ysq_test, num_classes, input_shape, FNN_hyperparams)
H_tri_FNN, tri_FNN_model = train_model(fnn_model, Xtri_train, ytri_train, Xtri_test, ytri_test, num_classes, input_shape, FNN_hyperparams)

Train on 800 samples, validate on 200 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Train on 800 samples, validate on 200 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


### (b) Train a convolutional neural network to do the classification, on both datasets. Make a table of your performance numbers for (a) and (b). 
Try to optimize the performance of your models and compare the result.

solution to (b):

In [48]:
class SimpleCNN:
    def __init__(self):
        model = self
    
    @staticmethod
    def build(width, height, depth, num_classes, channels_first=False):
        # initialize the model along with the input shape to be
        # "channels last" and the channels dimension itself
        model = Sequential()
        
        if channels_first == False:
            inputShape = (height, width, depth)
            chanDim = -1
        elif channels_first == True:
            inputShape = (depth, height, width)
            chanDim = 1
            
        model.add(Conv2D(32, (5, 5), activation='relu', input_shape=inputShape))
        model.add(MaxPooling2D((2, 2)))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2)))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(Dropout(0.25))
                  
        model.add(Flatten())
        model.add(Dropout(0.4))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(num_classes, activation='softmax'))

        # return the constructed network architecture
        return model

In [49]:
input_shape = (32, 32, 1)
num_classes = 4
cnn_model = SimpleCNN.build(width=input_shape[0], height=input_shape[1], depth=input_shape[2],
                   num_classes=num_classes)

CNN_hyperparams = (0.01, 50, 64)
H_sq_CNN, sq_CNN_model = train_model(cnn_model, Xsq_train, ysq_train, Xsq_test, ysq_test, num_classes, input_shape, CNN_hyperparams)
H_tri_CNN, tri_CNN_model = train_model(cnn_model, Xtri_train, ytri_train, Xtri_test, ytri_test, num_classes, input_shape, CNN_hyperparams)

Train on 800 samples, validate on 200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Train on 800 samples, validate on 200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### (c) We have provided a test set of 10 spins configurations for each of the two problems. Each of the spin configurations is not necessarily at the temperatures of the training sets. Calculate your best estimate of the temperatures of these spin configuration. Upload your results to Kaggle.
[Hint: A direct fingerprint of temperature is the distribution of spin up
and down, because you can image that the spins fluctuate more violently
at higher temperature. Although the mothod you use in homework 2 can also work, you may be interested in trying to take distribution into account when you
build the model to estimate temperature and see if you can make use of this extra information. This may help you win the
kaggle. It is totally fine if you find that the information of distribution is not helpful. Note also that a CNN kind-of does this. One possibility is that you may want a CNN that captures enough distribution information.]

Solution to (c)

### (d) *Transfer Learning*.  
As we emphasize in class, one can freeze the training of the bottom layers of a network and retrain the top part of the network to adopt to a new situation. Use your CNN that you trained on the squarelattice data to do transfer learning on the triangular lattice data.  How does the performance compare to that of the direct methods?  Add the performance numbers for transfer learning in your table from Part (a). Note that the training time and number of training examples needed for transfer learning is far less than that for the direct  optimization. For  example,  is  50  triangle  example  sufficient  for the re-training process?  Use your transfer learning result to predict the transition temperature of triangle lattice Ising model, as demonstrated in this [Nature Physics](https://www-nature-com.ezp-prod1.hul.harvard.edu/articles/nphys4035.pdf) publication.

As a guideline, you may like to just change the last `Dense` layer with `softmax` activation when you do the transfer learning. Other choices are also OK.

Solution to (d):

In [51]:
base_model = sq_CNN_model

In [53]:
base_model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 12, 12, 64)        18496     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 4, 4, 64)          36928     
_________________________________________________________________
dropout_22 (Dropout)         (None, 4, 4, 64)          0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 1024)             

In [66]:
base_layers = base_model.layers[0:5]
trainable_layers = [
     Flatten(),
     Dropout(0.25),
     Dense(32, activation='relu'),
     Dense(num_classes, activation='softmax')
]

In [69]:
trans_model = keras.Sequential(base_layers+trainable_layers)
trans_model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 12, 12, 64)        18496     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 4, 4, 64)          36928     
_________________________________________________________________
flatten_11 (Flatten)         (None, 1024)              0         
_________________________________________________________________
dropout_25 (Dropout)         (None, 1024)            

In [70]:
input_shape = (32, 32, 1)
num_classes = 4
hyperparams = (0.01, 25, 32)
H_trans, trained_trans_model = train_model(trans_model, Xtri_train, ytri_train, Xtri_test, ytri_test, num_classes, input_shape, hyperparams)

Train on 800 samples, validate on 200 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
