biostat93
diff --git a/‎MLiP-week09/09 Advanced Topic in ANN.ipynb
Lines changed: 1662 additions & 0 deletions b/‎MLiP-week09/09 Advanced Topic in ANN.ipynb
Lines changed: 1662 additions & 0 deletions
diff --git a/‎MLiP-week09/data_utils.py
Lines changed: 79 additions & 0 deletions b/‎MLiP-week09/data_utils.py
Lines changed: 79 additions & 0 deletions
diff --git a/‎MLiP-week09/fc_net.py
Lines changed: 146 additions & 0 deletions b/‎MLiP-week09/fc_net.py
Lines changed: 146 additions & 0 deletions
diff --git a/‎MLiP-week09/imgs/09-Augmentation.jpg
119 KB b/‎MLiP-week09/imgs/09-Augmentation.jpg
119 KB
diff --git a/‎MLiP-week09/imgs/09-Batch_normalization.jpg
140 KB b/‎MLiP-week09/imgs/09-Batch_normalization.jpg
140 KB
diff --git a/‎MLiP-week09/imgs/09-Different_update_rules.gif
591 KB b/‎MLiP-week09/imgs/09-Different_update_rules.gif
591 KB
diff --git a/‎MLiP-week09/imgs/09-Dropout.jpg
131 KB b/‎MLiP-week09/imgs/09-Dropout.jpg
131 KB
diff --git a/‎MLiP-week09/imgs/09-SGD_problem1.jpg
75.8 KB b/‎MLiP-week09/imgs/09-SGD_problem1.jpg
75.8 KB
diff --git a/‎MLiP-week09/imgs/09-SGD_problem2.jpg
97.3 KB b/‎MLiP-week09/imgs/09-SGD_problem2.jpg
97.3 KB
diff --git a/‎MLiP-week09/imgs/09-color_jitter.jpg
65.2 KB b/‎MLiP-week09/imgs/09-color_jitter.jpg
65.2 KB
diff --git a/‎MLiP-week09/imgs/09-crop.jpg
116 KB b/‎MLiP-week09/imgs/09-crop.jpg
116 KB
diff --git a/‎MLiP-week09/imgs/09-flip.jpg
95.2 KB b/‎MLiP-week09/imgs/09-flip.jpg
95.2 KB
diff --git a/‎MLiP-week09/imgs/09-momentum.jpg
102 KB b/‎MLiP-week09/imgs/09-momentum.jpg
102 KB
diff --git a/‎MLiP-week09/imgs/09-three-layer-NN.jpg
39.9 KB b/‎MLiP-week09/imgs/09-three-layer-NN.jpg
39.9 KB
diff --git a/‎MLiP-week09/imgs/09-two-layer-NN.jpg
29.6 KB b/‎MLiP-week09/imgs/09-two-layer-NN.jpg
29.6 KB
diff --git a/‎MLiP-week09/imgs/09_batch_norm.jpg
68 KB b/‎MLiP-week09/imgs/09_batch_norm.jpg
68 KB
diff --git a/‎MLiP-week09/imgs/09_batch_norm.png
79.5 KB b/‎MLiP-week09/imgs/09_batch_norm.png
79.5 KB
diff --git a/‎MLiP-week09/imgs/algorithm-main-pic.png
61.6 KB b/‎MLiP-week09/imgs/algorithm-main-pic.png
61.6 KB
diff --git a/‎MLiP-week09/imgs/next_week.png
10.4 KB b/‎MLiP-week09/imgs/next_week.png
10.4 KB
@@ -0,0 +1,79 @@
+from six.moves import cPickle as pickle
+import numpy as np
+import os
+from scipy.misc import imread
+import platform
+
+def load_pickle(f):
+    version = platform.python_version_tuple()
+    if version[0] == '2':
+        return  pickle.load(f)
+    elif version[0] == '3':
+        return  pickle.load(f, encoding='latin1')
+    raise ValueError("invalid python version: {}".format(version))
+
+def load_CIFAR_batch(filename):
+  """ load single batch of cifar """
+  with open(filename, 'rb') as f:
+    datadict = load_pickle(f)
+    X = datadict['data']
+    Y = datadict['labels']
+    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
+    Y = np.array(Y)
+    return X, Y
+
+def load_CIFAR10(ROOT):
+  """ load all of cifar """
+  xs = []
+  ys = []
+  for b in range(1,6):
+    f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
+    X, Y = load_CIFAR_batch(f)
+    xs.append(X)
+    ys.append(Y)    
+  Xtr = np.concatenate(xs)
+  Ytr = np.concatenate(ys)
+  del X, Y
+  Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
+  return Xtr, Ytr, Xte, Yte
+
+
+def get_CIFAR10_data(cifar10_dir, num_training=49000, num_validation=1000, num_test=1000,
+                     subtract_mean=True):
+    """
+    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
+    it for classifiers. These are the same steps as we used for the SVM, but
+    condensed to a single function.
+    """
+    # Load the raw CIFAR-10 data
+    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
+        
+    # Subsample the data
+    mask = list(range(num_training, num_training + num_validation))
+    X_val = X_train[mask]
+    y_val = y_train[mask]
+    mask = list(range(num_training))
+    X_train = X_train[mask]
+    y_train = y_train[mask]
+    mask = list(range(num_test))
+    X_test = X_test[mask]
+    y_test = y_test[mask]
+
+    # Normalize the data: subtract the mean image
+    if subtract_mean:
+      mean_image = np.mean(X_train, axis=0)
+      X_train -= mean_image
+      X_val -= mean_image
+      X_test -= mean_image
+    
+    # Transpose so that channels come first
+    X_train = X_train.transpose(0, 3, 1, 2).copy()
+    X_val = X_val.transpose(0, 3, 1, 2).copy()
+    X_test = X_test.transpose(0, 3, 1, 2).copy()
+
+    # Package data into a dictionary
+    return {
+      'X_train': X_train, 'y_train': y_train,
+      'X_val': X_val,   'y_val': y_val,
+      'X_test': X_test,  'y_test': y_test,
+    }
@@ -0,0 +1,146 @@
+import numpy as np
+from layers import *
+
+
+class FullyConnectedNet(object):
+    """
+    A fully-connected neural network with an arbitrary number of hidden layers,
+    ReLU nonlinearities, and a softmax loss function. This will also implement
+    dropout and batch normalization as options. For a network with L layers,
+    the architecture will be
+
+    {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax
+
+    where batch normalization and dropout are optional, and the {...} block is
+    repeated L - 1 times.
+
+    Similar to the TwoLayerNet above, learn-able parameters are stored in the
+    self.params dictionary and will be learned using the Solver class.
+    """
+
+    def __init__(self, hidden_dims, input_dim=3*32*32, num_classes=10,
+                 dropout=0, use_batchnorm=False, reg=0.0,
+                 weight_scale=1e-2, dtype=np.float32, seed=None):
+        """
+        Initialize a new FullyConnectedNet.
+
+        Inputs:
+        - hidden_dims: A list of integers giving the size of each hidden layer.
+        - input_dim: An integer giving the size of the input.
+        - num_classes: An integer giving the number of classes to classify.
+        - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
+          the network should not use dropout at all.
+        - use_batchnorm: Whether or not the network should use batch normalization.
+        - reg: Scalar giving L2 regularization strength.
+        - weight_scale: Scalar giving the standard deviation for random
+          initialization of the weights.
+        - dtype: A numpy data-type object; all computations will be performed using
+          this data-type. float32 is faster but less accurate, so you should use
+          float64 for numeric gradient checking.
+        - seed: If not None, then pass this random seed to the dropout layers. This
+          will make the dropout layers deterministic so we can gradient check the
+          model.
+        """
+        self.use_batchnorm = use_batchnorm
+        self.use_dropout = dropout > 0
+        self.reg = reg
+        self.num_layers = 1 + len(hidden_dims)
+        self.dtype = dtype
+        self.params = {}
+
+        dims = [input_dim] + hidden_dims + [num_classes]
+        for i in range(1, self.num_layers + 1):
+            self.params['W%d' %i] = weight_scale * np.random.randn(dims[i - 1], dims[i])
+            self.params['b%d' %i] = np.zeros(dims[i])
+            if i < self.num_layers and self.use_batchnorm:
+                self.params['gamma%d' %i] = np.ones(dims[i])
+                self.params['beta%d' %i] = np.zeros(dims[i])
+
+        # When using dropout we need to pass a dropout_param dictionary to each
+        # dropout layer so that the layer knows the dropout probability and the mode
+        # (train / test). You can pass the same dropout_param to each dropout layer.
+        self.dropout_param = {}
+        if self.use_dropout:
+            self.dropout_param = {'mode': 'train', 'p': dropout}
+            if seed is not None:
+                self.dropout_param['seed'] = seed
+
+        # With batch normalization we need to keep track of running means and
+        # variances, so we need to pass a special bn_param object to each batch
+        # normalization layer. You should pass self.bn_params[0] to the forward pass
+        # of the first batch normalization layer, self.bn_params[1] to the forward
+        # pass of the second batch normalization layer, etc.
+        self.bn_params = []
+        if self.use_batchnorm:
+            self.bn_params = [{'mode': 'train'} for i in range(self.num_layers - 1)]
+
+        # Cast all parameters to the correct data-type
+        for k, v in self.params.items():
+            self.params[k] = v.astype(dtype)
+
+    def loss(self, X, y=None):
+        """
+        Compute loss and gradient for the fully-connected net.
+
+        Input / output: Same as TwoLayerNet above.
+        """
+        X = X.astype(self.dtype)
+        mode = 'test' if y is None else 'train'
+
+        # Set train/test mode for batchnorm params and dropout param since they
+        # behave differently during training and testing.
+        if self.use_dropout:
+            self.dropout_param['mode'] = mode
+        if self.use_batchnorm:
+            for bn_param in self.bn_params:
+                bn_param['mode'] = mode
+
+        scores = None
+
+        cache = {}
+        a_cache, relu_cache, bn_cache, d_cache = {}, {}, {}, {}
+        h = X
+        for i in range(1, self.num_layers + 1):
+            W, b = self.params['W%d' % i], self.params['b%d' % i]
+            if i < self.num_layers:
+                if self.use_batchnorm:
+                    gamma, beta = self.params['gamma%d' % i], self.params['beta%d' % i]
+                    h, a_cache[i] = affine_forward(h, W, b)
+                    h, bn_cache[i] = batchnorm_forward(h, gamma, beta, self.bn_params[i - 1])
+                    h, relu_cache[i] = relu_forward(h)
+                else:
+                    h, cache[i] = affine_relu_forward(h, W, b)
+                if self.use_dropout:
+                    h, d_cache[i] = dropout_forward(h, self.dropout_param)
+            else:
+                scores, cache[i] = affine_forward(h, W, b)
+
+        # If test mode return early
+        if mode == 'test':
+            return scores
+
+        loss, grads = 0.0, {}
+
+        loss, dscores = softmax_loss(scores, y)
+
+        # backward pass
+        dout = dscores
+        for i in reversed(range(1, self.num_layers + 1)):
+            if i < self.num_layers:
+                if self.use_dropout:
+                    dout = dropout_backward(dout, d_cache[i])
+                if self.use_batchnorm:
+                    dout = relu_backward(dout, relu_cache[i])
+                    dout, grads['gamma%d' % i], grads['beta%d' % i] = batchnorm_backward(dout, bn_cache[i])
+                    dout, grads['W%d' % i], grads['b%d' % i] = affine_backward(dout, a_cache[i])
+                else:
+                    dout, grads['W%d' % i], grads['b%d' % i] = affine_relu_backward(dout, cache[i])
+            else:
+                dout, grads['W%d' % i], grads['b%d' %i] = affine_backward(dout, cache[i])
+
+        for i in range(1, self.num_layers):
+            W = self.params['W%d' % i]
+            loss += 0.5 * self.reg * np.sum(W * W)
+            grads['W%d' % i] += self.reg * W
+
+        return loss, grads