## Connect Google Drive and GPU


In [12]:
%reset

# connect google drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# connect colab gpu
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
Mounted at /content/drive
Fri Dec 17 06:01:37 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P0    40W / 250W |   1191MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                              

## Import Needed Libraries, Paramaters and Functions

In [13]:
import sys
import time
import os.path
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.utils import prune
import torchvision
import matplotlib.pyplot as plt
from torch.utils.mobile_optimizer import optimize_for_mobile
from scipy import stats
from sklearn.utils import shuffle

SEED = 10
WINDOW_SIZE = 128
FEATURE_SIZE = 9
LABEL_SIZE = 6
BATCH_SIZE = 32
PATH = '/content/drive/MyDrive/CNNPaper'
TRAIN_LOADER_PATH = PATH + '/model/final/loader/train_loader'
VALID_LOAER_PATH = PATH + '/model/final/loader/valid_loader'
TEST_LOADER_PATH = PATH + '/model/final/loader/test_loader'
TRAIN_DATA_DIR_PATH = PATH + '/data/UCIHAR/train/'
TEST_DATA_DIR_PATH = PATH + '/data/UCIHAR/test/'
TRAIN_DATA_PATH = PATH + '/data/UCIHAR/train_data.cvs'
TEST_DATA_PATH = PATH + '/data/UCIHAR/test_data.cvs'

In [14]:
def read_data(file_path, TYPE):
  """
    Read data from file_path
    Paramater:
      file_path: str
    Return:
      a DataFrame with the data and labels 
  """
  if os.path.isfile(TRAIN_DATA_PATH) and TYPE == 'train':
    print("Start reading data ...")
    df = pd.read_csv(TRAIN_DATA_PATH)
    print("Finish reading data ...")
  elif os.path.isfile(TEST_DATA_PATH) and TYPE == 'test':
    print("Start reading data ...")
    df = pd.read_csv(TEST_DATA_PATH)
    print("Finish reading data ...")
  else:
    print("Start reading data ...")
    x_accel = pd.read_csv(file_path + 'Inertial Signals/body_acc_x_'+TYPE+'.txt', header=None, names=['x-accel'])
    y_accel = pd.read_csv(file_path + 'Inertial Signals/body_acc_y_'+TYPE+'.txt', header=None, names=['y-accel'])
    z_accel = pd.read_csv(file_path + 'Inertial Signals/body_acc_z_'+TYPE+'.txt', header=None, names=['z-accel'])
    x_gyro = pd.read_csv(file_path + 'Inertial Signals/body_gyro_x_'+TYPE+'.txt', header=None, names=['x-gyro'])
    y_gyro = pd.read_csv(file_path + 'Inertial Signals/body_gyro_y_'+TYPE+'.txt', header=None, names=['y-gyro'])
    z_gyro = pd.read_csv(file_path + 'Inertial Signals/body_gyro_z_'+TYPE+'.txt', header=None, names=['z-gyro'])
    x_gyro_total = pd.read_csv(file_path + 'Inertial Signals/total_acc_x_'+TYPE+'.txt', header=None, names=['total-x-gyro'])
    y_gyro_total = pd.read_csv(file_path + 'Inertial Signals/total_acc_y_'+TYPE+'.txt', header=None, names=['total-y-gyro'])
    z_gyro_total = pd.read_csv(file_path + 'Inertial Signals/total_acc_z_'+TYPE+'.txt', header=None, names=['total-z-gyro'])
    activity = pd.read_csv(file_path + 'y_'+TYPE+'.txt', header=None, names=['activity'])
    length = len(x_accel)
    df = pd.DataFrame(columns=['user', 'activity', 'timestamp', 'x-accel', 'y-accel', 'z-accel', 'total-x-gyro', 'total-y-gyro', 'total-z-gyro'])
    
    for i in range(length):
      x_acc_data = x_accel['x-accel'][i].split()
      y_acc_data = y_accel['y-accel'][i].split()
      z_acc_data = z_accel['z-accel'][i].split()
      x_gyro_data = x_gyro['x-gyro'][i].split()
      y_gyro_data = y_gyro['y-gyro'][i].split()
      z_gyro_data = z_gyro['z-gyro'][i].split()
      total_gyro_x_data = x_gyro_total['total-x-gyro'][i].split()
      total_gyro_y_data = y_gyro_total['total-y-gyro'][i].split()
      total_gyro_z_data = z_gyro_total['total-z-gyro'][i].split()
      activity_data = activity['activity'][i]
      print("index " + str(i))
      size = len(x_acc_data)
      for j in range(size):
        df = df.append({'user': i, 'activity': activity_data, 'timestamp': j, 
                                'x-accel': float(x_acc_data[j]),
                                'y-accel': float(y_acc_data[j]),
                                'z-accel': float(z_acc_data[j]),
                                'x-gyro': float(x_gyro_data[j]),
                                'y-gyro': float(y_gyro_data[j]),
                                'z-gyro': float(z_gyro_data[j]),
                                'total-x-gyro': float(total_gyro_x_data[j]),
                                'total-y-gyro': float(total_gyro_y_data[j]),
                                'total-z-gyro': float(total_gyro_z_data[j])}, ignore_index=True)
    print("Finish reading data ...")
    if TYPE == 'train':
      df.to_csv('train_data.cvs', index=False)
    else:
      df.to_csv('test_data.cvs', index=False)
  return df

def feature_normalize(data):
  """
    Normalize the feature data
    Paramater:
      data: a list of floats
    Return:
      a list of floats with normalized data
  """
  mu = np.mean(data, axis=0)
  sigma = np.std(data, axis=0)
  return (data - mu) / sigma

def dataset_normalize(dataset):
  """
    Normalize the whole dataset
    Paramater:
      dataset: a DataFrame with the data and labels 
    Return:
      a DataFrame with the normalized data and labels 
  """
  dataset.dropna(axis=0, how='any', inplace=True)
  print("Normalizing x-accel ...")
  dataset['x-accel'] = feature_normalize(dataset['x-accel'])
  print("Normalizing y-accel ...")
  dataset['y-accel'] = feature_normalize(dataset['y-accel'])
  print("Normalizing z-accel ...")
  dataset['z-accel'] = feature_normalize(dataset['z-accel'])
  print("Normalizing x-gyro ...")
  dataset['x-gyro'] = feature_normalize(dataset['x-gyro'])
  print("Normalizing y-gyro ...")
  dataset['y-gyro'] = feature_normalize(dataset['y-gyro'])
  print("Normalizing z-gyro ...")
  dataset['z-gyro'] = feature_normalize(dataset['z-gyro'])
  print("Normalizing total-x-gyro ...")
  dataset['total-x-gyro'] = feature_normalize(dataset['total-x-gyro'])
  print("Normalizing total-y-gyro ...")
  dataset['total-x-gyro'] = feature_normalize(dataset['total-x-gyro'])
  print("Normalizing total-z-gyro ...")
  dataset['total-x-gyro'] = feature_normalize(dataset['total-x-gyro'])
  return dataset

def dataset_segmentation(data):
  """
    Dataset segmentation according the window size
    Paramater:
      data: a list of floats
    Return:
      segments and labels 
  """
  print("Start segmentation with window size: ", WINDOW_SIZE)
  segments = np.empty((0, WINDOW_SIZE, FEATURE_SIZE))
  labels = np.empty((0))
  size = data['timestamp'].count()
  for start in range(0, size, WINDOW_SIZE):
      x1 = data["x-accel"][start:start+WINDOW_SIZE]
      y1 = data["y-accel"][start:start+WINDOW_SIZE]
      z1 = data["z-accel"][start:start+WINDOW_SIZE]
      x2 = data["x-gyro"][start:start+WINDOW_SIZE]
      y2 = data["y-gyro"][start:start+WINDOW_SIZE]
      z2 = data["z-gyro"][start:start+WINDOW_SIZE]
      x3 = data["total-x-gyro"][start:start+WINDOW_SIZE]
      y3 = data["total-y-gyro"][start:start+WINDOW_SIZE]
      z3 = data["total-z-gyro"][start:start+WINDOW_SIZE]
      if len(data["timestamp"][start:start+WINDOW_SIZE]) == WINDOW_SIZE:
        segments = np.vstack([segments, np.dstack([x1,y1,z1,x2,y2,z2,x3,y3,z3])])
        labels = np.append(labels, stats.mode(data["activity"][start:start+WINDOW_SIZE])[0][0])
  labels = np.asarray(pd.get_dummies(labels), dtype = np.int8)
  segments = segments.reshape(len(segments), FEATURE_SIZE, WINDOW_SIZE)
  print("Finish segmentation ...")
  return segments, labels

def train_valid_test_split(segments, classes, test_x, test_y, k_fold):
  """
    Split train, valid and test datase
    Paramater:
      segments: a list of input data
      classes: a list of classes data
      k: k fold cross validation
    Return:
      segments and labels 
  """
  print("Start dataset split... ")
  seg_len = len(segments)
  idx_val = [0, int(seg_len/5*1), int(seg_len/5*2), int(seg_len/5*3), int(seg_len/5*4), seg_len]
  train_range1 = range(0, idx_val[k_fold])
  valid_range = range(idx_val[k_fold], idx_val[k_fold+1])
  train_range2 = range(idx_val[k_fold+1], seg_len)

  train_x = np.concatenate((segments[train_range1], segments[train_range2]), axis=0)
  train_y = np.concatenate((classes[train_range1], classes[train_range2]), axis=0)
  valid_x = segments[valid_range]
  valid_y = classes[valid_range]

  # get train data
  train_data = []
  for i in range(len(train_x)):
    train_data.append([train_x[i], train_y[i]])
  
  # get valid data
  valid_data = []
  for i in range(len(valid_x)):
    valid_data.append([valid_x[i], valid_y[i]])
  
  # get test data
  test_data = []
  for i in range(len(test_x)):
    test_data.append([test_x[i], test_y[i]])
  print(len(train_data))
  print(len(valid_data))
  print(len(test_data))

  # generate DataLoader for each dataset
  trainloader = torch.utils.data.DataLoader(train_data, shuffle=True, batch_size=BATCH_SIZE)
  validloader = torch.utils.data.DataLoader(valid_data, shuffle=True, batch_size=BATCH_SIZE)
  testloader = torch.utils.data.DataLoader(test_data, shuffle=True, batch_size=BATCH_SIZE)
  
  print("Finish dataset split... ")
  return trainloader, validloader, testloader


## Load and Save Train, Test, Valid Dataset

In [None]:
train_dataset = dataset_normalize(read_data(TRAIN_DATA_DIR_PATH, 'train'))
test_dataset = dataset_normalize(read_data(TEST_DATA_DIR_PATH, 'test'))
segments, classes = dataset_segmentation(train_dataset)
test_x, test_y = dataset_segmentation(test_dataset)
np.random.seed(SEED)
total_x, total_y = shuffle(segments, classes)
print(len(test_x))
print(len(test_y))
print(len(total_x))
print(len(total_y))

In [None]:
cross_valid_range = 5

for k in range(cross_valid_range):
  print("Start spliting for k = " + str(k))
  trainloader, validloader, testloader = train_valid_test_split(total_x, total_y, test_x, test_y, k)
  CROSS_TRAIN_LOADER_PATH = TRAIN_LOADER_PATH + str(k) + '.pkl'
  CROSS_VALID_LOADER_PATH = VALID_LOAER_PATH + str(k) + '.pkl'
  CROSS_TEST_LOADER_PATH = TEST_LOADER_PATH + str(k) + '.pkl'
  torch.save(trainloader, CROSS_TRAIN_LOADER_PATH)
  torch.save(validloader, CROSS_VALID_LOADER_PATH)
  torch.save(testloader, CROSS_TEST_LOADER_PATH)
  print("Finish data loading...")

## Load CNN Model and Other Helper Functions


In [51]:
KERNAL_SIZE = 5
LEARNING_RATE = 0.00015
NODE_SIZE = 128

k = 4 # modify this number for k-fold cross validation

CROSS_TRAIN_LOADER_PATH = TRAIN_LOADER_PATH + str(k) + '.pkl'
CROSS_VALID_LOADER_PATH = VALID_LOAER_PATH + str(k) + '.pkl'
CROSS_TEST_LOADER_PATH = TEST_LOADER_PATH + str(k) + '.pkl'
trainloader = torch.load(CROSS_TRAIN_LOADER_PATH)
validloader = torch.load(CROSS_VALID_LOADER_PATH)
testloader = torch.load(CROSS_TEST_LOADER_PATH)

class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()

    # Convolutional Layers
    self.features = nn.Sequential(
      nn.Conv1d(FEATURE_SIZE, NODE_SIZE, kernel_size=KERNAL_SIZE, bias=False),
      nn.ReLU(),
      nn.Conv1d(NODE_SIZE, NODE_SIZE, kernel_size=KERNAL_SIZE, bias=False),
      nn.ReLU(),
      nn.Conv1d(NODE_SIZE, NODE_SIZE, kernel_size=KERNAL_SIZE, bias=False),
      nn.ReLU(),
      nn.Conv1d(NODE_SIZE, NODE_SIZE, kernel_size=KERNAL_SIZE, bias=False),
      nn.ReLU(),
      nn.Conv1d(NODE_SIZE, NODE_SIZE, kernel_size=KERNAL_SIZE, bias=False),
      nn.ReLU(),
    )
  
    self.fc1 = nn.Linear(NODE_SIZE*(WINDOW_SIZE-5*(KERNAL_SIZE-1)), 100)
    self.fc2 = nn.Linear(100, LABEL_SIZE)
    self.max = nn.Softmax(dim=1)

  def forward(self, x):
    x = self.features(x)
    x = x.view(x.shape[0], -1)
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    x = self.max(x)
    return x

def train_save_CNN_model(TYPE, EPOCH_SIZE):
  # manually set random seed
  torch.backends.cudnn.deterministic = True
  torch.manual_seed(SEED)

  # set gpu device
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  net = CNN().double().to(device)

  # pick the criterion and optimizer
  criterion = nn.MultiLabelSoftMarginLoss()
  optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

  print("Learning rate %.5f, batch size %d, node size %d, kernal size %d" % (LEARNING_RATE, BATCH_SIZE, NODE_SIZE, KERNAL_SIZE))

  # initialization
  train_acc_list = []
  val_acc_list = []
  test_acc_list = []
  accuray = 0

  # start to train with epoches
  for epoch in range(EPOCH_SIZE):
    running_loss = 0.0
    train_total = 0
    train_correct = 0
    valid_total = 0
    valid_correct = 0
    test_total = 0
    test_correct = 0

    # for the training dataset
    for i, data in enumerate(trainloader, 0):
      inputs, labels = data
      inputs, labels = inputs.cuda(0), labels.cuda(0)
      optimizer.zero_grad()
      outputs = net(inputs)
      train_total += labels.size(0)
      train_correct += (torch.max(outputs, 1)[1] == torch.max(labels, 1)[1]).sum().item()
      loss = criterion(outputs, labels)
      if TYPE == 'l0_norm':
        # add group lasso regularization
        lgl = 1e-10
        cnt = torch.tensor([0]).cuda(0)
        for name, param in net.named_parameters():
          if "features" in name:
            cnt = cnt + param.detach().nonzero().size(0)
            #cnt = cnt + len(param.detach()[param.detach() > 1e-2]) + len(param.detach()[param.detach() < -1e-2])
        loss = loss + lgl * cnt
      elif TYPE == 'l1_norm':
        # add group lasso regularization
        lgl = 1e-8
        regularization = torch.tensor([0]).cuda(0)
        for name, param in net.named_parameters():
          if "features" in name:
            regularization = regularization + torch.norm(param, 1)
        loss = loss + lgl * regularization
      elif TYPE == 'l2_norm':
        lgl = 1e-8
        regularization = torch.tensor([0]).cuda(0)
        for name, param in net.named_parameters():
          if "features" in name:
            regularization = regularization + torch.norm(param)
        loss = loss + lgl * regularization
      elif TYPE == 'group_lasso':
        # add group lasso regularization
        lgl = 1e-8
        regularization = torch.tensor([0]).cuda(0)
        for name, param in net.named_parameters():
          if "features" in name:
            for i in range(param.shape[0]):
              regularization = regularization + torch.norm(param[i,:,:])
        loss = loss + lgl * regularization
      elif TYPE == 'l1_group_lasso':
        lgl = 1e-8
        alpha = 0.5
        group_lasso_regularization = torch.tensor([0]).cuda(0)
        lasso_regularization = torch.tensor([0]).cuda(0)
        for name, param in net.named_parameters():
          if "features" in name:
            for i in range(param.shape[0]):
              group_lasso_regularization = group_lasso_regularization + torch.norm(param[i,:,:])
            lasso_regularization = lasso_regularization + torch.norm(param, 1)
        loss = loss + (1-alpha) * lgl * group_lasso_regularization + alpha * lgl * lasso_regularization
      elif TYPE == 'l0_group_lasso':
        #lgl = 0.000001
        #alpha = 0.90
        l0 = 1e-12
        lg = 0.4*1e-10
        cnt = torch.tensor([0]).cuda(0)
        group_lasso_regularization = torch.tensor([0]).cuda(0)
        lasso_regularization = torch.tensor([0]).cuda(0)
        for name, param in net.named_parameters():
          if "features" in name:
            for i in range(param.shape[0]):
              group_lasso_regularization = group_lasso_regularization + torch.norm(param[i,:,:])
            cnt += param.detach().nonzero().size(0)
            #cnt = cnt + len(param.detach()[param.detach() > 2*1e-2]) + len(param.detach()[param.detach() < 2*-1e-2])
            #lasso_regularization = lasso_regularization + torch.norm(param, 0)
        loss = loss + lg * group_lasso_regularization + l0 * cnt
        #loss = loss + (1-alpha) * lgl * group_lasso_regularization + alpha * lgl * lasso_regularization
      loss.backward()
      optimizer.step()
      running_loss += loss.item()

    # for the validation dataset
    for data in validloader:
      inputs, labels = data
      inputs, labels = inputs.cuda(0), labels.cuda(0)
      outputs = net(inputs)
      _, predicted = torch.max(outputs.data, 1)
      valid_total += labels.size(0)
      valid_correct += (predicted == torch.max(labels, 1)[1]).sum().item()
    
    # for the test dataset
    for data in testloader:
      inputs, labels = data
      inputs, labels = inputs.cuda(0), labels.cuda(0)
      outputs = net(inputs)
      _, predicted = torch.max(outputs.data, 1)
      test_total += labels.size(0)
      test_correct += (predicted == torch.max(labels, 1)[1]).sum().item()
    
    # obtain the results for training, validation, test dataset
    train_acc = 100 * train_correct / train_total
    valid_acc = 100 * valid_correct / valid_total
    test_acc = 100 * test_correct / test_total
    train_acc_list.append(train_acc)
    val_acc_list.append(valid_acc)
    test_acc_list.append(test_acc)
    print("epoch %d, loss %.3f, train acc %.2f%%, valid acc %.2f%%, test acc %.2f%%" % (epoch+1, running_loss, train_acc, valid_acc, test_acc))
    
    # save the best model
    if valid_acc >= accuray:
      accuray = valid_acc
      torch.save(net, PATH + '/model/' + TYPE + str(k) + ".ptl")
      torch.jit.save(torch.jit.script(net), PATH + '/model/' + TYPE + str(k) + "_git.ptl")
    
  return train_acc_list, val_acc_list, test_acc_list

## Results for CNN Model

In [18]:
TYPE = 'no_penalty'
EPOCH_SIZE = 150
train_acc, valid_acc, test_acc = train_save_CNN_model(TYPE, EPOCH_SIZE)

Learning rate 0.00010, batch size 32, node size 128, kernal size 5
epoch 1, loss 131.752, train acc 44.65%, valid acc 62.61%, test acc 61.79%
epoch 2, loss 126.765, train acc 64.24%, valid acc 66.28%, test acc 63.90%
epoch 3, loss 124.801, train acc 70.65%, valid acc 74.03%, test acc 71.50%
epoch 4, loss 123.413, train acc 75.21%, valid acc 74.64%, test acc 71.26%
epoch 5, loss 122.908, train acc 77.18%, valid acc 75.39%, test acc 72.38%
epoch 6, loss 122.684, train acc 77.74%, valid acc 75.73%, test acc 71.29%
epoch 7, loss 122.587, train acc 78.13%, valid acc 76.82%, test acc 73.67%
epoch 8, loss 120.063, train acc 86.63%, valid acc 89.26%, test acc 85.68%
epoch 9, loss 118.766, train acc 90.95%, valid acc 90.14%, test acc 86.29%
epoch 10, loss 118.303, train acc 92.23%, valid acc 91.50%, test acc 86.63%
epoch 11, loss 118.146, train acc 92.79%, valid acc 91.71%, test acc 87.61%
epoch 12, loss 117.924, train acc 93.54%, valid acc 92.32%, test acc 86.83%
epoch 13, loss 117.693, train 

In [None]:
TYPE = 'l0_norm'
EPOCH_SIZE = 150
train_acc, valid_acc, test_acc = train_save_CNN_model(TYPE, EPOCH_SIZE)

Learning rate 0.00010, batch size 32, node size 128, kernal size 5
epoch 1, loss 131.758, train acc 44.65%, valid acc 62.61%, test acc 61.79%
epoch 2, loss 126.771, train acc 64.24%, valid acc 66.28%, test acc 63.90%
epoch 3, loss 124.807, train acc 70.65%, valid acc 74.03%, test acc 71.50%
epoch 4, loss 123.419, train acc 75.21%, valid acc 74.64%, test acc 71.26%
epoch 5, loss 122.914, train acc 77.18%, valid acc 75.39%, test acc 72.38%
epoch 6, loss 122.690, train acc 77.74%, valid acc 75.73%, test acc 71.29%
epoch 7, loss 122.593, train acc 78.13%, valid acc 76.82%, test acc 73.67%
epoch 8, loss 120.069, train acc 86.63%, valid acc 89.26%, test acc 85.68%
epoch 9, loss 118.772, train acc 90.95%, valid acc 90.14%, test acc 86.29%
epoch 10, loss 118.309, train acc 92.23%, valid acc 91.50%, test acc 86.63%
epoch 11, loss 118.152, train acc 92.79%, valid acc 91.71%, test acc 87.61%
epoch 12, loss 117.930, train acc 93.54%, valid acc 92.32%, test acc 86.83%
epoch 13, loss 117.699, train 

In [None]:
TYPE = 'l1_norm'
EPOCH_SIZE = 150
train_acc, valid_acc, test_acc = train_save_CNN_model(TYPE, EPOCH_SIZE)

Learning rate 0.00010, batch size 32, node size 128, kernal size 5
epoch 1, loss 131.725, train acc 44.43%, valid acc 69.75%, test acc 66.30%
epoch 2, loss 125.565, train acc 68.78%, valid acc 72.94%, test acc 67.80%
epoch 3, loss 124.587, train acc 71.84%, valid acc 75.32%, test acc 69.09%
epoch 4, loss 123.367, train acc 75.74%, valid acc 80.83%, test acc 76.86%
epoch 5, loss 121.551, train acc 81.75%, valid acc 85.38%, test acc 80.52%
epoch 6, loss 119.788, train acc 87.71%, valid acc 89.46%, test acc 84.15%
epoch 7, loss 118.870, train acc 90.60%, valid acc 91.16%, test acc 85.14%
epoch 8, loss 118.458, train acc 91.92%, valid acc 91.09%, test acc 85.85%
epoch 9, loss 118.139, train acc 92.96%, valid acc 91.64%, test acc 87.24%
epoch 10, loss 118.101, train acc 93.06%, valid acc 91.77%, test acc 86.66%
epoch 11, loss 117.814, train acc 93.91%, valid acc 92.52%, test acc 87.68%
epoch 12, loss 117.797, train acc 93.93%, valid acc 92.79%, test acc 87.34%
epoch 13, loss 117.607, train 

In [None]:
TYPE = 'l2_norm'
EPOCH_SIZE = 150
train_acc, valid_acc, test_acc = train_save_CNN_model(TYPE, EPOCH_SIZE)

Learning rate 0.00010, batch size 32, node size 128, kernal size 5
epoch 1, loss 131.729, train acc 44.65%, valid acc 59.89%, test acc 58.19%
epoch 2, loss 126.827, train acc 64.17%, valid acc 69.20%, test acc 65.93%
epoch 3, loss 124.475, train acc 71.91%, valid acc 74.44%, test acc 71.02%
epoch 4, loss 123.246, train acc 75.75%, valid acc 74.37%, test acc 71.12%
epoch 5, loss 122.957, train acc 77.03%, valid acc 75.59%, test acc 72.92%
epoch 6, loss 122.686, train acc 77.66%, valid acc 76.41%, test acc 73.67%
epoch 7, loss 122.476, train acc 78.37%, valid acc 76.61%, test acc 73.84%
epoch 8, loss 122.491, train acc 78.32%, valid acc 77.09%, test acc 74.45%
epoch 9, loss 122.230, train acc 79.10%, valid acc 80.90%, test acc 78.52%
epoch 10, loss 119.475, train acc 88.62%, valid acc 90.07%, test acc 85.00%
epoch 11, loss 118.447, train acc 91.80%, valid acc 90.28%, test acc 86.53%
epoch 12, loss 118.149, train acc 92.77%, valid acc 91.91%, test acc 86.26%
epoch 13, loss 117.879, train 

In [None]:
TYPE = 'group_lasso'
EPOCH_SIZE = 150
train_acc, valid_acc, test_acc = train_save_CNN_model(TYPE, EPOCH_SIZE)

Learning rate 0.00010, batch size 32, node size 128, kernal size 5
epoch 1, loss 131.698, train acc 44.77%, valid acc 61.45%, test acc 61.76%
epoch 2, loss 126.648, train acc 64.75%, valid acc 67.85%, test acc 65.15%
epoch 3, loss 124.632, train acc 71.35%, valid acc 73.96%, test acc 70.51%
epoch 4, loss 123.467, train acc 75.29%, valid acc 74.37%, test acc 71.02%
epoch 5, loss 123.070, train acc 76.57%, valid acc 75.12%, test acc 72.41%
epoch 6, loss 122.730, train acc 77.66%, valid acc 76.61%, test acc 73.43%
epoch 7, loss 122.567, train acc 78.23%, valid acc 76.27%, test acc 73.23%
epoch 8, loss 120.297, train acc 85.95%, valid acc 90.14%, test acc 85.34%
epoch 9, loss 118.791, train acc 90.95%, valid acc 90.89%, test acc 86.80%
epoch 10, loss 118.408, train acc 92.03%, valid acc 90.62%, test acc 84.29%
epoch 11, loss 118.096, train acc 92.93%, valid acc 92.52%, test acc 87.55%
epoch 12, loss 117.789, train acc 93.93%, valid acc 93.00%, test acc 88.16%
epoch 13, loss 117.699, train 

In [42]:
TYPE = 'l1_group_lasso'
EPOCH_SIZE = 150
train_acc, valid_acc, test_acc = train_save_CNN_model(TYPE, EPOCH_SIZE)

Learning rate 0.00010, batch size 32, node size 128, kernal size 5
epoch 1, loss 131.788, train acc 44.69%, valid acc 62.34%, test acc 61.55%
epoch 2, loss 126.685, train acc 64.70%, valid acc 66.15%, test acc 64.51%
epoch 3, loss 124.338, train acc 72.52%, valid acc 74.58%, test acc 70.51%
epoch 4, loss 123.346, train acc 75.46%, valid acc 75.32%, test acc 71.97%
epoch 5, loss 122.963, train acc 76.79%, valid acc 75.32%, test acc 73.43%
epoch 6, loss 122.690, train acc 77.71%, valid acc 76.55%, test acc 73.40%
epoch 7, loss 122.560, train acc 78.32%, valid acc 76.68%, test acc 73.33%
epoch 8, loss 121.311, train acc 82.32%, valid acc 88.58%, test acc 85.95%
epoch 9, loss 119.005, train acc 90.19%, valid acc 90.55%, test acc 85.71%
epoch 10, loss 118.534, train acc 91.63%, valid acc 91.16%, test acc 85.37%
epoch 11, loss 118.157, train acc 92.74%, valid acc 90.41%, test acc 86.19%
epoch 12, loss 117.870, train acc 93.78%, valid acc 93.07%, test acc 87.34%
epoch 13, loss 117.767, train 

In [52]:
TYPE = 'l0_group_lasso'
EPOCH_SIZE = 150
train_acc, valid_acc, test_acc = train_save_CNN_model(TYPE, EPOCH_SIZE)
print(train_acc)
print(valid_acc)
print(test_acc)
file_name = 'data'+str(k)+'.txt'
with open(file_name, 'w') as f:
    for i in train_acc:
      f.write("%f " % i)
    f.write('\n')
    for i in valid_acc:
      f.write("%f " % i)
    f.write('\n')
    for i in test_acc:
      f.write("%f " % i)
    f.write('\n')

Learning rate 0.00015, batch size 32, node size 128, kernal size 5
epoch 1, loss 130.469, train acc 49.04%, valid acc 72.33%, test acc 69.12%
epoch 2, loss 124.098, train acc 73.18%, valid acc 76.07%, test acc 74.01%
epoch 3, loss 122.659, train acc 78.10%, valid acc 79.88%, test acc 75.60%
epoch 4, loss 121.740, train acc 81.14%, valid acc 80.90%, test acc 77.16%
epoch 5, loss 120.759, train acc 84.19%, valid acc 85.11%, test acc 81.10%
epoch 6, loss 119.400, train acc 88.71%, valid acc 88.51%, test acc 82.46%
epoch 7, loss 118.970, train acc 89.97%, valid acc 91.03%, test acc 85.51%
epoch 8, loss 118.432, train acc 91.82%, valid acc 88.92%, test acc 84.56%
epoch 9, loss 118.192, train acc 92.54%, valid acc 91.84%, test acc 86.83%
epoch 10, loss 117.975, train acc 93.30%, valid acc 91.98%, test acc 87.41%
epoch 11, loss 117.964, train acc 93.39%, valid acc 92.93%, test acc 87.61%
epoch 12, loss 117.899, train acc 93.49%, valid acc 92.93%, test acc 87.00%
epoch 13, loss 117.677, train 

## Results after Pruning the above Models

In [56]:
PRUNE_THRESHOLD = 0.04

class ThresholdPruning(prune.BasePruningMethod):
    PRUNING_TYPE = "unstructured"

    def __init__(self, threshold):
        self.threshold = threshold

    def compute_mask(self, tensor, default_mask):
      return torch.abs(tensor) > self.threshold

PATHS = {'No penalty - 4 128 0.0001':            PATH + '/model/final/lr0.0001/no_penalty4.ptl',
         'l0 norm - 4 128 0.0001':               PATH + '/model/final/lr0.0001/l0_norm4.ptl',
         'l1 norm - 4 128 0.0001':               PATH + '/model/final/lr0.0001/l1_norm4.ptl',
         'l2 norm - 4 128 0.0001':               PATH + '/model/final/lr0.0001/l2_norm4.ptl',
         'group lasso - 4 128 0.0001':           PATH + '/model/final/lr0.0001/group_lasso4.ptl',
         'l1 group lasso - 4 128 0.0001':        PATH + '/model/final/lr0.0001/l1_group_lasso4.ptl',
         'l0 group lasso - 4 128 0.0001 (BEST)': PATH + '/model/final/lr0.0001/l0_group_lasso4.ptl',
         'l0 group lasso - 4 128 0.00005':       PATH + '/model/final/lr0.00005/l0_group_lasso4.ptl',
         'l0 group lasso - 4 128 0.00015':       PATH + '/model/final/lr0.00015/l0_group_lasso4.ptl',
         'l0 group lasso - 4 128 0.0002':        PATH + '/model/final/lr0.0002/l0_group_lasso4.ptl',
         'l0 group lasso - 4 128 0.00001':       PATH + '/model/final/lr0.00001/l0_group_lasso4.ptl',
         'l0 group lasso - 4 256 0.0001':        PATH + '/model/final/256node/l0_group_lasso4.ptl',
         'l0 group lasso - 4 64 0.0001':         PATH + '/model/final/64node/l0_group_lasso4.ptl',
         'l0 group lasso - 0 128 0.0001':        PATH + '/model/final/lr0.0001/l0_group_lasso0.ptl',
         'l0 group lasso - 1 128 0.0001':        PATH + '/model/final/lr0.0001/l0_group_lasso1.ptl',
         'l0 group lasso - 2 128 0.0001':        PATH + '/model/final/lr0.0001/l0_group_lasso2.ptl',
         'l0 group lasso - 3 128 0.0001':        PATH + '/model/final/lr0.0001/l0_group_lasso3.ptl',
        }

for name in PATHS:
  print('Here are the results for {}:'.format(name))
  # load the model
  net = torch.load(PATHS[name])

  # display the results before compressed model
  test_correct = 0
  test_total = 0
  with torch.no_grad():
    for data in testloader:
      inputs, labels = data
      inputs, labels = inputs.cuda(0), labels.cuda(0)
      outputs = net(inputs)
      _, predicted = torch.max(outputs.data, 1)
      test_total += labels.size(0)
      test_correct += (predicted == torch.max(labels, 1)[1]).sum().item()
    test_acc = 100 * test_correct / test_total
  print('Accuracy of the network on the %d test data: %.2f %% before compression' % (test_total, test_acc))

  # prune the model
  parameters_to_prune = []
  for na, child in net.features.named_children():
    if int(na) % 2 == 0:
      parameters_to_prune.append((child, "weight"))
  if name == 'l0 group lasso 256':
    prune.global_unstructured(parameters_to_prune, pruning_method=ThresholdPruning, threshold=PRUNE_THRESHOLD)
  else:
    prune.global_unstructured(parameters_to_prune, pruning_method=ThresholdPruning, threshold=PRUNE_THRESHOLD)

  # calculate the sparsity
  total_weight = 0
  total_nonzero = 0
  for na, child in net.features.named_children():
    if int(na) % 2 == 0:
      total_weight += torch.numel(child.weight)
      total_nonzero += torch.count_nonzero(child.weight)
  print('Sparity for the compressed model: %.2f %%' % (100*float(total_nonzero / total_weight)))

  # display the results after compressed model
  test_correct = 0
  test_total = 0
  with torch.no_grad():
    for data in testloader:
      inputs, labels = data
      inputs, labels = inputs.cuda(0), labels.cuda(0)
      outputs = net(inputs)
      _, predicted = torch.max(outputs.data, 1)
      test_total += labels.size(0)
      test_correct += (predicted == torch.max(labels, 1)[1]).sum().item()
    test_acc = 100 * test_correct / test_total
  print('Accuracy of the network on the %d test data: %.2f %% after compression\n' % (test_total, test_acc))

Here are the results for No penalty - 4 128 0.0001:
Accuracy of the network on the 2947 test data: 91.52 % before compression
Sparity for the compressed model: 18.68 %
Accuracy of the network on the 2947 test data: 89.21 % after compression

Here are the results for l0 norm - 4 128 0.0001:
Accuracy of the network on the 2947 test data: 91.52 % before compression
Sparity for the compressed model: 18.68 %
Accuracy of the network on the 2947 test data: 89.21 % after compression

Here are the results for l1 norm - 4 128 0.0001:
Accuracy of the network on the 2947 test data: 90.46 % before compression
Sparity for the compressed model: 18.17 %
Accuracy of the network on the 2947 test data: 88.06 % after compression

Here are the results for l2 norm - 4 128 0.0001:
Accuracy of the network on the 2947 test data: 91.01 % before compression
Sparity for the compressed model: 18.94 %
Accuracy of the network on the 2947 test data: 89.24 % after compression

Here are the results for group lasso - 4 