# Import libraries

In [None]:
import random
import gc
import json
from tqdm import tqdm
import os

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss

!pip install statsmodels
from statsmodels.tsa.stattools import adfuller, grangercausalitytests
from statsmodels.tsa.stattools import pacf

!pip install shap
import shap

!git clone https://github.com/UCMerced-ML/LC-model-compression.git
!mv ./LC-model-compression/* ./
!rm -r ./LC-model-compression
import lc
from lc.torch import ParameterTorch as Param, AsVector, AsIs
from lc.compression_types import ConstraintL0Pruning, ConstraintL1Pruning, ConstraintL1Pruning, LowRank, RankSelection, AdaptiveQuantization

#List of all parameters

In [None]:
BATCH_SIZE = 16
TEST_SIZE = 0.2
SEED = 42

PACF_ALPHA = 0.05
PACF_NLAGS = 200

GC_ALPHA = 0.005
GC_MAXLAG = 10
GC_THRESHOLD = 1e-5

LEARNING_RATE = 0.005
WEIGHT_DECAY = 0
NUM_FEATURES = 33
EPOCHS_PER_STEP = 15
EPOCHS = 7500

WEIGHTS_DIFF_ORDER = 2
LOSS_DIFF_ORDER = 2

loss_fn = nn.BCEWithLogitsLoss()

#Seed for reproducibility

In [None]:
random.seed(SEED)
np.random.seed(SEED)
np.random.RandomState(SEED)
torch.manual_seed(SEED)
os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:2"

# Data Preprocessing

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        df = pd.read_csv(os.path.join(dirname, "ionosphere_data_kaggle.csv"))
        break

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# feature 2 has all zeroes
df = df.drop("feature2", axis=1)

# convert labels to numeric
df['label'] = df['label'].apply(lambda x: 0 if x == 'g' else 1)

# convert to numeric
df = df.apply(pd.to_numeric)

# normalize the data
features = df.columns.tolist()
features.remove("label")
mean = df[features].mean()
std_dev = df[features].std()
df[features] = (df[features] - mean) / std_dev

# dataset
data = df.to_numpy()
x, y = data[:, :-1], np.expand_dims(data[:, -1], axis=1)

In [None]:
x.shape, y.shape

In [None]:
# train and test split
x = x.astype(np.float32)
y = y.astype(np.float32)

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=TEST_SIZE, random_state=SEED)
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

In [None]:
# load to gpu
device = torch.device("cuda")

x_train_tensor = torch.from_numpy(train_x).to(device)
x_test_tensor = torch.from_numpy(test_x).to(device)

y_train_tensor = torch.from_numpy(train_y).to(device)
y_test_tensor = torch.from_numpy(test_y).to(device)

In [None]:
# create dataset and dataloader
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)

#Utility functions

In [None]:
def save_shap_values(shap_values, model_name):
  values = np.abs(shap_values.values).mean(axis = 0)
  val_dict = dict()
  for i,val in enumerate(values):
    val_dict[i] = val

  with open(model_name, 'w') as f:
    json.dump(val_dict, f)

In [None]:
def get_pacf_order(time_data, nlags = PACF_NLAGS):
  print(nlags)
  pacf_arr, confid_arr = pacf(time_data, nlags, alpha = PACF_ALPHA)
  for i in range(nlags):
    if abs(pacf_arr[i]) < 0.05:
        print(f"pacf order is {max(i - 1,1)}")
        return max(i - 1,1)
  print(f"pacf order is 1")
  return 1

In [None]:
def check_granger_causality(weights_dict, alpha = GC_ALPHA):
  maxlag = GC_MAXLAG
  tests = ["params_ftest", "ssr_ftest", "ssr_chi2test", "lrtest"]

  gc_masks = dict()
  loss_series = weights_dict["loss"]
  order = get_pacf_order(loss_series.reshape(-1,1) , nlags = PACF_NLAGS)
  print(order)

  for key in weights_dict.keys():
    if key == "loss":
      continue
    mask = []
    for index, time_series in tqdm(weights_dict[key].items()):
      try:
        data = np.concatenate([loss_series.reshape(-1,1), time_series.reshape(-1,1)], axis = 1)
        gc_test = grangercausalitytests(data, [order], verbose = False)
        p_value = max([gc_test[order][0][test][1] for test in tests])
        if p_value < alpha:
          mask.append(1)
        else:
          mask.append(0)
      except:
        mask.append(1)
    gc_masks[key] = mask
  return gc_masks

In [None]:
def compare_feature_importances(file1, file2):
  with open(file1, 'r') as f:
    imp1 = json.load(f)
  with open(file2, 'r') as f:
    imp2 = json.load(f)

  sort1 = sorted(imp1.values(), reverse = True)
  sum1 = sum(sort1)
  sort2 = sorted(imp2.values(), reverse = True)
  sum2 = sum(sort2)

  total_diff = 0

  assert len(imp1) == len(imp2)
  for (key1, val1), (key2, val2) in zip(imp1.items(), imp2.items()):
    rank1 = sort1.index(val1)
    rank2 = sort2.index(val2)
    diff = abs(rank1 - rank2)
    diff_prop = abs(val1/sum1 - val2/sum2)

    print(key1, key2, diff, diff_prop)

    total_diff += diff * diff_prop

  return total_diff, total_diff/len(sort1)

In [None]:
def evaluate_performance(net):
    y_pred_train = net.infer(x_train_tensor)
    loss_train = log_loss(train_y, y_pred_train)
    acc_train = accuracy_score(train_y, y_pred_train > 0.5)

    y_pred_test = net.infer(x_test_tensor)
    loss_test = log_loss(test_y, y_pred_test)
    acc_test = accuracy_score(test_y, y_pred_test > 0.5)

    print(f"loss_train {loss_train}")
    print(f"acc_train {acc_train}")
    print(f"loss_test {loss_test}")
    print(f"acc_test {acc_test}")

In [None]:
def my_l_step(lc_model, lc_penalty, step, lr=LEARNING_RATE):
    params = list(filter(lambda p: p.requires_grad, lc_model.parameters()))
    optimizer = torch.optim.SGD(params, lr, weight_decay=WEIGHT_DECAY)

    print(f'\nL-step #{step} with lr: {lr:.5f}')

    epochs_per_step_ = EPOCHS_PER_STEP
    avg_loss = 0
    for _ in range(epochs_per_step_):
        y_pred = lc_model(x_train_tensor)
        loss = loss_fn(y_pred, y_train_tensor) + lc_penalty()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss += loss.item()

    if epochs_per_step_ != 0:
      print(f'avg lc loss {avg_loss / epochs_per_step_:.5f}')
    evaluate_performance(lc_model)

In [None]:
def my_l_step_batching(lc_model, lc_penalty, step, lr=LEARNING_RATE):
    params = list(filter(lambda p: p.requires_grad, lc_model.parameters()))
    optimizer = torch.optim.SGD(params, lr, weight_decay=WEIGHT_DECAY)

    print(f'\nL-step #{step} with lr: {lr:.5f}')

    epochs_per_step_ = EPOCHS_PER_STEP
    avg_loss = 0
    for _ in range(epochs_per_step_):
        for x, y in train_loader:
            y_pred = lc_model(x)
            loss = loss_fn(y_pred, y) + lc_penalty()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            avg_loss += loss.item()

    if epochs_per_step_ != 0:
      print(f'avg lc loss {avg_loss / epochs_per_step_:.5f}')
    evaluate_performance(lc_model)

In [None]:
def make_stationary(weights_dict):
  stationary_weights = dict()
  stationary_weights["loss"] = np.diff(weights_dict["loss"], n = LOSS_DIFF_ORDER).reshape(-1)

  for key in list(weights_dict.keys())[:-1]:
    stationary_weights[key] = dict()
    weights = np.array(weights_dict[key])

    for i in range(weights.shape[-1]):
      time_series = weights[:,i]
      time_series = np.diff(time_series, n = WEIGHTS_DIFF_ORDER)
      stationary_weights[key][i] = time_series
  return stationary_weights

# The Model

In [None]:
class neural_network(nn.Module):
  def __init__(self, num_features):
    super(neural_network, self).__init__()

    self.layers = nn.Sequential(
        nn.Linear(num_features, 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, 1)
    )

    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    self.train()
    return self.layers(x)

  def infer(self, x):
    self.eval()
    with torch.no_grad():
      if type(x) is np.ndarray: # only pass np.ndarray if model is on cpu
        x = torch.from_numpy(x.astype(np.float32)).cuda()
      x = self.forward(x)
      x = self.sigmoid(x).cpu().detach()
      return x

# More functions

In [None]:
def vanilla_training_no_batching(model):
    # define the optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr = LEARNING_RATE, weight_decay = WEIGHT_DECAY)

    # record the weights
    recorded_weights = dict()
    for parameter in model.named_parameters():
        recorded_weights[parameter[0]] = []
    recorded_weights["loss"] = []

    # training the vanilla model
    print("Starting Vanilla Training(no batching).....")


    for epoch in range(EPOCHS):
      y_pred = model(x_train_tensor)
      loss = loss_fn(y_pred, y_train_tensor)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      for parameter in model.named_parameters():
          name = parameter[0]
          value = parameter[1].cpu().detach().view(-1).numpy()
          recorded_weights[name].append(value)

      recorded_weights["loss"].append(loss.item())

      if (epoch+1) % 500 == 0:
          print(f"Epoch - {epoch + 1}: Loss - {loss.item()}")
          evaluate_performance(model)

    # epochs completed, evaluate the model once
    print(f"EPOCHS COMPLETED!")

    print("model performance after vanilla training(no batching)")
    evaluate_performance(model)

    shap_explainer = shap.Explainer(model.infer, train_x)
    shap_values = shap_explainer(test_x)
    shap.plots.bar(shap_values, max_display=99)

    save_shap_values(shap_values, f"unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json")
    print(f"Saved unpruned model to -> unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json")

    return recorded_weights, model

In [None]:
def vanilla_training_batching(model):
    # define the optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr = LEARNING_RATE, weight_decay = WEIGHT_DECAY)

    # record the weights
    recorded_weights = dict()
    for parameter in model.named_parameters():
        recorded_weights[parameter[0]] = []
    recorded_weights["loss"] = []

    # training the vanilla model
    print("Starting Vanilla Training(batching).....")


    for epoch in range(EPOCHS):
      batch_loss = 0

      for x, y in train_loader:
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_loss += loss

      loss = batch_loss
      for parameter in model.named_parameters():
          name = parameter[0]
          value = parameter[1].cpu().detach().view(-1).numpy()
          recorded_weights[name].append(value)

      recorded_weights["loss"].append(loss.item())

      if (epoch+1) % 500 == 0:
          print(f"Epoch - {epoch + 1}: Loss - {loss.item()}")
          evaluate_performance(model)

    # epochs completed, evaluate the model once
    print(f"EPOCHS COMPLETED!")

    print("model performance after vanilla training(batching)")
    evaluate_performance(model)

    shap_explainer = shap.Explainer(model.infer, train_x)
    shap_values = shap_explainer(test_x)
    shap.plots.bar(shap_values, max_display=99)

    save_shap_values(shap_values, f"unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json")
    print(f"Saved unpruned model to -> unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json")

    return recorded_weights, model

In [None]:
def gc_prune(model, recorded_weights, batching=False):
    print("Starting Granger Causal Pruning.....\n\n")

    weights = make_stationary(recorded_weights)
    masks = check_granger_causality(weights)

    model.cpu()
    for name, parameter in model.named_parameters():
      mask = torch.Tensor(masks[name]).view(parameter.shape)
      parameter.data = torch.mul(mask, parameter.data)

    model.cuda()
    shap_explainer = shap.Explainer(model.infer, train_x)
    shap_values = shap_explainer(test_x)
    shap.plots.bar(shap_values, max_display=99)

    gc_mask = dict()
    for name, parameter in model.named_parameters():
      parameter = parameter.cpu().detach().view(-1)
      mask = (parameter.abs() < GC_THRESHOLD).int()
      gc_mask[name] = mask

    pruned = 0
    total = 0
    for key in gc_mask:
      count = sum(gc_mask[key])
      pruned += count
      total += len(gc_mask[key])

    print("Model performance after gc training")
    evaluate_performance(model)
    print(f"\n\nGC Pruning completed, percentage of model pruned is: {pruned * 100 /total}")

    if not batching:
      save_shap_values(shap_values, f"gc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json")
      print(f"Saved gc pruned model to -> gc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json")

    else:
      save_shap_values(shap_values, f"gc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json")
      print(f"Saved gc pruned model to -> gc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json")

    return round((pruned / total).item(), 3)


In [None]:
def lc_no_batching(prune_total):
  lc_model = neural_network(train_x.shape[1]).to(device)
  model_size = sum(p.numel() for p in lc_model.parameters() if p.requires_grad)

  mu_s = [1 for i in range(EPOCHS // EPOCHS_PER_STEP)]
  layers = [lambda x=x: getattr(x, 'weight') for x in lc_model.modules() if isinstance(x, nn.Linear)]
  compression_tasks = {
    Param(layers, device): (AsVector, ConstraintL0Pruning(kappa=int(model_size * (1 - prune_total))), 'pruning')
  }

  lc_alg = lc.Algorithm(
    model=lc_model,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=evaluate_performance      # evaluation function
  )

  lc_alg.run()

  print("LC Pruning Training Completed!")

  lc_model.cuda()
  shap_explainer = shap.Explainer(lc_model.infer, train_x)
  shap_values = shap_explainer(test_x)
  shap.plots.bar(shap_values, max_display=99)

  print("Model Performance after LC Training")
  evaluate_performance(lc_model)

  lc_mask = dict()
  for name, parameter in lc_model.named_parameters():
    parameter = parameter.cpu().detach().view(-1)
    mask = (parameter.abs() < GC_THRESHOLD).int()
    lc_mask[name] = mask

  pruned = 0
  total = 0
  for key in lc_mask:
    count = sum(lc_mask[key]).data
    pruned += count
    total += len(lc_mask[key])

  print(f"LC Pruning completed, percentage of model pruned is: {round((pruned / total).item(), 3)}")
  save_shap_values(shap_values, f"lc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json")
  print(f"Saved lc pruned model to -> lc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json")

In [None]:
def lc_batching(prune_total):
  lc_model = neural_network(train_x.shape[1]).to(device)
  model_size = sum(p.numel() for p in lc_model.parameters() if p.requires_grad)

  mu_s = [1 for i in range(EPOCHS // EPOCHS_PER_STEP)]
  layers = [lambda x=x: getattr(x, 'weight') for x in lc_model.modules() if isinstance(x, nn.Linear)]
  compression_tasks = {
    Param(layers, device): (AsVector, ConstraintL0Pruning(kappa=int(model_size * (1 - prune_total))), 'pruning')
  }

  lc_alg = lc.Algorithm(
    model=lc_model,                            # model to compress
    compression_tasks=compression_tasks,  # specifications of compression
    l_step_optimization=my_l_step_batching,        # implementation of L-step
    mu_schedule=mu_s,                     # schedule of mu values
    evaluation_func=evaluate_performance      # evaluation function
  )

  lc_alg.run()

  print("LC Pruning Training Completed!")

  lc_model.cuda()
  shap_explainer = shap.Explainer(lc_model.infer, train_x)
  shap_values = shap_explainer(test_x)
  shap.plots.bar(shap_values, max_display=99)

  print("Model Performance after LC Training")
  evaluate_performance(lc_model)

  lc_mask = dict()
  for name, parameter in lc_model.named_parameters():
    parameter = parameter.cpu().detach().view(-1)
    mask = (parameter.abs() < GC_THRESHOLD).int()
    lc_mask[name] = mask

  pruned = 0
  total = 0
  for key in lc_mask:
    count = sum(lc_mask[key]).data
    pruned += count
    total += len(lc_mask[key])

  print(f"LC Pruning completed, percentage of model pruned is: {round((pruned / total).item(), 3)}")
  save_shap_values(shap_values, f"lc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json")
  print(f"Saved lc pruned model to -> lc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json")

#Main Training Functions

In [None]:
def train_no_batching():

    # initialize the model
    model = neural_network(num_features = NUM_FEATURES).to(device)

    # vanilla training
    recorded_weights, model = vanilla_training_no_batching(model)

    # granger causal pruning
    pruned_total = gc_prune(model, recorded_weights, batching=False)

    # lc pruning
    lc_no_batching(pruned_total)

    # comparison
    print("GC Pruning Comparison")
    print(compare_feature_importances(f"unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json", f"gc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json"))

    print("LC Pruning Comparison")
    print(compare_feature_importances(f"unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json", f"lc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_no_batching.json"))

In [None]:
def train_batching():
    # initialize the model
    model = neural_network(num_features = NUM_FEATURES).to(device)

    # vanilla training
    recorded_weights, model = vanilla_training_batching(model)

    # granger causal pruning
    pruned_total = gc_prune(model, recorded_weights, batching=True)

    # lc pruning
    lc_batching(pruned_total)

    # comparison
    print("GC Pruning Comparison")
    print(compare_feature_importances(f"unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json", f"gc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json"))

    print("LC Pruning Comparison")
    print(compare_feature_importances(f"unpruned_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json", f"lc_lr_{LEARNING_RATE}_wd_{WEIGHT_DECAY}_epochs_{EPOCHS}_batching.json"))

In [None]:
train_no_batching()

In [None]:
# train_batching()

#MAIN

In [None]:
def main():
  if __name__ == "__main__":
    train_batching()
    train_no_batching()

In [None]:
# main()