In [None]:
!pip install semantichar==0.2


In [None]:
!git clone https://github.com/xiyuanzh/SHARE.git

In [None]:
cd ./SHARE


In [None]:
mkdir model

In [None]:
mkdir dataset


In [None]:
pwd

In [None]:
cd ./dataset

In [None]:
!unzip ./easy_imu_phone.zip -d ./

In [1]:
cd /home/keerthiv/HAR_models/SHARE/SHARE

/home/keerthiv/HAR_models/SHARE/SHARE


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import torch
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import average_precision_score, f1_score, precision_score, recall_score, accuracy_score
from torch.nn.utils.rnn import pack_padded_sequence
from torch.cuda.amp import GradScaler, autocast

from semantichar.utils import all_label_augmentation

def DataBatch(data, label, text, l, batchsize, shuffle=True):
    
    n = data.shape[0]
    if shuffle:
        index = np.random.permutation(n)
    else:
        index = np.arange(n)
    for i in range(int(np.ceil(n/batchsize))):
        inds = index[i*batchsize : min(n,(i+1)*batchsize)]
        yield data[inds], label[inds], text[inds], l[inds]
        
def trainer(config, 
            enc, 
            dec, 
            cross_entropy, 
            optimizer, 
            tr_data, 
            tr_label, 
            tr_text, 
            len_text, 
            break_step, 
            vocab_size, 
            device
):
    """
    Train the model.
    Args:
        config: user-specified configurations.
        qat_model: encoder of the model.
        dec: decoder of the model.
        cross_entropy: loss function.
        optimizer: optimizer (default is Adam).
        tr_data, tr_label, tr_text, len_text: training data, label, label sequence, length of the label sequence. 
        break_step: length of the longest label sequence length (i.e., maximum decoding step).
        vocab_size: label name vocabulary size.
        device: cuda or cpu.
    """

    enc.train()
    dec.train()  
    scaler = GradScaler()
    total_loss = 0
    for batch_data, batch_label, batch_text, batch_len in \
        DataBatch(tr_data, tr_label, tr_text, len_text, config['batchSize']):
        
        batch_text = all_label_augmentation(batch_text, config['prob'], break_step, vocab_size)

        batch_data = batch_data.to(device)
        batch_label = batch_label.to(device)
        batch_text = batch_text.to(device)
        batch_len = batch_len.to(device)

        with autocast():
            enc_hidden = enc(batch_data)
        
            enc_hidden = enc(batch_data)
            pred, batch_text_sorted, decode_lengths, sort_ind \
                = dec(enc_hidden, batch_text, batch_len)
            
            targets = batch_text_sorted[:, 1:]

            pred, *_ = pack_padded_sequence(pred, decode_lengths, batch_first=True)
            targets, *_ = pack_padded_sequence(targets, decode_lengths, batch_first=True)

            loss = cross_entropy(pred, targets.long())

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        total_loss += len(batch_data) * loss.item()

    total_loss /= len(tr_data)
    
    return total_loss 

  

import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time
import torch
import torch.nn.functional as F
from sklearn.metrics import average_precision_score, f1_score, precision_score, recall_score, accuracy_score

def evaluate(opt, 
             enc, 
             dec, 
             test_data, 
             test_label, 
             test_text, 
             test_len_text, 
             pred_dict, 
             seqs, 
             break_step, 
             class_num, 
             vocab_size, 
             device,
             load=True):
    """
    Evaluate the model.
    Args:
        opt: user-specified configurations.
        enc: encoder of the model.
        dec: decoder of the model.
        test_data, test_label, test_text, test_len_text: test data, label, label sequence, length of the label sequence.
        pred_dict: mapping from label token-id sequence to label id.
        seqs: label token-id sequence for all classes.
        break_step: length of the longest label sequence length (i.e., maximum decoding step).
        class_num: number of classes.
        vocab_size: label name vocabulary size.
        device: cuda or cpu.
        load: load saved model weights or not.
    """

    #enc.eval()
    #dec.eval()
    enc.cpu()
    dec.cpu()
    enc.eval()
    quantized_enc = torch.quantization.convert(enc, inplace=False)
    print("enc Model converted to quantized version")


    dec.eval()
    quantized_dec = torch.quantization.convert(dec, inplace=False)
    print("dec Model converted to quantized version")

    device = torch.device("cpu")
    quantized_enc.eval()
    quantized_dec.eval()
    quantized_enc.to(device)
    quantized_dec.to(device)
    #if load:
    #    enc.load_state_dict(torch.load(opt['model_path'] + opt['run_tag'] + '_enc.pth', map_location=device, weights_only=True))
    #    dec.load_state_dict(torch.load(opt['model_path'] + opt['run_tag'] + '_dec.pth', map_location=device, weights_only=True))

    hypotheses = list()
    batch_size = test_data.size(0)
    pred_whole = torch.zeros_like(test_label)
    seqs = seqs.to(device)

    total_evaluation_time = 0  # Initialize total evaluation time
    total_samples = 0  # Initialize total number of samples

    for batch_idx, (batch_data, batch_label, batch_text, batch_len) in enumerate(
        DataBatch(test_data, test_label, test_text, test_len_text, opt['batchSize'], shuffle=False)
    ):

        batch_data = batch_data.to(device)
        batch_label = batch_label.to(device)
        batch_text = batch_text.to(device)
        batch_len = batch_len.to(device)
        
        # Start timing after sending to device
        start_time = time.time()

        batch_size = batch_data.size(0)
        total_samples += batch_size  # Accumulate the number of samples
        encoder_out = quantized_enc(batch_data)  # (batch_size, enc_seq_len, encoder_dim)
        enc_seq_len = encoder_out.size(1)
        encoder_dim = encoder_out.size(2)

        encoder_out = encoder_out.unsqueeze(1).expand(batch_size, class_num, enc_seq_len, encoder_dim)
        encoder_out = encoder_out.reshape(batch_size * class_num, enc_seq_len, encoder_dim)

        k_prev_words = seqs[:, 0].unsqueeze(0).expand(batch_size, class_num).long()  # (batch_size, class_num)
        k_prev_words = k_prev_words.reshape(batch_size * class_num, 1)  # (batch_size * class_num, 1)

        h, c = quantized_dec.init_hidden_state(encoder_out)

        seq_scores = torch.zeros((batch_size, class_num)).to(device)

        for step in range(1, break_step):
            embeddings = quantized_dec.embedding(k_prev_words).squeeze(1)  # (batch_size * class_num, embed_dim)
            h, c = quantized_dec.decode_step(embeddings, (h, c))
            scores = quantized_dec.fc(h.reshape(batch_size, class_num, -1))  # (batch_size, class_num, vocab_size)
            scores = F.log_softmax(scores, dim=-1)
            k_prev_words = seqs[:, step].unsqueeze(0).expand(batch_size, class_num).long()
            for batch_i in range(batch_size):
                for class_i in range(class_num):
                    if k_prev_words[batch_i, class_i] != 0:
                        seq_scores[batch_i, class_i] += scores[batch_i, class_i, k_prev_words[batch_i, class_i]]
            k_prev_words = k_prev_words.reshape(batch_size * class_num, 1)  # (batch_size * class_num, 1)

        max_indices = seq_scores.argmax(dim=1)
        for batch_i in range(batch_size):
            max_i = max_indices[batch_i]
            seq = seqs[max_i].tolist()
            hypotheses.append([w for w in seq if w not in {0, vocab_size - 1}])
            pred_whole[batch_i + batch_idx * opt['batchSize']] = pred_dict["#".join(map(str, hypotheses[-1]))]

        # End timing for the batch
        end_time = time.time()
        batch_evaluation_time = end_time - start_time  # Calculate batch evaluation time
        total_evaluation_time += batch_evaluation_time  # Accumulate total evaluation time

        print(f'Batch {batch_idx + 1} Evaluation Time: {batch_evaluation_time:.2f} seconds')

    acc = accuracy_score(test_label.cpu().numpy(), pred_whole.cpu().numpy())
    prec = precision_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)
    rec = recall_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)
    f1 = f1_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)

    print(f'Total Evaluation Time: {total_evaluation_time:.2f} seconds')
    
    # Calculate evaluation time per batch and per sample
    eval_time_per_batch = total_evaluation_time / (batch_idx + 1)
    eval_time_per_sample = total_evaluation_time / total_samples

    print(f'Average Evaluation Time per Batch: {eval_time_per_batch:.2f} seconds')
    print(f'Average Evaluation Time per Sample: {eval_time_per_sample:.6f} seconds')

    return acc, prec, rec, f1, total_evaluation_time, eval_time_per_batch, eval_time_per_sample




In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.quantization

import semantichar.data 
from semantichar import imagebind_model
from semantichar.imagebind_model import ModalityType

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.quantization import QuantStub, DeQuantStub
import torch.quantization as quant
class CustomBatchNorm1d(nn.Module):
    def __init__(self, num_features):
        super(CustomBatchNorm1d, self).__init__()
        self.bn = nn.BatchNorm1d(num_features)

    def forward(self, x):
        # Convert batch normalization to quantization-aware operations
        if self.training:
            x = self.bn(x)
        else:
            x = torch.nn.functional.batch_norm(
                x, 
                self.bn.running_mean, 
                self.bn.running_var, 
                self.bn.weight, 
                self.bn.bias, 
                training=False
            )
        return x
    

class QuantizedLinear(nn.Module):
    def __init__(self, input_size, output_size, bias=True):
        super(QuantizedLinear, self).__init__()
        self.fc = nn.Linear(input_size, output_size, bias=bias)
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)  # Quantize input
        x = self.fc(x)     # Apply linear transformation
        x = self.dequant(x)  # Dequantize output
        return x

    def fuse_model(self):
        # Fuse the linear layer with the quantization stubs
        torch.quantization.fuse_modules(self, ['quant', 'fc', 'dequant'], inplace=True)

    @property
    def weight(self):
        return self.fc.weight

    @property
    def bias(self):
        return self.fc.bias
 
    
class CustomObserver(quant.MinMaxObserver):
    def __init__(self, *args, **kwargs):
        super(CustomObserver, self).__init__(*args, **kwargs)

class QuantizedConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
        super(QuantizedConv1d, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=bias)
        self.bn = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU()
        self.weight_fake_quant = quant.FakeQuantize.with_args(observer=quant.default_weight_observer, quant_min=-128, quant_max=127, dtype=torch.qint8, qscheme=torch.per_tensor_affine)()
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)  # Quantize input
        x = self.conv(x)   # Apply convolution
        x = self.bn(x)     # Apply batch normalization
        x = self.relu(x)   # Apply ReLU
        x = self.dequant(x)  # Dequantize output
        return x

    def fuse_model(self):
        # Fuse the conv, batch norm, and ReLU layers
        torch.quantization.fuse_modules(self, [['conv', 'bn', 'relu']], inplace=True)

    @property
    def weight(self):
        return self.conv.weight

    @property
    def bias(self):
        return self.conv.bias

    def apply_weight_fake_quant(self):
        self.weight_fake_quant(self.conv.weight)

class Encoder(nn.Module):
    def __init__(self, d_input: int, d_model: int, d_output: int, seq_len:int):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv1d( d_input, 128, kernel_size=3, padding=(1))
        self.bn1 = nn.BatchNorm1d(128)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(128, 128, kernel_size=3, padding=(1))
        self.bn2 = nn.BatchNorm1d(128)
        self.relu2 = nn.ReLU()

        #self.conv3 = nn.Conv1d( 256, 256, kernel_size=3, padding=(1))
        #self.bn3 = nn.BatchNorm1d(256)
        #self.relu3 = nn.ReLU()
        #self.conv4 = nn.Conv1d(256, d_output, kernel_size=3, padding=(1))
        #self.bn4 = nn.BatchNorm1d(d_output)
        #self.relu4 = nn.ReLU()

        self.quant1 = quant.QuantStub()  # Quantizes the input
        self.dequant1 = quant.DeQuantStub()  # Dequantizes the output
        self.quant2 = quant.QuantStub()  # Quantizes the input
        self.dequant2 = quant.DeQuantStub()  # Dequantizes the output

        #self.quant3 = quant.QuantStub()  # Quantizes the input
        #self.dequant3 = quant.DeQuantStub()  # Dequantizes the output
        #self.quant4 = quant.QuantStub()  # Quantizes the input
        #self.dequant4 = quant.DeQuantStub()  # Dequantizes the output


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x shape: [batch_size, seq_len, d_input]
        x = x.permute(0, 2, 1)  # [batch_size, d_input, seq_len]
        #x = x.unsqueeze(1)  # Add a dummy dimension: [batch_size, 1, d_input, seq_len]
        x = self.quant1(x)
        
        out = self.conv1(x)
        #out = self.bn1(out)
        out = self.relu1(out)
        out = self.dequant1(out)

        out = self.quant2(out)
        out = self.conv2(out)
        #out = self.bn2(out)
        out = self.relu2(out)
        out = self.dequant2(out)

        #out = self.quant3(out)
        #out = self.conv3(out)
        #out = self.bn3(out)
        #out = self.relu3(out)
        #out = self.dequant3(out)
        
        #out = self.quant4(out)
        #out = self.conv4(out)
        #out = self.bn4(out)
        #out = self.relu4(out)
        #out = self.dequant4(out)

        #out = out.squeeze(1)  # Remove the dummy dimension: [batch_size, d_output, seq_len]
        out = out.permute(0, 2, 1)  # [batch_size, seq_len, d_output]
        return out
    
class Encoder_q(nn.Module):
    def __init__(self, d_input: int, d_model: int, d_output: int, seq_len: int):
        super().__init__()
        self.layer1 = QuantizedConv1d(in_channels=d_input, out_channels=d_model, kernel_size=3, padding=1)
        self.layer2 = QuantizedConv1d(in_channels=d_model, out_channels=d_output, kernel_size=3, padding=1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        b, t, c = x.size()
        out = self.layer1(x.permute(0, 2, 1))
        self.layer1.apply_weight_fake_quant()  # Apply weight fake quantization for layer1
        out = self.layer2(out)
        self.layer2.apply_weight_fake_quant()  # Apply weight fake quantization for layer2
        out = out.permute(0, 2, 1)  # (b, seq_len, d_output)
        return out

class QuantizedLSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size, bias=True):
        super(QuantizedLSTMCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias

        self.ih = nn.Linear(input_size, 4 * hidden_size, bias=bias)
        self.hh = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)

        self.quant1 = QuantStub()
        self.dequant1 = DeQuantStub()

        self.quant2 = QuantStub()
        self.dequant2 = DeQuantStub()

        self.quant3 = QuantStub()
        self.dequant3 = DeQuantStub()


    def forward(self, input, hx):
        hx, cx = hx

        # Quantize inputs
        #input = self.quant1(input)
        hx = self.quant2(hx)
        cx = self.quant3(cx)

        # LSTM cell operations
        ih_out = self.ih(input)
        hh_out = self.hh(hx)

        # Dequantize before addition and multiplication
        #ih_out = self.dequant1(ih_out)
        hh_out = self.dequant2(hh_out)
        cx = self.dequant3(cx)

        gates = ih_out + hh_out

        i, f, g, o = gates.chunk(4, 1)

        i = torch.sigmoid(i)
        f = torch.sigmoid(f)
        g = torch.tanh(g)
        o = torch.sigmoid(o)

        cy = f * cx + i * g
        hy = o * torch.tanh(cy)

        return hy, cy

    def _init_hidden(self, batch_size, device):
        weight = next(self.parameters()).data
        return (weight.new(batch_size, self.hidden_size).zero_().to(device),
                weight.new(batch_size, self.hidden_size).zero_().to(device))
    
class Decoder(nn.Module):
    
    def __init__(self, embed_dim, decoder_dim, vocab, encoder_dim, device, dropout=0.5):
        super(Decoder, self).__init__()

        self.encoder_dim = encoder_dim
        self.embed_dim = embed_dim
        self.decoder_dim = decoder_dim
        self.vocab = vocab
        self.vocab_size = len(vocab)
        self.dropout = dropout
        self.device = device

        self.embedding = nn.Embedding(self.vocab_size, embed_dim)
        self.dropout = nn.Dropout(p=self.dropout)
        self.decode_step = nn.LSTMCell(embed_dim, decoder_dim, bias=True)
        #self.decode_step = QuantizedLSTMCell(embed_dim, decoder_dim, bias=True)
        self.init_h = nn.Linear(encoder_dim, decoder_dim)
        self.init_c = nn.Linear(encoder_dim, decoder_dim)
        #self.fc = nn.Linear(decoder_dim, self.vocab_size)
        self.fc = nn.Linear(decoder_dim, self.vocab_size)
        self.load_pretrained_embeddings()
        
        #EMBEDDING
        #self.fake_quant = quant.FakeQuantize.with_args(observer=quant.default_observer, quant_min=-128, quant_max=127, dtype=torch.qint8)       
        self.quantize_emb = QuantStub()
        self.dequantize_emb = DeQuantStub()

        #DECODER
        self.quant_h = QuantStub()
        self.dequant_h = DeQuantStub()
        self.quant_c = QuantStub()
        self.dequant_c = DeQuantStub()

        #INIT 
        self.quant_init_h = QuantStub()
        self.quant_init_c = QuantStub()
        self.dequant_init_h = QuantStub()
        self.dequant_init_c = DeQuantStub()
        
        #FC
        self.quant_fc = QuantStub()
        self.dequant_fc = DeQuantStub()

    def load_pretrained_embeddings(self):
        inputs = {
            ModalityType.TEXT: semantichar.data.load_and_transform_text(self.vocab, self.device)
        }
        model = imagebind_model.imagebind_huge(pretrained=True)
        model.eval()
        model.to(self.device)
        with torch.no_grad():
            embeddings = model(inputs)['text']
        self.embedding.weight = nn.Parameter(embeddings)
        self.fc.bias.data.fill_(0)
        self.fc.weight.data.uniform_(-0.1, 0.1)

    def init_hidden_state(self, encoder_out):

        #mean_encoder_out = encoder_out.mean(dim=1)
        mean_encoder_out = encoder_out.mean(dim=1)

        mean_encoder_out_h = self.quant_init_h(mean_encoder_out)
        mean_encoder_out_c = self.quant_init_c(mean_encoder_out)

        h = self.init_h(mean_encoder_out_h)
        c = self.init_c(mean_encoder_out_c)

        h = self.dequant_init_h(h)
        c = self.dequant_init_c(c)

        return h, c

    def forward(self, encoder_out, encoded_captions, caption_lengths):
        batch_size = encoder_out.size(0)
        encoder_dim = encoder_out.size(-1)
        vocab_size = self.vocab_size
        
        encoder_out = encoder_out.view(batch_size, -1, encoder_dim)

        caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True)
        encoder_out = encoder_out[sort_ind]
        encoded_captions = encoded_captions[sort_ind]
        embeddings = self.embedding(encoded_captions.long())
        #embeddings = self.fake_quant(embeddings)
        q_emb = self.quantize_emb(embeddings)  # Quantize the embeddings
        dq_emb = self.dequantize_emb(q_emb)  # Dequantize back to float

        h, c = self.init_hidden_state(encoder_out)

        decode_lengths = (caption_lengths - 1).tolist()

        predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(self.device)

        for t in range(max(decode_lengths)):
            batch_size_t = sum([l > t for l in decode_lengths])
            step_embeddings = dq_emb[:batch_size_t, t, :]
            
            h_batch = h[:batch_size_t]
            c_batch = c[:batch_size_t]
            #print( h_batch.shape)
            #print( c_batch.shape)
            h_quant = self.quant_h(h_batch)
            c_quant = self.quant_c(c_batch)
            
            #print( h_quant.shape)
            #print( c_quant.shape)

            hx = ((h_quant, c_quant))           
            #print(hx.shape) 
            h, c = self.decode_step(step_embeddings,hx)
            
            h_fc = self.quant_fc(h)
            preds = self.fc(self.dropout(h_fc))
            preds = self.dequant_fc(preds)

            predictions[:batch_size_t, t, :] = preds

        return predictions, encoded_captions, decode_lengths, sort_ind


In [4]:
import argparse
import datetime
import os
import json
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.quantization as quant
from torch.quantization.observer import MovingAverageMinMaxObserver, default_weight_observer

#from semantichar.seq2seq import Encoder, Decoder
#from semantichar.exp import trainer, evaluate
from semantichar.dataset import prepare_dataset

class CustomObserver(MovingAverageMinMaxObserver):
    def calculate_qparams(self):
        scale, _ = super().calculate_qparams()
        zero_point = torch.tensor(0, dtype=torch.int32)
        return scale, zero_point

In [5]:
pwd

'/home/keerthiv/HAR_models/SHARE/SHARE'

In [6]:
import random
import torch
import numpy as np
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.nn as nn
import torch.quantization as quant
#from custom_observer import CustomObserver  # Assuming you have a custom observer implementation
from torch.quantization.observer import default_weight_observer
import json

# Updated values for the arguments
dataset = 'easy_imu_phone'
data_path = './'
manualSeed = 1
epochs = 150
early_stopping = 50
batchSize = 16
lr = 1e-4
prob = 0.4
cuda = True
run_tag = 'test'
model_path = './model/'

# Print the updated values
print(f'dataset: {dataset}')
print(f'data_path: {data_path}')
print(f'manualSeed: {manualSeed}')
print(f'epochs: {epochs}')
print(f'early_stopping: {early_stopping}')
print(f'batchSize: {batchSize}')
print(f'lr: {lr}')
print(f'prob: {prob}')
print(f'cuda: {cuda}')
print(f'run_tag: {run_tag}')
print(f'model_path: {model_path}')

# Set random seed
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
np.random.seed(manualSeed)

cudnn.benchmark = True

if torch.cuda.is_available() and not cuda:
    print("You have a cuda device, so you might want to run with --cuda as option")
device = torch.device("cuda:0" if cuda else "cpu")

data_root = data_path + '/dataset/' + dataset
config_file = data_path + '/configs/' + dataset + '.json'
with open(config_file, 'r') as config_file:
    data = json.load(config_file)
    label_dictionary = {int(k): v for k, v in data['label_dictionary'].items()}

tr_data = np.load(data_root + '/x_train.npy')
tr_label = np.load(data_root + '/y_train.npy')

test_data = np.load(data_root + '/x_test.npy')
test_label = np.load(data_root + '/y_test.npy')






dataset: easy_imu_phone
data_path: ./
manualSeed: 1
epochs: 150
early_stopping: 50
batchSize: 16
lr: 0.0001
prob: 0.4
cuda: True
run_tag: test
model_path: ./model/
Random Seed:  1


In [7]:
print("Available quantized engines:", torch.backends.quantized.supported_engines)

# Define quantization configurations
#default_qconfig = quant.get_default_qconfig(torch.backends.quantized.engine)
'''
# Custom qconfig enforcing per_tensor_affine
custom_qconfig = quant.QConfig(
    activation=quant.default_observer.with_args(qscheme=torch.per_tensor_affine),
    weight=quant.default_weight_observer.with_args(qscheme=torch.per_tensor_affine)
)

# Define custom QAT configuration
custom_qconfig = torch.quantization.QConfig(
    activation=MovingAverageMinMaxObserver.with_args(dtype=torch.quint8, reduce_range=True),
    weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8)
)
'''

Available quantized engines: ['qnnpack', 'none', 'onednn', 'x86', 'fbgemm']


'\n# Custom qconfig enforcing per_tensor_affine\ncustom_qconfig = quant.QConfig(\n    activation=quant.default_observer.with_args(qscheme=torch.per_tensor_affine),\n    weight=quant.default_weight_observer.with_args(qscheme=torch.per_tensor_affine)\n)\n\n# Define custom QAT configuration\ncustom_qconfig = torch.quantization.QConfig(\n    activation=MovingAverageMinMaxObserver.with_args(dtype=torch.quint8, reduce_range=True),\n    weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8)\n)\n'

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.quantization as quant
from torch.quantization.observer import MovingAverageMinMaxObserver, PerChannelMinMaxObserver

class CustomObserver(MovingAverageMinMaxObserver):
    def calculate_qparams(self):
        scale, _ = super().calculate_qparams()
        zero_point = torch.tensor(0, dtype=torch.int32)
        return scale, zero_point
    
# Check available quantized engines
print("Available quantized engines:", torch.backends.quantized.supported_engines)

# Set the quantized engine
torch.backends.quantized.engine = 'fbgemm'  # Use 'qnnpack' for ARM or mobile

# Prepare dataset and models (assuming prepare_dataset is defined elsewhere)
seq_len, dim, class_num, vocab_size, break_step, word_list, pred_dict, seqs, \
    tr_data, test_data, \
    tr_label, test_label, \
    tr_text, test_text, \
    len_text, test_len_text = prepare_dataset(tr_data, tr_label, test_data, test_label, label_dictionary)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

enc = Encoder(d_input=dim, d_model=128, d_output=128, seq_len=seq_len).to(device)
dec = Decoder(embed_dim=1024, decoder_dim=128, vocab=word_list, encoder_dim=128, device=device).to(device)
enc.eval()

#dec.qconfig = custom_qconfig
dec.qconfig = quant.get_default_qat_qconfig('fbgemm')
enc.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
'''
# Define a function to set qconfig for embedding layers
def set_qconfig_for_embedding(module, qconfig):
    if isinstance(module, nn.Embedding):
        module.qconfig = qconfig
    for child in module.children():
        set_qconfig_for_embedding(child, qconfig)
'''
# Apply the quantization configuration to the embedding layers in the decoder
enc.train()
dec.train()

# Move the models to GPU for training
enc.to(device)
dec.to(device)

# Define optimizer and loss function
params = list(enc.parameters()) + list(dec.parameters())
optimizer = optim.Adam(params, lr=1e-4)
cross_entropy = nn.CrossEntropyLoss().to(device)

Available quantized engines: ['qnnpack', 'none', 'onednn', 'x86', 'fbgemm']
load dataset
torch.Size([4953, 150, 9]) torch.Size([1320, 150, 9])


  model.load_state_dict(torch.load(".checkpoints/imagebind_huge.pth"))


In [9]:
# Configuration dictionary
config = {
    'batchSize': batchSize,
    'epochs': epochs,
    'run_tag': run_tag,
    'dataset': dataset,
    'cuda': cuda,
    'manualSeed': manualSeed,
    'data_path': data_path,
    'early_stopping': early_stopping,
    'lr': 0.0001,
    'prob': prob,
    'model_path': model_path
}



In [10]:
device = torch.device('cuda')
enc.to(device)
dec.to(device)
tr_data.to(device)
tr_label.to(device)
tr_text.to(device)
len_text.to(device)

for epoch in range(500):
    loss = trainer(
        config, # configs
        enc, # encoder
        dec, # decoder
        cross_entropy, # loss
        optimizer, # optimizer
        tr_data, # training input
        tr_label, # training labels
        tr_text, # training label text sequence
        len_text, # training label text sequence length
        break_step, # max training label text sequence length
        vocab_size, # vocabulary size
        device, # device
    )

    print("epoch: %d total loss: %.4f" % (epoch + 1, loss))


  scaler = GradScaler()
  with autocast():


epoch: 1 total loss: 1.7783
epoch: 2 total loss: 1.1715
epoch: 3 total loss: 1.0113
epoch: 4 total loss: 0.9421
epoch: 5 total loss: 0.8723
epoch: 6 total loss: 0.8284
epoch: 7 total loss: 0.7918
epoch: 8 total loss: 0.7767
epoch: 9 total loss: 0.7584
epoch: 10 total loss: 0.7293
epoch: 11 total loss: 0.7260
epoch: 12 total loss: 0.7050
epoch: 13 total loss: 0.6966
epoch: 14 total loss: 0.6863
epoch: 15 total loss: 0.6828
epoch: 16 total loss: 0.6523
epoch: 17 total loss: 0.6450
epoch: 18 total loss: 0.6401
epoch: 19 total loss: 0.6238
epoch: 20 total loss: 0.6133
epoch: 21 total loss: 0.5985
epoch: 22 total loss: 0.5966
epoch: 23 total loss: 0.6028
epoch: 24 total loss: 0.5814
epoch: 25 total loss: 0.5844
epoch: 26 total loss: 0.5707
epoch: 27 total loss: 0.5635
epoch: 28 total loss: 0.5773
epoch: 29 total loss: 0.5593
epoch: 30 total loss: 0.5546
epoch: 31 total loss: 0.5495
epoch: 32 total loss: 0.5513
epoch: 33 total loss: 0.5411
epoch: 34 total loss: 0.5254
epoch: 35 total loss: 0

In [11]:
enc

Encoder(
  (conv1): Conv1d(9, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (conv2): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (quant1): QuantStub()
  (dequant1): DeQuantStub()
  (quant2): QuantStub()
  (dequant2): DeQuantStub()
)

In [None]:
'''
import torch

def fold_batch_norm(conv, bn):
    with torch.no_grad():
        # Ensure the batch norm parameters are on the same device as the convolution weights
        if conv.weight.device != bn.weight.device:
            bn = bn.to(conv.weight.device)
        
        # Calculate scale and shift factors
        scale_factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
        shift_factor = bn.bias - bn.running_mean * scale_factor

        # Reshape scale_factor to match convolution weight dimensions for 1D convolution
        scale_factor = scale_factor.reshape([-1, 1, 1])
        
        # Apply scaling to convolution weights
        conv.weight.copy_(conv.weight * scale_factor)
        
        # Handle convolution bias
        if conv.bias is None:
            conv.bias = torch.nn.Parameter(shift_factor)
        else:
            conv.bias.copy_((conv.bias - bn.running_mean) * scale_factor.squeeze() + bn.bias)
    
    return conv

# Example usage:
enc.to('cpu')  # Ensure the model is on CPU for parameter updates
enc.conv1 = fold_batch_norm(enc.conv1, enc.bn1)
enc.conv2 = fold_batch_norm(enc.conv2, enc.bn2)

# Remove batch norm layers after folding
del enc.bn1
del enc.bn2

# Define the new forward method without BatchNorm layers
def new_forward(self, x):
    x = self.quant1(x.permute(0, 2, 1))  # Quantize the input
    x = self.conv1(x)
    x = self.relu1(x)
    x = self.dequant1(x)
    x = self.quant2(x)  # Quantize the input
    x = self.conv2(x)
    x = self.relu2(x)
    x = self.dequant2(x)
    x = x.permute(0, 2, 1)
    return x

# Attach the new forward method to the model instance
enc.forward = new_forward.__get__(enc, Encoder)

# Verify the modified forward method
print(enc.forward)

'''

In [12]:
import copy
enc.eval()
#enc = torch.quantization.fuse_modules(enc, [['conv1', 'bn1', 'relu1'], ['conv2', 'bn2', 'relu2'], ['conv3', 'bn3', 'relu3'],  ['conv4', 'bn4', 'relu4']])
enc.to('cpu')
enc.eval()
print( enc )
#quantized_enc = Encoder(9,128,128,150)
#quantized_enc = copy.deepcopy(enc)
#quantized_enc.conv1 = enc.conv1
#quantized_enc.conv2 = quant.convert(enc.conv2, inplace=False)
quantized_enc = quant.convert(enc, inplace=False)
print( quantized_enc)

Encoder(
  (conv1): Conv1d(9, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (conv2): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (quant1): QuantStub()
  (dequant1): DeQuantStub()
  (quant2): QuantStub()
  (dequant2): DeQuantStub()
)
Encoder(
  (conv1): Conv1d(9, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (conv2): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (quant1): QuantStub()
  (dequant1): DeQuantStub()
  (quant2): QuantStub()
  (dequant2): DeQuantStub()
)


In [13]:
custom_qconfig = quant.QConfig(
    activation=quant.default_observer,
    weight=quant.default_weight_observer
)
float_qparams_weight_only_qconfig = quant.float_qparams_weight_only_qconfig
# Apply the quantization configuration to the embedding layers again before conversion
#set_qconfig_for_embedding(dec, float_qparams_weight_only_qconfig)
#enc.eval()
dec.eval()
#enc.layer1.fuse_model()
#enc.layer2.fuse_model()
#enc.qconfig = default_qconfig
#enc.cpu()
dec.cpu()


# Convert models to quantized versions
#quantized_enc = quant.convert(enc, inplace=False)
#print("Encoder model converted to quantized version")

quantized_dec = quant.convert(dec, inplace=False)
#print("Decoder model converted to quantized version")

# Print the quantized models
#print(quantized_enc)
print(quantized_dec)

Decoder(
  (embedding): Embedding(37, 1024)
  (dropout): Dropout(p=0.5, inplace=False)
  (decode_step): LSTMCell(1024, 128)
  (init_h): Linear(in_features=128, out_features=128, bias=True)
  (init_c): Linear(in_features=128, out_features=128, bias=True)
  (fc): Linear(in_features=128, out_features=37, bias=True)
  (quantize_emb): QuantStub()
  (dequantize_emb): DeQuantStub()
  (quant_h): QuantStub()
  (dequant_h): DeQuantStub()
  (quant_c): QuantStub()
  (dequant_c): DeQuantStub()
  (quant_init_h): QuantStub()
  (quant_init_c): QuantStub()
  (dequant_init_h): QuantStub()
  (dequant_init_c): DeQuantStub()
  (quant_fc): QuantStub()
  (dequant_fc): DeQuantStub()
)


In [17]:
# Save the quantized models
torch.save(enc.state_dict(), model_path + run_tag + '_enc.pth')
torch.save(dec.state_dict(), model_path + run_tag + '_dec.pth')

In [18]:
# Save the quantized models
torch.save(quantized_enc.state_dict(), model_path + run_tag + 'quant_enc.pth')
torch.save(quantized_dec.state_dict(), model_path + run_tag + 'quant_dec.pth')

In [16]:
device = torch.device("cpu")
dec.eval()
dec.cpu()
enc.eval()
enc.cpu()

hypotheses = list()
batch_size = test_data.size(0)
pred_whole = torch.zeros_like(test_label)
seqs = seqs.to(device)

total_evaluation_time = 0  # Initialize total evaluation time
total_samples = 0  # Initialize total number of samples

for batch_idx, (batch_data, batch_label, batch_text, batch_len) in enumerate(
        DataBatch(test_data, test_label, test_text, test_len_text, config['batchSize'], shuffle=False)
    ):

        batch_data = batch_data.to(device)
        batch_label = batch_label.to(device)
        batch_text = batch_text.to(device)
        batch_len = batch_len.to(device)
        
        # Start timing after sending to device
        start_time = time.time()

        batch_size = batch_data.size(0)
        total_samples += batch_size  # Accumulate the number of samples
        encoder_out = quantized_enc(batch_data)  # (batch_size, enc_seq_len, encoder_dim)
        enc_seq_len = encoder_out.size(1)
        encoder_dim = encoder_out.size(2)

        encoder_out = encoder_out.unsqueeze(1).expand(batch_size, class_num, enc_seq_len, encoder_dim)
        encoder_out = encoder_out.reshape(batch_size * class_num, enc_seq_len, encoder_dim)

        k_prev_words = seqs[:, 0].unsqueeze(0).expand(batch_size, class_num).long()  # (batch_size, class_num)
        k_prev_words = k_prev_words.reshape(batch_size * class_num, 1)  # (batch_size * class_num, 1)

        h, c = quantized_dec.init_hidden_state(encoder_out)

        seq_scores = torch.zeros((batch_size, class_num)).to(device)

        for step in range(1, break_step):
            embeddings = quantized_dec.embedding(k_prev_words).squeeze(1)  # (batch_size * class_num, embed_dim)
            h, c = quantized_dec.decode_step(embeddings, (h, c))
            scores = quantized_dec.fc(h.reshape(batch_size, class_num, -1))  # (batch_size, class_num, vocab_size)
            scores = F.log_softmax(scores, dim=-1)
            k_prev_words = seqs[:, step].unsqueeze(0).expand(batch_size, class_num).long()
            for batch_i in range(batch_size):
                for class_i in range(class_num):
                    if k_prev_words[batch_i, class_i] != 0:
                        seq_scores[batch_i, class_i] += scores[batch_i, class_i, k_prev_words[batch_i, class_i]]
            k_prev_words = k_prev_words.reshape(batch_size * class_num, 1)  # (batch_size * class_num, 1)

        max_indices = seq_scores.argmax(dim=1)
        for batch_i in range(batch_size):
            max_i = max_indices[batch_i]
            seq = seqs[max_i].tolist()
            hypotheses.append([w for w in seq if w not in {0, vocab_size - 1}])
            pred_whole[batch_i + batch_idx * config['batchSize']] = pred_dict["#".join(map(str, hypotheses[-1]))]

        # End timing for the batch
        end_time = time.time()
        batch_evaluation_time = end_time - start_time  # Calculate batch evaluation time
        total_evaluation_time += batch_evaluation_time  # Accumulate total evaluation time

        print(f'Batch {batch_idx + 1} Evaluation Time: {batch_evaluation_time:.2f} seconds')

acc = accuracy_score(test_label.cpu().numpy(), pred_whole.cpu().numpy())
prec = precision_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)
rec = recall_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)
f1 = f1_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)

print(f'Total Evaluation Time: {total_evaluation_time:.2f} seconds')
    
    # Calculate evaluation time per batch and per sample
eval_time_per_batch = total_evaluation_time / (batch_idx + 1)
eval_time_per_sample = total_evaluation_time / total_samples

print(f'Average Evaluation Time per Batch: {eval_time_per_batch:.2f} seconds')
print(f'Average Evaluation Time per Sample: {eval_time_per_sample:.6f} seconds')
print('Test Acc: %.4f Macro-Prec: %.4f Macro-Rec: %.4f Macro-F1: %.4f' % (acc, prec, rec, f1))


Batch 1 Evaluation Time: 0.04 seconds
Batch 2 Evaluation Time: 0.03 seconds
Batch 3 Evaluation Time: 0.03 seconds
Batch 4 Evaluation Time: 0.03 seconds
Batch 5 Evaluation Time: 0.04 seconds
Batch 6 Evaluation Time: 0.04 seconds
Batch 7 Evaluation Time: 0.04 seconds
Batch 8 Evaluation Time: 0.03 seconds
Batch 9 Evaluation Time: 0.04 seconds
Batch 10 Evaluation Time: 0.03 seconds
Batch 11 Evaluation Time: 0.03 seconds
Batch 12 Evaluation Time: 0.03 seconds
Batch 13 Evaluation Time: 0.03 seconds
Batch 14 Evaluation Time: 0.03 seconds
Batch 15 Evaluation Time: 0.04 seconds
Batch 16 Evaluation Time: 0.03 seconds
Batch 17 Evaluation Time: 0.04 seconds
Batch 18 Evaluation Time: 0.04 seconds
Batch 19 Evaluation Time: 0.04 seconds
Batch 20 Evaluation Time: 0.04 seconds
Batch 21 Evaluation Time: 0.04 seconds
Batch 22 Evaluation Time: 0.04 seconds
Batch 23 Evaluation Time: 0.04 seconds
Batch 24 Evaluation Time: 0.04 seconds
Batch 25 Evaluation Time: 0.04 seconds
Batch 26 Evaluation Time: 0.04 sec

In [19]:
#to be run on raspberry pi 
torch.backends.quantized.engine = 'qnnpack'

enc_loaded = Encoder(d_input=dim, d_model=128, d_output=128, seq_len=seq_len).to(device)
dec_loaded = Decoder(embed_dim=1024, decoder_dim=128, vocab=word_list, encoder_dim=128, device=device).to(device)

# Prepare dataset and models (assuming prepare_dataset is defined elsewhere)
seq_len, dim, class_num, vocab_size, break_step, word_list, pred_dict, seqs, \
    tr_data, test_data, \
    tr_label, test_label, \
    tr_text, test_text, \
    len_text, test_len_text = prepare_dataset(tr_data, tr_label, test_data, test_label, label_dictionary)


# Prepare the recreated models for QAT
enc_loaded.qconfig = quant.get_default_qat_qconfig('qnnpack')
dec_loaded.qconfig = quant.get_default_qat_qconfig('qnnpack')
quant.prepare_qat(enc_loaded, inplace=True)
quant.prepare_qat(dec_loaded, inplace=True)
quant.convert(enc_loaded, inplace=True)
quant.convert(dec_loaded, inplace=True)

# Load the saved state_dicts into the recreated models
enc_loaded.load_state_dict(torch.load(model_path + run_tag + 'quant_enc.pth'))
dec_loaded.load_state_dict(torch.load(model_path + run_tag + 'quant_dec.pth'))

device = torch.device("cpu")
enc_loaded.eval()
enc_loaded.cpu()
dec_loaded.eval()
dec_loaded.cpu()

hypotheses = list()
batch_size = test_data.size(0)
pred_whole = torch.zeros_like(test_label)
seqs = seqs.to(device)

total_evaluation_time = 0  # Initialize total evaluation time
total_samples = 0  # Initialize total number of samples

for batch_idx, (batch_data, batch_label, batch_text, batch_len) in enumerate(
        DataBatch(test_data, test_label, test_text, test_len_text, config['batchSize'], shuffle=False)
    ):

        batch_data = batch_data.to(device)
        batch_label = batch_label.to(device)
        batch_text = batch_text.to(device)
        batch_len = batch_len.to(device)
        
        # Start timing after sending to device
        start_time = time.time()

        batch_size = batch_data.size(0)
        total_samples += batch_size  # Accumulate the number of samples
        encoder_out = enc_loaded(batch_data)  # (batch_size, enc_seq_len, encoder_dim)
        enc_seq_len = encoder_out.size(1)
        encoder_dim = encoder_out.size(2)

        encoder_out = encoder_out.unsqueeze(1).expand(batch_size, class_num, enc_seq_len, encoder_dim)
        encoder_out = encoder_out.reshape(batch_size * class_num, enc_seq_len, encoder_dim)

        k_prev_words = seqs[:, 0].unsqueeze(0).expand(batch_size, class_num).long()  # (batch_size, class_num)
        k_prev_words = k_prev_words.reshape(batch_size * class_num, 1)  # (batch_size * class_num, 1)

        h, c = dec_loaded.init_hidden_state(encoder_out)

        seq_scores = torch.zeros((batch_size, class_num)).to(device)

        for step in range(1, break_step):
            embeddings = dec_loaded.embedding(k_prev_words).squeeze(1)  # (batch_size * class_num, embed_dim)
            h, c = dec_loaded.decode_step(embeddings, (h, c))
            scores = dec_loaded.fc(h.reshape(batch_size, class_num, -1))  # (batch_size, class_num, vocab_size)
            scores = F.log_softmax(scores, dim=-1)
            k_prev_words = seqs[:, step].unsqueeze(0).expand(batch_size, class_num).long()
            for batch_i in range(batch_size):
                for class_i in range(class_num):
                    if k_prev_words[batch_i, class_i] != 0:
                        seq_scores[batch_i, class_i] += scores[batch_i, class_i, k_prev_words[batch_i, class_i]]
            k_prev_words = k_prev_words.reshape(batch_size * class_num, 1)  # (batch_size * class_num, 1)

        max_indices = seq_scores.argmax(dim=1)
        for batch_i in range(batch_size):
            max_i = max_indices[batch_i]
            seq = seqs[max_i].tolist()
            hypotheses.append([w for w in seq if w not in {0, vocab_size - 1}])
            pred_whole[batch_i + batch_idx * config['batchSize']] = pred_dict["#".join(map(str, hypotheses[-1]))]

        # End timing for the batch
        end_time = time.time()
        batch_evaluation_time = end_time - start_time  # Calculate batch evaluation time
        total_evaluation_time += batch_evaluation_time  # Accumulate total evaluation time

        print(f'Batch {batch_idx + 1} Evaluation Time: {batch_evaluation_time:.2f} seconds')

acc = accuracy_score(test_label.cpu().numpy(), pred_whole.cpu().numpy())
prec = precision_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)
rec = recall_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)
f1 = f1_score(test_label.cpu().numpy(), pred_whole.cpu().numpy(), average='macro', zero_division=0)

print(f'Total Evaluation Time: {total_evaluation_time:.2f} seconds')
    
    # Calculate evaluation time per batch and per sample
eval_time_per_batch = total_evaluation_time / (batch_idx + 1)
eval_time_per_sample = total_evaluation_time / total_samples

print(f'Average Evaluation Time per Batch: {eval_time_per_batch:.2f} seconds')
print(f'Average Evaluation Time per Sample: {eval_time_per_sample:.6f} seconds')
print('Test Acc: %.4f Macro-Prec: %.4f Macro-Rec: %.4f Macro-F1: %.4f' % (acc, prec, rec, f1))



  model.load_state_dict(torch.load(".checkpoints/imagebind_huge.pth"))


KeyError: tensor(0.)