In [None]:
import torch
import numpy as np
import sys

## Config and Data Generators

In [None]:
class config:
    data_path_train = 'dataset/Amazon/amazon_train.txt'
    data_path_test = 'dataset/Amazon/amazon_test.txt'
    GPUs = True
    num_threads = 44 # Only used when GPUs is empty string
    lr = 0.0001
    sparsity = 0.005
    feature_dim = 135909
    n_classes = 670091
    n_train = 490449
    n_test = 153025
    n_epochs = 20
    batch_size = 256
    test_batch_size = 256
    hidden_dim = 128
    log_file = 'log_amz_torch_full_backward'

In [None]:
def get_default_device():
    # Pick GPU if available else CPU
    if torch.cuda.is_available() and config.GPUs:
        return torch.device("cuda")
    else:
        return torch.device("cpu")

In [None]:
import os
# If the runtime is connected to Colab Hosted runtime
if "COLAB_GPU" in os.environ:
    config.data_path_train = '/content/drive/MyDrive/Colab Datasets/Amazon/amazon_train.txt'
    config.data_path_test = '/content/drive/MyDrive/Colab Datasets/Amazon/amazon_test.txt'
    from google.colab import drive
    drive.mount('/content/drive')

In [None]:
device = get_default_device()

In [None]:
device

device(type='cuda')

In [None]:
def to_device(data, device):
    # Move tensor(s) to chosen device
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking = True)

In [None]:
# ! cat /content/drive/MyDrive/Colab\ Datasets/Amazon/amazon_train_sample.txt > sample.txt

In [None]:
batch_size = config.batch_size
n_classes = config.n_classes

In [None]:
def data_generator(file_name, batch_size, n_classes):
    with open(file_name, "r", encoding = "utf-8") as txt_file:
        header = txt_file.readline() # ignore the header
        while True:
            vals = []
            idxs_x, idxs_y = [], []
            y_batch = torch.zeros([batch_size, n_classes], dtype = torch.float32)
            y_batch_idxs_x = []
            y_batch_idxs_y = []
            y_batch_vals = []
            labels_batch = []
            # Readlines until EOF or count reaches batch_size
            count = 0
            while True:
                line = txt_file.readline().strip()
                if not line:
                    return
                # Fill the y-values
                labels = list( map(int, line.split(' ')[0].split(',')) )
                y_batch_idxs_x += [count] * len(labels)
                y_batch_idxs_y += labels
                y_batch_vals += [1 / len(labels)] * len(labels)
                labels_batch.append(labels)
                # Get the x-idxs and x-vals
                n_idxs = len(line.split(' ')[1:])
                idxs_x += [count] * (n_idxs)
                idxs_y += [int(itm.split(':')[0]) for itm in line.split(' ')[1:]]
                vals += [float(itm.split(':')[1]) for itm in line.split(' ')[1:]]

                count += 1
                if count == batch_size:
                    break
            if count < batch_size:
                return
            y_batch = y_batch.to_sparse()
            yield (idxs_x, idxs_y, vals, y_batch_idxs_x, y_batch_idxs_y, y_batch_vals, labels_batch)

In [None]:
def data_generator_tst(file_name, batch_size, n_classes):
    with open(file_name, "r", encoding = "utf-8") as txt_file:
        header = txt_file.readline() # ignore the header
        vals = []
        idxs_x, idxs_y = [], []
        labels_batch = []
        # Readlines until EOF or count reaches batch_size
        count = 0
        while True:
            line = txt_file.readline().strip()
            if not line:
                break
            labels = list( map(int, line.split(' ')[0].split(',')) )
            labels_batch.append(labels)
            n_idxs = len(line.split(' ')[1:])
            idxs_x += [count] * (n_idxs)
            idxs_y += [int(itm.split(':')[0]) for itm in line.split(' ')[1:]]
            vals += [float(itm.split(':')[1]) for itm in line.split(' ')[1:]]

            count += 1
            if count == batch_size:
                break
        if count < batch_size:
            return
        yield (idxs_x, idxs_y, vals, labels_batch)

## Torch Training

In [None]:
# !pip install hnswlib -q

In [None]:
import torch
import time
import numpy as np
# import hnswlib
import math
import torch.nn as nn

In [None]:
train_data_generator = data_generator(config.data_path_train, batch_size = config.batch_size, n_classes = config.n_classes)

In [None]:
n_epochs = config.n_epochs
n_train = config.n_train
n_test = config.n_test
n_check = 50
steps_per_epoch = n_train // batch_size
n_steps = config.n_epochs * steps_per_epoch

In [None]:
train_data_generator = data_generator(config.data_path_train, batch_size = config.batch_size, n_classes = config.n_classes)

In [None]:
W1 = torch.randn(config.feature_dim, config.hidden_dim, requires_grad = True)
B1 = torch.randn(config.hidden_dim, requires_grad = True)

W2 = torch.randn(config.hidden_dim, config.n_classes, requires_grad = True)
B2 = torch.randn(config.n_classes, requires_grad = True)

In [None]:
W1 = nn.init.trunc_normal_(W1, std = 2.0/math.sqrt(config.feature_dim + config.hidden_dim))
B1 = nn.init.trunc_normal_(B1, std = 2.0/math.sqrt(config.feature_dim + config.hidden_dim))
W2 = nn.init.trunc_normal_(W2, std = 2.0/math.sqrt(config.hidden_dim + config.n_classes))
B2 = nn.init.trunc_normal_(B2, std = 2.0/math.sqrt(config.hidden_dim + config.n_classes))

In [None]:
with torch.no_grad():
    (W1, W2) = to_device((W1, W2), device)
    (B1, B2) = to_device((B1, B2), device)
W1.requires_grad = True
W2.requires_grad = True
B1.requires_grad = True
B2.requires_grad = True

In [None]:
# W1.grad = W1.new_zeros(W1.shape)
# W2.grad = W2.new_zeros(W2.shape)

In [None]:
W1.grad

In [None]:
print(W1.requires_grad, W2.requires_grad)

True True


In [None]:
adam_optim = torch.optim.Adam(params = (W1, W2, B1, B2), lr = config.lr)

log_softmax = torch.nn.LogSoftmax(dim = 1)
add_unity_col = torch.nn.ConstantPad1d((0, 1), value = 1.0)
add_zero_col = torch.nn.ConstantPad1d((0, 1), value = 0.0)

In [None]:
def fit(train_dg):
    adam_optim.zero_grad()
    idxs_x, idxs_y, vals, Y, labels = next(train_dg)
    
    # Feed Forward
    input = to_device(torch.sparse_coo_tensor([idxs_x, idxs_y], vals, size = (batch_size, config.feature_dim)), device)
    A1 = torch.sparse.mm(input, W1) + B1
    Z1 = torch.nn.functional.relu(A1)

    #  Sparse Feed Forward
    A2 = Z1 @ W2 + B2
    P = log_softmax(A2)
    L = -P * Y
    L = torch.mean(torch.sum(L, dim = 1))

    # Sparse Back Propagation
    L.backward()

    # Update Parameters
    adam_optim.step()

    return L.item()

In [None]:
def evaluate(n_steps, test_dg):
    accuracies = []
    for h in range(n_steps):
        idxs_x, idxs_y, vals, Y = next(test_dg)
        
        with torch.no_grad():
            input = to_device(torch.sparse_coo_tensor([idxs_x, idxs_y], vals, size = (config.test_batch_size, config.feature_dim)), device)
            A1 = torch.sparse.mm(input, W1) + B1
            Z1 = torch.nn.functional.relu(A1)
            A2 = Z1 @ W2 + B2
                
            _, preds = torch.max(A2, dim = 1)
            num_correct = 0
            for j in range(A2.shape[0]):
                if len(np.intersect1d(preds[j].cpu(), Y[j])) > 0:
                    num_correct += 1

            accuracies.append(num_correct / config.test_batch_size)
    return np.mean(accuracies)

In [None]:
total_time = 0
begin_time = time.time()
with open(config.log_file, 'a') as out:
    losses = []
    for step in range(n_steps): 
        if step % n_check == 0:
            total_time += time.time() - begin_time
            n_steps_val = n_test//batch_size
            test_data_generator = data_generator_tst(config.data_path_test, config.test_batch_size, config.n_classes)
            
            accuracy = evaluate(20, test_data_generator) #checking precision on 20 test batches
            print('Step : {}  Total_Time: {}  Test_acc: {}'.format(step, total_time, accuracy), file = out)
            print('Step : {}  Total_Time: {}  Test_acc: {}'.format(step, total_time, accuracy))
            begin_time = time.time()
        if step % steps_per_epoch == (steps_per_epoch - 1):
            total_time += time.time() - begin_time
            n_steps_val = n_test//batch_size
            test_data_generator = data_generator_tst(config.data_path_test, config.test_batch_size, config.n_classes)
            
            accuracy = evaluate(n_steps_val, test_data_generator) #checking precision on the complete test data
            print('OVERALL Step : {} Total_Time: {} Test_acc: {}'.format(step, total_time, accuracy), file = out)
            print('OVERALL Step : {} Total_Time: {} Test_acc: {}'.format(step, total_time, accuracy))
            begin_time = time.time()
        loss = fit(train_data_generator)
        losses.append(loss)