In [1]:
import argparse
import os
import time
import shutil

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
     

import torchvision
import torchvision.transforms as transforms

from models import *


global best_prec
use_gpu = torch.cuda.is_available()
print('=> Building model...')
    
    
    
batch_size = 128
model_name = "VGG16_quant_proj"
model = VGG16_quant_proj()

print(model)

normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])


train_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


test_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


print_freq = 100 # every 100 batches, accuracy printed. Here, each batch includes "batch_size" data points
# CIFAR10 has 50,000 training data, and 10,000 validation data.

def train(trainloader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    model.train()

    end = time.time()
    for i, (input, target) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)

        input, target = input.cuda(), target.cuda()

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec = accuracy(output, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()


        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
                   epoch, i, len(trainloader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1))

            

def validate(val_loader, model, criterion ):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
         
            input, target = input.cuda(), target.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec = accuracy(output, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:  # This line shows how frequently print out the status. e.g., i%5 => every 5 batch, prints out
                print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1))

    print(' * Prec {top1.avg:.3f}% '.format(top1=top1))
    return top1.avg


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

        
def save_checkpoint(state, is_best, fdir):
    filepath = os.path.join(fdir, 'checkpoint.pth')
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(fdir, 'model_best.pth.tar'))


def adjust_learning_rate(optimizer, epoch):
    """For resnet, the lr starts from 0.1, and is divided by 10 at 80 and 120 epochs"""
    adjust_list = [80, 120]
    if epoch in adjust_list:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.1        

#model = nn.DataParallel(model).cuda()
#all_params = checkpoint['state_dict']
#model.load_state_dict(all_params, strict=False)
#criterion = nn.CrossEntropyLoss().cuda()
#validate(testloader, model, criterion)

=> Building model...
VGG_quant(
  (features): Sequential(
    (0): QuantConv2d(
      3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      (weight_quant): weight_quantize_fn()
    )
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): QuantConv2d(
      64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      (weight_quant): weight_quantize_fn()
    )
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): QuantConv2d(
      64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      (weight_quant): weight_quantize_fn()
    )
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): QuantConv2d(
      128, 128, kernel_size=(3, 3), stride

In [2]:
print(model.features[27])

QuantConv2d(
  8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
  (weight_quant): weight_quantize_fn()
)


In [3]:
# This cell won't be given, but students will complete the training

lr = 0.1
weight_decay = 1e-4
epochs = 130
best_prec = 0

#model = nn.DataParallel(model).cuda()
model.cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
#cudnn.benchmark = True

if not os.path.exists('result'):
    os.makedirs('result')
fdir = 'result/'+str(model_name)
if not os.path.exists(fdir):
    os.makedirs(fdir)
        

for epoch in range(0, epochs):
    adjust_learning_rate(optimizer, epoch)

    train(trainloader, model, criterion, optimizer, epoch)
    
    # evaluate on test set
    print("Validation starts")
    prec = validate(testloader, model, criterion)

    # remember best precision and save checkpoint
    is_best = prec > best_prec
    best_prec = max(prec,best_prec)
    print('best acc: {:1f}'.format(best_prec))
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_prec': best_prec,
        'optimizer': optimizer.state_dict(),
    }, is_best, fdir)

Epoch: [0][0/391]	Time 1.734 (1.734)	Data 0.334 (0.334)	Loss 2.6055 (2.6055)	Prec 8.594% (8.594%)


KeyboardInterrupt: 

In [None]:
# HW

#  1. Train with 4 bits for both weight and activation to achieve >90% accuracy
#  2. Find x_int and w_int for the 2nd convolution layer
#  3. Check the recovered psum has similar value to the un-quantized original psum
#     (such as example 1 in W3S2)

In [3]:
PATH = "result/VGG16_quant_proj/model_best.pth.tar"
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['state_dict'])
device = torch.device("cuda") 

model.cuda()
model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in testloader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(testloader.dataset)

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(testloader.dataset),
        100. * correct / len(testloader.dataset)))


Test set: Accuracy: 9144/10000 (91%)



In [4]:
#send an input and grap the value by using prehook like HW3
class SaveOutput: #the instance of this class serve as a hook of the "register_forward_pre_hook" function.
    def __init__(self):
        self.outputs = []
    def __call__(self, module, module_in): #to make the instance of the class callable.
        self.outputs.append(module_in)
    def clear(self):
        self.outputs = []  
        
save_output = SaveOutput()

for layer in model.modules():
    if isinstance(layer, QuantConv2d):
        print("prehooked")
        layer.register_forward_pre_hook(save_output)       ## Input for the module will be grapped     
        #hook should be a callable object
        
dataiter = iter(trainloader)
images, labels = next(dataiter)
images = images.to(device)
out = model(images)  

print("10th quant convolution's input size:", save_output.outputs[8][0].size())
print(model.features[27])

prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
prehooked
10th quant convolution's input size: torch.Size([128, 8, 4, 4])
QuantConv2d(
  8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
  (weight_quant): weight_quantize_fn()
)


In [5]:
w_bit = 4
weight_q = model.features[27].weight_q # quantized value is stored during the training
w_alpha = model.features[27].weight_quant.wgt_alpha  # alpha is defined in your model already. bring it out here
w_delta =w_alpha.div(2**(w_bit-1)-1)   # delta can be calculated by using alpha and w_bit
weight_int = weight_q.div(w_delta) # w_int can be calculated by weight_q and w_delta
print(weight_int.size()) # you should see clean integer numbers


torch.Size([8, 8, 3, 3])


In [6]:
x_bit = 4    
x=save_output.outputs[8][0]
x_alpha  = model.features[27].act_alpha
x_delta = x_alpha.div(2**x_bit -1)

act_quant_fn = act_quantization(x_bit) # define the quantization function
x_q = act_quant_fn(x, x_alpha)         # create the quantized value for x

x_int = x_q.div(x_delta)
print(x_int.size()) # you should see clean integer numbers 

torch.Size([128, 8, 4, 4])


In [7]:
conv_int = torch.nn.Conv2d(in_channels = 8, out_channels=8, kernel_size = 3,padding=1, bias = False)
conv_int.weight = torch.nn.parameter.Parameter(weight_int)

output_int =  conv_int(x_int)      # output_int can be calculated with conv_int and x_int
relu=nn.ReLU(inplace=True)
print(output_int.size())
output_int_relu=relu(output_int)#relu first, then recover is equal to the other way around
output_recovered =output_int_relu.mul(x_delta).mul(w_delta)  # recover with x_delta and w_delta

#output_recovered=relu(output_recovered)
print(output_recovered)

torch.Size([128, 8, 4, 4])
tensor([[[[ 0.0000,  5.3497,  7.2444,  3.4327],
          [ 0.0000,  3.9008,  3.7225,  0.0000],
          [ 1.6718,  4.0346,  3.2990,  0.0000],
          [ 3.7225,  3.6779,  1.8278,  0.0000]],

         [[ 0.0000,  0.0000,  0.0000,  0.0000],
          [ 5.1714,  2.9869,  2.1845,  0.0000],
          [ 9.8970,  7.1775,  1.1145,  0.0000],
          [ 6.4197,  2.7417,  0.0000,  0.0000]],

         [[ 0.0000,  5.4389,  8.2921,  6.3528],
          [ 6.1522, 13.5972, 13.1514, 10.5434],
          [11.4350, 11.0338,  9.8078,  8.0246],
          [ 9.8524, 12.4158, 10.3874,  8.1806]],

         ...,

         [[ 0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000]],

         [[ 0.3121,  1.6049,  1.1814,  0.7802],
          [ 4.6141,  6.6426,  1.6272,  0.0000],
          [ 4.6364,  4.0792,  0.0000,  0.0000],
          [ 2.9646,  4.7479,  2.1622, 

In [8]:
#### input floating number / weight quantized version
print(save_output.outputs[9][0].size())
print(output_recovered.size())
#print(save_output.outputs[9][0])

torch.Size([128, 8, 4, 4])
torch.Size([128, 8, 4, 4])


In [9]:
output=save_output.outputs[9][0]

difference = abs( output - output_recovered )
print(difference.mean())  ## It should be small, e.g.,2.3 in my trainned model

tensor(3.8927e-07, device='cuda:0', grad_fn=<MeanBackward0>)


In [19]:
#X=save_output.outputs[8][0]
#print(X)
x_pad = torch.zeros(128, 8, 6, 6).cuda()
x_pad[:,:, 1:5, 1:5] = x_int.cuda() #input feature is 6*6
print(x_pad)

X_reshaped=torch.reshape(x_pad,(x_pad.size(0),x_pad.size(1),-1))#[batch_num, in_channel, nij]

W_reshaped=torch.reshape(weight_int,(weight_int.size(0),weight_int.size(1),-1))#[out_channel,in_channel,kij]
#[8,8,9]
#print(W_reshaped.size())
output_reshaped=torch.reshape(output_int_relu,(output_int_relu.size(0),output_int_relu.size(1),-1))#[batch_num,out_channel,o_nij]
#print(output_reshaped.size())
#print(output_reshaped)

tensor([[[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000, 15.0000, 15.0000, 15.0000, 10.0000,  0.0000],
          [ 0.0000,  5.0000,  0.0000,  0.0000,  8.0000,  0.0000],
          [ 0.0000,  6.0000,  5.0000,  5.0000, 10.0000,  0.0000],
          [ 0.0000,  2.0000,  0.0000,  0.0000,  4.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

         [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  3.0000, 15.0000,  3.0000,  0.0000],
          [ 0.0000,  0.0000, 13.0000, 15.0000, 15.0000,  0.0000],
          [ 0.0000,  0.0000, 11.0000, 15.0000, 15.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

         [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
      

In [24]:
tile_id = 0 
nij = 0 # just a random number
X = X_reshaped[tile_id,:,:]#[8,36]

bit_precision = 4
file = open('activation_tile0.txt', 'w') #write to file
file.write('#time0row7[msb-lsb],time0row6[msb-lst],....,time0row0[msb-lst]#\n')
file.write('#time1row7[msb-lsb],time1row6[msb-lst],....,time1row0[msb-lst]#\n')
file.write('#................#\n')

for i in range(X.size(1)):  # time step #input feature map
    for j in range(X.size(0)): # row #8 rows (input channels)
        X_bin = '{0:04b}'.format(round(X[7-j,i].item()))
        for k in range(bit_precision):
            file.write(X_bin[k])        
        #file.write(' ')  # for visibility with blank between words, you can use
    file.write('\n')
file.close() #close file   

print(X[:,8])

tensor([15.0000,  3.0000,  0.0000, 15.0000,  0.0000, 15.0000,  0.0000, 15.0000],
       device='cuda:0', grad_fn=<SelectBackward0>)


In [23]:
tile_id = 0 
kij = 9
W = W_reshaped[:,:,:]#[out_channel,in_channel,kij]

bit_precision = 4


for k in range(W.size(2)):    
    file_name = 'weight_kij'+str(k)+'.txt'
    file = open(file_name, 'w') #write to file
    file.write('#col0row7[msb-lsb],col0row6[msb-lst],....,col0row0[msb-lst]#\n')#first line: oc1, second line: oc2....
    file.write('#col1row7[msb-lsb],col1row6[msb-lst],....,col1row0[msb-lst]#\n')
    file.write('#................#\n')
    for i in range(W.size(0)):#output channel
        for j in range(W.size(1)): #input channel#the weight of an output channel is in one line
            if (round(W[i,7-j,k].item()<0)):
                W_bin='{0:04b}'.format(round(W[i,7-j,k].item()+2**bit_precision))
            else:
                W_bin='{0:04b}'.format(round(W[i,7-j,k].item()))
            for n in range(bit_precision):
                file.write(W_bin[n])         
            #file.write(' ')  # for visibility with blank between words, you can use
        file.write('\n')
    file.close() #close file   
   


In [60]:
print(W_reshaped[:,:,0])


tensor([[ 0., -7., -0.,  0., -4., -2.,  1.,  0.],
        [-4., -1., -2.,  0., -0., -1.,  1., -4.],
        [ 7.,  2., -0.,  1.,  0., -1.,  7., -5.],
        [ 7., -4., -1.,  6., -3., -1.,  5.,  7.],
        [-7., -6.,  0.,  3.,  2., -2., -0.,  2.],
        [-0., -1., -1., -1., -1., -0., -1., -1.],
        [ 1., -1., -0., -2., -0.,  2., -2., -2.],
        [ 2., -0., -0., -1.,  4., -1.,  1.,  2.]], device='cuda:0',
       grad_fn=<SelectBackward0>)


In [25]:
tile_id = 0 
#nij = 0 # just a random number
out=output_reshaped[tile_id,:,:]#[out_channel, time step]#[out_channel,o_nij]#[8,16]

bit_precision = 16
file = open('out.txt', 'w') #write to file
file.write('#time0col7[msb-lsb],time0col6[msb-lst],....,time0col0[msb-lst]#\n')
file.write('#time1col7[msb-lsb],time1col6[msb-lst],....,time1col0[msb-lst]#\n')
file.write('#................#\n')

for i in range(out.size(1)):  # time step 
    for j in range(out.size(0)): # output channels
        if (round(out[7-j,i].item()<0)):
            o_bin = '{0:016b}'.format(round(out[7-j,i].item()+2**bit_precision))
        else:
            o_bin = '{0:016b}'.format(round(out[7-j,i].item()))
        for k in range(bit_precision):
            file.write(o_bin[k])        
        #file.write(' ')  # for visibility with blank between words, you can use
    file.write('\n')
file.close() #close file   

In [20]:
print(output_reshaped[0,:,0])

tensor([  0.0000,   9.0000,  66.9999,   0.0000,   0.0000,   0.0000, 117.0000,
          0.0000], device='cuda:0', grad_fn=<SelectBackward0>)


In [2]:
#address of the psum memory for accumulation
o_ni_dim=4
a_pad_ni_dim=6
ki_dim=3

address = torch.zeros(16, 9).cuda()

for o_nij in range(16):
    for kij in range(9):
        address[o_nij, kij] = int(o_nij/o_ni_dim)*a_pad_ni_dim + o_nij%o_ni_dim + int(kij/ki_dim)*a_pad_ni_dim + kij%ki_dim
        #print(address[o_nij, kij])

In [4]:
print(address)
index=torch.nonzero(torch.eq(address[:,2],23))
print(index)
in2=index.tolist()
print(in2[0][0])
print(address[in2[0][0],2])

#in each kij loop, the results are stored sequentially (0-35), and not all of these 36 results are effective (will be used later). 
#column: each element along one column specifies the address of the element in one  36-length group that will be accumulated. (address of effective elements)
#accumulation while execution: for each i in (0,323), first compute i//36, which is the column number of i in the address matrix.
#Then compute i%36
#for example, for the last kij, the 15th result should be added to the 14th result of the previous kij, as shown in the last two columns of the address matrix.

tensor([[ 0.,  1.,  2.,  6.,  7.,  8., 12., 13., 14.],
        [ 1.,  2.,  3.,  7.,  8.,  9., 13., 14., 15.],
        [ 2.,  3.,  4.,  8.,  9., 10., 14., 15., 16.],
        [ 3.,  4.,  5.,  9., 10., 11., 15., 16., 17.],
        [ 6.,  7.,  8., 12., 13., 14., 18., 19., 20.],
        [ 7.,  8.,  9., 13., 14., 15., 19., 20., 21.],
        [ 8.,  9., 10., 14., 15., 16., 20., 21., 22.],
        [ 9., 10., 11., 15., 16., 17., 21., 22., 23.],
        [12., 13., 14., 18., 19., 20., 24., 25., 26.],
        [13., 14., 15., 19., 20., 21., 25., 26., 27.],
        [14., 15., 16., 20., 21., 22., 26., 27., 28.],
        [15., 16., 17., 21., 22., 23., 27., 28., 29.],
        [18., 19., 20., 24., 25., 26., 30., 31., 32.],
        [19., 20., 21., 25., 26., 27., 31., 32., 33.],
        [20., 21., 22., 26., 27., 28., 32., 33., 34.],
        [21., 22., 23., 27., 28., 29., 33., 34., 35.]], device='cuda:0')
tensor([[15]], device='cuda:0')
15
tensor(23., device='cuda:0')


In [3]:
position_list=[]
for i in range(324):
    col_num=i//36
    element=i%36
    
    index=torch.nonzero(torch.eq(address[:,col_num],element)).tolist()
    
    #print(len(index))
    if (len(index)==0):
        #position=1024
        position=0
        position_list.append(position)
    else:
        index_int=index[0][0]
        if col_num==0:
            position=address[index_int,col_num]
            position_list.append(position)
        else:
            position=address[index_int,col_num-1]
            position_list.append(position)
        
print(position_list[323-35:324])
    

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, tensor(13., device='cuda:0'), tensor(14., device='cuda:0'), tensor(15., device='cuda:0'), tensor(16., device='cuda:0'), 0, 0, tensor(19., device='cuda:0'), tensor(20., device='cuda:0'), tensor(21., device='cuda:0'), tensor(22., device='cuda:0'), 0, 0, tensor(25., device='cuda:0'), tensor(26., device='cuda:0'), tensor(27., device='cuda:0'), tensor(28., device='cuda:0'), 0, 0, tensor(31., device='cuda:0'), tensor(32., device='cuda:0'), tensor(33., device='cuda:0'), tensor(34., device='cuda:0')]


In [4]:
file = open('accumulation_while_execution_add.txt', 'w') #write to file
bit_precision = 11

for i in range(len(position_list)):
        a_bin = '{0:011b}'.format(int(position_list[i]))
        for k in range(bit_precision):
            file.write(a_bin[k])
        file.write('\n')
file.close()

In [14]:
print(address[:,8])

tensor([14., 15., 16., 17., 20., 21., 22., 23., 26., 27., 28., 29., 32., 33.,
        34., 35.], device='cuda:0')


In [15]:
file = open('final_result_add.txt', 'w') #write to file
bit_precision = 11

final_result_add=address[:,8]

for i in range(len(final_result_add)):
        a_bin = '{0:011b}'.format(int(final_result_add[i]))
        for k in range(bit_precision):
            file.write(a_bin[k])
        file.write('\n')
file.close()

In [24]:
file = open('acc_address.txt', 'w') #write to file
file.write('#1st address#\n')
file.write('#2st address#\n')
file.write('#................#\n')
bit_precision = 11

for i in range(address.size(0)):
    for j in range(address.size(1)):
        a_bin = '{0:011b}'.format(int(address[i, j]))
        for k in range(bit_precision):
            file.write(a_bin[k])
        file.write('\n')
file.close()

In [14]:
offset = torch.zeros(16, 9).cuda()
for i in range(16):
    for j in range (9):
        offset[i,j]=36*j
        
print(offset)
address2=torch.add(address,offset)
print(address2)

tensor([[  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 180., 216., 252., 288.],
        [  0.,  36.,  72., 108., 144., 1

In [37]:


file = open('acc_address2.txt', 'w') #write to file
bit_precision = 11

for i in range(address2.size(0)):
    for j in range(address2.size(1)):
        a_bin = '{0:011b}'.format(int(address2[i, j]))
        for k in range(bit_precision):
            file.write(a_bin[k])
        file.write('\n')
file.close()