In [26]:
import torch
from torch.utils.cpp_extension import load
import os
import random
import numpy as np
import sys

sys.path.append("../")

os.environ['CUDA_LAUNCH_BLOCKING']="1"

def manual_seed(seed):
    np.random.seed(seed) #1
    random.seed(seed) #2
    torch.manual_seed(seed) #3
    torch.cuda.manual_seed(seed) #4.1
    torch.cuda.manual_seed_all(seed) #4.2
    torch.backends.cudnn.benchmark = False #5 
    torch.backends.cudnn.deterministic = True #6

manual_seed(42)

torch integer data flow 는 gpu에서 사용 불가

In [27]:
# int_data = torch.randint(0,255,(1,3,24,24), dtype=torch.uint8)
# weight = torch.randint(0,255,(1,3,3,3), dtype=torch.uint8)

# # b = torch.nn.functional.conv2d(int_data.cuda(), weight=weight.cuda(),stride=1)
# b = torch.nn.functional.conv2d(int_data, weight=weight,stride=1,bias=None, padding=1, dtype=torch.uint8)
# print(b.shape)

In [28]:
import int8mm_cuda
from torch.nn.modules import Module

class IntLinear(Module):
    def __init__(self, in_channels, out_channels):
        super(IntLinear,self).__init__()
        self.weight = torch.randint(-127,127,(out_channels, in_channels), dtype=torch.int8)
        # self.weight = torch.ones((out_channels, in_channels), dtype=torch.int8)

    def forward(self,x):
        # weight [OUT, IN} - > [IN, OUT]
        # input [BATCH, IN]
        y = int8mm_cuda.int8_mm(x,self.weight.transpose(1,0).contiguous())
        return y
    
    def cuda(self):
        self.weight = self.weight.cuda()
        

mm = IntLinear(4,12)
# x = torch.randint(-127,127,(1,4), dtype=torch.int8).cuda()
# x = torch.ones((1,4), dtype=torch.int8).cuda()
x = torch.tensor([[1,2,3,4]], dtype=torch.int8).cuda()
print(x.dtype, mm.weight.dtype)
with torch.no_grad():
    mm.cuda()
    y = mm(x)
print(x)
print(mm.weight)
print(y)

torch.int8 torch.int8
tensor([[1, 2, 3, 4]], device='cuda:0', dtype=torch.int8)
tensor([[  43,  -60,   21,    9],
        [  73,   32,  115,  -23],
        [-103,   74,  -43,  109],
        [ -33,  -17,   58,   55],
        [ -96,   48,   90,    5],
        [  96,  -25, -118,  -48],
        [ 120,  -66, -126,  -30],
        [  16,  104,   15,  -15],
        [ -18, -104,   10,   -7],
        [  28,  123,   61,   55],
        [  -5,  -63,  -34,   56],
        [ -38,   74,   50, -103]], device='cuda:0', dtype=torch.int8)
tensor([[  22,  390,  352,  327,  290, -500, -510,  209, -224,  677,   -9, -152]],
       device='cuda:0', dtype=torch.int32)


In [29]:
import numpy as np 
print(mm.weight.transpose(1,0),end="\n\n")
x_data = x.detach().cpu().numpy()
mm_data = mm.weight.detach().cpu().numpy()
print(f"x data - {x_data}\n")
print(f"mm data - {mm_data}\nmm Trans - {mm_data.T}\n")
y = x_data @ mm_data.T
print(f"y - {y}")

tensor([[  43,   73, -103,  -33,  -96,   96,  120,   16,  -18,   28,   -5,  -38],
        [ -60,   32,   74,  -17,   48,  -25,  -66,  104, -104,  123,  -63,   74],
        [  21,  115,  -43,   58,   90, -118, -126,   15,   10,   61,  -34,   50],
        [   9,  -23,  109,   55,    5,  -48,  -30,  -15,   -7,   55,   56, -103]],
       device='cuda:0', dtype=torch.int8)

x data - [[1 2 3 4]]

mm data - [[  43  -60   21    9]
 [  73   32  115  -23]
 [-103   74  -43  109]
 [ -33  -17   58   55]
 [ -96   48   90    5]
 [  96  -25 -118  -48]
 [ 120  -66 -126  -30]
 [  16  104   15  -15]
 [ -18 -104   10   -7]
 [  28  123   61   55]
 [  -5  -63  -34   56]
 [ -38   74   50 -103]]
mm Trans - [[  43   73 -103  -33  -96   96  120   16  -18   28   -5  -38]
 [ -60   32   74  -17   48  -25  -66  104 -104  123  -63   74]
 [  21  115  -43   58   90 -118 -126   15   10   61  -34   50]
 [   9  -23  109   55    5  -48  -30  -15   -7   55   56 -103]]

y - [[  22 -122   96   71   34   12    2  -47   32  -9

In [30]:
import int8mm_cuda
from torch.nn.modules import Module

class FloatLinear(Module):
    def __init__(self, in_channels, out_channels):
        super(FloatLinear,self).__init__()
        self.weight = torch.randn((in_channels, out_channels), dtype=torch.float)
        # self.weight = torch.ones((out_channels, in_channels), dtype=torch.int8)
        self.bias = torch.zeros((out_channels), dtype=torch.float)
    def forward(self,x):
        # weight [OUT, IN} - > [IN, OUT]
        # input [BATCH, IN]
        y = int8mm_cuda.float_mm(x,self.weight.contiguous())
        y = y+ self.bias
        return y
    
    def cuda(self):
        self.weight = self.weight.cuda()
        self.bias = self.bias.cuda()
        

mm = FloatLinear(4,12)
torch_linear = torch.nn.Linear(4,10,bias=True)

mm.weight = torch_linear.weight.data.transpose(1,0).contiguous()
mm.bias = torch_linear.bias.data.contiguous()

x = torch.tensor([[1,2,3,4]],dtype=torch.float).cuda()
print(x.dtype, mm.weight.dtype)
with torch.no_grad():
    mm.cuda()
    torch_linear.cuda()
    y = mm(x)
    trans = torch_linear(x)
print(x)
print(torch.equal(mm.weight.transpose(0,1),torch_linear.weight.data))
print(torch_linear.weight)
print(torch_linear.bias)
print(f"compare y shape")
print(y)
print(trans)

torch.float32 torch.float32
tensor([[1., 2., 3., 4.]], device='cuda:0')
True
Parameter containing:
tensor([[ 0.4578, -0.1687, -0.1773, -0.4838],
        [-0.2863,  0.1249, -0.0660, -0.3629],
        [ 0.0117, -0.3415, -0.4242, -0.2753],
        [-0.4376, -0.3184,  0.4998,  0.0944],
        [ 0.1541, -0.4663, -0.3284, -0.1664],
        [ 0.0782, -0.4400, -0.2154, -0.2993],
        [ 0.0014, -0.1861, -0.0346, -0.3388],
        [-0.3432, -0.2917, -0.1711, -0.3946],
        [ 0.4192, -0.0992,  0.4302,  0.1558],
        [-0.4234,  0.3460, -0.1376, -0.1917]], device='cuda:0',
       requires_grad=True)
Parameter containing:
tensor([-0.4150, -0.4971,  0.1431, -0.1092,  0.1947, -0.4103,  0.3712, -0.3670,
        -0.0863,  0.1044], device='cuda:0', requires_grad=True)
compare y shape
tensor([[-2.7616, -2.1834, -2.9022,  0.6936, -2.2348, -3.0557, -1.4587, -3.3856,
          2.0482, -0.8063]], device='cuda:0')
tensor([[-2.7616, -2.1834, -2.9022,  0.6936, -2.2348, -3.0557, -1.4587, -3.3856,
      

Pooling layer


In [31]:
import int8pool_cuda

class IntPool(Module):
    def __init__(self,kernel_size = 2, stride = 2, padding=0, mode=0):
        super(IntPool,self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.mode = mode
    
    def forward(self,x):
        y = int8pool_cuda.int8_pool(x,self.kernel_size, self.stride, self.padding, self.mode)
        # y = (y > 10).int()*5
        # y = y.type(torch.int8)
        return y

pool = IntPool()
x = torch.randint(0, 127,(4,32,32,4), dtype=torch.int8).cuda()

In [32]:
with torch.no_grad():
    y = pool(x)

In [33]:
print(f"x - {x.shape} \n{x}\n")
print(f"y - {y.shape}\n{y}")

x - torch.Size([4, 32, 32, 4]) 
tensor([[[[ 78,  85,  10,  48],
          [112,   8,  18,  64],
          [ 29,   5,  45, 100],
          ...,
          [123, 124, 101,  95],
          [  1,  16,   2,  40],
          [ 96,  67,  56,  97]],

         [[ 17,  25,  98,  68],
          [ 76, 122, 101,  95],
          [115,  69,  13,  88],
          ...,
          [ 98,  47,  96,  47],
          [106, 125,  79,  95],
          [ 49,  41,  20,  14]],

         [[  5,  31,  22,  83],
          [ 77, 119,  17,  13],
          [ 89,   0,  74,  74],
          ...,
          [  0, 115,  19, 118],
          [119,  72, 107,  11],
          [ 14, 126,  13,  29]],

         ...,

         [[ 67,  91,   6,  25],
          [ 27,  34, 124,  21],
          [ 41,  30,  74,  48],
          ...,
          [121,  69,  64, 103],
          [ 56, 120,  34,  28],
          [ 31,  36, 115,  53]],

         [[ 14,  20,  81,  20],
          [  9, 113,  80,  94],
          [ 63, 113,  20,  20],
          ...,
      

In [34]:
avg_pool = IntPool(mode=1)
x = torch.randint(0, 127,(4,32,32,4), dtype=torch.int8).cuda()
with torch.no_grad():
    y = pool(x)
print(f"x - {x.shape} \n{x}\n")
print(f"y - {y.shape}\n{y}")

x - torch.Size([4, 32, 32, 4]) 
tensor([[[[118,  55,  55,  16],
          [118,   4,  35,  35],
          [ 89,  77,  19,  56],
          ...,
          [ 40,  59,  59,  79],
          [105,  37,  20,  19],
          [ 68,  91,  66,  62]],

         [[ 43,  75,  77, 115],
          [122,  33,  27,  56],
          [ 33,  46,  47,  14],
          ...,
          [ 85,   9,  34, 105],
          [105, 102,  17,  76],
          [ 61, 125, 119,  13]],

         [[ 84, 111,  36,  98],
          [  6,  15, 126,  61],
          [ 72, 119,  51,   9],
          ...,
          [ 64,  71,  34,  72],
          [102, 118,  28,  70],
          [ 38,  77,  31,  34]],

         ...,

         [[109,  77,  85, 124],
          [ 31,  32, 103,  92],
          [ 36,   0,  40, 119],
          ...,
          [ 48,   9, 111,  87],
          [ 52,  86,  99,  39],
          [ 90, 119, 114,  26]],

         [[ 27,  33,   4,  99],
          [ 82, 119,  22,  89],
          [108,  56,  23,  90],
          ...,
      

pytorch conv layer parmeter shape 보기

In [35]:
import torchvision
model = torchvision.models.vgg.vgg16(pretrained=True)
print(model.features[0].weight.shape)
c = torch.nn.Conv2d(4,12,3,1,1)
print(c.weight.shape) # NCHW



torch.Size([64, 3, 3, 3])
torch.Size([12, 4, 3, 3])


In [36]:
import cutlassconv
from torch.nn.modules import Module

class IntConv2d(Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride =1, padding =1):
        super(IntConv2d,self).__init__()
        self.weight = torch.randint(-127,127,(out_channels, kernel_size, kernel_size, in_channels), dtype=torch.int8)
        self.stride = stride
        self.padding = padding

    def forward(self,x):
        # trans_weight = torch.flip(self.weight,[1,2]).transpose(0,3).contiguous()
        # trans_weight = self.weight.permute(0,2,3,1).contiguous()
        trans_weight = self.weight
        return cutlassconv.int8_conv(x,trans_weight)
    
    def cuda(self):
        self.weight = self.weight.cuda()
        
## cutlass는 16의 배수만
input_channel= 16
conv = IntConv2d(input_channel,32,3,1,1)
print(conv.weight.shape)
x = torch.randint(0,127,(1,32,32,input_channel), dtype=torch.int8).cuda()


torch.Size([32, 3, 3, 16])


In [37]:
with torch.no_grad():
    conv.cuda()
    y = conv(x)


In [38]:
import numpy as np 
print(f"x data - {x.shape} \n{x_data}\n")
print(f"conv data - {conv.weight.shape}\n{conv.weight}\n")
print(f"y data - {y.shape}\n{y}")

x data - torch.Size([1, 32, 32, 16]) 
[[1 2 3 4]]

conv data - torch.Size([32, 3, 3, 16])
tensor([[[[  53,   47,  -18,  ...,   87,  -56,  125],
          [  79,   58,   13,  ...,  -32,   32,   56],
          [-115,  -43,  -29,  ...,   45,  -96,  -55]],

         [[  74,   46,   42,  ...,   87,  -40,  -29],
          [ -96,   22,  -64,  ...,   94,   25,   51],
          [  60,  -28,   48,  ...,   77,   66,   42]],

         [[ 117,  120,  -43,  ...,    1,   72, -119],
          [  65,  -41,  -36,  ...,   14,  -78, -100],
          [ -54,   30,  -33,  ...,  -96, -103,  -94]]],


        [[[ -74,  -85,   49,  ...,   37,  118,  118],
          [ -70, -110,   82,  ...,   -7,    4, -126],
          [  22,  -35,   -7,  ...,   85,    2,   43]],

         [[  31, -104,  -15,  ...,   65,  -91,   13],
          [  67,   14,  -42,  ..., -110,  -75,   18],
          [-119,  115,  -66,  ...,  -37,   36,    1]],

         [[ -86,  125,   92,  ...,   47,   85,  -38],
          [  15,   63,  -13,  ...,

In [39]:
import int8conv_cuda
from torch.nn.modules import Module

class IntConv2d(Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride =1, padding =1):
        super(IntConv2d,self).__init__()
        # self.weight = torch.ones((out_channels, kernel_size, kernel_size, in_channels), dtype=torch.int8)
        # self.weight[:,:,:,1] = self.weight[:,:,:,1]*2
        # self.weight[2,:,:,:] = self.weight[2,:,:,:]*3

        self.weight = torch.ones((out_channels,in_channels , kernel_size, kernel_size), dtype=torch.int8)
        self.weight[:,1,:,:] = self.weight[:,1,:,:]*2
        self.weight[2,:,:,:] = self.weight[2,:,:,:]*3
        self.weight = self.weight.permute(0,2,3,1).contiguous()

        self.stride = stride
        self.padding = padding

    def forward(self,x):
        # trans_weight = torch.flip(self.weight,[1,2]).transpose(0,3).contiguous()
        # trans_weight = self.weight.permute(0,2,3,1).contiguous()
        trans_weight = self.weight
        y = int8conv_cuda.int8_conv(x,trans_weight,self.stride, self.padding,1)
        # y = (y > 127).int()*5
        # y = y.type(torch.int8)
        return y
    
    def cuda(self):
        self.weight = self.weight.cuda()
    
# cudnn은 4의 배수만
input_channel= 4
conv = IntConv2d(input_channel,8,3,1,1)
print(conv.weight.shape)
x = torch.ones((1,3,3,input_channel), dtype=torch.int8).cuda()


torch.Size([8, 3, 3, 4])


In [40]:
with torch.no_grad():
    conv.cuda()
    y = conv(x)
print(y.shape, y.device)

torch.Size([1, 3, 3, 8]) cuda:0


In [41]:
import numpy as np 
conv_data = conv.weight.detach().cpu().numpy()
x_data = x.detach().cpu().numpy()
y_data = y.detach().cpu().numpy()
print(f"x data - {x.shape} {x.stride()},{x.data} \n")
print(f"conv data - {conv.weight.shape}, {conv.weight.stride()}, {conv.weight}\n")
print(f"y data - {y.shape} {y.stride()}\n{y}")


x data - torch.Size([1, 3, 3, 4]) (36, 12, 4, 1),tensor([[[[1, 1, 1, 1],
          [1, 1, 1, 1],
          [1, 1, 1, 1]],

         [[1, 1, 1, 1],
          [1, 1, 1, 1],
          [1, 1, 1, 1]],

         [[1, 1, 1, 1],
          [1, 1, 1, 1],
          [1, 1, 1, 1]]]], device='cuda:0', dtype=torch.int8) 

conv data - torch.Size([8, 3, 3, 4]), (36, 12, 4, 1), tensor([[[[1, 2, 1, 1],
          [1, 2, 1, 1],
          [1, 2, 1, 1]],

         [[1, 2, 1, 1],
          [1, 2, 1, 1],
          [1, 2, 1, 1]],

         [[1, 2, 1, 1],
          [1, 2, 1, 1],
          [1, 2, 1, 1]]],


        [[[1, 2, 1, 1],
          [1, 2, 1, 1],
          [1, 2, 1, 1]],

         [[1, 2, 1, 1],
          [1, 2, 1, 1],
          [1, 2, 1, 1]],

         [[1, 2, 1, 1],
          [1, 2, 1, 1],
          [1, 2, 1, 1]]],


        [[[3, 6, 3, 3],
          [3, 6, 3, 3],
          [3, 6, 3, 3]],

         [[3, 6, 3, 3],
          [3, 6, 3, 3],
          [3, 6, 3, 3]],

         [[3, 6, 3, 3],
          [3, 6, 

In [42]:
test_tensor = torch.ones((1,4,3,3))
print(test_tensor)
test_tensor[:,1,:,:] = test_tensor[:,1,:,:]*2
print(test_tensor)
trans_tensor = test_tensor.permute(0,2,3,1)
print(trans_tensor[:,:,:,:])

tensor([[[[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]])
tensor([[[[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]],

         [[2., 2., 2.],
          [2., 2., 2.],
          [2., 2., 2.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]])
tensor([[[[1., 2., 1., 1.],
          [1., 2., 1., 1.],
          [1., 2., 1., 1.]],

         [[1., 2., 1., 1.],
          [1., 2., 1., 1.],
          [1., 2., 1., 1.]],

         [[1., 2., 1., 1.],
          [1., 2., 1., 1.],
          [1., 2., 1., 1.]]]])


torch 와 동일한 conv가 나오는지 테스트

In [61]:
torch_conv2d = torch.nn.Conv2d(4,8,3,1,1,bias=False)
print(torch_conv2d.weight.data.shape)
torch_conv2d.weight.data = torch.ones((8,4,3,3),dtype=torch.float)

torch.Size([8, 4, 3, 3])


In [62]:
import int8conv_cuda
from torch.nn.modules import Module

class FloatConv2d(Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride =1, padding =1):
        super(FloatConv2d,self).__init__()
        self.weight = torch.ones((out_channels, kernel_size, kernel_size, in_channels),dtype=torch.float).to(memory_format=torch.channels_last)
        self.stride = stride
        self.padding = padding

    def forward(self,x):
        # trans_weight = torch.flip(self.weight,[1,2]).transpose(0,3).contiguous()
        # trans_weight = self.weight.permute(0,2,3,1).contiguous()
        trans_weight = self.weight
        y = int8conv_cuda.float_conv(x,trans_weight,self.stride, self.padding,1)
        # y = (y > 127).int()*5
        # y = y.type(torch.int8)
        return y
    
    def cuda(self):
        self.weight = self.weight.cuda()
    
# cudnn은 4의 배수만
input_channel= 4
conv = FloatConv2d(input_channel,8,3,1,1)
conv.weight = torch_conv2d.weight.data.permute(0,2,3,1).contiguous()
print(conv.weight.shape, conv.weight.stride())
torch_x = torch.ones((1,input_channel,3,3),dtype=torch.float).contiguous()
x = torch_x.permute(0,2,3,1).contiguous()
print(torch_x.shape, x.shape)
print(torch.equal(torch_x.permute(0,2,3,1),x))


torch.Size([8, 3, 3, 4]) (36, 12, 4, 1)
torch.Size([1, 4, 3, 3]) torch.Size([1, 3, 3, 4])
True


In [63]:
with torch.no_grad():
    conv.cuda()
    torch_conv2d.cuda()
    x= x.cuda()
    torch_x = torch_x.cuda()
    y = conv(x)
    trans = torch_conv2d(torch_x)
print(y.shape,y.is_contiguous(memory_format=torch.channels_last))
print(trans.shape, trans.is_contiguous(memory_format=torch.channels_last))

print(torch.equal(y, trans.permute(0,2,3,1)))

torch.Size([1, 3, 3, 8]) False
torch.Size([1, 8, 3, 3]) False
True


In [64]:
import numpy as np 
conv_data = conv.weight.detach().cpu().numpy()
x_data = x.detach().cpu().numpy()
y_data = y.detach().cpu().numpy()
# print(f"x data - {x.shape},{x.data} \n")
# print(f"conv data - {conv_data.shape}, {conv_data}\n")
print(f"y data - {y.shape}\n{y[0,:,:,0]}")
print(f"trans - {trans.shape}\n{trans[:,0,:,:]}")

y data - torch.Size([1, 3, 3, 8])
tensor([[16., 24., 16.],
        [24., 36., 24.],
        [16., 24., 16.]], device='cuda:0')
trans - torch.Size([1, 8, 3, 3])
tensor([[[16., 24., 16.],
         [24., 36., 24.],
         [16., 24., 16.]]], device='cuda:0')


VGG 모델 테스트

In [16]:
import torch.nn as nn

class VGG(nn.Module):
    def __init__(
        self, features: nn.Module, num_classes: int = 100, dropout: float = 0.5) -> None:
        super().__init__()
        self.features = features
        # self.avgpool = IntPool(7,1,0,1)
        self.classifier = nn.Sequential(
            IntLinear(25088,4096),
            # RELU가 아닌 32bit을 8bit으로 변경하는 activation을 사용해야한다.
            nn.ReLU(),
            nn.Dropout(0.5),
            IntLinear(4096,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            IntLinear(4096,num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        print(f"after feature {x.shape}")
        # x = self.avgpool(x)
        # print(f"after pooling {x.shape}")
        x = torch.flatten(x, 1)
        print(f"after flatten {x.shape} {x.dtype}")
        x = self.classifier(x)
        return x
    
    def cuda(self):
        for layer in model.modules():
            if 'Int' in str(type(layer)):
                layer.cuda()

def make_layers(cfg, batch_norm: bool = False) -> nn.Sequential:
    layers = []
    in_channels = 4
    for vs in cfg:
        for v in vs:
            v = int(v)
            conv2d = IntConv2d(in_channels, v, kernel_size=3, padding=1)
            layers += [conv2d, nn.ReLU()]
            in_channels = v
        layers += [IntPool(kernel_size=2, stride=2)]
    return nn.Sequential(*layers)


cfgs = {
    "D": [[64, 64], [128, 128], [256, 256, 256], [512, 512, 512],[512, 512, 512]],
    # "D": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
}

def int_vgg(cfg: str, **kwargs) -> VGG:
    model = VGG(make_layers(cfgs[cfg]), **kwargs)
    return model

In [17]:
modules = []
before_l = []
after_l = []
hooks = []

def hook_fn(module, input, output):
    modules.append(module)
    before_l.append(input[0])
    after_l.append(output)

def add_forward_hook(net, hooks):
    for name, layer in net._modules.items():
        if isinstance(layer, nn.Sequential) or isinstance(layer, torchvision.models.vgg.VGG):
            add_forward_hook(layer, hooks)
        else:
            hook = layer.register_forward_hook(hook_fn)
            hooks.append(hook)
            
    return hooks

def remove_forward_hook(hooks):
    for i in hooks:
        i.remove()
# out = model((torch.randn(1,3,32,32)))

In [18]:
# class test_module(Module):
#     def __init__(self,num_classes= 100):
#         super(test_module,self).__init__()
#         self.layers = nn.Sequential(
#             IntLinear(512,4096),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             IntLinear(4096,4096),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             IntLinear(4096,num_classes),
#         )
#     def forward(self,x):
#         for l in self.layers:
#             x = l(x)
#             print(x.shape, x.dtype)
#         return x
#     def cuda(self):
#         for layer in model.modules():
#             if 'Int' in str(type(layer)):
#                 layer.cuda()

# model = test_module()
# x = torch.randint(-127,127,(1,512)).cuda()
# model.eval()
# model.cuda()
# print(model.layers[0].weight)
# print(x.dtype)
# with torch.no_grad():
#     y = model(x)



In [19]:
model = int_vgg("D")
model.eval()
hooks = add_forward_hook(model, hooks)
# remove hook, hook works at once
remove_forward_hook(hooks)
model.cuda()
with torch.no_grad():
    x = torch.randint(-127,127,(1,224,224,4), dtype=torch.int8).cuda()
    y = model(x)
    print(len(hooks), len(modules), len(before_l), len(after_l))
    remove_forward_hook(hooks)
    hooks=[]
print(y.dtype, y.shape, y)
    

after feature torch.Size([1, 7, 7, 512])
after flatten torch.Size([1, 25088]) torch.int8


RuntimeError: expected scalar type Char but found Int

In [None]:
i = torch.randint(-127, 127,(1,4,4,3), dtype=torch.int8).cuda()
lay = nn.Dropout(0.5)

lay.cuda()
with torch.no_grad():
    lay.eval()
    y = lay(i)
    k = nn.functional.relu(y)
print(y)
print(k)

tensor([[[[  77,   60,   40],
          [  93,   43,  -67],
          [  88,   87,  122],
          [ 114,    7,  -48]],

         [[-101,  -36,   60],
          [  -5,  -62,  -72],
          [ -30,   30,  -32],
          [  34,  101,    7]],

         [[  68,  120,   36],
          [-109,  -55,    9],
          [ -99,   61,  -12],
          [ -57,  -66,   -7]],

         [[  53,   16,   57],
          [-116,   80,  -19],
          [  -2,  -51,  -21],
          [ -72,   69,   96]]]], device='cuda:0', dtype=torch.int8)
tensor([[[[ 77,  60,  40],
          [ 93,  43,   0],
          [ 88,  87, 122],
          [114,   7,   0]],

         [[  0,   0,  60],
          [  0,   0,   0],
          [  0,  30,   0],
          [ 34, 101,   7]],

         [[ 68, 120,  36],
          [  0,   0,   9],
          [  0,  61,   0],
          [  0,   0,   0]],

         [[ 53,  16,  57],
          [  0,  80,   0],
          [  0,   0,   0],
          [  0,  69,  96]]]], device='cuda:0', dtype=torch.int8)


In [None]:
from models import vgg

model = vgg.int_vgg16("D")
x = torch.randint(-128,127,(1,224,224,4),dtype=torch.int8).cuda()
model.eval()
model.cuda()
with torch.no_grad():
    y = model(x)
    print(y.shape)
print(y)

torch.Size([1, 100])
tensor([[ -574282,  -967096,   718499,  -854490,  -480142,  1088080,  -752827,
          -116280,  -313422,  -976545,  -148737,  -783753,  -647241,  -154183,
           160387,  -120619,   -25240,   725703,  -217888,  -175336, -1143886,
          -437042,  -478281,   371989,  -784157,  -368922,    77165,  -562955,
          -791538, -1247484,  -639698,  -404810,  -830884,  -352098,  -982993,
         -1149701,   388485,   430506, -1301755,  -722884, -1382657,  -926842,
          -209861,   663056,   616099, -1206340,  -615280,  -344674,   939926,
           749104,  -966203,   174706,    50013,  -642108,  -657352,  -455320,
          -236708,  -146362,   413926,   203769,  -677935, -1635767,  -263175,
         -1041366,   -54477,  -756444,  -234938,  -921265,  -493369, -1095055,
          -715730,    15213,   374301,   473852,  -453373, -1359736,   253718,
          -337226,  -387232,  -113524,     6194,   477962, -1093968,   738111,
          -730462,   -55275,  -

In [None]:
x = torch.ones((3,3), dtype=torch.int64)*(2**18+1)
print(x)
x = x + 2**15
print(x)
x = torch.clamp(x, min=0, max=2**16-1)
print(x)
x = torch.sqrt(x)
print(x)
x = x-128
print(x)
x = x.type(torch.int8)
print(x)

tensor([[262145, 262145, 262145],
        [262145, 262145, 262145],
        [262145, 262145, 262145]])
tensor([[294913, 294913, 294913],
        [294913, 294913, 294913],
        [294913, 294913, 294913]])
tensor([[65535, 65535, 65535],
        [65535, 65535, 65535],
        [65535, 65535, 65535]])
tensor([[255.9980, 255.9980, 255.9980],
        [255.9980, 255.9980, 255.9980],
        [255.9980, 255.9980, 255.9980]])
tensor([[127.9980, 127.9980, 127.9980],
        [127.9980, 127.9980, 127.9980],
        [127.9980, 127.9980, 127.9980]])
tensor([[127, 127, 127],
        [127, 127, 127],
        [127, 127, 127]], dtype=torch.int8)


In [None]:
a = torch.tensor([[1,2,3],[4,5,6],[7,8,9]]).reshape(1,3,3,1)
print(a.shape, a,sep='\n')
a = a.repeat(1,1,1,4)
print(a.shape, a,sep='\n')

torch.Size([1, 3, 3, 1])
tensor([[[[1],
          [2],
          [3]],

         [[4],
          [5],
          [6]],

         [[7],
          [8],
          [9]]]])
torch.Size([1, 3, 3, 4])
tensor([[[[1, 1, 1, 1],
          [2, 2, 2, 2],
          [3, 3, 3, 3]],

         [[4, 4, 4, 4],
          [5, 5, 5, 5],
          [6, 6, 6, 6]],

         [[7, 7, 7, 7],
          [8, 8, 8, 8],
          [9, 9, 9, 9]]]])


In [None]:
class FloatConv2d(Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride =1, padding =1):
        super(FloatConv2d,self).__init__()
        self.weight = torch.ones((out_channels, kernel_size, kernel_size, in_channels),dtype=torch.float32)
        self.stride = stride
        self.padding = padding

    def forward(self,x):
        # trans_weight = torch.flip(self.weight,[1,2]).transpose(0,3).contiguous()
        # trans_weight = self.weight.permute(0,2,3,1).contiguous()
        trans_weight = self.weight
        y = int8conv_cuda.float_conv(x,trans_weight,self.stride, self.padding,1)
        return y
    
    def cuda(self):
        self.weight = self.weight.cuda()


input_channel= 4
conv = FloatConv2d(input_channel,1,3,1,1)
conv.cuda()
print(f"conv: {conv.weight.shape}\n{conv.weight}")
x = torch.tensor([[1,2,3],[4,5,6],[7,8,9]],dtype=torch.float32).reshape(1,3,3,1).repeat(1,1,1,input_channel).cuda()
print(f"x : {x.shape} \n{x}")
with torch.no_grad():
    y= conv(x)
    print(f"y : {y.shape}\n{y}")

conv: torch.Size([1, 3, 3, 4])
tensor([[[[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],

         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],

         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]]]], device='cuda:0')
x : torch.Size([1, 3, 3, 4]) 
tensor([[[[1., 1., 1., 1.],
          [2., 2., 2., 2.],
          [3., 3., 3., 3.]],

         [[4., 4., 4., 4.],
          [5., 5., 5., 5.],
          [6., 6., 6., 6.]],

         [[7., 7., 7., 7.],
          [8., 8., 8., 8.],
          [9., 9., 9., 9.]]]], device='cuda:0')
y : torch.Size([1, 3, 3, 1])
tensor([[[[ 48.],
          [ 84.],
          [ 64.]],

         [[108.],
          [180.],
          [132.]],

         [[ 96.],
          [156.],
          [112.]]]], device='cuda:0')
