In [2]:
import time
import torch
import compressors
import torch.nn.functional as F

In [16]:
grank_8 = compressors.GlobalRandKMaxNormCompressor('cpu', quantization_level = 8)
grank_4 = compressors.GlobalRandKMaxNormCompressor('cpu', quantization_level = 2)
qsgd = compressors.QSGDCompressor('cpu')
qsgdwec_8 = compressors.QSGDWECCompressor('cpu', quantization_level = 8)
qsgdwec_4 = compressors.QSGDWECCompressor('cpu', quantization_level = 2)
qsgdwec_8_mod = compressors.QSGDWECModCompressor('cpu', quantization_level = 8)
qsgdwec_4_mod = compressors.QSGDWECModCompressor('cpu', quantization_level = 4)
qsgdwec_2_mod = compressors.QSGDWECModCompressor('cpu', quantization_level = 2)
terngrad = compressors.TernGradCompressor('cpu')  
terngradmod = compressors.TernGradModCompressor('cpu')
qsgdmax_8 = compressors.QSGDMaxNormCompressor('cpu', quantization_level = 8)
qsgdmax_4 = compressors.QSGDMaxNormCompressor('cpu', quantization_level = 2)
nuq_4 = compressors.NUQSGDModCompressor('cpu', quantization_level = 2)
nuq_8 = compressors.NUQSGDModCompressor('cpu', quantization_level = 8)
nuq_max_4 = compressors.NUQSGDMaxNormCompressor('cpu', quantization_level = 2)
nuq_max_8 = compressors.NUQSGDMaxNormCompressor('cpu', quantization_level = 8)

size = 10000
tensor = torch.rand( size )

def bsize( t ): return t.numel() * t.element_size()

print ('original size', bsize(tensor))

start = time.time()
comp = grank_8.compress( tensor = tensor, norm = tensor.norm() )
decomp = grank_8.decompress( sign_xi_array = comp, norm = tensor.norm() )
print( "grank_8: ", time.time() - start, 'size', bsize(comp), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity',  F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
comp = grank_4.compress( tensor = tensor, norm = tensor.norm() )
decomp = grank_4.decompress( sign_xi_array = comp, norm = tensor.norm() )
print( "grank_4: ", time.time() - start, 'size', bsize(comp), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
norm, sign_array, xi_array = qsgdwec_8.compress( tensor = tensor )
decomp = qsgdwec_8.decompress( norm, sign_array, xi_array )
print( "qsgdwec_8: ", time.time() - start, 'size', bsize(sign_array) + bsize(xi_array), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
norm, sign_array, xi_array = qsgdwec_4.compress( tensor = tensor )
decomp = qsgdwec_4.decompress( norm, sign_array, xi_array )
print( "qsgdwec_4: ", time.time() - start, 'size',  bsize(sign_array) + bsize(xi_array), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
norm, sign_xi_array = qsgdwec_8_mod.compress( tensor = tensor )
decomp = qsgdwec_8_mod.decompress( norm, sign_xi_array )
print( "qsgdwec_8_mod: ", time.time() - start, 'size', bsize(sign_xi_array), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
norm, sign_xi_array = qsgdwec_4_mod.compress( tensor = tensor )
decomp = qsgdwec_4_mod.decompress( norm, sign_xi_array )
print( "qsgdwec_4_mod: ", time.time() - start, 'size', bsize(sign_xi_array), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
norm, sign_xi_array = qsgdwec_2_mod.compress( tensor = tensor )
decomp = qsgdwec_2_mod.decompress( norm, sign_xi_array )
print( "qsgdwec_2_mod: ", time.time() - start, 'size', bsize(sign_xi_array), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
scaler, sign_array, b_array= terngrad.compress( tensor = tensor )
decomp = terngrad.decompress( scaler, sign_array, b_array )
csize = bsize(scaler) + bsize(sign_array) + bsize(b_array)
print( "terngrad: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
scaler, sign_b_array =  terngradmod.compress( tensor = tensor )
decomp = terngradmod.decompress( scaler, sign_b_array)
csize = bsize(scaler) + bsize(sign_b_array) 
print( "terngradmod: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
sign_xi_array = qsgdmax_8.compress( tensor = tensor, norm = tensor.norm() )
decomp = qsgdmax_8.decompress( norm = tensor.norm(), sign_xi_array = sign_xi_array)
csize = bsize(sign_xi_array)
print( "qsgdmax_8: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
sign_xi_array = qsgdmax_4.compress( tensor = tensor, norm = tensor.norm() )
decomp = qsgdmax_4.decompress( norm = tensor.norm(), sign_xi_array = sign_xi_array)
csize = bsize(sign_xi_array)
print( "qsgdmax_4: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
norm, sign_h_array = nuq_8.compress( tensor = tensor )
decomp = nuq_8.decompress( norm, sign_h_array )
csize = bsize(norm) + bsize(sign_h_array)
print( "nuq_8: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
norm, sign_h_array = nuq_4.compress( tensor = tensor )
decomp = nuq_4.decompress( norm, sign_h_array )
csize = bsize(norm) + bsize(sign_h_array)
print( "nuq_4: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
sign_h_array = nuq_max_4.compress( tensor = tensor, norm = tensor.norm() )
decomp = nuq_max_4.decompress( norm = tensor.norm(), sign_h_array = sign_h_array)
csize = bsize(sign_h_array)
print( "nuq_max_4: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
sign_h_array = nuq_max_8.compress( tensor = tensor, norm = tensor.norm() )
decomp = nuq_max_8.decompress( norm = tensor.norm(), sign_h_array = sign_h_array )
csize = bsize(sign_h_array)
print( "nuq_max_8: ", time.time() - start, 'size', csize, 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

start = time.time()
comp, size = qsgd.compress( tensor = tensor )
decomp = qsgd.decompress( compressed_tensor = comp, compressed_tensor_size = size )
print( "qsgd: ", time.time() - start, 'size', bsize(comp) + bsize(size), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())

original size 40000
grank_8:  0.001116037368774414 size 40000 dist 9.226079940795898 similarity 0.9876014590263367
grank_4:  0.00018405914306640625 size 10000 dist 323.9773254394531 similarity 0.19805899262428284
qsgdwec_8:  0.00022101402282714844 size 50000 dist 0.16051147878170013 similarity 0.9999961853027344
qsgdwec_4:  0.00016117095947265625 size 20000 dist 13.487634658813477 similarity 0.9739315509796143
qsgdwec_8_mod:  0.00021791458129882812 size 40000 dist 0.1611844152212143 similarity 0.9999961256980896
qsgdwec_4_mod:  0.00015306472778320312 size 10000 dist 2.700674533843994 similarity 0.998921811580658
qsgdwec_2_mod:  0.0002880096435546875 size 10000 dist 13.613450050354004 similarity 0.973444402217865
terngrad:  0.00023508071899414062 size 20004 dist 40.81898498535156 similarity 0.8172917366027832
terngradmod:  0.0001289844512939453 size 10004 dist 40.70539093017578 similarity 0.8204123377799988
qsgdmax_8:  0.00018310546875 size 40000 dist 9.253512382507324 similarity 0.9875

In [15]:

size = 10000
tensor = torch.rand( size )
def bsize( t ): return t.numel() * t.element_size()
print ('original size', bsize(tensor))
qsgdwec_2_mod = compressors.QSGDWECModCompressor('cpu', quantization_level = 8)
start = time.time()
norm, sign_xi_array = qsgdwec_2_mod.compress( tensor = tensor )
decomp = qsgdwec_2_mod.decompress( norm, sign_xi_array )
print( "qsgdwec_2_mod: ", time.time() - start, 'size', bsize(sign_xi_array), 'dist', torch.dist(tensor, decomp, p=2).item(), 'similarity', F.cosine_similarity(tensor.float(), decomp.float(), dim=0).item())


original size 40000
qsgdwec_2_mod:  0.0005919933319091797 size 40000 dist 0.1591997593641281 similarity 0.9999962449073792


In [49]:
sign_array.dtype

torch.int8