In [None]:
from glob import glob
import pandas as pd
from io import StringIO
import numpy as np
from collections import namedtuple
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
a = int("11010010",2)
b = int("11010101",2)
diff = b - a

bin(a - diff), bin(b + 2*diff)

a = int("11010010",2) - int("1101",2)
b = int("11010101",2) - int("1101",2)
diff = b - a

bin(a - diff), bin(b + 2*diff)

bin(int("11010101",2) - 
int("10101010",2))

a = int("11010010",2) - int("101011",2)
b = int("11010101",2) - int("101011",2)
diff = b - a

bin(a - diff), bin(b + 2*diff)

In [None]:
num =  int("11010010",2)
pred = int("11010101",2)
center = 4
# shift_calculation(num, 32 - center, pred, domain=4)

### Shift and LZC calculation

In [None]:
def shift_calculation(num, center, pred, domain=4, bits=32):
    string_repr = np.binary_repr(pred,bits)
    right,left = min(center + domain, bits), max(center - domain, 0)
    last_bit = string_repr[left-1]
    if last_bit == "0":
        val = "10"
    else:
        val = "01"
    goal = val*(domain+(32-right))
    tmp = string_repr[:left]
    h = tmp+goal
    shift = int(goal,2) - pred
    return shift

In [None]:
def lzc(val, bits=32):
    """Count leading zeroes."""
    cnt = 0
    for i in range(0, bits):
        if val & (1 << (bits - 1 - i)) != 0:
            break
        cnt += 1
    return cnt
lzcu = np.frompyfunc(lzc, 2, 1)

# Test
## Read data and prepare dataframe

In [None]:
truth = np.fromfile("test.npf", "int64")

In [None]:
df = pd.DataFrame({"truth":truth})
df["prediction"] = 0

df["shifted_prediction"] = 0
df["shifted_truth"] = 0
df["shift"] = 0

df["shifted_residual"] = 0
df["normal_residual"] = 0

df["shifted_lzc"] = 0
df["normal_lzc"] = 0

df.head()

## Calculate predictions (shifted and normal)

In [None]:
for i in range(2,df.truth.size):
    df["prediction"][i] = df["truth"][i-1]
    center = 32 - lzc(df["truth"][i-2] ^ df["truth"][i-1])
    df["shift"][i] = shift_calculation(None, center,df["prediction"][i],domain=4)
    df["shifted_prediction"][i] = df["prediction"][i] + df["shift"][i]
    df["shifted_truth"][i] = df["truth"][i] + df["shift"][i]
    df["shifted_residual"][i] = df["shifted_prediction"][i] ^ df["shifted_truth"][i]
    df["normal_residual"][i] = df["prediction"][i] ^ df["truth"][i]
    df["shifted_lzc"][i] = lzc(df["shifted_prediction"][i] ^ df["shifted_truth"][i])
    df["normal_lzc"][i] = lzc(df["prediction"][i] ^ df["truth"][i])
df.head()

## Analysis

In [None]:
analysis = [ 
   (df["normal_lzc"] <= df["shifted_lzc"]).sum(), 
   (df["shifted_lzc"]-df["normal_lzc"]).sum(), 
   df["normal_lzc"].sum(), 
   df["shifted_lzc"].sum(),
   df["normal_lzc"].sum()/(df["normal_lzc"].size*32) * 100,
   df["shifted_lzc"].sum()/(df["shifted_lzc"].size*32) * 100
]
print(
    """
    Shifted LZC >= Former LZC (of 1000): {0}
    Sum Shifted LZC vs. Sum Former LZC : {2} vs. {3} (diff: {1})
    % Shifted LZC vs. % Former LZC : {4}% vs. {5}%
    """.format(*analysis))

## Calculate reconstructions (shifted and normal)

In [None]:
# Reconstruction of original true value

In [None]:
df["reconstruct"] = 0
df["shifted_reconstruct"] = 0

In [None]:
for i in range(2,df.truth.size):
    center = 32 - lzc(df["truth"][i-2] ^ df["truth"][i-1])
    shift = shift_calculation(None, center,df["prediction"][i],domain=4)
    shifted_truth = (df["prediction"][i] + shift) ^ (df["shifted_residual"][i] + shift) - shift
    df["shifted_reconstruct"][i] = shifted_truth - shift
    df["reconstruct"][i] = df["prediction"][i] ^ df["normal_residual"][i]

In [None]:
(np.array_equal(df["truth"][2:],df["reconstruct"][2:]),
np.array_equal(df["truth"][2:],df["shifted_reconstruct"][2:]))
# reconstruct successful?

In [None]:
df.tail()

## Compare with previous error addition

In [None]:
df = df[["truth","prediction"]]
df.head()

In [None]:
df["prev_error_prediction"] = 0

In [None]:
delta = 0
beta, parts = 1,1
for i in range(1, df["prev_error_prediction"].size):
    delta = df["truth"][i-1] - df["prediction"][i-1]
    overshot = delta < 0
    offset = np.abs(delta)
    correction = (beta * offset) / parts
    if not overshot:
        df["prev_error_prediction"][i] =  df["prediction"][i] + correction
    elif correction <= df["prediction"][i]:
        df["prev_error_prediction"][i] = df["prediction"][i] - correction
    else:
        df["prev_error_prediction"][i] = 0
        offset = 0

In [None]:
df["lzc_normal"] = lzcu(df['prediction'] ^ df['truth'], 32)
df["lzc_prev_error"] = lzcu(df['prev_error_prediction'] ^ df['truth'], 32)

In [None]:
df.head()

In [None]:
df["lzc_normal"].mean(),df["lzc_prev_error"].mean()

In [None]:
df[["lzc_normal","lzc_prev_error"]].max(axis=1).mean()

# Improve upon the shifted reconstruction

In [None]:
def shift_calculation(num, center, pred, domain=4, bits=32):
    """
    Shifted residue calculation
    """
    string_repr = np.binary_repr(pred,bits)
    right,left = min(center + domain, bits), max(center - domain, 0)
    last_bit = string_repr[left-1]
    tmp = string_repr[:left]
    if last_bit == "0":
        val = "10"
    else:
        val = "01"
    goal = val*((32-left)//2)
    h = tmp+goal
    if len(h) == 31:
        h = h + h[-2]
    shift = int(h,2) - pred
    return shift

In [None]:
truth = np.fromfile("test.npf", "int64")
def shifted_residual_calculation(truth, do, centeroffset):

    df = pd.DataFrame({"truth":truth})
    df["prediction"] = 0

    df["shifted_prediction"] = 0
    df["shifted_truth"] = 0
    df["shift"] = 0

    df["shifted_residual"] = 0
    df["normal_residual"] = 0

    df["shifted_lzc"] = 0
    df["normal_lzc"] = 0

    for i in range(2,df.truth.size):
        df["prediction"][i] = df["truth"][i-1]
        center = 32 - lzc(df["truth"][i-2] ^ df["truth"][i-1]) + centeroffset
        df["shift"][i] = shift_calculation(None, center,df["prediction"][i],domain=do)
        df["shifted_prediction"][i] = df["prediction"][i] + df["shift"][i]
        df["shifted_truth"][i] = df["truth"][i] + df["shift"][i]
        df["shifted_residual"][i] = df["shifted_prediction"][i] ^ df["shifted_truth"][i]
        df["normal_residual"][i] = df["prediction"][i] ^ df["truth"][i]
        df["shifted_lzc"][i] = lzc(df["shifted_prediction"][i] ^ df["shifted_truth"][i])
        df["normal_lzc"][i] = lzc(df["prediction"][i] ^ df["truth"][i])
    analysis = [ 
       (df["normal_lzc"] <= df["shifted_lzc"]).sum(), 
       (df["shifted_lzc"]-df["normal_lzc"]).sum(), 
       df["normal_lzc"].sum(), 
       df["shifted_lzc"].sum(),
       df["normal_lzc"].sum()/(df["normal_lzc"].size*32) * 100,
       df["shifted_lzc"].sum()/(df["shifted_lzc"].size*32) * 100
    ]
#     print("Potential:", df[["normal_lzc","shifted_lzc"]].max(axis=1).sum())
#     print(
#         """
#         Shifted LZC >= Former LZC (of 1000): {0}
#         Sum Shifted LZC vs. Sum Former LZC : {2} vs. {3} (diff: {1})
#         % Shifted LZC vs. % Former LZC : {4}% vs. {5}%
#         """.format(*analysis))
    print("domain: {} centeroffset: {} [{}] diff:{}".format(do, centeroffset, center - do, analysis[1]))

In [None]:
for do in range(1,11):
    for centeroffset in range(-4,5):
        shifted_residual_calculation(truth, do=do, centeroffset=centeroffset)

Until now the shift value was dependent on the previous LZC. To better analyse the effects this will be changed to a set center

# Find out were the optimal shift position is
## One shift position for all

In [None]:
truth = np.fromfile("test.npf", "int64")
def shift_calculation_by_pos(prev_truth, prev_prev_truth, prediction, pos, bits=32):
    """
    Shifted residue calculation
    """
    string_repr = np.binary_repr(prediction,bits)
    last_bit = string_repr[pos-1]
    tmp = string_repr[:pos]
    if last_bit == "0":
        val = "10"
    else:
        val = "01"
    goal = val*((32-pos)//2)
    h = tmp+goal
    if len(h) == 31:
        h = h + h[-2]
    shift = int(h,2) - prediction
    return shift

In [None]:
def shifted_residual_calculation_pos(truth, shift_calculation_method, **kwargs):

    df = pd.DataFrame({"truth":truth})
    df["prediction"] = 0

    df["shifted_prediction"] = 0
    df["shifted_truth"] = 0
    df["shift"] = 0

    df["shifted_residual"] = 0
    df["normal_residual"] = 0

    df["shifted_lzc"] = 0
    df["normal_lzc"] = 0

    for i in range(2,df.truth.size):
        df["prediction"][i] = df["truth"][i-1]
        df["shift"][i] = shift_calculation_method(df["truth"][i-1], df["truth"][i-2], df["prediction"][i], **kwargs)
        df["shifted_prediction"][i] = df["prediction"][i] + df["shift"][i]
        df["shifted_truth"][i] = df["truth"][i] + df["shift"][i]
        df["shifted_residual"][i] = df["shifted_prediction"][i] ^ df["shifted_truth"][i]
        df["normal_residual"][i] = df["prediction"][i] ^ df["truth"][i]
        df["shifted_lzc"][i] = lzc(df["shifted_prediction"][i] ^ df["shifted_truth"][i])
        df["normal_lzc"][i] = lzc(df["prediction"][i] ^ df["truth"][i])
    diff = (df["shifted_lzc"]-df["normal_lzc"]).sum()
    print("pos {}, diff {}".format(pos, diff))
    return df

In [None]:
for pos in range(1,30):
    shifted_residual_calculation_pos(truth, shift_calculation_by_pos, pos=pos)

In [None]:
maximums = []
before = []
pos = 977
pred = truth[pos]; spred = np.binary_repr(pred,32)
tru = truth[pos+1]; stru = np.binary_repr(tru,32)
before_lcount = lzc(pred^tru)
print(spred[:before_lcount], spred[before_lcount:], lzc(truth[pos-1] ^ pred))
print(stru[:before_lcount], stru[before_lcount:])
lzcounts = [lzc((pred + shift_calculation_by_pos(None,None,pred, k))^( tru + shift_calculation_by_pos(None,None,pred, k))) for k in range(33)]
for k in range(1,33):
    shift = shift_calculation_by_pos(None,None,pred, k)
    lcount = lzc((pred + shift)^( tru + shift))
    print("{:>3} {:>32b} {:>32b} {} {}".format(k, shift, shift+tru, lcount, before_lcount))
maximums.append(max(lzcounts))
before.append(before_lcount)

In [None]:
maximums = []
before = []
for pos in range(999):
    pred = truth[pos]; spred = np.binary_repr(pred,32)
    tru = truth[pos+1]; stru = np.binary_repr(tru,32)
    before_lcount = lzc(pred^tru)
#     print(spred[:before_lcount], spred[before_lcount:])
#     print(stru[:before_lcount], stru[before_lcount:])
    lzcounts = [lzc((pred + shift_calculation_by_pos(None,None,pred, k))^( tru + shift_calculation_by_pos(None,None,pred, k))) for k in range(33)]
#     for k in range(1,33):
#         shift = shift_calculation_by_pos(None,None,pred, k)
#         lcount = lzc((pred + shift)^( tru + shift))
#         print("{:>3} {:>32b} {:>32b} {} {}".format(k, shift, shift+tru, lcount, before_lcount))
    maximums.append(max(lzcounts))
    before.append(before_lcount)

In [None]:
(np.array(maximums)>np.array(before)).sum()

In [None]:
np.array(maximums).sum()/32000,np.array(before).sum()/32000

In [None]:
np.binary_repr(truth[pos])

In [None]:
np.binary_repr(truth[pos+1])

In [None]:
(np.array(maximums)>np.array(before))

In [None]:
zero_one = int("01"*16,2)
one_zero = int("10"*16,2)
int("01"*16,2), int("10"*16,2)

In [None]:
cut = 20
np.binary_repr((truth[pos+1] >> cut) << cut,32)

In [None]:
last_value = (truth[pos+1] >> cut) & 1
last_value

In [None]:
def get_shift_with_overflow(num, cut):
#     assert bits in (32,64), "Unknown bits {}".format(bits)
    zero_one = 1431655765 #if bits==32 else 6148914691236517205
    one_zero = 2863311530 #if bits==32 else 12297829382473034410
    base = (num >> cut) << cut
    last_value = (num >> cut) & 1
    if last_value == 1:
        delta = zero_one >> (32 - cut)
    #     subtract = False
    #     goal = base + delta
    #     shift = num - goal
    else:
        delta = one_zero >> (32 - cut)
    #     subtract = True
    #     goal = base + delta
    #     shift = goal - num
    shift = base + delta - num
    return shift

In [None]:
def get_shift_without_overflow(num, cut, bits = 32):
    """
    Get shift value within the u32/u64 value range.
    """
    assert bits in (32,64), "Unknown bits {}".format(bits)
    zero_one = 1431655765 if bits==32 else 6148914691236517205
    one_zero = 2863311530 if bits==32 else 12297829382473034410
    base = (num >> cut) << cut
    last_value = (num >> cut) & 1
    if last_value == 1:
        delta = zero_one >> (bits - cut)
        subtract = False
        goal = base + delta
        shift = num - goal
    else:
        delta = one_zero >> (bits - cut)
        subtract = True
        goal = base + delta
        shift = goal - num
    return subtract, shift

In [None]:
output = []
for i in range(truth.size):
    o  = get_shift_with_overflow(truth[i],20) + truth[i]
    a,b = get_shift_without_overflow(truth[i],20) 
    wo = truth[i]+b if a else truth[i]-b
    output.append(o == wo)

In [None]:
def shift_value(num, sign, delta):
    return num+delta if sign else num-delta

In [None]:
pos = 48
prediction = truth[pos]
tru = truth[pos+1]

shift = get_shift_without_overflow(prediction, 20)
shifted_prediction = shift_value(prediction, *shift)
shifted_truth = shift_value(tru, *shift)

In [None]:
lzc(shifted_prediction ^ shifted_truth)

In [None]:
lzc(truth[pos] ^ truth[pos+1])

# New start

In [None]:
truth = np.fromfile("test.npf", "int64")
def shift_calculation_by_pos(prev_truth, prev_prev_truth, prediction, pos, bits=32):
    """
    Shifted residue calculation
    """
    string_repr = np.binary_repr(prediction,bits)
    last_bit = string_repr[pos-1]
    tmp = string_repr[:pos]
    if last_bit == "0":
        val = "10"
    else:
        val = "01"
    goal = val*((32-pos)//2)
    h = tmp+goal
    if len(h) == 31:
        h = h + h[-2]
    shift = int(h,2) - prediction
    return shift

In [None]:
truth = np.fromfile("test.npf", "int64")
def shift_calculation_to_0(prev_truth, prev_prev_truth, prediction, pos, bits=32):
    """
    Shifted residue calculation
    """
    return -prediction

In [None]:
truth = np.fromfile("test.npf", "int64")
def shift_calculation_to_1(prev_truth, prev_prev_truth, prediction, pos, bits=32):
    """
    Shifted residue calculation
    """
    return int("1"*32,2) - prediction

In [None]:
def shift_calculation_compress(prediction, truth, bits=32):
    real_residual = prediction ^ truth
    zero_residual = np.array(real_residual, dtype='uint32') + 0
    once_residual = np.array(np.invert(real_residual), dtype='uint32') + 0
    if zero_residual ^ prediction > prediction & once_residual ^ prediction > prediction:
        return once_residual    
    elif zero_residual ^ prediction < prediction & once_residual ^ prediction < prediction:
        return zero_residual
    return shift_calculation_by_pos(None, None, prediction, 20)
#     print(real_residual, zero_residual, once_residual)

In [None]:
def shift_calculation_decompress(prediction, residual, bit=32):
    inverse_residual = np.array(np.invert(residual), dtype='uint32') + 0
    if prediction ^ residual > prediction & prediction ^ inverse_residual > prediction:
        return 4294967281
    ## Hier sind wir stehen geblieben

In [None]:
np.binary_repr(30, 32)