In [None]:
import numpy as np
import matplotlib.pyplot as plt
from collections import namedtuple
import pandas as pd
%matplotlib inline

In [None]:
def lzc(val, bits=32):
    """Count leading zeroes."""
    cnt = 0
    for i in range(0, bits):
        if val & (1 << (bits - 1 - i)) != 0:
            break
        cnt += 1
    return cnt
lzcu = np.frompyfunc(lzc, 2, 1)

In [None]:
def abs_diff(truth, pred):
    assert truth.size == pred.size, "Different sizes"
    result = np.zeros(truth.size).astype(truth.dtype)
    sign = np.ones(truth.size).astype(bool) * False
    for i in range(truth.size):
        low_shot = truth[i] > pred[i]
        result[i] = truth[i] - pred[i] if low_shot else pred[i] - truth[i]
        sign[i] = low_shot
    return sign, result

In [None]:
truth = np.fromfile("test.npf", "int64")

In [None]:
df = pd.DataFrame({'truth':truth})

In [None]:
df['prediction'] = df.truth.shift()
df.loc[0,'prediction'] = 0
df = df.astype('uint32')

In [None]:
df['xor'] = df.truth ^ df.prediction
df['low_shot'], df['diff'] = abs_diff(df.truth, df.prediction)
df['lzc_xor'] = lzcu(df.loc[:,'xor'], 32)
df['lzc_diff'] = lzcu(df.loc[:,'diff'], 32)

In [None]:
df.head()

In [None]:
all_cases = df.index.size
diff_better = df.loc[:,'xor'] >= df.loc[:,'diff']
diff_bits_better = lzcu(df.loc[:,'xor'], 32) < lzcu(df.loc[:,'diff'], 32)
diff_bits_better_2bits = lzcu(df.loc[:,'xor'], 32) + 2 <= lzcu(df.loc[:,'diff'], 32) 
all_cases, diff_better.sum(), diff_bits_better.sum(), diff_bits_better_2bits.sum()

In [None]:
df.lzc_diff.sum(), df.lzc_xor.sum()
# diff_lzc - sign_bits, xor_lzc + first_bit

In [None]:
df[df.lzc_xor > df.lzc_diff].size

DIFF is often smaller than XOR regarding absolute representation or bit.

In [None]:
def shift_calculation_by_pos(prediction, pos, bits=32):
    """
    Shifted residue calculation
    """
    pos = bits - pos
    string_repr = np.binary_repr(prediction,bits)
    last_bit = string_repr[pos-1]
    tmp = string_repr[:pos]
    if last_bit == "0":
        val = "10"
    else:
        val = "01"
    goal = val*((32-pos)//2)
    h = tmp+goal
    if len(h) == 31:
        h = h + h[-2]
    shift = int(h,2) - prediction
    return shift

def shift_pos_lzc(pred, truth, pos):
    s = shift_calculation_by_pos(pred, pos)
    spred = s+pred
    struth = s+truth
    r = spred ^ struth
    return r
shift_pos = np.frompyfunc(shift_pos_lzc, 3, 1)

In [None]:
vectorized = shift_pos(df.prediction, df.truth, 31)
iterative = np.zeros_like(vectorized)
for i in range(vectorized.size):
    iterative[i] = shift_pos(df.prediction[i], df.truth[i], 31)
np.array_equal(vectorized, iterative) ## Vectorisation functions as expected

In [None]:
df['lzc_s31'] = lzcu(shift_pos(df.prediction, df.truth, 31), 32)

In [None]:
df.iloc[1:,-3:].plot(figsize=(15,5));
plt.savefig('diff_s31_xor.svg')

In [None]:
all(df['lzc_s31'] <= df['lzc_diff']), all(df['lzc_s31'] <= df['lzc_xor']), all(df['lzc_xor'] <= df['lzc_s31'])

In [None]:
all_cases = df.index.size
# diff_better = df.loc[:,'xor'] >= df.loc[:,'diff']
diff_bits_better = df.loc[:,'lzc_s31'] < df.loc[:,'lzc_diff']
diff_bits_better_2bits = df.loc[:,'lzc_s31'] + 2 <= df.loc[:,'lzc_diff']
all_cases, None, diff_bits_better.sum(), diff_bits_better_2bits.sum()

In [None]:
def shift_pos_lzc_both(pred, truth, pos):        
    s = shift_calculation_by_pos(pred, pos)
    s1 = s
    spred1 = s+pred
    struth1 = s+truth
    for i in range(0,32):
        s = shift_calculation_by_pos(pred, pos-i)
        if s+pred!=spred1:
            break
    s2 = s
    spred2 = s+pred
    struth2 = s+truth
    d = max(truth,pred) - min(truth,pred)
    NT = namedtuple("ShiftPosLZC", "lzc1,lzc2,lzcX,lzcD,shift1,shift2,pred_s1,pred_s2,truth_s1,truth_s2,residual_s1,residual_s2,xor,diff")
    return NT(lzc(struth1^spred1),lzc(struth2^spred2),lzc(truth^pred),lzc(d),s1,s2,spred1,spred2,struth1,struth2,struth1^spred1,spred2^struth2,truth^pred,d)

In [None]:
ix=342
both = shift_pos_lzc_both(truth[ix-1],truth[ix], 31)
print(ix)
for x in [x for x in dir(both) if x[0] not in ('_','c','i')]:
    val = getattr(both,x)
    if x[0] not in ('r','x','d','l'):
        print("{:>13} {:032b}".format(x,val))
    else:
        print("{:>13} {}".format(x,val))

In [None]:
# Gibt es Fälle bei denen immer s1 bzw. s2 besser ist? Was sind die Konditionen hierfür?

In [None]:
df['lzc_s30'] = 0
for k in range(1,df.index.size):
    both = shift_pos_lzc_both(truth[k-1],truth[k], 31)
    df.iloc[k,-1] = both.lzc2

In [None]:
df.head()

In [None]:
df.sum(axis=0).astype(int)

In [None]:
df.iloc[:,-2:].max(axis=1).sum(), (df.iloc[:,-2:].max(axis=1) > df.lzc_diff).sum()

In [None]:
(df.loc[:,'lzc_s31'] - df.loc[:,'lzc_s30']).min()

In [None]:
df.iloc[1:,-4:].min()

In [None]:
df.loc[:,'diff'].argmin()

In [None]:
df.iloc[112,]

In [None]:
# how about making the goal not dependent
def shift_calculation_by_pos_hardcoded(prediction, pos, bits=32):
    """
    Shifted residue calculation
    """
    pos = bits - pos
    string_repr = np.binary_repr(prediction,bits)
    last_bit = string_repr[pos-1]
    tmp = string_repr[:pos]
    if last_bit == "0":
        val = "10"
    else:
        val = "01"
    goal = val*((32-pos)//2)
    h = tmp+goal
    if len(h) == 31:
        h = h + h[-2]
    shift = int(h,2) - prediction
    return shift

def shift_pos_lzc(pred, truth, pos):
    s = shift_calculation_by_pos(pred, pos)
    spred = s+pred
    struth = s+truth
    r = spred ^ struth
    return r
shift_pos = np.frompyfunc(shift_pos_lzc, 3, 1)

In [None]:
v1 = '0'+('10'*16)[:-1]
v2 = '1'+('01'*16)[:-1]
v1,v2,int(v1,2),int(v2,2)

In [None]:
(df.lzc_s31 + 2 < df.lzc_xor).sum()