In [None]:
import numpy as np
import pandas as pd
import struct
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
npxor = np.frompyfunc(np.bitwise_xor, 2, 1)

In [None]:
def lzc(t):
    return 32 - len(np.binary_repr(t))
lzcv = np.frompyfunc(lzc, 1, 1)

In [None]:
def to_u32(f):
    s = struct.pack('>f',f)
    return struct.unpack('>l', s)[0]
to_u32v = np.frompyfunc(to_u32, 1, 1)

In [None]:
def mean_lzc(m, o):
    dist = np.random.normal(m,o,size=1000)
    truth = to_u32(m)
    vals = to_u32v(dist)
    xors = npxor(vals, truth)
    lzcs = lzcv(xors)
    return lzcs.mean()

## TODO

- [x] Plot LZC($\mu$)
- [x] Plot LZC($\sigma$)
- [ ] Find out with what $\sigma$ we are working for different predictors for climate data
- [ ] Plot compression ratio dependency of $\mu$
- [ ] Plot compression ratio dependency of $\sigma$
- [ ] Mark the 0101010101 shift values in the plot
- [ ] Mark the 1010101010 shift values in the plot

# LZC($\mu$)

In [None]:
mu = np.arange(1000)
sigma = mu*.20
sigma2 = mu*.10
sigma3 = mu*.05
sigma4 = mu*.01

In [None]:
result = [mean_lzc(m,o) for m,o in zip(mu,sigma)]
result2 = [mean_lzc(m,o) for m,o in zip(mu,sigma2)]
result3 = [mean_lzc(m,o) for m,o in zip(mu,sigma3)]
result4 = [mean_lzc(m,o) for m,o in zip(mu,sigma4)]

In [None]:
factors = [1.75,1.625,1.5,1.25,1.125,1]
fcolors = ['crimson','magenta','skyblue','limegreen','olivedrab', 'goldenrod']

In [None]:
# LZC(mu)
_, ax = plt.subplots(figsize=(15,5))
sns.lineplot(data=pd.DataFrame({'20%':result, '10%':result2, '5%':result3, '1%':result4})[1:])
for i,f in enumerate(factors):
    [plt.axvline(2**x*f, alpha=.5, color=fcolors[i], ls=":") for x in range(4,10)];
plt.legend();

In [None]:
# Vertical lines at above plot
base = 128
for i,f in enumerate(factors):
    print("{:.4f} {:>9} {:>10}".format(f, np.binary_repr(int(base*f), 8), fcolors[i]))

# LZC($\sigma$)

In [None]:
sigma = np.arange(1000)
mu = [128 * 2] * sigma.size
mu2 = [128 * 1.5] * sigma.size
mu3 = [128 * 1.125] * sigma.size
mu4 = [128] * sigma.size

In [None]:
result = [mean_lzc(m,o) for m,o in zip(mu,sigma)]
result2 = [mean_lzc(m,o) for m,o in zip(mu2,sigma)]
result3 = [mean_lzc(m,o) for m,o in zip(mu3,sigma)]
result4 = [mean_lzc(m,o) for m,o in zip(mu4,sigma)]

In [None]:
# LZC(sigma)
_, ax = plt.subplots(figsize=(15,5))
sns.lineplot(data=pd.DataFrame({'128x2':result, '128x1.5':result2, '128x1.125':result3, '128x1':result4})[1:])
for i,f in enumerate(factors):
    [plt.axvline(128*f, alpha=.5, color=fcolors[i], ls=":")];
plt.legend();

- Abstand zu den anderen Linien maximal?

In [None]:
# Vertical lines at above plot
base = 128
for i,f in enumerate(factors):
    print("{:.4f} {:>9} {:>10}".format(f, np.binary_repr(int(base*f), 8), fcolors[i]))