In [39]:
# python imports
import os
import time
import functools

import numpy as np
from scipy import stats
from scipy.stats import uniform, norm, t as tdist, ttest_ind


In [40]:
# setup graphs
%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
from matplotlib import rc

plt.rcParams['figure.figsize'] = 8, 3.5
plt.rcParams['figure.facecolor'] = 'white'
# plt.rcParams['figure.dpi'] = 100;

# numpy print format
np.set_printoptions(linewidth=120, precision=4, suppress=False, formatter={'float': '{:5.3e}'.format})

In [41]:
# local imports
import util.tests as tests
import util.dwdb_reader as io
import util.func as f


In [42]:
small_value = 1e-300
tvla_thrd = 4.5 * np.sqrt(2)

tracenum = 1000
step = 50
# Not needed now
# sample_start = 120 # 30  100    # 40
# sample_end =   130 # 50  500    # 180
# rlen = sample_end - sample_start

PROJECT_ROOT_DIR=os.getcwd()
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
if not os.path.exists(IMAGES_PATH): os.makedirs(IMAGES_PATH)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

## Load traces and classification

In [43]:
# Read first sample point, split by classification (2 traces, c1 and c2)
print("Reading TP traces")
dsrtp = io.dwdb_reader(r'TP_db_small/log.dwdb', r'TP_db_small')
data_batch_tp, meta_batch_tp = dsrtp.read_batch(10)
print("Done")

print("Reading FP traces")
dsrfp = io.dwdb_reader(r'FP_db_small/log.dwdb', r'FP_db_small')
data_batch_fp, meta_batch_fp = dsrfp.read_batch(10)
print("Done")

tp_range, fp_range = slice(0,10), slice(0,10)
data_batch0, meta_batch0 = data_batch_fp[fp_range], meta_batch_fp[fp_range]
data_batch1, meta_batch1 = data_batch_tp[tp_range], meta_batch_tp[tp_range]

Reading TP traces
Done
Reading FP traces
Done


In [44]:
# Convert traces to numpy
traces_fp = np.asarray(data_batch0)
traces_tp = np.asarray(data_batch1)
assert traces_fp.shape == traces_tp.shape
traces_fp.shape, traces_tp.shape

((10, 1000), (10, 1000))

In [45]:
# Extract classifiers, convert to numpy
meta_prep = [m['other'].split() for m in meta_batch0]
classifiers = [s.split('=')[1] for m in meta_prep for s in m if s.startswith('s=')]
classifiers = [str(int(c)-1) for c in classifiers]
classifiers_fp = np.asarray(classifiers).reshape(-1, 2)
meta_prep = [m['other'].split() for m in meta_batch1]
classifiers = [s.split('=')[1] for m in meta_prep for s in m if s.startswith('s=')]
classifiers_tp = np.asarray(classifiers).reshape(-1, 2)

classifiers_fp.shape, classifiers_tp.shape

((5, 2), (5, 2))

In [46]:
# Ensure dbs classified correctly
ct_fp, ct_tp = classifiers_fp.astype(int), classifiers_tp.astype(int)
assert np.all(ct_tp[:,1] == ct_fp[:,1])
assert np.all(ct_tp[:,0] != ct_fp[:,1])

In [47]:
# Postprocessing 1 (Convert to dwdb format)
ds_num, tr_num = traces_fp.shape
# 1. 2 traces should be transposed and merged in 1 sample point
traces_fp = traces_fp.reshape(ds_num//2, -1)
traces_tp = traces_tp.reshape(ds_num//2, -1)
# 2. classifiers should be stretched as the 1st half of the sample point is R1 and 2nd one is R2
classifiers_fp = np.repeat(classifiers_fp[0], tr_num)
classifiers_tp = np.repeat(classifiers_tp[0], tr_num)
# traces_np[:4], classifiers_np[:4]
print(traces_fp.shape, classifiers_fp.shape, traces_tp.shape, classifiers_tp.shape)
print(np.all(classifiers_fp == classifiers_tp))

((5, 2000), (2000,), (5, 2000), (2000,))
True


In [48]:
# Postprocessing 2 (make multipoint (TP, FP, TP, FP, ...))
# Given that first 1000 traces are classified 0s and second 1000 are 1s, 
# merge traces to one dataset, where even sample point are TP and odd ones are FP
# Classifires are the same
pt_num, tr_num = traces_fp.shape
traces_np = np.empty((tr_num, 2*pt_num))
for i in range(pt_num):
    traces_np[:,2*i]   = traces_tp[i]
    traces_np[:,2*i+1] = traces_fp[i]
classifiers_np = classifiers_fp
traces_np.shape

(2000, 10)

In [49]:
# Sanity check, print border
print(traces_np[997:1003])
print(classifiers_np[997:1003])

[[5.000e+00 4.685e+00 5.000e+00 4.218e+00 5.000e+00 3.705e+00 5.000e+00 6.654e+00 5.000e+00 5.968e+00]
 [5.000e+00 7.099e+00 5.000e+00 4.624e+00 5.000e+00 6.558e+00 5.000e+00 4.657e+00 5.000e+00 4.292e+00]
 [5.000e+00 2.780e+00 5.000e+00 5.673e+00 5.000e+00 3.604e+00 5.000e+00 4.471e+00 5.000e+00 4.240e+00]
 [2.169e+00 4.596e+00 5.650e+00 7.707e+00 8.076e+00 4.470e+00 3.982e+00 5.849e+00 7.067e+00 5.628e+00]
 [6.655e+00 6.438e+00 5.545e+00 3.710e+00 7.053e+00 7.960e+00 3.969e+00 1.547e+00 3.317e+00 4.153e+00]
 [1.670e+00 3.416e+00 3.456e+00 1.011e+01 4.063e+00 3.773e+00 2.534e+00 4.420e+00 4.363e+00 4.298e+00]]
['0' '0' '0' '1' '1' '1']


In [50]:
# Postprocessing 3 (randomize traces)
# classifiers go as [000 all zeros, 111 all ones], it breaks evolution
# data should be shuffled, taking classifiers_fp as a common one
tracenum = tr_num # update tracenum to the actual value
perm_ind = np.random.permutation(np.arange(tracenum))
traces_np, classifiers_np = traces_np[perm_ind], classifiers_np[perm_ind]
traces_np.shape, classifiers_np.shape

((2000, 10), (2000,))

In [51]:
# Make sure traces are shuffled right, print the border
print(traces_np[997:1003])
print(classifiers_np[997:1003])

[[4.508e+00 7.644e+00 6.623e+00 5.838e+00 2.617e+00 8.160e-01 7.588e+00 7.795e+00 7.268e+00 7.528e+00]
 [5.000e+00 6.810e+00 5.000e+00 6.222e+00 5.000e+00 7.054e+00 5.000e+00 8.923e+00 5.000e+00 3.972e+00]
 [7.728e+00 6.333e+00 1.792e+00 3.399e+00 6.187e+00 5.623e+00 2.554e+00 4.281e+00 7.729e+00 7.725e+00]
 [4.524e+00 4.746e+00 3.318e+00 7.994e-01 7.308e+00 4.105e+00 2.354e+00 6.691e+00 6.709e+00 9.266e+00]
 [3.750e+00 5.931e+00 8.568e+00 5.060e+00 5.007e+00 4.168e+00 1.600e+00 7.341e+00 3.565e+00 5.237e+00]
 [4.732e+00 6.704e+00 3.455e+00 1.397e+00 9.001e+00 3.512e+00 5.707e+00 3.170e+00 6.079e+00 7.462e+00]]
['1' '0' '1' '1' '1' '1']


## Sanity check. Make sure it leaks

In [52]:
np.mean(traces_np, axis=0), np.var(traces_np, axis=0)

(array([4.817e+00, 4.998e+00, 4.859e+00, 4.968e+00, 5.146e+00, 4.990e+00, 4.854e+00, 5.027e+00, 5.150e+00, 5.007e+00]),
 array([1.880e+00, 3.844e+00, 1.920e+00, 3.909e+00, 1.928e+00, 3.997e+00, 2.056e+00, 3.674e+00, 1.958e+00, 3.851e+00]))

In [53]:
tt = tests.fvr_ttest(traces_np, classifiers_np)
np.abs(tt)

array([6.017e+00, 4.576e+00, 4.579e+00, 5.157e+00, 4.716e+00, 4.864e+00, 4.585e+00, 5.083e+00, 4.824e+00, 5.258e+00])

In [54]:
rangex = np.arange(2)
voltage_trace1 = np.mean(traces_np[:100], axis=0)
voltage_trace = np.repeat(voltage_trace1, len(rangex))
tt_trace = np.repeat(np.abs(tt), len(rangex))

fig, ax1 = plt.subplots()
ax1.set_ylabel('voltage', color='g')
ax1.tick_params(axis='y', labelcolor='g')
ax2 = ax1.twinx()
ax2.set_ylabel('leakage', color='r')
ax2.tick_params(axis='y', labelcolor='r')
ax1.set_ylim(4.5, 5.5)
ax1.plot(voltage_trace, color='g')   # signal
ax2.set_ylim(3, 7)
ax2.plot(tt_trace, color='r')     # leak
plt.grid()
plt.show()

<IPython.core.display.Javascript object>

## Evolution

In [55]:
evo_step = step
evo_tstat = np.empty(((tracenum+1)/evo_step, traces_np.shape[1]))
evo_xrange = np.arange(evo_step, tracenum+1, evo_step)
for i, ex in enumerate(evo_xrange):
    start = time.time()
    tt = tests.fvr_ttest(traces_np[:ex], classifiers_np[:ex])
    evo_tstat[i,:] = tt
#     if i % evo_step == 0:
#         print("Trace count: {}, tt: {}, done in {}".format(ex, tt, time.time() - start))
evo_tstat = np.abs(evo_tstat) # comment out for intermediates

# Max leakage info
tr_num, max_leak_pt = np.unravel_index(np.argmax(evo_tstat), evo_tstat.shape)
max_leak_val, max_leak_at = evo_tstat[tr_num, max_leak_pt], tr_num*evo_step
max_leak_pv = tdist.sf(max_leak_val, max_leak_at)
print("Max leak at point: {}, value: {:5.3e} (pv: {:5.3e}), trace: {}".format(
    max_leak_pt, max_leak_val, max_leak_pv, max_leak_at))

Max leak at point: 0, value: 6.017e+00 (pv: 1.059e-09), trace: 1950


In [56]:
step_num, pt_num = np.shape(evo_tstat)

fig = plt.figure()
plt.ylabel('T score')
plt.xlabel('Trace Number')
# plt.axhline(y=tvla_thrd, color='r', linestyle=':')

x_axis = np.arange(step_num) * evo_step

# leak history for each TP point (even, green)
for j in range(0, pt_num, 2):
    plk = evo_tstat[:, j]
    plt.plot(x_axis, plk, linewidth=0.5, linestyle='-', color = 'g', zorder = j)

# leak history for each FP point (odd, red)
for j in range(1, pt_num, 2):
    plk = evo_tstat[:, j]
    plt.plot(x_axis, plk, linewidth=0.5, linestyle='-', color = 'r', zorder = j)

plt.grid(alpha=.5, linewidth=.25)
plt.show()

<IPython.core.display.Javascript object>

## Bootstrapping

In [21]:
pt_num = traces_np.shape[1]
boots_list = [21, 51, 81, 131, 181, 251]  # iterations in a boot
boot_step = 200 #step
boot_till = tracenum
ld_stat = stats.uniform
ld_cdf = ld_stat().cdf

tt_boots_per_step = []  # cannot be numpy array due to different lengths per boot
tp_boots_per_step = []  # cannot be numpy array due to different lengths per boot
ksp_per_step = []
step_ranges = np.arange(boot_step, boot_till+1, boot_step)
for steprange in step_ranges:
    # Run all boots for a step
    tt_full_per_boot, tp_full_per_boot, ksp_per_boot = [], [], []
    for j, boots in enumerate(boots_list):
        start = time.time()
        tt_full = np.empty((boots, pt_num), dtype=np.float64)
        tp_full = np.empty_like(tt_full)
        ks_full = np.empty_like(tp_full[0])
#         boot_idxs = np.random.randint(steprange, size=(boots, steprange))  # From the beginning
        boot_idxs = np.random.randint(steprange-boot_step, steprange, size=(boots, boot_step)) # Rrange only
        for i, bi in enumerate(boot_idxs):
            tt_full[i] = tests.fvr_ttest(traces_np[bi], classifiers_np[bi]) # calc tt and keep it
            tp_full[i] = tdist.sf(tt_full[i], steprange)                    # convert tt to pv and keep it
        # Run ks-test for p-value set in each sample point
        for i, pt_boots in enumerate(tp_full.T):
            ks_full[i] = f.kstest(pt_boots, ld_cdf)[1]                      # keep pv only
        tt_full_per_boot.append(tt_full)
        tp_full_per_boot.append(tp_full)
        ksp_per_boot.append(ks_full)
    tt_boots_per_step.append(tt_full_per_boot)  # cannot be numpy array due to different lengths per boot
    tp_boots_per_step.append(tp_full_per_boot)  # cannot be numpy array due to different lengths per boot
    ksp_per_step.append(ksp_per_boot)
ksp_per_step = np.asarray(ksp_per_step)
ksp_per_step.shape # step_ranges, boots_list, point

(10L, 6L, 10L)

# Bootstrapping vizualization

In [22]:
# Prepare data
ksp_per_step[np.where(np.isnan(ksp_per_step))]     = small_value
ksp_per_step[np.where(ksp_per_step < small_value)] = small_value

x_axis = step_ranges
step_num, boot_num, pt_num = np.shape(ksp_per_step)

cpalette = ['g', 'r'] # TP green, FP red

boot_idx = 5

In [23]:
# ks results visualization
fig, axs = plt.subplots(2, sharey=True, figsize=(8, 5))
axs[0].set(xlabel='Trace Number', ylabel='ks: -log10(pv)')
axs[1].set(xlabel='Sorted ks values', ylabel='ks: -log10(pv)')

# Boot evolution for TP (even, green)
plot_evo = ksp_per_step[:, boot_idx].T
for j in range(0, pt_num, 2):
    kspl = -np.log10(plot_evo[j]) # Unsorted
    axs[0].plot(x_axis, kspl, linewidth=0.75, color = 'g', zorder = j)
    kspls = np.sort(kspl)         # Sorted
    axs[1].plot(x_axis, kspls, linewidth=0.75, color = 'g', zorder = j)

# Boot evolution for FP (odd, red)
for j in range(1, pt_num, 2):
    kspl = -np.log10(plot_evo[j]) # Unsorted
    axs[0].plot(x_axis, kspl, linewidth=0.75, color = 'r', zorder = j)
    kspls = np.sort(kspl)         # Sorted
    axs[1].plot(x_axis, kspls, linewidth=0.75, color = 'r', zorder = j)

for a in axs: a.minorticks_on()
for a in axs: a.set_xticks(x_axis)
for a in axs: a.grid(alpha=.5, linewidth=.25)
plt.show()
# plot_evo.shape, x_axis.shape

<IPython.core.display.Javascript object>

# p-value distribution visualization

In [24]:
step_idx, boot_idx = 2, 3

# bvs = tt_boots_per_step[step_idx][boot_idx].T  # tt value distribution per step per boot
bvs = tp_boots_per_step[step_idx][boot_idx].T   # tt-pv value distribution per step per boot

pt_num, boot_cnt = bvs.shape
kspv = ksp_per_step[step_idx, boot_idx]

fig, axs = plt.subplots(2, sharey=True, figsize=(8, 10))
for a in axs: a.set(xlabel='Boot Number', ylabel='pvalue')

# pv distribution for TP (even, green) and FP (odd, red)
for j in range(0, pt_num, 2):
    axs[0].set_title('TP pvalue distribution')
    l = 'pt: {}, ks: {:9.3}, tt: {:5.3}'.format(j, kspv[j], tt[j])
    axs[0].plot(np.arange(boot_cnt), bvs[j], linewidth=0.75, zorder = j, label=l)

for j in range(1, pt_num, 2):
    axs[1].set_title('FP pvalue distribution')
    l = 'pt: {}, ks: {:9.3}, tt: {:5.3}'.format(j, kspv[j], tt[j])
    axs[1].plot(np.arange(boot_cnt), bvs[j], linewidth=0.75, zorder = j, label=l)

for a in axs: a.legend(loc=6, framealpha=0.3, fontsize=8)
fig.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [25]:
step_idx, boot_idx = 2, 3

# bvs = tt_boots_per_step[step_idx][boot_idx].T  # tt value distribution per step per boot
bvs = tp_boots_per_step[step_idx][boot_idx].T   # tt-pv value distribution per step per boot

pt_num, boot_cnt = bvs.shape
kspv = ksp_per_step[step_idx, boot_idx]

fig, axs = plt.subplots(2, sharey=True, figsize=(8, 8))
for a in axs: a.set(xlabel='Sorted idx', ylabel='pvalue')

# pv distribution for TP (even, green) and FP (odd, red)
for j in range(0, pt_num, 2):
    axs[0].set_title('Sorted TP pvalue')
    l = 'pt: {}, ks: {:9.3}, tt: {:5.3}'.format(j, kspv[j], tt[j])
    bvss = np.sort(bvs[j])
    axs[0].plot(np.arange(boot_cnt), bvss, linewidth=0.75, zorder = j, label=l)

for j in range(1, pt_num, 2):
    axs[1].set_title('Sorted FP pvalue')
    l = 'pt: {}, ks: {:9.3}, tt: {:5.3}'.format(j, kspv[j], tt[j])
    bvss = np.sort(bvs[j])
    axs[1].plot(np.arange(boot_cnt), bvss, linewidth=0.75, zorder = j, label=l)

for a in axs: a.legend(loc=6, framealpha=0.3, fontsize=8)
fig.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

# Below are cells to debug code snippets

## Debugging ks-test 

In [None]:
# Find out whether it follows the uniform distribution
ksp_per_step2 = np.zeros_like(tp_full_per_step[:,1])

for s, bps in enumerate(tp_full_per_step):          # step
    bpsr = bps.T                                    # -> (pt, boot)
    start = time.time()
    print('{}: Step {} ...'.format(bpsr.shape, s))
    for i, ptbt in enumerate(bpsr):
        d, kpv = f.kstest(ptbt, ld_cdf)
        ksp_per_step2[s, i] = kpv
    print("Done in {}".format(time.time() - start))


## Debugging distributions for kstest

In [None]:
step_idx = 2

# bpvs = tt_boots_per_step[step_idx].T  # t
bpvs = tp_boots_per_step[step_idx].T   # t-pv
bvs = tt_boots_per_step[step_idx][boot_idx].T  # tt value distribution per step per boot

kpvs = []
ld_stat = stats.uniform
# ld_stat = stats.norm
ld_cdf = ld_stat().cdf

for j in range(0, len(bpvs)):
    bp = bpvs[j]
    d, pv = f.kstest(bp, ld_cdf)
    l, s = leak_dist.fit(bp)      # loc and scale of the dist at the point
    leaking = pv < 0.05           # 3 sigma assurance the point is leaking (non uniform)
    kpvs.append([d, pv, l, s, leaking])
d, pv = f.kstest(bpvs[mpi], ld_cdf)
l, s = leak_dist.fit(bpvs[mpi])  # loc and scale of the dist at the max leak point
leaking = pv < 0.05
kpvs.append([d, pv, l, s, leaking])

# print("d, pv, l, s, leaking")
# print(np.array(kpvs))
    
# norm_cdf = norm(loc=0, scale=1).cdf
# dt, pv  = stats.kstest(bpvs, norm_cdf)

fig = plt.figure()
plt.xlabel('Boot Number')
for j in range(0, len(bpvs), 20):
    plt.plot(bpvs[j], color = 'grey', linewidth=0.5, zorder = j)

# mpi = 0
# plt.plot(bpvs[mpi], color = 'r', linewidth=2, zorder=255)
plt.show()