In [25]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

In [26]:
full_path = '/home/jnear/co/collab/csl/experiments/immediate_sensitivity/results/'
filenames = {
    10000: '_results_8_10000.0.txt',
    1000: '_results_8_1000.0.txt',
    100: '_results_8_100.0.txt',
    10: '_results_8_10.0.txt',
    1: '_results_8_1.0.txt',
    0.1: '_results_8_0.1.txt',
    0.01: '_results_8_0.01.txt'
}

all_backdoor_results = {}

for epsilon in filenames.keys():
    nobkd_filename = full_path + 'nobkd' + filenames[epsilon]
    bkd_filename = full_path + 'bkd' + filenames[epsilon]
    with open(nobkd_filename) as f:
        nobkd_results = [int(x) for x in f]
    with open(bkd_filename) as f:
        bkd_results = [int(x) for x in f]

    all_backdoor_results[epsilon] = (nobkd_results, bkd_results)

len(nobkd_results)

1000

In [27]:
filenames = {
    10000: '_results_8_10000.0.txt',
    1000: '_results_8_1000.0.txt',
    100: '_results_8_100.0.txt',
    10: '_results_8_10.0.txt',
    1: '_results_8_1.0.txt',
}

all_backdoor_results_baseline = {}

for epsilon in filenames.keys():
    nobkd_filename = full_path + 'baseline_nobkd' + filenames[epsilon]
    bkd_filename = full_path + 'baseline_bkd' + filenames[epsilon]
    with open(nobkd_filename) as f:
        nobkd_results = [int(x) for x in f]
    with open(bkd_filename) as f:
        bkd_results = [int(x) for x in f]

    all_backdoor_results_baseline[epsilon] = (nobkd_results, bkd_results)

len(nobkd_results)

400

In [28]:
from scipy import stats
def clopper_pearson(count, trials, conf):
    count, trials, conf = np.array(count), np.array(trials), np.array(conf)
    q = count / trials
    ci_low = stats.beta.ppf(conf / 2., count, trials - count + 1)
    ci_upp = stats.beta.isf(conf / 2., count + 1, trials - count)

    if np.ndim(ci_low) > 0:
        ci_low[q == 0] = 0
        ci_upp[q == 1] = 1
    else:
        ci_low = ci_low if (q != 0) else 0
        ci_upp = ci_upp if (q != 1) else 1
    return ci_low, ci_upp


In [29]:
def bkd_find_thresh(nobkd_li, bkd_li, use_dkw=False):
    # find the biggest ratio
    best_threshs = {}
    nobkd_arr = nobkd_li
    bkd_arr = bkd_li
    all_arr = np.concatenate((nobkd_arr, bkd_arr)).ravel()
    all_threshs = np.unique(all_arr)
    best_plain_thresh = -np.inf, all_threshs[0]
    best_corr_thresh = -np.inf, all_threshs[0]
    for thresh in all_threshs:
        nobkd_ct = (nobkd_arr >= thresh).sum()
        bkd_ct = (bkd_arr >= thresh).sum()
        bkd_p = bkd_ct/bkd_arr.shape[0]
        nobkd_p = nobkd_ct/nobkd_arr.shape[0]
        
        if use_dkw:
            nobkd_ub = nobkd_p + np.sqrt(np.log(2/.05)/nobkd_arr.shape[0])
            bkd_lb = bkd_p - np.sqrt(np.log(2/.05)/bkd_arr.shape[0])
        else:
            _, nobkd_ub = clopper_pearson(nobkd_ct, nobkd_arr.shape[0], .05)
            bkd_lb, _ = clopper_pearson(bkd_ct, bkd_arr.shape[0], .05)

        if bkd_ct in [bkd_arr.shape[0], 0] or nobkd_ct in [nobkd_arr.shape[0], 0]:
            plain_ratio = 1
        elif bkd_p + nobkd_p > 1:  # this makes ratio bigger
            plain_ratio = (1-nobkd_p)/(1-bkd_p)
        else:
            plain_ratio = bkd_p/nobkd_p

        if nobkd_ub + bkd_lb > 1:
            corr_ratio = (1-nobkd_ub)/(1-bkd_lb)
        else:
            corr_ratio = bkd_lb/nobkd_ub

        plain_eps = np.log(plain_ratio)
        corr_eps = np.log(corr_ratio)

        if best_plain_thresh[0] < plain_eps:
            best_plain_thresh = plain_eps, thresh
        if best_corr_thresh[0] < corr_eps:
            best_corr_thresh = corr_eps, thresh
    return best_corr_thresh[1]

bkd_find_thresh(np.array(nobkd_results), np.array(bkd_results), use_dkw=True)



-96

In [30]:
def bkd_get_eps(n_repeat, nobkd_li, bkd_li, thresh, use_dkw=False):
    eps = {}

    nobkd_arr = nobkd_li
    bkd_arr = bkd_li
    bkd_ct, nobkd_ct = (bkd_arr >= thresh).sum(), (nobkd_arr >= thresh).sum()
    bkd_p = bkd_ct/bkd_arr.shape[0]
    nobkd_p = nobkd_ct/nobkd_arr.shape[0]
       
    if use_dkw:
        nobkd_ub = nobkd_p + np.sqrt(np.log(2/.05)/nobkd_arr.shape[0])
        bkd_lb = bkd_p - np.sqrt(np.log(2/.05)/bkd_arr.shape[0])
    else:
        nobkd_lb, nobkd_ub = clopper_pearson(nobkd_ct, nobkd_arr.shape[0], .01)
        bkd_lb, bkd_ub = clopper_pearson(bkd_ct, bkd_arr.shape[0], .01)

    if bkd_ct in [bkd_arr.shape[0], 0] or nobkd_ct in [nobkd_arr.shape[0], 0]:
        plain_ratio = 1
    elif bkd_p + nobkd_p > 1:  # this makes ratio bigger
        plain_ratio = (1-nobkd_p)/(1-bkd_p)
    else:
        plain_ratio = bkd_p/nobkd_p

    if nobkd_ub + bkd_lb > 1:
        corr_ratio = (1-nobkd_ub)/(1-bkd_lb)
    else:
        corr_ratio = bkd_lb/nobkd_ub

    plain_eps = np.log(plain_ratio)/n_repeat
    corr_eps = np.log(corr_ratio)/n_repeat

    return (corr_eps, plain_eps)

[(t, bkd_get_eps(8, np.array(nobkd_results), np.array(bkd_results), t)) for t in np.linspace(2950, 3000, 10)]

[(2950.0, (-0.09995534963101727, 0.0)),
 (2955.5555555555557, (-0.09995534963101727, 0.0)),
 (2961.1111111111113, (-0.09995534963101727, 0.0)),
 (2966.6666666666665, (-0.09995534963101727, 0.0)),
 (2972.222222222222, (-0.09995534963101727, 0.0)),
 (2977.777777777778, (-0.09995534963101727, 0.0)),
 (2983.3333333333335, (-0.09995534963101727, 0.0)),
 (2988.8888888888887, (-0.09995534963101727, 0.0)),
 (2994.4444444444443, (-0.09995534963101727, 0.0)),
 (3000.0, (-0.09995534963101727, 0.0))]

In [31]:
def get_eps_thresh(nobkd_arr, bkd_arr, thresh):
    #thresh = 2800
    poisoning_size = 8
    
    bkd_ct = (bkd_arr >= thresh).sum()
    nobkd_ct = (nobkd_arr >= thresh).sum()
    bkd_p = bkd_ct/bkd_arr.shape[0]
    nobkd_p = nobkd_ct/nobkd_arr.shape[0]

    nobkd_lb, nobkd_ub = clopper_pearson(nobkd_ct, nobkd_arr.shape[0], .01)
    bkd_lb, bkd_ub = clopper_pearson(bkd_ct, bkd_arr.shape[0], .01)
#     nobkd_ub = nobkd_p + np.sqrt(np.log(2/.05)/nobkd_arr.shape[0])
#     bkd_lb = bkd_p - np.sqrt(np.log(2/.05)/bkd_arr.shape[0])


    if nobkd_ub + bkd_lb > 1:
        corr_ratio = (1-nobkd_ub)/(1-bkd_lb)
    else:
        corr_ratio = bkd_lb/nobkd_ub

    corr_eps = np.log(corr_ratio)/poisoning_size

#     print('threshold', thresh)
#     #print('  upper and lower bounds, no backdoor:', nobkd_lb, nobkd_ub)
#     #print('  upper and lower bounds, with backdoor:', bkd_lb, bkd_ub)
#     print('  ratio:', corr_ratio)
#     print('  epsilon:', corr_eps)

    #print(nobkd_lb, nobkd_ub, bkd_lb, bkd_ub, corr_ratio, corr_eps)
    return corr_eps

def get_eps(nobkd_arr, bkd_arr):
    all_arr = np.concatenate((nobkd_arr, bkd_arr)).ravel()
    all_threshs = np.unique(all_arr)
    all_threshs = all_threshs[all_threshs > 0]
    all_epsilons = [(thresh, bkd_get_eps(8, nobkd_arr, bkd_arr, thresh)[1]) for thresh in all_threshs]
    #print(all_epsilons)
    thresh, corr_eps = max(all_epsilons, key = lambda x: x[1])
    return thresh, corr_eps
#     for thresh in all_threshs:
#         corr_eps = get_eps_thresh(nobkd_arr, bkd_arr, thresh)
#         print(thresh, corr_eps)

# for thresh in np.linspace(0, 4000):
#     print(thresh, get_eps(np.array(nobkd_results), np.array(bkd_results), thresh))
get_eps(np.array(nobkd_results), np.array(bkd_results))
#get_eps_thresh(np.array(nobkd_results), np.array(bkd_results), 2953)

(3032, 0.0)

In [32]:
np.log(np.mean(nobkd_results)/np.mean(bkd_results))

0.6145145009058661

# Our Results

In [45]:
ours_epsilons = []
for epsilon in sorted(all_backdoor_results.keys()):
    nobkd_results, bkd_results = all_backdoor_results[epsilon]
    nobkd_results = np.array(nobkd_results)
    bkd_results = np.array(bkd_results)
    nobkd_results = nobkd_results[nobkd_results > 1000]
    bkd_results = bkd_results[bkd_results > 1000]
#     plt.boxplot([nobkd_results, bkd_results])
#     plt.show();
    thresh, calculated_eps = get_eps(nobkd_results, bkd_results)
    print('for epsilon', epsilon, 'calculated epsilon was', calculated_eps)
    ours_epsilons.append(calculated_eps)
#     print(' mean and median count, NOT backdoored:', np.mean(nobkd_results), np.median(nobkd_results))
#     print(' mean and median count, backdoored:', np.mean(bkd_results), np.median(bkd_results))
#     print(' chosen threshold:', thresh)

print('ours_measured_eps = ', ours_epsilons)



for epsilon 0.01 calculated epsilon was 0.017055395993964495
for epsilon 0.1 calculated epsilon was 0.03811740869244331
for epsilon 1 calculated epsilon was 0.04342367998149139
for epsilon 10 calculated epsilon was 0.17380541639867042
for epsilon 100 calculated epsilon was 0.12355785973334968
for epsilon 1000 calculated epsilon was 0.40533546519751856
for epsilon 10000 calculated epsilon was 0.41445914987243243
ours_measured_eps =  [0.017055395993964495, 0.03811740869244331, 0.04342367998149139, 0.17380541639867042, 0.12355785973334968, 0.40533546519751856, 0.41445914987243243]


# Baseline Results

In [46]:
baseline_epsilons = []

for epsilon in sorted(all_backdoor_results_baseline.keys()):
    nobkd_results, bkd_results = all_backdoor_results_baseline[epsilon]
    nobkd_results = np.array(nobkd_results)
    print(len(nobkd_results))
    bkd_results = np.array(bkd_results)
    nobkd_results = nobkd_results[nobkd_results > 1000]
    bkd_results = bkd_results[bkd_results > 1000]
#     plt.boxplot([nobkd_results, bkd_results])
#     plt.show();
    thresh, calculated_eps = get_eps(nobkd_results, bkd_results)
    print('for epsilon', epsilon, 'calculated epsilon was', calculated_eps)
    baseline_epsilons.append(calculated_eps)
#     print(' mean and median count, NOT backdoored:', np.mean(nobkd_results), np.median(nobkd_results))
#     print(' mean and median count, backdoored:', np.mean(bkd_results), np.median(bkd_results))
#     print(' chosen threshold:', thresh)
print('baseline_measured_eps = ', baseline_epsilons)

400
for epsilon 1 calculated epsilon was 0.0
800
for epsilon 10 calculated epsilon was 0.15659537106192106
800
for epsilon 100 calculated epsilon was 0.08122086061621502
400
for epsilon 1000 calculated epsilon was 0.16466268620411742
400
for epsilon 10000 calculated epsilon was 0.25993019270997947
baseline_measured_eps =  [0.0, 0.15659537106192106, 0.08122086061621502, 0.16466268620411742, 0.25993019270997947]




# Non-private Training