In [1]:
%%bash
lein uberjar

Retrieving org/clojure/java.classpath/0.3.0/java.classpath-0.3.0.jar from central
Retrieving org/clojure/tools.reader/1.3.2/tools.reader-1.3.2.jar from central
Retrieving org/clojure/tools.namespace/0.3.1/tools.namespace-0.3.1.jar from central
Retrieving clj-time/clj-time/0.14.0/clj-time-0.14.0.jar from clojars
Compiling ppdsp.classifier.base
Compiling ppdsp.classifier.inspectablearf
Compiling ppdsp.classifier.moa-classifier
Compiling ppdsp.classifier.random
Compiling ppdsp.core
Compiling ppdsp.dataset.base
Compiling ppdsp.dataset.csv-dataset
Compiling ppdsp.dataset.moa
Compiling ppdsp.dataset.save-csv
Compiling ppdsp.masking.attack-data
Compiling ppdsp.masking.base
Compiling ppdsp.masking.data_fitting
Compiling ppdsp.masking.evaluation
Compiling ppdsp.masking.optimize
Compiling ppdsp.masking.projection
Compiling ppdsp.masking.single-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-independent-attack
Compiling ppdsp.masking.u

In [2]:
 %classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.data.csv :as csv]
         '[clojure.java.io :as io]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data display-table 
                                  ;; display-masking-error-plots  
                                  ;; attack-strategy-comparison-plots 
                                   ;;noise-accuracy-plot accuracy-privacy-tradeoff-comparison
                                  ;; display-html accuracy-privacy-tradeoff-legend
                                   ;accuracy-privacy-tradeoff attack-strategy-comparison
                                   round-known-record-counts
                                   ;;added by Waruni
                                    read-data run-masking-experiments-cycles     accuracy-updating-cycles   
                                     save-data-append-newline  ;;accuracy-for-flat-areas 
                                   attack-per-cycle validate-datafitting-results
                                   ]]
         '[ppdsp.classifier.moa-classifier :refer [hoeffding-tree ]]    ;;changed
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count dataset-class-count get-schema]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma  calculate-avarage-information-loss 
                                            get-attack-count
                                               ]]
         '[ppdsp.masking.data_fitting :refer [kernel-regression-using-fastmath k-AEL-final-from-selected-kernel]] ;retrieve-accuracy-using-fitted-function
         '[ppdsp.utils :refer [map-vals mean]])
;;(import '[java.lang.instrument Instrumentation])

null

## Experiment Configuration

In [3]:
(def dataset-label "breast-cancer")
(def dataset (read-csv-dataset "datasets/breast-cancer/breast-cancer.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                ["clump_thickness" "uniformity_cell_size" "uniformity_cell_shape" "marginal_adhesion"
                                 "single_epithelial_cell_size" "bare_nuclei" "bland_chromatin" "normal_nucleoli" "mitoses"]))

#'beaker_clojure_shell_b3f5b94b-b512-4d9a-9ee4-9326e64edfa4/dataset

In [4]:
(def epsilons 0.2)
(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))
(def cycle-sizes [1000]) ;;Half of the cycle size [500 1000 2000 4000]
(def attack-count (int (/(* record-count 0.05) 4)))  ;;attack count = (record-count * 5%)/4 as for each attack 4 known I/O pairsairs
(def independent-noise-sigmas [0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))
(def class-count (dataset-class-count dataset))


(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :known-record-counts [4] ;;per attack->Total known record count = 4 * attack count = 5% of the record count
     :known-record-ranges [1]
     :classifier-fns {:hdt hoeffding-tree} 
     :attempt-count 3
     :attack-count  attack-count
     :threads-per-configuration 2
     :threads-per-evaluation 1
     :seed 1
     ;:evaluations [:privacy-auc :accuracy :privacy-attacks]
     :evaluations [:privacy :accuracy ]
    }
)

(def logistic-cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/logistic-cumulative-output.edn")
            :output-file-privacy-auc  (str "workspace/" dataset-label "/logistic-cumulative-privacy-auc.csv")
            :attck-results-file  (str "workspace/" dataset-label "/attack-results.edn")
            :output-file-privacy-attacks  (str "workspace/" dataset-label "/logistic-cumulative-privacy-attacks.csv")
            :original-model-description (str "workspace/" dataset-label "/original-model-description.edn")
            :original-probability-file (str "workspace/" dataset-label "/original-AEL.csv")
            :masked-model-file (str "workspace/" dataset-label "/masked-model-description.edn")
            :masked-probability-file (str "workspace/" dataset-label "/masked-AEL.csv")
            :final-bp-file (str "workspace/" dataset-label "/final-breach-probabilities.csv")
            :temp-bp-file (str "workspace/" dataset-label "/temp-bp.edn")
            :maximum-fn-value 1  ;[1]
            ;:growth-rate-k [0.0015 0.003 0.006 0.0125 0.025 0.05 0.1] ;0.0015 0.003 0.006 0.0125 0.025 0.05 0.1
            :growth-rate-k-initial 0.005 ; will be run for different k values upto 0.1 with tilda= 0.002 
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :epsilon epsilons
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]
            ;;:cycle-size-upper-bound (load-data (str "workspace/" dataset-label "/previous-upper-bound.csv"))
            ;;:cycle-size-upper-bound previous-ub
            ;;:previous-AUC (load-data (str "workspace/" dataset-label "/previous-AUC.csv")) 
            ;;:pre-AUC previous-AUC
            ;:bandwidth initial-f-value
            :previous-AUC-file-to-update (str "workspace/" dataset-label "/previous-AUC.csv")
            :previous-upper-bound-file-to-update(str "workspace/" dataset-label "/previous-upper-bound.csv")
            ;;:flat-record-length 200  ;;indicates the length of the flat period of the cycle according to the logistic function. depend on the cycle size
            }
    )
)


#'beaker_clojure_shell_b3f5b94b-b512-4d9a-9ee4-9326e64edfa4/logistic-cumulative-noise-configuration

## Run Experiments and Write Results (AEL & BP) to Files

In [4]:
(run-masking-experiments-cycles logistic-cumulative-noise-configuration)

Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr0.005


Mar 31, 2021 2:24:52 AM com.github.fommil.jni.JniLoader liberalLoad
INFO: successfully loaded /tmp/jniloader1736868227692383832netlib-native_system-linux-x86_64.so


Growth Rate =  0.005
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 127393.0963 msecs"
Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr0.009
Growth Rate =  0.009
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 103904.5428 msecs"
Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr0.013
Growth Rate =  0.013
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 101317.2123 msecs"
Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr0.017
Growth Rate =  0.017
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 101145.0097 msecs"
Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:midd

null

## Data Fitting Using Privacy and Accuracy Results

In [5]:
(def accuracy-file (str "workspace/" dataset-label "/masked-AEL.csv"))
(def user-threshold-testing [0.6 0.7 0.75 0.8 0.85 0.9 0.95 0.97])
;[0.5 0.55 0.6 0.65 0.7 0.75 0.8 0.85 0.9 0.95] 0.85 0.9 0.92 0.95 0.97
(def datafitting-results-file (str "workspace/" dataset-label "/results-data-fitting.csv"))


(kernel-regression-using-fastmath (:final-bp-file logistic-cumulative-noise-configuration) accuracy-file user-threshold-testing datafitting-results-file (:output-file-privacy-auc logistic-cumulative-noise-configuration))



[null, null, null, null, null, null, null, null]

In [14]:
(def accuracy-file (str "workspace/" dataset-label "/masked-AEL.csv"))
(def final-AEL-file (str "workspace/" dataset-label "/final-ael.csv"))
(def privacy-threshold-by-user 0.995)  ;;0.75, 0.8, 0.85, 0.9, 0.95, 0.97 0.98 0.985 0.99 0.995 should run for all these values
(k-AEL-final-from-selected-kernel (:final-bp-file logistic-cumulative-noise-configuration) accuracy-file (:output-file-privacy-auc logistic-cumulative-noise-configuration) privacy-threshold-by-user final-AEL-file)

null

## Validate Experiments

In [15]:
;;; ;;For validating data fitting results run the experiments again with k-values received after data fitting
(def logistic-cumulative-noise-validation
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/validation/logistic-cumulative-output.edn")
            :output-file-privacy-auc  (str "workspace/" dataset-label "/validation/logistic-cumulative-privacy-auc.csv")
            :attck-results-file  (str "workspace/" dataset-label "/validation/attack-results.edn")
            :output-file-privacy-attacks  (str "workspace/" dataset-label "/validation/logistic-cumulative-privacy-attacks.csv")
            :original-model-description (str "workspace/" dataset-label "/validation/original-model-description.edn")
            :original-probability-file (str "workspace/" dataset-label "/validation/original-AEL.csv")
            :masked-model-file (str "workspace/" dataset-label "/validation/masked-model-description.edn")
            :masked-probability-file (str "workspace/" dataset-label "/validation/masked-AEL.csv")
            :final-bp-file (str "workspace/" dataset-label "/validation/final-breach-probabilities.csv")
            :temp-bp-file (str "workspace/" dataset-label "/validation/temp-bp.edn")
            :maximum-fn-value 1  ;[1]
            :growth-rate-k-initial [1.1502 1.0493 1.1506 0.9439 0.8429 0.7321 0.8618 0.4133 0.1204 0.0040] ;
                 ;k from bp- Wave 1.1502 1.0493 1.1506 0.9439 0.8429 0.7321 0.8618 0.4133 0.1204 0.0040
                ;k from bp- Rational 0.3248 0.2934 0.2330 0.1501 0.1868 0.3110 0.1724 0.1144 0.0697 0.0401
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :epsilon epsilons
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]
            :previous-AUC-file-to-update (str "workspace/" dataset-label "/validation/previous-AUC.csv")
            :previous-upper-bound-file-to-update(str "workspace/" dataset-label "/validation/previous-upper-bound.csv")
             }
    )
)
(validate-datafitting-results logistic-cumulative-noise-validation)

Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr1.1502
Growth Rate =  1.1502
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 110028.8865 msecs"
Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr1.0493
Growth Rate =  1.0493
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 108983.2242 msecs"
Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr1.1506
Growth Rate =  1.1506
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 107974.8834 msecs"
Starting: privacy-evaluation -pf9 -ps1.0 -cs0.003749719937933765 -tr0 -cz2000 -krc4 -krr1 -ac125 -krrp:middle -gr0.9439
Growth Rate =  0.9439
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time:

[null, null, null, null, null, null, null, null, null, null]

In [9]:
(def output-independent (load-data (:output-file independent-noise-configuration)))
(def original-accuracy-independent (-> output-independent :original :accuracy))
(def results-independent (-> (filter #(contains? (set independent-noise-sigmas) (:independent-noise-sigma %))
                                     (:results output-independent))
                             (add-combined-result :score [:a-rp :a-rpin])
                             (add-combined-result :score [:a-rp :a-rpin-1])))
(def flat-results-independent (->> results-independent
                                   flatten-masking-experiment-recoveries
                                   (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

#'beaker_clojure_shell_3a305d4d-1ed5-4d27-83fd-2bc3d85a921a/flat-results-independent

In [10]:
(def output-rp-only (load-data (:output-file rp-only-configuration)))
(def original-accuracy-rp-only (-> output-rp-only :original :accuracy))
(def results-rp-only (-> (:results output-rp-only)))
(def flat-results-rp-only (->> results-rp-only
                               flatten-masking-experiment-recoveries
                               (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

#'beaker_clojure_shell_3a305d4d-1ed5-4d27-83fd-2bc3d85a921a/flat-results-rp-only

## Comparison of Attack Strategies

In [11]:
(doseq [epsilon epsilons]
    (display-html (str "<h3>Epsilon = " epsilon "</h3>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative :all epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

null

In [12]:
(doseq [epsilon epsilons]
    (display-html (str "<h3>Epsilon = " epsilon "</h3>"))
    (.display (attack-strategy-comparison-plots flat-results-independent :all epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-independent :independent-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

null

In [13]:
(let [comparison (attack-strategy-comparison flat-results-cumulative epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/cumulative-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

null

In [14]:
(let [comparison (attack-strategy-comparison flat-results-independent epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/independent-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

null

In [15]:
(def best-cumulative-attack-strategy :a-rpcn-1)
(def best-independent-attack-strategy :a-rpin-1)

#'beaker_clojure_shell_3a305d4d-1ed5-4d27-83fd-2bc3d85a921a/best-independent-attack-strategy

## Cumulative vs. Independent Noise

In [16]:
(display-html (accuracy-privacy-tradeoff-legend independent-noise-sigmas cumulative-noise-sigmas))
nil

null

In [17]:
(doseq [epsilon epsilons]
    (.display (accuracy-privacy-tradeoff-comparison results-cumulative results-independent results-rp-only
                                                    :arf best-cumulative-attack-strategy best-independent-attack-strategy epsilon
                                                    :plot-width 400
                                                    :plot-height 400)))

null

### Comparison of Square Distance From Origin

Performance is the sum of the squares of: (1) Prob. of e-privacy breach and (2) classification error.

In [18]:
(let [comparison (accuracy-privacy-tradeoff results-cumulative results-independent results-rp-only
                               :arf best-cumulative-attack-strategy best-independent-attack-strategy epsilons
                               :square-distance? true
                               :row-per-noise-level? true
                               :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/mask-comparison.edn") comparison)
    (display-table comparison))

null

## Effect of Cumulative Noise on Accuracy over Time

In [19]:
(noise-accuracy-plot results-cumulative results-independent results-rp-only :arf
                     :init-width 800
                     :init-height 400)

## Effect of Cumulative Noise on Privacy over Time

In [20]:
(display-masking-error-plots (->> flat-results-cumulative
                                  (filter #(= best-cumulative-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)