In [1]:
%%bash
lein uberjar

Compiling ppdsp.classifier.base
Compiling ppdsp.classifier.inspectablearf
Compiling ppdsp.classifier.moa-classifier
Compiling ppdsp.classifier.random
Compiling ppdsp.core
Compiling ppdsp.dataset.base
Compiling ppdsp.dataset.csv-dataset
Compiling ppdsp.dataset.moa
Compiling ppdsp.dataset.save-csv
Compiling ppdsp.masking.attack-data
Compiling ppdsp.masking.base
Compiling ppdsp.masking.evaluation
Compiling ppdsp.masking.optimize
Compiling ppdsp.masking.projection
Compiling ppdsp.masking.single-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-independent-attack
Compiling ppdsp.masking.utils
Compiling ppdsp.training
Compiling ppdsp.utils.complex-numbers
Compiling ppdsp.utils.matrices
Compiling ppdsp.utils.moving-average
Compiling ppdsp.utils.random
Compiling ppdsp.utils.stats
Compiling ppdsp.utils.timing
Compiling ppdsp.utils
Created /home/beakerx/ppdsp/target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT.jar
Created /home/beakerx/ppdsp/target

In [2]:
 %classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data display-table
                                   display-masking-error-plots run-masking-experiments
                                   attack-strategy-comparison-plots
                                   noise-accuracy-plot accuracy-privacy-tradeoff-comparison
                                   display-html accuracy-privacy-tradeoff-legend
                                   accuracy-privacy-tradeoff attack-strategy-comparison
                                   round-known-record-counts]]
         '[ppdsp.classifier.moa-classifier :refer [adaptive-random-forest]]
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma]]
         '[ppdsp.utils :refer [map-vals mean]])

null

## Experiment Configuration

In [3]:
(def dataset-label "pregnancies")
(def dataset (read-csv-dataset "datasets/pregnancies/pregnancies.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                ["parity" "agecon" "pregordr" "poverty" "educat"]))

#'beaker_clojure_shell_9df8b47f-db66-4326-9b32-158fb6681c55/dataset

In [4]:
(def epsilons [0.1 0.2])

(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))

(def independent-noise-sigmas [0.05 0.1 0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))

(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :known-record-counts (distinct [(dec feature-count) (int (Math/ceil (/ feature-count 2))) 1])
     :known-record-ranges [1]
     :classifier-fns {:arf adaptive-random-forest}
     :attack-count 500
     :attempt-count 3
     :threads-per-configuration 4
     :threads-per-evaluation 1
     :seed 1
     :evaluations [:privacy :accuracy]})

(def cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/cumulative.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))

(def independent-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/independent.edn")
            :independent-noise-sigmas independent-noise-sigmas
            :cumulative-noise-sigmas [0.0]
            :attack-strategies [:a-rp :a-rpin :a-rpin-1]}))

(def rp-only-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/rp-only.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas [0.0]
            :attack-strategies [:a-rp]}))

#'beaker_clojure_shell_9df8b47f-db66-4326-9b32-158fb6681c55/rp-only-configuration

## Run Experiments

In [5]:
(run-masking-experiments cumulative-noise-configuration)

Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.005868343106755962-tr0-krc4-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0023473372427023852-tr0-krc4-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0011736686213511926-tr0-krc4-krr1-krrp:middle


Aug 10, 2019 1:02:11 AM com.github.fommil.jni.JniLoader liberalLoad
INFO: successfully loaded /tmp/jniloader3396934109359994073netlib-native_system-linux-x86_64.so


Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0011736686213511926-tr0-krc3-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0023473372427023852-tr0-krc3-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.005868343106755962-tr0-krc3-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0011736686213511926-tr0-krc1-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0023473372427023852-tr0-krc1-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.005868343106755962-tr0-krc1-krr1-krrp:middle
"Elapsed time: 592869.2575 msecs"


null

In [6]:
(run-masking-experiments independent-noise-configuration)

Starting: privacy-evaluation-pf5-ps1.0-is0.25-cs0.0-tr0-krc4-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.05-cs0.0-tr0-krc4-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.1-cs0.0-tr0-krc4-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.05-cs0.0-tr0-krc3-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.1-cs0.0-tr0-krc3-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.25-cs0.0-tr0-krc3-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.05-cs0.0-tr0-krc1-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.1-cs0.0-tr0-krc1-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.25-cs0.0-tr0-krc1-krr1-krrp:middle
"Elapsed time: 716991.2344 msecs"


null

In [7]:
(run-masking-experiments rp-only-configuration)

Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0-tr0-krc4-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0-tr0-krc3-krr1-krrp:middle
Starting: privacy-evaluation-pf5-ps1.0-is0.0-cs0.0-tr0-krc1-krr1-krrp:middle
"Elapsed time: 29205.96 msecs"


null

## Load and Extend Experiment Results

In [1]:
(def output-cumulative (load-data (:output-file cumulative-noise-configuration)))
(def original-accuracy-cumulative (-> output-cumulative :original :accuracy))
(def results-cumulative (-> (filter #(contains? (set cumulative-noise-sigmas) (:cumulative-noise-sigma %))
                                    (:results output-cumulative))
                            (add-combined-result :score [:a-rp :a-rpcn])
                            (add-combined-result :score [:a-rp :a-rpcn-1])))
(def flat-results-cumulative (->> results-cumulative
                                  flatten-masking-experiment-recoveries
                                  (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

java.lang.RuntimeException:  Unable to resolve symbol

In [9]:
(def output-independent (load-data (:output-file independent-noise-configuration)))
(def original-accuracy-independent (-> output-independent :original :accuracy))
(def results-independent (-> (filter #(contains? (set independent-noise-sigmas) (:independent-noise-sigma %))
                                     (:results output-independent))
                             (add-combined-result :score [:a-rp :a-rpin])
                             (add-combined-result :score [:a-rp :a-rpin-1])))
(def flat-results-independent (->> results-independent
                                   flatten-masking-experiment-recoveries
                                   (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

#'beaker_clojure_shell_9df8b47f-db66-4326-9b32-158fb6681c55/flat-results-independent

In [10]:
(def output-rp-only (load-data (:output-file rp-only-configuration)))
(def original-accuracy-rp-only (-> output-rp-only :original :accuracy))
(def results-rp-only (-> (:results output-rp-only)))
(def flat-results-rp-only (->> results-rp-only
                               flatten-masking-experiment-recoveries
                               (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

#'beaker_clojure_shell_9df8b47f-db66-4326-9b32-158fb6681c55/flat-results-rp-only

## Comparison of Attack Strategies

In [11]:
(doseq [epsilon epsilons]
    (display-html (str "<h3>Epsilon = " epsilon "</h3>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative :all epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

null

In [12]:
(doseq [epsilon epsilons]
    (display-html (str "<h3>Epsilon = " epsilon "</h3>"))
    (.display (attack-strategy-comparison-plots flat-results-independent :all epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-independent :independent-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

null

In [13]:
(let [comparison (attack-strategy-comparison flat-results-cumulative epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/cumulative-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

null

In [14]:
(let [comparison (attack-strategy-comparison flat-results-independent epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/independent-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

null

In [15]:
(def best-cumulative-attack-strategy :a-rpcn-1)
(def best-independent-attack-strategy :a-rpin-1)

#'beaker_clojure_shell_9df8b47f-db66-4326-9b32-158fb6681c55/best-independent-attack-strategy

## Cumulative vs. Independent Noise

In [16]:
(display-html (accuracy-privacy-tradeoff-legend independent-noise-sigmas cumulative-noise-sigmas))
nil

null

In [17]:
(doseq [epsilon epsilons]
    (.display (accuracy-privacy-tradeoff-comparison results-cumulative results-independent results-rp-only
                                                    :arf best-cumulative-attack-strategy best-independent-attack-strategy epsilon
                                                    :plot-width 400
                                                    :plot-height 400)))

null

### Comparison of Square Distance From Origin

Performance is the sum of the squares of: (1) Prob. of e-privacy breach and (2) classification error.

In [18]:
(let [comparison (accuracy-privacy-tradeoff results-cumulative results-independent results-rp-only
                               :arf best-cumulative-attack-strategy best-independent-attack-strategy epsilons
                               :square-distance? true
                               :row-per-noise-level? true
                               :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/mask-comparison.edn") comparison)
    (display-table comparison))

null

## Effect of Cumulative Noise on Accuracy over Time

In [19]:
(noise-accuracy-plot results-cumulative results-independent results-rp-only :arf
                     :init-width 800
                     :init-height 400)

## Effect of Cumulative Noise on Privacy over Time

In [20]:
(display-masking-error-plots (->> flat-results-cumulative
                                  (filter #(= best-cumulative-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)