In [1]:
%%bash
lein uberjar

Compiling ppdsp.classifier.base
Compiling ppdsp.classifier.inspectablearf
Compiling ppdsp.classifier.moa-classifier
Compiling ppdsp.classifier.random
Compiling ppdsp.core
Compiling ppdsp.dataset.base
Compiling ppdsp.dataset.csv-dataset
Compiling ppdsp.dataset.moa
Compiling ppdsp.dataset.save-csv
Compiling ppdsp.masking.attack-data
Compiling ppdsp.masking.base
Compiling ppdsp.masking.evaluation
Compiling ppdsp.masking.optimize
Compiling ppdsp.masking.projection
Compiling ppdsp.masking.single-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-independent-attack
Compiling ppdsp.masking.utils
Compiling ppdsp.training
Compiling ppdsp.utils.complex-numbers
Compiling ppdsp.utils.matrices
Compiling ppdsp.utils.moving-average
Compiling ppdsp.utils.random
Compiling ppdsp.utils.stats
Compiling ppdsp.utils.timing
Compiling ppdsp.utils
Created /home/beakerx/ppdsp/target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT.jar
Created /home/beakerx/ppdsp/target

In [2]:
 %classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data display-table
                                   display-masking-error-plots run-masking-experiments-cycles run-masking-experiments
                                   attack-strategy-comparison-plots 
                                   noise-accuracy-plot accuracy-privacy-tradeoff-comparison
                                   display-html accuracy-privacy-tradeoff-legend
                                   accuracy-privacy-tradeoff attack-strategy-comparison
                                   round-known-record-counts]]
         '[ppdsp.classifier.moa-classifier :refer [adaptive-random-forest]]
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma
                                            get-attack-count]]
         '[ppdsp.utils :refer [map-vals mean]])

null

## Experiment Configuration

In [3]:
(def dataset-label "arem")
(def dataset (read-csv-dataset "datasets/arem/arem_all_interleaved.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                ["avg_rss12" "var_rss12" "avg_rss13" "var_rss13" "avg_rss23" "var_rss23"]))

#'beaker_clojure_shell_a56dad8c-6698-42f2-9304-28843438d7cf/dataset

In [4]:
(def epsilons [0.2])

(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))

;;(def cycle-sizes [300 500 700])
(def cycle-sizes [500])
;;(def cycle-sizes 300)
;;(def attack-count (map #(get-attack-count %)cycle-sizes))
;;(def cumulative-attack-counts [120 200 280])

(def independent-noise-sigmas [0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))


(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     ;;:cumulative-noise-sigmas cumulative-noise-sigmas
     :known-record-counts [4]
     :known-record-ranges [1]
     :classifier-fns {:arf adaptive-random-forest}
     :attempt-count 3
     :attack-count 200
     :threads-per-configuration 4
     :threads-per-evaluation 1
     :seed 1
     :evaluations [:privacy :accuracy]})


(def cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/cumulative.edn")  
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes [0]
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))

(def cumulative-noise-cycles-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/cumulative-cycles.edn")  
            :maximum-fn-value 0
            :growth-rate-k 0.0
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))

(def logistic-cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/logistic-cumulative.edn")
            :maximum-fn-value 1
            :growth-rate-k 0.01
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))



#'beaker_clojure_shell_a56dad8c-6698-42f2-9304-28843438d7cf/logistic-cumulative-noise-configuration

## Run Experiments

In [5]:
(run-masking-experiments-cycles cumulative-noise-cycles-configuration)

Starting: privacy-evaluation-pf6-ps1.0-is0.0-cs0.001976409902290168-tr0 -cz1000-krc4-krr1 -ac200-krrp:middle


Oct 13, 2019 10:46:59 PM com.github.fommil.jni.JniLoader liberalLoad
INFO: successfully loaded /tmp/jniloader8480062215240944964netlib-native_system-linux-x86_64.so


"Elapsed time: 126645.1792 msecs"


null

In [6]:
(run-masking-experiments-cycles logistic-cumulative-noise-configuration)

Starting: privacy-evaluation-pf6-ps1.0-is0.0-cs0.001976409902290168-tr0 -cz1000-krc4-krr1 -ac200-krrp:middle
"Elapsed time: 123569.8125 msecs"


null

In [None]:
(run-masking-experiments cumulative-noise-configuration)

Starting: privacy-evaluation-pf6-ps1.0-is0.0-cs0.001976409902290168-tr0 -cz0-krc4-krr1 -ac200-krrp:middle


## Load and Extend Experiment Results

In [None]:
(def output-cumulative (load-data (:output-file cumulative-noise-configuration)))
(def original-accuracy-cumulative (-> output-cumulative :original :accuracy))
(def results-cumulative (-> (filter #(contains? (set cumulative-noise-sigmas) (:cumulative-noise-sigma %))
                                    (:results output-cumulative))
                            (add-combined-result :score [:a-rp :a-rpcn])
                            (add-combined-result :score [:a-rp :a-rpcn-1])))
(def flat-results-cumulative (->> results-cumulative
                                  flatten-masking-experiment-recoveries
                                  (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))


In [None]:
(def output-cumulative-cycles (load-data (:output-file cumulative-noise-cycles-configuration)))
(def original-accuracy-cumulative-cycles (-> output-cumulative-cycles :original :accuracy))
(def results-cumulative-cycles (-> (filter #(contains? (set cumulative-noise-sigmas) (:cumulative-noise-sigma %))
                                    (:results output-cumulative-cycles))
                            (add-combined-result :score [:a-rp :a-rpcn])
                            (add-combined-result :score [:a-rp :a-rpcn-1])))
(def flat-results-cumulative-cycles (->> results-cumulative-cycles
                                  flatten-masking-experiment-recoveries
                                  (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))
;;(println results-cumulative "results-cumulative")

In [None]:
(def output-cumulative-logistic (load-data (:output-file logistic-cumulative-noise-configuration)))
(def original-accuracy-cumulative-logistic (-> output-cumulative-logistic :original :accuracy))
(def results-cumulative-logistic (-> (filter #(contains? (set cumulative-noise-sigmas) (:cumulative-noise-sigma %))
                                    (:results output-cumulative-logistic))
                            (add-combined-result :score [:a-rp :a-rpcn])
                            (add-combined-result :score [:a-rp :a-rpcn-1])))
(def flat-results-cumulative-logistic (->> results-cumulative-logistic
                                  flatten-masking-experiment-recoveries
                                  (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))
;;(println results-cumulative-logistic "results-cumulative-logistic")

## Comparison of Attack Strategies

In [None]:
(doseq [epsilon epsilons cycles cycle-sizes]
    (display-html (str "<h3>Cumulative Noise" "</h3>" "<h3>Epsilon = " epsilon)) 
    (.display (attack-strategy-comparison-plots flat-results-cumulative :all epsilon
                                              :plot-width 400
                                             :plot-height 400
                                              :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

In [None]:
(doseq [epsilon epsilons cycles cycle-sizes]
    (display-html (str "<h3>Cumulative Noise With Cycles" "</h3>" "<h3>Epsilon = " epsilon "</h3>" "<h3>Cycle Size = " (* cycles 2 )"</h3>" )) 
    (.display (attack-strategy-comparison-plots flat-results-cumulative-cycles :all epsilon
                                               :plot-width 400
                                               :plot-height 400
                                              :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative-cycles :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

In [None]:
(doseq [epsilon epsilons cycles cycle-sizes]
    (display-html (str "<h3>Logistic Cumulative Noise With Cycles" "</h3>" "<h3>Epsilon = " epsilon "</h3>" "<h3>Cycle Size = " (* cycles 2 )"</h3>" ))
    (.display (attack-strategy-comparison-plots flat-results-cumulative-logistic :all epsilon
                                              :plot-width 400
                                              :plot-height 400
                                              :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative-logistic :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

In [None]:
(let [comparison (attack-strategy-comparison flat-results-cumulative epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/cumulative-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

In [None]:
(let [comparison (attack-strategy-comparison flat-results-cumulative-cycles epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/cumulative-cycles-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

In [None]:
(let [comparison (attack-strategy-comparison flat-results-cumulative-logistic epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/logistic-attack-strategies-comparison.edn") comparison)
    (display-table comparison))


In [None]:
(def best-cumulative-attack-strategy :a-rpcn-1)
(def best-cumulative-cycles-attack-strategy :a-rpcn-1)
(def best-logistic-attack-strategy :a-rpcn-1)

##Linear Cumulative vs.Logistic Cumulative Noise

In [None]:
(display-html (accuracy-privacy-tradeoff-legend cumulative-noise-sigmas))
nil

In [None]:
(doseq [epsilon epsilons]
    (.display (accuracy-privacy-tradeoff-comparison results-cumulative results-cumulative-cycles results-cumulative-logistic 
                                                    :arf best-cumulative-attack-strategy best-cumulative-cycles-attack-strategy best-logistic-attack-strategy epsilon
                                                    :plot-width 400
                                                    :plot-height 400)))

### Comparison of Square Distance From Origin

Performance is the sum of the squares of: (1) Prob. of e-privacy breach and (2) classification error.

In [None]:
(let [comparison (accuracy-privacy-tradeoff results-cumulative results-cumulative-cycles results-cumulative-logistic 
                               :arf best-cumulative-attack-strategy best-cumulative-cycles-attack-strategy best-logistic-attack-strategy epsilons
                               :square-distance? true
                               :row-per-noise-level? true
                               :known-record-count (apply max (:known-record-counts base-configuration))
                                           )]
    (save-data (str "workspace/" dataset-label "/mask-comparison.edn") comparison)
    (display-table comparison))

## Effect of Cumulative Noise on Accuracy over Time

In [None]:
(noise-accuracy-plot results-cumulative results-cumulative-cycles results-cumulative-logistic :arf
                     :init-width 800
                     :init-height 400)

## Effect of Cumulative Noise on Privacy over Time

In [None]:
(display-masking-error-plots (->> flat-results-cumulative
                                  (filter #(= best-cumulative-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)

In [None]:
(display-masking-error-plots (->> flat-results-cumulative-cycles
                                  (filter #(= best-cumulative-cycles-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)

In [None]:
(display-masking-error-plots (->> flat-results-cumulative-logistic
                                  (filter #(= best-logistic-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)