In [1]:
%%bash
lein uberjar

Retrieving org/clojure/clojure/1.8.0/clojure-1.8.0.jar from central
Compiling ppdsp.classifier.base
Compiling ppdsp.classifier.inspectablearf
Compiling ppdsp.classifier.moa-classifier
Compiling ppdsp.classifier.random
Compiling ppdsp.core
Compiling ppdsp.dataset.base
Compiling ppdsp.dataset.csv-dataset
Compiling ppdsp.dataset.moa
Compiling ppdsp.dataset.save-csv
Compiling ppdsp.masking.attack-data
Compiling ppdsp.masking.base
Compiling ppdsp.masking.evaluation
Compiling ppdsp.masking.optimize
Compiling ppdsp.masking.projection
Compiling ppdsp.masking.single-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-independent-attack
Compiling ppdsp.masking.utils
Compiling ppdsp.training
Compiling ppdsp.utils.complex-numbers
Compiling ppdsp.utils.matrices
Compiling ppdsp.utils.moving-average
Compiling ppdsp.utils.random
Compiling ppdsp.utils.stats
Compiling ppdsp.utils.timing
Compiling ppdsp.utils
Created /home/beakerx/ppdsp/target/jvm

In [2]:
%classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data display-table 
                                   display-masking-error-plots  run-masking-experiments
                                   attack-strategy-comparison-plots 
                                   noise-accuracy-plot accuracy-privacy-tradeoff-comparison
                                   display-html accuracy-privacy-tradeoff-legend
                                   accuracy-privacy-tradeoff attack-strategy-comparison
                                   round-known-record-counts
                                   ;;added by Waruni
                                    read-data run-masking-experiments-cycles  attack-per-cycle accuracy-updating-cycles 
                                   accuracy-for-flat-areas  save-data-append-newline  ;;accuracy-of-attacked-records
                                   ]]
         '[ppdsp.classifier.moa-classifier :refer [adaptive-random-forest]]
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma
                                            get-attack-count]]
         '[ppdsp.utils :refer [map-vals mean]])

null

## Experiment Configuration

In [3]:
(def dataset-label "nyc-taxi")
(def dataset (read-csv-dataset "datasets/nyc-taxi/nyc-50k.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                ["pickup_longitude" "pickup_latitude" "dropoff_longitude" "dropoff_latitude" "pickup_day" "pickup_hour" "dist"]))

#'beaker_clojure_shell_cec1d768-b36b-439e-bf75-a7d9afc664b8/dataset

In [4]:
(def epsilons [0.2])

(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))

(def cycle-sizes [500]) ;;Half of the cycle size
(def attack-count (int (/(* record-count 0.05) 4)))  ;; 5% of the record count was selected as the attack count and for each attack 4 known I/O pairs
;;(def cumulative-attack-counts [120 200 280])

(def independent-noise-sigmas [0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))


(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :known-record-counts [4]
     :known-record-ranges [1]
     :classifier-fns {:arf adaptive-random-forest}
     :attack-count attack-count
     :attempt-count 3
     :threads-per-configuration 4
     :threads-per-evaluation 1
     :seed 250
     :evaluations [:privacy :accuracy]})
(def cumulative-noise-configuration
        (merge base-configuration
               {:output-file (str "workspace/" dataset-label "/cumulative.edn")  
                :independent-noise-sigmas [0.0]
                :cumulative-noise-sigmas cumulative-noise-sigmas
                :cycle-sizes [0]
                :flat-record-length 0
                :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))

(def cumulative-noise-cycles-configuration
        (merge base-configuration
               {:output-file (str "workspace/" dataset-label "/cumulative-cycles.edn")  
                :maximum-fn-value 0
                :growth-rate-k 0.0
                :independent-noise-sigmas [0.0]
                :cumulative-noise-sigmas cumulative-noise-sigmas
                :cycle-sizes cycle-sizes
                :flat-record-length 0
                :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))


(def logistic-cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/logistic-cumulative.edn")
            :maximum-fn-value 1
            :growth-rate-k 0.01
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :flat-record-length 200  ;;indicates the length of the flat period of the cycle according to the logistic function. depend on the cycle size
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))


#'beaker_clojure_shell_cec1d768-b36b-439e-bf75-a7d9afc664b8/logistic-cumulative-noise-configuration

## Run Experiments

In [5]:
(run-masking-experiments-cycles logistic-cumulative-noise-configuration)

Starting: privacy-evaluation-pf7-ps1.0-is0.0-cs0.0016770258744685006-tr0 -cz1000-krc4-krr1 -ac625-krrp:middle


Aug 02, 2021 2:00:51 AM com.github.fommil.jni.JniLoader liberalLoad
INFO: successfully loaded /tmp/jniloader2962904501504803853netlib-native_system-linux-x86_64.so


"Elapsed time: 455467.9253 msecs"


null

In [6]:
(run-masking-experiments cumulative-noise-configuration)

Starting: privacy-evaluation-pf7-ps1.0-is0.0-cs0.0016770258744685006-tr0 -cz0-krc4-krr1 -ac625-krrp:middle


java.util.concurrent.ExecutionException:  java.util.concurrent.ExecutionException

In [7]:
(run-masking-experiments-cycles cumulative-noise-cycles-configuration)

Starting: privacy-evaluation-pf7-ps1.0-is0.0-cs0.0016770258744685006-tr0 -cz1000-krc4-krr1 -ac625-krrp:middle


java.util.concurrent.ExecutionException:  java.util.concurrent.ExecutionException

## Load and Extend Experiment Results

In [6]:
;;LOGISTIC NOISE
(def output-cumulative-logistic (load-data (:output-file logistic-cumulative-noise-configuration))) ;;all results
(def original-accuracy-cumulative-logistic (-> output-cumulative-logistic :original :accuracy))        ;;original classification results with accuracy
(def results-cumulative-logistic (-> (filter #(contains? (set cumulative-noise-sigmas) (:cumulative-noise-sigma %)) ;;All results without original accuracy (masking results)
                                    (:results output-cumulative-logistic))
                            (add-combined-result :score [:a-rp :a-rpcn])
                            (add-combined-result :score [:a-rp :a-rpcn-1])))
(def flat-results-cumulative-logistic (->> results-cumulative-logistic  
                                  flatten-masking-experiment-recoveries
                                  (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))  ;;results with privacy
                                           )) ;;results with privacy

;;(println flat-results-cumulative-logistic )

#'beaker_clojure_shell_8f12dc80-cf24-4bec-ba06-e725800105a2/flat-results-cumulative-logistic

In [7]:
;;Accuracy per sample for logistic noise
(def accuracy-cycles-outputfile-logistic (str "workspace/" dataset-label "/logistic-accuracy-per-cycle.edn"))
(def details-outputfile-logistic (str "workspace/" dataset-label "/logistic-accuracy-details.edn"))
(def full-cycle 1000)
(def access-result-list-logistic (nth results-cumulative-logistic 0)) ;;nth is used to access elements of lists
 (def acc-after-masking-logistic (-> access-result-list-logistic :accuracy :arf :raw-results )) ;;getting only accuracy results
 
       (accuracy-updating-cycles acc-after-masking-logistic accuracy-cycles-outputfile-logistic record-count full-cycle details-outputfile-logistic)

In [8]:
;;for wriring new attack details to a file
(def epsilon 0.2)
(def attack-cycles-outputfile-logistic (str "workspace/" dataset-label "/logistic-attacks-per-cycle.edn"))
           (attack-per-cycle flat-results-cumulative-logistic attack-cycles-outputfile-logistic epsilon attack-count )


[null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.0016, null, null, null, null, 0.001

In [9]:
(def get-breach-probabilty (load-data (str "workspace/" dataset-label "/logistic-attacks-per-cycle.edn"))) ;;get the final breach probability written in the file
(save-data-append-newline (str "workspace/" dataset-label "/logistic-breach-probability.csv") get-breach-probabilty) ;;append the value to another file for the operations after 100 runs

0.0272

In [10]:
;;Average Accuracy for the attacked cycle period (Starting flat period of the logistic cycle)
(def accuracy-details-file (load-data (str "workspace/" dataset-label "/logistic-accuracy-details.edn"))) ;;reading accuracy details from previously written file
(def accuracy-for-attcked-period-file (str "workspace/" dataset-label "/logistic-accuracy-attacked-period.csv")) ;; write file of relavant accuracy details
(def accuracy-details  (-> accuracy-details-file :sample-accuracy ))  
(def full-cycle 1000)
(def flat-record-length (:flat-record-length logistic-cumulative-noise-configuration))
       (accuracy-for-flat-areas accuracy-details full-cycle flat-record-length record-count accuracy-for-attcked-period-file)

0.47885999999999984