In [1]:
%%bash
lein uberjar

Retrieving org/clojure/tools.namespace/0.3.1/tools.namespace-0.3.1.jar from central
Retrieving org/clojure/java.classpath/0.3.0/java.classpath-0.3.0.jar from central
Retrieving org/clojure/tools.reader/1.3.2/tools.reader-1.3.2.jar from central
Retrieving clj-time/clj-time/0.14.0/clj-time-0.14.0.jar from clojars
Compiling ppdsp.classifier.base
Compiling ppdsp.classifier.inspectablearf
Compiling ppdsp.classifier.moa-classifier
Compiling ppdsp.classifier.random
Compiling ppdsp.core
Compiling ppdsp.dataset.base
Compiling ppdsp.dataset.csv-dataset
Compiling ppdsp.dataset.moa
Compiling ppdsp.dataset.save-csv
Compiling ppdsp.masking.attack-data
Compiling ppdsp.masking.base
Compiling ppdsp.masking.data_fitting
Compiling ppdsp.masking.evaluation
Compiling ppdsp.masking.optimize
Compiling ppdsp.masking.projection
Compiling ppdsp.masking.single-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-independent-attack
Compiling ppdsp.masking.u

In [1]:
 %classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.data.csv :as csv]
         '[clojure.java.io :as io]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data display-table 
                                  ;; display-masking-error-plots  
                                  ;; attack-strategy-comparison-plots 
                                   ;;noise-accuracy-plot accuracy-privacy-tradeoff-comparison
                                  ;; display-html accuracy-privacy-tradeoff-legend
                                   ;accuracy-privacy-tradeoff attack-strategy-comparison
                                   round-known-record-counts
                                   ;;added by Waruni
                                    read-data run-masking-experiments-cycles     accuracy-updating-cycles   
                                     save-data-append-newline  ;;accuracy-for-flat-areas 
                                   attack-per-cycle validate-datafitting-results
                                   ]]
         '[ppdsp.classifier.moa-classifier :refer [hoeffding-tree ]]    ;;changed
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count dataset-class-count get-schema]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma  calculate-avarage-information-loss 
                                            get-attack-count
                                               ]]
         '[ppdsp.masking.data_fitting :refer [kernel-regression-using-fastmath k-AEL-final-from-selected-kernel]] ;retrieve-accuracy-using-fitted-function 
         '[ppdsp.utils :refer [map-vals mean]])
;;(import '[java.lang.instrument Instrumentation])

null

## Experiment Configuration

In [2]:
(def dataset-label "electricity")
(def dataset (read-csv-dataset "datasets/electricity/elecNormNew.csv"
                               99999999 ;; All records
                               ;; Numeric features
                               ["date" "day" "period" "nswprice" "nswdemand" "vicprice" "vicdemand" "transfer"]))

#'beaker_clojure_shell_cf657b96-0512-481b-af18-60b357753dda/dataset

In [3]:
(def epsilons 0.2)
(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))
(def cycle-sizes [1000]) ;;Half of the cycle size [500 1000 2000 4000]
(def attack-count (int (/(* record-count 0.05) 4)))  ;;attack count = (record-count * 5%)/4 as for each attack 4 known I/O pairsairs
(def independent-noise-sigmas [0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))
(def class-count (dataset-class-count dataset))


(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :known-record-counts [4] ;;per attack->Total known record count = 4 * attack count = 5% of the record count
     :known-record-ranges [1]
     :classifier-fns {:hdt hoeffding-tree} 
     :attempt-count 3
     :attack-count  attack-count
     :threads-per-configuration 2
     :threads-per-evaluation 1
     :seed 1
     ;:evaluations [:privacy-auc :accuracy :privacy-attacks]
     :evaluations [:privacy :accuracy ]
    }
)

(def logistic-cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/logistic-cumulative-output.edn")
            :output-file-privacy-auc  (str "workspace/" dataset-label "/logistic-cumulative-privacy-auc.csv")
            :attck-results-file  (str "workspace/" dataset-label "/attack-results.edn")
            :output-file-privacy-attacks  (str "workspace/" dataset-label "/logistic-cumulative-privacy-attacks.csv")
            :original-model-description (str "workspace/" dataset-label "/original-model-description.edn")
            :original-probability-file (str "workspace/" dataset-label "/original-AEL.csv")
            :masked-model-file (str "workspace/" dataset-label "/masked-model-description.edn")
            :masked-probability-file (str "workspace/" dataset-label "/masked-AEL.csv")
            :final-bp-file (str "workspace/" dataset-label "/final-breach-probabilities.csv")
            :temp-bp-file (str "workspace/" dataset-label "/temp-bp.edn")
            :maximum-fn-value 1  ;[1]
            ;:growth-rate-k [0.0015 0.003 0.006 0.0125 0.025 0.05 0.1] ;0.0015 0.003 0.006 0.0125 0.025 0.05 0.1
            :growth-rate-k-initial 0.005 ; will be run for different k values upto 0.1 with tilda= 0.002 
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :epsilon epsilons
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]
            ;;:cycle-size-upper-bound (load-data (str "workspace/" dataset-label "/previous-upper-bound.csv"))
            ;;:cycle-size-upper-bound previous-ub
            ;;:previous-AUC (load-data (str "workspace/" dataset-label "/previous-AUC.csv")) 
            ;;:pre-AUC previous-AUC
            ;:bandwidth initial-f-value
            :previous-AUC-file-to-update (str "workspace/" dataset-label "/previous-AUC.csv")
            :previous-upper-bound-file-to-update(str "workspace/" dataset-label "/previous-upper-bound.csv")
            ;;:flat-record-length 200  ;;indicates the length of the flat period of the cycle according to the logistic function. depend on the cycle size
            }
    )
)



#'beaker_clojure_shell_cf657b96-0512-481b-af18-60b357753dda/logistic-cumulative-noise-configuration

## Run Experiments and Write Results (AEL & BP) to Files

In [4]:
(run-masking-experiments-cycles logistic-cumulative-noise-configuration)

Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.005


Mar 31, 2021 3:32:59 AM com.github.fommil.jni.JniLoader liberalLoad
INFO: successfully loaded /tmp/jniloader4661626531969301095netlib-native_system-linux-x86_64.so


Growth Rate =  0.005
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 444037.1703 msecs"
Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.009
Growth Rate =  0.009
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 462844.8899 msecs"
Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.013
Growth Rate =  0.013
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 467776.2823 msecs"
Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.017
Growth Rate =  0.017
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 493340.5067 msecs"
Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:

null

## Data Fitting Using Privacy and Accuracy Results

In [5]:
(def accuracy-file (str "workspace/" dataset-label "/masked-AEL.csv"))
(def user-threshold-testing [0.6 0.7 0.75 0.8 0.85 0.9 0.95 0.97])
  ;;[0.5 0.55 0.6 0.65 0.7 0.75 0.8 0.85 0.9 0.95] 0.85 0.9 0.92 0.95 0.97
(def datafitting-results-file (str "workspace/" dataset-label "/results-data-fitting.csv"))


(kernel-regression-using-fastmath (:final-bp-file logistic-cumulative-noise-configuration) accuracy-file user-threshold-testing datafitting-results-file (:output-file-privacy-auc logistic-cumulative-noise-configuration))


[null, null, null, null, null, null, null, null]

In [13]:
(def accuracy-file (str "workspace/" dataset-label "/masked-AEL.csv"))
(def final-AEL-file (str "workspace/" dataset-label "/final-ael.csv"))
(def privacy-threshold-by-user 0.995) ;;0.75, 0.8, 0.85, 0.9, 0.95, 0.97 0.98 0.985 0.99 0.995 should run for all these values
(k-AEL-final-from-selected-kernel (:final-bp-file logistic-cumulative-noise-configuration) accuracy-file (:output-file-privacy-auc logistic-cumulative-noise-configuration) privacy-threshold-by-user final-AEL-file)

null

## Validate Experiments

In [14]:
;;; ;;For validating data fitting results run the experiments again with k-values received after data fitting
(def logistic-cumulative-noise-validation
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/validation/logistic-cumulative-output.edn")
            :output-file-privacy-auc  (str "workspace/" dataset-label "/validation/logistic-cumulative-privacy-auc.csv")
            :attck-results-file  (str "workspace/" dataset-label "/validation/attack-results.edn")
            :output-file-privacy-attacks  (str "workspace/" dataset-label "/validation/logistic-cumulative-privacy-attacks.csv")
            :original-model-description (str "workspace/" dataset-label "/validation/original-model-description.edn")
            :original-probability-file (str "workspace/" dataset-label "/validation/original-AEL.csv")
            :masked-model-file (str "workspace/" dataset-label "/validation/masked-model-description.edn")
            :masked-probability-file (str "workspace/" dataset-label "/validation/masked-AEL.csv")
            :final-bp-file (str "workspace/" dataset-label "/validation/final-breach-probabilities.csv")
            :temp-bp-file (str "workspace/" dataset-label "/validation/temp-bp.edn")
            :maximum-fn-value 1  ;[1]
            :growth-rate-k-initial [0.0668 0.0192 0.0577 0.1211 0.0710 0.0534 0.0466 0.0410 0.0329 0.0218] ;
                 ;k from bp- Wave 0.6598 0.5911 0.2228 0.2052 0.2371 0.0628 0.0506 0.0249 0.0986 0.4607
                ;k from bp- Rational  0.0668 0.0192 0.0577 0.1211 0.0710 0.0534 0.0466 0.0410 0.0329 0.0218
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :epsilon epsilons
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]
            :previous-AUC-file-to-update (str "workspace/" dataset-label "/validation/previous-AUC.csv")
            :previous-upper-bound-file-to-update(str "workspace/" dataset-label "/validation/previous-upper-bound.csv")
             }
    )
)
(validate-datafitting-results logistic-cumulative-noise-validation)

Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.0668
Growth Rate =  0.0668
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 459260.1269 msecs"
Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.0192
Growth Rate =  0.0192
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 476766.271 msecs"
Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.0577
Growth Rate =  0.0577
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 477471.1921 msecs"
Starting: privacy-evaluation -pf8 -ps1.0 -cs0.0017616412765085574 -tr0 -cz2000 -krc4 -krr1 -ac566 -krrp:middle -gr0.1211
Growth Rate =  0.1211
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed ti

[null, null, null, null, null, null, null, null, null, null]

In [7]:
;;Accuracy per sample for logistic noise
(def accuracy-cycles-outputfile-logistic (str "workspace/" dataset-label "/logistic-accuracy-per-cycle.edn"))
(def details-outputfile-logistic (str "workspace/" dataset-label "/logistic-accuracy-details.edn"))
(def full-cycle 1000)
(def access-result-list-logistic (nth results-cumulative-logistic 0)) ;;nth is used to access elements of lists
 (def acc-after-masking-logistic (-> access-result-list-logistic :accuracy :hdt :raw-results )) ;;getting only accuracy results
 

       ;;(accuracy-updating-cycles acc-after-masking-logistic accuracy-cycles-outputfile-logistic record-count full-cycle details-outputfile-logistic)

java.lang.RuntimeException:  Unable to resolve symbol

In [8]:
;;Average Accuracy for the attacked cycle period (Starting flat period of the logistic cycle)
(def accuracy-details-file (load-data (str "workspace/" dataset-label "/logistic-accuracy-details.edn"))) ;;reading accuracy details from previously written file
(def accuracy-for-attcked-period-file (str "workspace/" dataset-label "/logistic-accuracy-attacked-period.csv")) ;; write file of relavant accuracy details
(def accuracy-details  (-> accuracy-details-file :sample-accuracy ))  
(def full-cycle 1000)
(def flat-record-length (:flat-record-length logistic-cumulative-noise-configuration))
       (accuracy-for-flat-areas accuracy-details full-cycle flat-record-length record-count accuracy-for-attcked-period-file)


java.io.FileNotFoundException:  workspace/electricity/logistic-accuracy-details.edn (No such file or directory)

In [4]:
(def get-breach-probabilty (load-data (str "workspace/" dataset-label "/logistic-attacks-per-cycle.edn"))) ;;get the final breach probability written in the file
(save-data-append-newline (str "workspace/" dataset-label "/logistic-breach-probability.csv") get-breach-probabilty) ;;append the value to another file for the operations after 100 runs

java.lang.RuntimeException:  Unable to resolve symbol

## Comparison of Attack Strategies

In [18]:
(doseq [epsilon epsilons cycles cycle-sizes]
    (display-html (str "<h3>Logistic Cumulative Noise With Cycles" "</h3>" "<h3>Epsilon = " epsilon "</h3>" "<h3>Cycle Size = " (* cycles 2 )"</h3>" ))
    (.display (attack-strategy-comparison-plots flat-results-cumulative-logistic :all epsilon
                                              :plot-width 400
                                              :plot-height 400
                                              :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative-logistic :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

java.lang.RuntimeException:  Unable to resolve symbol

In [19]:
(let [comparison (attack-strategy-comparison flat-results-cumulative-cycles epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/cumulative-cycles-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

java.lang.RuntimeException:  Unable to resolve symbol

In [20]:
(let [comparison (attack-strategy-comparison flat-results-cumulative-logistic epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/logistic-attack-strategies-comparison.edn") comparison)
    (display-table comparison))


java.lang.RuntimeException:  Unable to resolve symbol

In [21]:
;;(def best-cumulative-attack-strategy :a-rpcn-1)
(def best-cumulative-cycles-attack-strategy :a-rpcn-1)
(def best-logistic-attack-strategy :a-rpcn-1)

#'beaker_clojure_shell_23e4d944-a124-4fb2-92e2-c6028b05c2dd/best-logistic-attack-strategy

##Linear Cumulative vs.Logistic Cumulative Noise

In [22]:
(display-html (accuracy-privacy-tradeoff-legend cumulative-noise-sigmas))
nil

java.lang.RuntimeException:  Unable to resolve symbol

In [23]:
(doseq [epsilon epsilons]
    (.display (accuracy-privacy-tradeoff-comparison  results-cumulative-cycles results-cumulative-logistic ;;results-cumulative
                                                    :arf  best-cumulative-cycles-attack-strategy best-logistic-attack-strategy epsilon  ;;best-cumulative-attack-strategy
                                                    :plot-width 400
                                                    :plot-height 400)))

java.lang.RuntimeException:  Unable to resolve symbol

### Comparison of Square Distance From Origin

Performance is the sum of the squares of: (1) Prob. of e-privacy breach and (2) classification error.

In [24]:
(let [comparison (accuracy-privacy-tradeoff  results-cumulative-cycles results-cumulative-logistic ;;results-cumulative
                               :arf  best-cumulative-cycles-attack-strategy best-logistic-attack-strategy epsilons ;;best-cumulative-attack-strategy
                               :square-distance? true
                               :row-per-noise-level? true
                               :known-record-count (apply max (:known-record-counts base-configuration))
                                           )]
    (save-data (str "workspace/" dataset-label "/mask-comparison.edn") comparison)
    (display-table comparison))

java.lang.RuntimeException:  Unable to resolve symbol

## Effect of Cumulative Noise on Accuracy over Time

In [25]:
(noise-accuracy-plot  results-cumulative-cycles results-cumulative-logistic :arf ;;results-cumulative
                     :init-width 800
                     :init-height 400)

java.lang.RuntimeException:  Unable to resolve symbol

## Effect of Cumulative Noise on Privacy over Time

In [26]:
(display-masking-error-plots (->> flat-results-cumulative-cycles
                                  (filter #(= best-cumulative-cycles-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)

java.lang.RuntimeException:  Unable to resolve symbol

In [27]:
(display-masking-error-plots (->> flat-results-cumulative-logistic
                                  (filter #(= best-logistic-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)

java.lang.RuntimeException:  Unable to resolve symbol