In [1]:
%%bash
lein uberjar

Retrieving org/clojure/tools.namespace/0.3.1/tools.namespace-0.3.1.jar from central
Retrieving org/clojure/java.classpath/0.3.0/java.classpath-0.3.0.jar from central
Retrieving org/clojure/tools.reader/1.3.2/tools.reader-1.3.2.jar from central
Retrieving clj-time/clj-time/0.14.0/clj-time-0.14.0.jar from clojars
Compiling ppdsp.classifier.base
Compiling ppdsp.classifier.inspectablearf
Compiling ppdsp.classifier.moa-classifier
Compiling ppdsp.classifier.random
Compiling ppdsp.core
Compiling ppdsp.dataset.base
Compiling ppdsp.dataset.csv-dataset
Compiling ppdsp.dataset.moa
Compiling ppdsp.dataset.save-csv
Compiling ppdsp.masking.attack-data
Compiling ppdsp.masking.base
Compiling ppdsp.masking.data_fitting
Compiling ppdsp.masking.evaluation
Compiling ppdsp.masking.optimize
Compiling ppdsp.masking.projection
Compiling ppdsp.masking.single-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-cumulative-attack
Compiling ppdsp.masking.two-stage-independent-attack
Compiling ppdsp.masking.u

In [3]:
%classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.data.csv :as csv]
         '[clojure.math.numeric-tower :refer [ceil]]
         '[clojure.java.io :as io]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data display-table 
                                   round-known-record-counts
                                   ;;added by Waruni
                                    read-data run-masking-experiments-cycles  
                                     save-data-append-newline 
                                   validate-datafitting-results
                                   ]]
         '[ppdsp.classifier.moa-classifier :refer [hoeffding-tree ]]    ;;changed
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count dataset-class-count get-schema]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma  calculate-avarage-information-loss-error 
                                            get-attack-count
                                               ]]
         '[ppdsp.masking.data_fitting :refer [kernel-regression-using-fastmath k-AEL-final-from-selected-kernel]] ;retrieve-accuracy-using-fitted-function 
         '[ppdsp.utils :refer [map-vals mean]]
         '[incanter.core ]
         )
         


null

## Experiment Configuration

In [4]:
(def dataset-label "nyc-taxi")
(def dataset (read-csv-dataset "datasets/nyc-taxi/nyc-50k.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                ["pickup_longitude" "pickup_latitude" "dropoff_longitude" "dropoff_latitude" "pickup_day" "pickup_hour" "dist"]))


#'beaker_clojure_shell_25e9fb05-cbc0-4178-882b-c9e0988f78fb/dataset

In [5]:
(def epsilons 0.2)
(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))
(def window-size 10000)
(def no-of-windows (int (ceil (/ record-count window-size ))) )
(def cycle-sizes [500]) ;;Randomly select the cycle size -->initial cycle size has used to define the cycles
(def attack-count (*(int (/(* window-size 0.05) 4)) no-of-windows )) ;total attack count for all windows. Attack using (window-count * 2%)
;of known I/O pairs.Divided by 4 since 4 I/O pairs are used for one attack
(def independent-noise-sigmas [0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))
(def class-count (dataset-class-count dataset))

(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :known-record-counts [4] ;;per attack->Total known record count = 4 * attack count = 5% of each window
     :known-record-ranges [1]
     :classifier-fns {:hdt hoeffding-tree} 
     :attempt-count 3
     :attack-count  attack-count
     :threads-per-configuration 2
     :threads-per-evaluation 1
     :seed 1
     :record-count record-count 
     :window-size window-size
     :evaluations [:privacy :accuracy ]
    }
)

(def logistic-cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/logistic-cumulative-output.edn")
            :output-file-privacy-auc  (str "workspace/" dataset-label "/logistic-cumulative-privacy-auc.csv")
            :attck-results-file  (str "workspace/" dataset-label "/attack-results.edn")
            :output-file-privacy-attacks  (str "workspace/" dataset-label "/logistic-cumulative-privacy-attacks.csv")
            :original-model-description (str "workspace/" dataset-label "/original-model-description.edn")
            :original-probability-file (str "workspace/" dataset-label "/original-AEL.csv")
            :masked-model-file (str "workspace/" dataset-label "/masked-model-description.edn")
            :masked-probability-file (str "workspace/" dataset-label "/masked-AEL.csv")
            :anytime-ael-file (str "workspace/" dataset-label "/anytime-AEL.csv")
            :bp-window-file (str "workspace/" dataset-label "/bp-per-window.csv")
            :maximum-fn-value 1  ;[1]
            :growth-rate-k-initial 0.097; ; will be run for different k values upto 0.1 with tilda= 0.004
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :epsilon epsilons
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]
            :previous-AUC-file-to-update (str "workspace/" dataset-label "/previous-AUC.csv")
            :previous-upper-bound-file-to-update(str "workspace/" dataset-label "/previous-upper-bound.csv")
            :win-acc-details-file (str "workspace/" dataset-label "/win-acc-details.edn")
            :win-acc-excel-file (str "workspace/" dataset-label "/window-based-acc.csv")
            :average-error-win-file (str "workspace/" dataset-label "/avg-error-per-win.csv")
            }
    )
)



#'beaker_clojure_shell_25e9fb05-cbc0-4178-882b-c9e0988f78fb/logistic-cumulative-noise-configuration

##  Run Experiments and Write Results (AEL & BP) to Files

In [6]:
(run-masking-experiments-cycles logistic-cumulative-noise-configuration)

Starting: privacy-evaluation -pf7 -ps1.0 -cs0.0016770258744685006 -tr0 -krc4 -krr1 -ac625 -krrp:middle -gr0.097 -ws10000


Dec 08, 2021 1:31:34 AM com.github.fommil.jni.JniLoader liberalLoad
INFO: successfully loaded /tmp/jniloader7184289337770498950netlib-native_system-linux-x86_64.so


Growth Rate =  0.097
Attacks were performed
Breach probability and accuracy was calculated
Written into the file
"Elapsed time: 559878.3514 msecs"


null

## Data Fitting Using Privacy and Accuracy Results

In [7]:
(def accuracy-file (str "workspace/" dataset-label "/masked-AEL.csv"))
(def user-threshold-testing [0.6 0.7 0.75 0.8 0.85 0.9 0.95 0.97])
  ;;[0.5 0.55 0.6 0.65 0.7 0.75 0.8 0.85 0.9 0.95] 0.85 0.9 0.92 0.95 0.97
(def datafitting-results-file (str "workspace/" dataset-label "/results-data-fitting.csv"))

(kernel-regression-using-fastmath (:final-bp-file logistic-cumulative-noise-configuration) accuracy-file user-threshold-testing datafitting-results-file (:output-file-privacy-auc logistic-cumulative-noise-configuration))



java.io.FileNotFoundException:  workspace/nyc-taxi/masked-AEL.csv (No such file or directory)

In [15]:
(def accuracy-file (str "workspace/" dataset-label "/masked-AEL.csv"))
(def final-AEL-file (str "workspace/" dataset-label "/final-ael.csv"))
(def privacy-threshold-by-user 0.995) ;;0.75, 0.8, 0.85, 0.9, 0.95, 0.97 0.98 0.985 0.99 0.995 should run for all these values
(k-AEL-final-from-selected-kernel (:final-bp-file logistic-cumulative-noise-configuration) accuracy-file (:output-file-privacy-auc logistic-cumulative-noise-configuration) privacy-threshold-by-user final-AEL-file)

null

## Validate Experiments

In [16]:
;;; ;;For validating data fitting results run the experiments again with k-values received after data fitting
(def logistic-cumulative-noise-validation
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/validation/logistic-cumulative-output.edn")
            :output-file-privacy-auc  (str "workspace/" dataset-label "/validation/logistic-cumulative-privacy-auc.csv")
            :attck-results-file  (str "workspace/" dataset-label "/validation/attack-results.edn")
            :output-file-privacy-attacks  (str "workspace/" dataset-label "/validation/logistic-cumulative-privacy-attacks.csv")
            :original-model-description (str "workspace/" dataset-label "/validation/original-model-description.edn")
            :original-probability-file (str "workspace/" dataset-label "/validation/original-AEL.csv")
            :masked-model-file (str "workspace/" dataset-label "/validation/masked-model-description.edn")
            :masked-probability-file (str "workspace/" dataset-label "/validation/masked-AEL.csv")
            :final-bp-file (str "workspace/" dataset-label "/validation/final-breach-probabilities.csv")
            :temp-bp-file (str "workspace/" dataset-label "/validation/temp-bp.edn")
            :maximum-fn-value 1  ;[1]
            :growth-rate-k-initial [0.1766 0.1848 0.0040 0.0064 0.1107 0.0607 0.0336 0.0071 0.0991 0.0662] ;
                 ;k from bp- Wave 0.1766 0.1848 0.0040 0.0064 0.1107 0.0607 0.0336 0.0071 0.0991 0.0662
                ;k from bp- Rational  0.0172 0.1255 0.4556 0.4015 0.0773 0.0554 0.1207 0.1639 0.2046 0.2369
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :cycle-sizes cycle-sizes
            :epsilon epsilons
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]
            :previous-AUC-file-to-update (str "workspace/" dataset-label "/validation/previous-AUC.csv")
            :previous-upper-bound-file-to-update(str "workspace/" dataset-label "/validation/previous-upper-bound.csv")
             }
    )
)
(validate-datafitting-results logistic-cumulative-noise-validation)

Starting: privacy-evaluation -pf7 -ps1.0 -cs0.0016770258744685006 -tr0 -cz2000 -krc4 -krr1 -ac625 -krrp:middle -gr0.1766
Growth Rate =  0.1766
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 437799.1025 msecs"
Starting: privacy-evaluation -pf7 -ps1.0 -cs0.0016770258744685006 -tr0 -cz2000 -krc4 -krr1 -ac625 -krrp:middle -gr0.1848
Growth Rate =  0.1848
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 447291.4816 msecs"
Starting: privacy-evaluation -pf7 -ps1.0 -cs0.0016770258744685006 -tr0 -cz2000 -krc4 -krr1 -ac625 -krrp:middle -gr0.004
Growth Rate =  0.004
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed time: 445737.7297 msecs"
Starting: privacy-evaluation -pf7 -ps1.0 -cs0.0016770258744685006 -tr0 -cz2000 -krc4 -krr1 -ac625 -krrp:middle -gr0.0064
Growth Rate =  0.0064
Attacks were performed
Breach probability was calculated
Written into the file
"Elapsed tim

[null, null, null, null, null, null, null, null, null, null]

In [33]:
;;Accuracy per sample for logistic noise
(def accuracy-cycles-outputfile-logistic (str "workspace/" dataset-label "/logistic-accuracy-per-cycle.edn"))
(def details-outputfile-logistic (str "workspace/" dataset-label "/logistic-accuracy-details.edn"))
(def full-cycle 1000)
(def access-result-list-logistic (nth results-cumulative-logistic 0)) ;;nth is used to access elements of lists
 (def acc-after-masking-logistic (-> access-result-list-logistic :accuracy :hdt :raw-results )) ;;getting only accuracy results
 

       ;;(accuracy-updating-cycles acc-after-masking-logistic accuracy-cycles-outputfile-logistic record-count full-cycle details-outputfile-logistic)

#'beaker_clojure_shell_1504541b-a9b3-4a91-b637-747f99bc8753/acc-after-masking-logistic

In [16]:
;;Average Accuracy for the attacked cycle period (Starting flat period of the logistic cycle)
(def accuracy-details-file (load-data (str "workspace/" dataset-label "/logistic-accuracy-details.edn"))) ;;reading accuracy details from previously written file
(def accuracy-for-attcked-period-file (str "workspace/" dataset-label "/logistic-accuracy-attacked-period.csv")) ;; write file of relavant accuracy details
(def accuracy-details  (-> accuracy-details-file :sample-accuracy ))  
(def full-cycle 1000)
(def flat-record-length (:flat-record-length logistic-cumulative-noise-configuration))
       (accuracy-for-flat-areas accuracy-details full-cycle flat-record-length record-count accuracy-for-attcked-period-file)


java.io.FileNotFoundException:  workspace/arem/logistic-accuracy-details.edn (No such file or directory)

In [17]:
(def get-breach-probabilty (load-data (str "workspace/" dataset-label "/logistic-attacks-per-cycle.edn"))) ;;get the final breach probability written in the file
(save-data-append-newline (str "workspace/" dataset-label "/logistic-breach-probability.csv") get-breach-probabilty) ;;append the value to another file for the operations after 100 runs

java.io.FileNotFoundException:  workspace/arem/logistic-attacks-per-cycle.edn (No such file or directory)

## Comparison of Attack Strategies

In [18]:
(doseq [epsilon epsilons cycles cycle-sizes]
    (display-html (str "<h3>Logistic Cumulative Noise With Cycles" "</h3>" "<h3>Epsilon = " epsilon "</h3>" "<h3>Cycle Size = " (* cycles 2 )"</h3>" ))
    (.display (attack-strategy-comparison-plots flat-results-cumulative-logistic :all epsilon
                                              :plot-width 400
                                              :plot-height 400
                                              :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative-logistic :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

java.lang.RuntimeException:  Unable to resolve symbol

In [19]:
(let [comparison (attack-strategy-comparison flat-results-cumulative-cycles epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/cumulative-cycles-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

java.lang.RuntimeException:  Unable to resolve symbol

In [20]:
(let [comparison (attack-strategy-comparison flat-results-cumulative-logistic epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/logistic-attack-strategies-comparison.edn") comparison)
    (display-table comparison))


java.lang.RuntimeException:  Unable to resolve symbol

In [21]:
;;(def best-cumulative-attack-strategy :a-rpcn-1)
(def best-cumulative-cycles-attack-strategy :a-rpcn-1)
(def best-logistic-attack-strategy :a-rpcn-1)

#'beaker_clojure_shell_23e4d944-a124-4fb2-92e2-c6028b05c2dd/best-logistic-attack-strategy

##Linear Cumulative vs.Logistic Cumulative Noise

In [22]:
(display-html (accuracy-privacy-tradeoff-legend cumulative-noise-sigmas))
nil

java.lang.RuntimeException:  Unable to resolve symbol

In [23]:
(doseq [epsilon epsilons]
    (.display (accuracy-privacy-tradeoff-comparison  results-cumulative-cycles results-cumulative-logistic ;;results-cumulative
                                                    :arf  best-cumulative-cycles-attack-strategy best-logistic-attack-strategy epsilon  ;;best-cumulative-attack-strategy
                                                    :plot-width 400
                                                    :plot-height 400)))

java.lang.RuntimeException:  Unable to resolve symbol

### Comparison of Square Distance From Origin

Performance is the sum of the squares of: (1) Prob. of e-privacy breach and (2) classification error.

In [24]:
(let [comparison (accuracy-privacy-tradeoff  results-cumulative-cycles results-cumulative-logistic ;;results-cumulative
                               :arf  best-cumulative-cycles-attack-strategy best-logistic-attack-strategy epsilons ;;best-cumulative-attack-strategy
                               :square-distance? true
                               :row-per-noise-level? true
                               :known-record-count (apply max (:known-record-counts base-configuration))
                                           )]
    (save-data (str "workspace/" dataset-label "/mask-comparison.edn") comparison)
    (display-table comparison))

java.lang.RuntimeException:  Unable to resolve symbol

## Effect of Cumulative Noise on Accuracy over Time

In [25]:
(noise-accuracy-plot  results-cumulative-cycles results-cumulative-logistic :arf ;;results-cumulative
                     :init-width 800
                     :init-height 400)

java.lang.RuntimeException:  Unable to resolve symbol

## Effect of Cumulative Noise on Privacy over Time

In [26]:
(display-masking-error-plots (->> flat-results-cumulative-cycles
                                  (filter #(= best-cumulative-cycles-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)

java.lang.RuntimeException:  Unable to resolve symbol

In [27]:
(display-masking-error-plots (->> flat-results-cumulative-logistic
                                  (filter #(= best-logistic-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)

java.lang.RuntimeException:  Unable to resolve symbol