Private regression using block coordinate descent
install.packages(' privreg' , repos = c(' https://vankesteren.r-universe.dev' , ' https://cloud.r-project.org' ))
library(privreg )
# create test data
set.seed(45 )
S <- cov2cor(rWishart(1 , 10 , diag(10 ))[,,1 ])
X <- cbind(MASS :: mvrnorm(1000 , rep(0 , 10 ), S ), rbinom(100 , 1 , 0.1 ))
b <- runif(11 , - 1 , 1 )
y <- X %*% b + rnorm(100 , sd = sd(X %*% b ))
# vertically partition test data
alice_data <- data.frame (y , X [, 1 : 5 ])
bob_data <- data.frame (y , X [, 6 : 11 ])
# create alice and bob locations
alice <- PrivReg $ new(
formula = y ~ . ,
data = alice_data ,
family = gaussian(),
intercept = TRUE ,
verbose = TRUE ,
name = " alice" ,
crypt_key = " pre-shared-key-123"
)
bob <- PrivReg $ new(
formula = y ~ . ,
data = bob_data ,
family = gaussian(),
intercept = FALSE ,
verbose = TRUE ,
name = " bob " ,
crypt_key = " pre-shared-key-123"
)
# connect the session
alice $ listen()
bob $ connect(url = " 127.0.0.1" )
# ...
# estimate the model
alice $ estimate()
# ...
# disconnect
alice $ disconnect()
# compare results to glm()
summary(glm(y ~ X ))
alice $ summary()
bob $ summary()
> summary(glm(y ~ X))
Call:
glm(formula = y ~ X)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.17875 -0.73828 -0.05085 0.72709 2.45231
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.10280 0.03419 -3.007 0.002708 **
X1 0.86003 0.19318 4.452 9.48e-06 ***
X2 -0.26996 0.12100 -2.231 0.025903 *
X3 0.85155 0.10760 7.914 6.65e-15 ***
X4 -1.07249 0.13248 -8.096 1.67e-15 ***
X5 0.36944 0.11597 3.186 0.001490 **
X6 1.05666 0.16451 6.423 2.07e-10 ***
X7 0.37315 0.11277 3.309 0.000971 ***
X8 -0.61272 0.19582 -3.129 0.001806 **
X9 0.44891 0.10730 4.184 3.12e-05 ***
X10 -0.69175 0.18520 -3.735 0.000198 ***
X11 -0.65930 0.12914 -5.105 3.96e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 1.080014)
Null deviance: 2142.7 on 999 degrees of freedom
Residual deviance: 1067.1 on 988 degrees of freedom
AIC: 2928.8
Number of Fisher Scoring iterations: 2
> alice$summary()
Privacy-preserving GLM
----------------------
family: gaussian
formula: y ~ .
iterations: 399
Coefficients:
Estimate Std. Error 2.5% 97.5% t value Pr(>|t|)
(Intercept) -0.102797 0.034189 -0.16989 -0.035705 -3.0067 0.002708 **
X1 0.860031 0.193180 0.48094 1.239122 4.4520 9.478e-06 ***
X2 -0.269965 0.121005 -0.50742 -0.032509 -2.2310 0.025903 *
X3 0.851555 0.107599 0.64041 1.062703 7.9142 6.653e-15 ***
X4 -1.072488 0.132478 -1.33246 -0.812517 -8.0956 1.666e-15 ***
X5 0.369441 0.115970 0.14186 0.597018 3.1857 0.001490 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> bob$summary()
Privacy-preserving GLM
----------------------
family: gaussian
formula: y ~ .
iterations: 400
Coefficients:
Estimate Std. Error 2.5% 97.5% t value Pr(>|t|)
X1 1.05666 0.16451 0.73382 1.37949 6.4229 2.074e-10 ***
X2 0.37315 0.11277 0.15185 0.59444 3.3089 0.0009705 ***
X3 -0.61272 0.19582 -0.99700 -0.22844 -3.1289 0.0018059 **
X4 0.44891 0.10730 0.23835 0.65947 4.1837 3.123e-05 ***
X5 -0.69175 0.18520 -1.05519 -0.32832 -3.7351 0.0001984 ***
X6 -0.65930 0.12914 -0.91272 -0.40588 -5.1053 3.961e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1