Skip to content

Commit 6225cd6

Browse files
committed
Lefcheck's code
1 parent 0740127 commit 6225cd6

File tree

3 files changed

+254
-5
lines changed

3 files changed

+254
-5
lines changed

Lecture-0-Setup-of-Python-Environment.ipynb

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,26 @@
5555
"$ source activate gwas-lecture\n",
5656
"$ jupyter notebook\n",
5757
"```\n",
58-
"Open link that is displayed in terminal in browser \n"
58+
"Open link that is displayed in terminal in browser \n",
59+
"\n",
60+
"***"
5961
]
6062
},
6163
{
62-
"cell_type": "code",
63-
"execution_count": null,
64+
"cell_type": "markdown",
6465
"metadata": {},
65-
"outputs": [],
66-
"source": []
66+
"source": [
67+
"## To run R in jupyter, install IRkernel\n",
68+
"\n",
69+
"The installation instructions for IRkernel are described in detail [here](https://github.com/IRkernel/IRkernel) or [here](https://irkernel.github.io/installation/#source-panel). Start by opening R in the terminal.\n",
70+
"\n",
71+
"In the terminal, type:\n",
72+
"* install.packages('IRkernel')\n",
73+
"* IRkernel::installspec() # to register the kernel in the current R installation\n",
74+
"\n",
75+
"Launch jupyter with the specified kernel:\n",
76+
"* jupyter console --kernel=ir"
77+
]
6778
}
6879
],
6980
"metadata": {

data/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
studies['smith08']['url'] = url_base + 'smith_2008/smith08.hdf5'
2828
studies['smith08']['path'] = 'smith_2008/smith08.hdf5'
2929

30+
studies['glucs'] = {}
31+
studies['smith08']['url'] = url_base + 'cmeyer_glucs2015/bmeyer_etal.txt'
32+
studies['smith08']['path'] = 'cmeyer_glucs2015/bmeyer_etal.txt'
33+
3034
def get_file(study_name):
3135
"""return hdf5 file and download from the web if needed"""
3236
study = studies[study_name]
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
## The code in this R-script is maintained at:
2+
## https://github.com/jslefche/rsquared.glmm/blob/master/rsquaredglmm.R
3+
##
4+
## it is an implementation of the algorithm described here:
5+
## https://besjournals.onlinelibrary.wiley.com/doi/10.1111/j.2041-210x.2012.00261.x
6+
##
7+
## This code is also available in the R-package "piecewiseSEM" but was copied verbatim here, as said package is not currently available in R version 3.4.3
8+
##
9+
## Author: Jon Lefcheck
10+
###############################################################################
11+
12+
#' R-squared and pseudo-rsquared for a list of (generalized) linear (mixed) models
13+
#'
14+
#' This function calls the generic \code{\link{r.squared}} function for each of the
15+
#' models in the list and rbinds the outputs into one data frame
16+
#'
17+
#' @param a list of fitted (generalized) linear (mixed) model objects
18+
#' @return a dataframe with one row per model, and "Class",
19+
#' "Family", "Marginal", "Conditional" and "AIC" columns
20+
rsquared.glmm <- function(modlist) {
21+
# Iterate over each model in the list
22+
do.call(rbind, lapply(modlist, r.squared))
23+
}
24+
25+
26+
27+
28+
29+
30+
#' R-squared and pseudo-rsquared for (generalized) linear (mixed) models
31+
#'
32+
#' This generic function calculates the r squared and pseudo r-squared for
33+
#' a variety of(generalized) linear (mixed) model fits.
34+
#' Currently implemented for \code{\link{lm}}, \code{\link{lmerTest::merMod}},
35+
#' and \code{\link{nlme::lme}} objects.
36+
#' Implementing methods usually call \code{\link{.rsquared.glmm}}
37+
#'
38+
#' @param mdl a fitted (generalized) linear (mixed) model object
39+
#' @return Implementing methods usually return a dataframe with "Class",
40+
#' "Family", "Marginal", "Conditional", and "AIC" columns
41+
r.squared <- function(mdl){
42+
UseMethod("r.squared")
43+
}
44+
45+
46+
47+
48+
49+
50+
#' Marginal r-squared for lm objects
51+
#'
52+
#' This method uses r.squared from \code{\link{summary}} as the marginal.
53+
#' Contrary to other \code{\link{r.squared}} methods,
54+
#' this one doesn't call \code{\link{.rsquared.glmm}}
55+
#'
56+
#' @param mdl an lm object (usually fit using \code{\link{lm}},
57+
#' @return a dataframe with with "Class" = "lm", "Family" = "gaussian",
58+
#' "Marginal" = unadjusted r-squared, "Conditional" = NA, and "AIC" columns
59+
r.squared.lm <- function(mdl){
60+
data.frame(Class=class(mdl), Family="gaussian", Link="identity",
61+
Marginal=summary(mdl)$r.squared,
62+
Conditional=NA, AIC=AIC(mdl))
63+
}
64+
65+
66+
67+
68+
69+
70+
#' Marginal and conditional r-squared for merMod objects
71+
#'
72+
#' This method extracts the variance for fixed and random effects, residuals,
73+
#' and the fixed effects for the null model (in the case of Poisson family),
74+
#' and calls \code{\link{.rsquared.glmm}}
75+
#'
76+
#' @param mdl an merMod model (usually fit using \code{\link{lme4::lmer}},
77+
#' \code{\link{lme4::glmer}}, \code{\link{lmerTest::lmer}},
78+
#' \code{\link{blme::blmer}}, \code{\link{blme::bglmer}}, etc)
79+
r.squared.merMod <- function(mdl){
80+
# Get variance of fixed effects by multiplying coefficients by design matrix
81+
VarF <- var(as.vector(lme4::fixef(mdl) %*% t(mdl@pp$X)))
82+
# Get variance of random effects by extracting variance components
83+
# Omit random effects at the observation level, variance is factored in later
84+
VarRand <- sum(
85+
sapply(
86+
VarCorr(mdl)[!sapply(unique(unlist(strsplit(names(ranef(mdl)),":|/"))), function(l) length(unique(mdl@frame[,l])) == nrow(mdl@frame))],
87+
function(Sigma) {
88+
X <- model.matrix(mdl)
89+
Z <- X[,rownames(Sigma)]
90+
sum(diag(Z %*% Sigma %*% t(Z)))/nrow(X) } ) )
91+
# Get the dispersion variance
92+
VarDisp <- unlist(VarCorr(mdl)[sapply(unique(unlist(strsplit(names(ranef(mdl)),":|/"))), function(l) length(unique(mdl@frame[,l])) == nrow(mdl@frame))])
93+
if(is.null(VarDisp)) VarDisp = 0 else VarDisp = VarDisp
94+
if(inherits(mdl, "lmerMod")){
95+
# Get residual variance
96+
VarResid <- attr(lme4::VarCorr(mdl), "sc")^2
97+
# Get ML model AIC
98+
mdl.aic <- AIC(update(mdl, REML=F))
99+
# Model family for lmer is gaussian
100+
family <- "gaussian"
101+
# Model link for lmer is identity
102+
link <- "identity"
103+
104+
} else if(inherits(mdl, "glmerMod")){
105+
# Get the model summary
106+
mdl.summ <- summary(mdl)
107+
# Get the model's family, link and AIC
108+
family <- mdl.summ$family
109+
link <- mdl.summ$link
110+
mdl.aic <- AIC(mdl)
111+
# Pseudo-r-squared for poisson also requires the fixed effects of the null model
112+
if(family=="poisson") {
113+
# Get random effects names to generate null model
114+
rand.formula <- reformulate(sapply(findbars(formula(mdl)),
115+
function(x) paste0("(", deparse(x), ")")),
116+
response=".")
117+
# Generate null model (intercept and random effects only, no fixed effects)
118+
null.mdl <- update(mdl, rand.formula)
119+
# Get the fixed effects of the null model
120+
null.fixef <- as.numeric(lme4::fixef(null.mdl))
121+
}
122+
}
123+
# Call the internal function to do the pseudo r-squared calculations
124+
.rsquared.glmm(VarF, VarRand, VarResid, VarDisp, family = family, link = link,
125+
mdl.aic = mdl.aic,
126+
mdl.class = class(mdl),
127+
null.fixef = null.fixef)
128+
}
129+
130+
131+
132+
133+
134+
135+
#' Marginal and conditional r-squared for lme objects
136+
#'
137+
#' This method extracts the variance for fixed and random effects,
138+
#' as well as residuals, and calls \code{\link{.rsquared.glmm}}
139+
#'
140+
#' @param mdl an lme model (usually fit using \code{\link{nlme::lme}})
141+
r.squared.lme <- function(mdl){
142+
# Get design matrix of fixed effects from model
143+
Fmat <- model.matrix(eval(mdl$call$fixed)[-2], mdl$data)
144+
# Get variance of fixed effects by multiplying coefficients by design matrix
145+
VarF <- var(as.vector(nlme::fixef(mdl) %*% t(Fmat)))
146+
# Get variance of random effects by extracting variance components
147+
VarRand <- sum(suppressWarnings(as.numeric(nlme::VarCorr(mdl)
148+
[rownames(nlme::VarCorr(mdl)) != "Residual",
149+
1])), na.rm=T)
150+
151+
# Get residual variance
152+
VarResid <- as.numeric(nlme::VarCorr(mdl)[rownames(nlme::VarCorr(mdl))=="Residual", 1])
153+
# Call the internal function to do the pseudo r-squared calculations
154+
.rsquared.glmm(VarF, VarRand, VarResid, family = "gaussian", link = "identity",
155+
mdl.aic = AIC(update(mdl, method="ML")),
156+
mdl.class = class(mdl))
157+
}
158+
159+
160+
161+
162+
163+
164+
#' Marginal and conditional r-squared for glmm given fixed and random variances
165+
#'
166+
#' This function is based on Nakagawa and Schielzeth (2013). It returns the marginal
167+
#' and conditional r-squared, as well as the AIC for each glmm.
168+
#' Users should call the higher-level generic "r.squared", or implement a method for the
169+
#' corresponding class to get varF, varRand and the family from the specific object
170+
#'
171+
#' @param varF Variance of fixed effects
172+
#' @param varRand Variance of random effects
173+
#' @param varResid Residual variance. Only necessary for "gaussian" family
174+
#' @param family family of the glmm (currently works with gaussian, binomial and poisson)
175+
#' @param link model link function. Working links are: gaussian: "identity" (default);
176+
#' binomial: "logit" (default), "probit"; poisson: "log" (default), "sqrt"
177+
#' @param mdl.aic The model's AIC
178+
#' @param mdl.class The name of the model's class
179+
#' @param null.fixef Numeric vector containing the fixed effects of the null model.
180+
#' Only necessary for "poisson" family
181+
#' @return A data frame with "Class", "Family", "Marginal", "Conditional", and "AIC" columns
182+
.rsquared.glmm <- function(varF, varRand, varResid = NULL, varDisp = NULL, family, link,
183+
mdl.aic, mdl.class, null.fixef = NULL){
184+
185+
if( family == "gaussian" ){
186+
# Only works with identity link
187+
if(link != "identity")
188+
family_link.stop(family, link)
189+
# Calculate marginal R-squared (fixed effects/total variance)
190+
Rm <- varF/(varF+varRand+varResid)
191+
# Calculate conditional R-squared (fixed effects+random effects/total variance)
192+
Rc <- (varF+varRand)/(varF+varRand+varResid)
193+
194+
} else if( family == "binomial" ){
195+
# Get the distribution-specific variance
196+
if(link == "logit")
197+
varDist <- (pi^2)/3
198+
else if(link == "probit")
199+
varDist <- 1
200+
else
201+
family_link.stop(family, link)
202+
# Calculate marginal R-squared
203+
Rm <- varF/(varF+varRand+varDist+varDisp)
204+
# Calculate conditional R-squared (fixed effects+random effects/total variance)
205+
Rc <- (varF+varRand)/(varF+varRand+varDist+varDisp)
206+
207+
} else if( family == "poisson" ){
208+
# Get the distribution-specific variance
209+
if(link == "log")
210+
varDist <- log(1+1/exp(null.fixef))
211+
else if(link == "sqrt")
212+
varDist <- 0.25
213+
else
214+
family_link.stop(family, link)
215+
# Calculate marginal R-squared
216+
Rm <- varF/(varF+varRand+varDist+varDisp)
217+
# Calculate conditional R-squared (fixed effects+random effects/total variance)
218+
Rc <- (varF+varRand)/(varF+varRand+varDist+varDisp)
219+
220+
} else {
221+
family_link.stop(family, link)
222+
}
223+
224+
# Bind R^2s into a matrix and return with AIC values
225+
data.frame(Class=mdl.class, Family = family, Link = link,
226+
Marginal=Rm, Conditional=Rc, AIC=mdl.aic)
227+
}
228+
229+
#' stop execution if unable to calculate variance for a given family and link
230+
family_link.stop <- function(family, link){
231+
stop(paste("Don't know how to calculate variance for",
232+
family, "family and", link, "link."))
233+
}
234+

0 commit comments

Comments
 (0)