-
Notifications
You must be signed in to change notification settings - Fork 72
/
datasummary_balance.Rd
223 lines (181 loc) · 9.32 KB
/
datasummary_balance.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasummary_balance.R
\name{datasummary_balance}
\alias{datasummary_balance}
\title{Balance table: Summary statistics for different subsets of the data (e.g.,
control and treatment groups)}
\usage{
datasummary_balance(
formula,
data,
output = "default",
fmt = fmt_decimal(digits = 1, pdigits = 3),
title = NULL,
notes = NULL,
align = NULL,
stars = FALSE,
add_columns = NULL,
add_rows = NULL,
dinm = TRUE,
dinm_statistic = "std.error",
escape = TRUE,
...
)
}
\arguments{
\item{formula}{a one-sided formula with the "condition" or "column" variable
on the right-hand side. ~1 can be used to show summary statistics for the
full data set}
\item{data}{A data.frame (or tibble). If this data includes columns called
"blocks", "clusters", and/or "weights", the "estimatr" package will consider
them when calculating the difference in means. If there is a \code{weights}
column, the reported mean and standard errors will also be weighted.}
\item{output}{filename or object type (character string)
\itemize{
\item Supported filename extensions: .docx, .html, .tex, .md, .txt, .csv, .xlsx, .png, .jpg
\item Supported object types: "default", "html", "markdown", "latex", "latex_tabular", "typst", "data.frame", "tinytable", "gt", "kableExtra", "huxtable", "flextable", "DT", "jupyter". The "modelsummary_list" value produces a lightweight object which can be saved and fed back to the \code{modelsummary} function.
\item The "default" output format can be set to "tinytable", "kableExtra", "gt", "flextable", "huxtable", "DT", or "markdown"
\itemize{
\item If the user does not choose a default value, the packages listed above are tried in sequence.
\item Session-specific configuration: \code{options("modelsummary_factory_default" = "gt")}
\item Persistent configuration: \code{config_modelsummary(output = "markdown")}
}
\item Warning: Users should not supply a file name to the \code{output} argument if they intend to customize the table with external packages. See the 'Details' section.
\item LaTeX compilation requires the \code{booktabs} and \code{siunitx} packages, but \code{siunitx} can be disabled or replaced with global options. See the 'Details' section.
}}
\item{fmt}{how to format numeric values: integer, user-supplied function, or \code{modelsummary} function.
\itemize{
\item Integer: Number of decimal digits
\item User-supplied functions:
\itemize{
\item Any function which accepts a numeric vector and returns a character vector of the same length.
}
\item \code{modelsummary} functions:
\itemize{
\item \code{fmt = fmt_significant(2)}: Two significant digits (at the term-level)
\item \code{fmt = fmt_sprintf("\%.3f")}: See \code{?sprintf}
\item \code{fmt = fmt_identity()}: unformatted raw values
}
}}
\item{title}{string}
\item{notes}{list or vector of notes to append to the bottom of the table.}
\item{align}{A string with a number of characters equal to the number of columns in
the table (e.g., \code{align = "lcc"}). Valid characters: l, c, r, d.
\itemize{
\item "l": left-aligned column
\item "c": centered column
\item "r": right-aligned column
\item "d": dot-aligned column. For LaTeX/PDF output, this option requires at least version 3.0.25 of the siunitx LaTeX package. See the LaTeX preamble help section below for commands to insert in your LaTeX preamble.
}}
\item{stars}{to indicate statistical significance
\itemize{
\item FALSE (default): no significance stars.
\item TRUE: +=.1, *=.05, **=.01, ***=0.001
\item Named numeric vector for custom stars such as \code{c('*' = .1, '+' = .05)}
\item Note: a legend will not be inserted at the bottom of the table when the \code{estimate} or \code{statistic} arguments use "glue strings" with \code{{stars}}.
}}
\item{add_columns}{a data.frame (or tibble) with the same number of rows as
your main table.}
\item{add_rows}{a data.frame (or tibble) with the same number of columns as
your main table. By default, rows are appended to the bottom of the table.
You can define a "position" attribute of integers to set the row positions.
See Examples section below.}
\item{dinm}{TRUE calculates a difference in means with uncertainty
estimates. This option is only available if the \code{estimatr} package is
installed. If \code{data} includes columns named "blocks", "clusters", or
"weights", this information will be taken into account automatically by
\code{estimatr::difference_in_means}.}
\item{dinm_statistic}{string: "std.error" or "p.value"}
\item{escape}{boolean TRUE escapes or substitutes LaTeX/HTML characters which could
prevent the file from compiling/displaying. \code{TRUE} escapes all cells, captions, and notes. Users can have more fine-grained control by setting \code{escape=FALSE} and using an external command such as: \code{modelsummary(model, "latex") |> tinytable::format_tt(tab, j=1:5, escape=TRUE)}}
\item{...}{all other arguments are passed through to the table-making
functions \link[tinytable:tt]{tinytable::tt}, \link[kableExtra:kbl]{kableExtra::kbl}, \link[gt:gt]{gt::gt}, \link[DT:datatable]{DT::datatable}, etc. depending on the \code{output} argument.
This allows users to pass arguments directly to \code{datasummary} in order to
affect the behavior of other functions behind the scenes.}
}
\description{
Creates balance tables with summary statistics for different subsets of the
data (e.g., control and treatment groups). It can also be used to create
summary tables for full data sets. See the Details and Examples sections
below, and the vignettes on the \code{modelsummary} website:
\itemize{
\item https://modelsummary.com/
\item https://modelsummary.com/articles/datasummary.html
}
}
\section{Global Options}{
The behavior of \code{modelsummary} can be modified by setting global options. For example:
\itemize{
\item \code{options(modelsummary_model_labels = "roman")}
}
The rest of this section describes each of the options above.
\subsection{Model labels: default column names}{
These global option changes the style of the default column headers:
\itemize{
\item \code{options(modelsummary_model_labels = "roman")}
\item \code{options(modelsummary_panel_labels = "roman")}
}
The supported styles are: "model", "panel", "arabic", "letters", "roman", "(arabic)", "(letters)", "(roman)""
The panel-specific option is only used when \code{shape="rbind"}
}
\subsection{Table-making packages}{
\code{modelsummary} supports 6 table-making packages: \code{tinytable}, \code{kableExtra}, \code{gt},
\code{flextable}, \code{huxtable}, and \code{DT}. Some of these packages have overlapping
functionalities. To change the default backend used for a specific file
format, you can use ' the \code{options} function:
\code{options(modelsummary_factory_html = 'kableExtra')}
\code{options(modelsummary_factory_latex = 'gt')}
\code{options(modelsummary_factory_word = 'huxtable')}
\code{options(modelsummary_factory_png = 'gt')}
}
\subsection{Table themes}{
Change the look of tables in an automated and replicable way, using the \code{modelsummary} theming functionality. See the vignette: https://modelsummary.com/articles/appearance.html
\itemize{
\item \code{modelsummary_theme_gt}
\item \code{modelsummary_theme_kableExtra}
\item \code{modelsummary_theme_huxtable}
\item \code{modelsummary_theme_flextable}
\item \code{modelsummary_theme_dataframe}
}
}
\subsection{Model extraction functions}{
\code{modelsummary} can use two sets of packages to extract information from
statistical models: the \code{easystats} family (\code{performance} and \code{parameters})
and \code{broom}. By default, it uses \code{easystats} first and then falls back on
\code{broom} in case of failure. You can change the order of priorities or include
goodness-of-fit extracted by \emph{both} packages by setting:
\code{options(modelsummary_get = "easystats")}
\code{options(modelsummary_get = "broom")}
\code{options(modelsummary_get = "all")}
}
\subsection{Formatting numeric entries}{
By default, LaTeX tables enclose all numeric entries in the \verb{\\num\{\}} command
from the siunitx package. To prevent this behavior, or to enclose numbers
in dollar signs (for LaTeX math mode), users can call:
\code{options(modelsummary_format_numeric_latex = "plain")}
\code{options(modelsummary_format_numeric_latex = "mathmode")}
A similar option can be used to display numerical entries using MathJax in
HTML tables:
\code{options(modelsummary_format_numeric_html = "mathjax")}
}
\subsection{LaTeX preamble}{
When creating LaTeX via the \code{tinytable} backend (default in version 2.0.0 and later), it is useful to include the following commands in the LaTeX preamble of your documents. Note that they are added automatically when compiling Rmarkdown or Quarto documents.
\\usepackage\{tabularray\}
\\usepackage\{float\}
\\usepackage\{graphicx\}
\\usepackage\{codehigh\}
\\usepackage[normalem]\{ulem\}
\\UseTblrLibrary\{booktabs\}
\\newcommand\{\\tinytableTabularrayUnderline\}\link{1}\{\\underline\{#1\}\}
\\newcommand\{\\tinytableTabularrayStrikeout\}\link{1}\{\\sout\{#1\}\}
\\NewTableCommand\{\\tinytableDefineColor\}\link{3}\{\\definecolor\{#1\}\{#2\}\{#3\}\}
}
}
\section{Examples}{
\if{html}{\out{<div class="sourceCode r">}}\preformatted{library(modelsummary)
datasummary_balance(~am, mtcars)
}\if{html}{\out{</div>}}
}
\references{
Arel-Bundock V (2022). “modelsummary: Data and Model Summaries in R.” \emph{Journal of Statistical Software}, \emph{103}(1), 1-23. \doi{10.18637/jss.v103.i01}.'
}