/
knitr-manual.lyx
2962 lines (2337 loc) · 52.6 KB
/
knitr-manual.lyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#LyX 2.0 created this file. For more info see http://www.lyx.org/
\lyxformat 413
\begin_document
\begin_header
\textclass article
\begin_preamble
\renewcommand{\textfraction}{0.05}
\renewcommand{\topfraction}{0.8}
\renewcommand{\bottomfraction}{0.8}
\renewcommand{\floatpagefraction}{0.75}
\usepackage[buttonsize=1em]{animate}
\end_preamble
\use_default_options true
\begin_modules
knitr
\end_modules
\maintain_unincluded_children false
\language english
\language_package none
\inputencoding default
\fontencoding global
\font_roman palatino
\font_sans lmss
\font_typewriter lmtt
\font_default_family default
\use_non_tex_fonts false
\font_sc true
\font_osf false
\font_sf_scale 100
\font_tt_scale 100
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize default
\spacing single
\use_hyperref true
\pdf_bookmarks true
\pdf_bookmarksnumbered true
\pdf_bookmarksopen true
\pdf_bookmarksopenlevel 2
\pdf_breaklinks false
\pdf_pdfborder false
\pdf_colorlinks false
\pdf_backref false
\pdf_pdfusetitle true
\pdf_quoted_options "pdfstartview={XYZ null null 1}"
\papersize default
\use_geometry true
\use_amsmath 1
\use_esint 1
\use_mhchem 1
\use_mathdots 1
\cite_engine natbib_authoryear
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\use_refstyle 1
\index Index
\shortcut idx
\color #008000
\end_index
\leftmargin 2.5cm
\topmargin 2.5cm
\rightmargin 2.5cm
\bottommargin 2.5cm
\secnumdepth 2
\tocdepth 2
\paragraph_separation indent
\paragraph_indentation default
\quotes_language english
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Standard
\begin_inset ERT
status open
\begin_layout Plain Layout
<<setup, include=FALSE, cache=FALSE>>=
\end_layout
\begin_layout Plain Layout
## this is equivalent to
\backslash
SweaveOpts{}
\end_layout
\begin_layout Plain Layout
opts_chunk$set(fig.path='figure/manual-', cache.path='cache/manual-', fig.align='ce
nter', fig.show='hold', par=TRUE)
\end_layout
\begin_layout Plain Layout
## I use = but I can replace it with <-; set code/output width to be 68
\end_layout
\begin_layout Plain Layout
options(replace.assign=TRUE, width=68)
\end_layout
\begin_layout Plain Layout
## tune details of base graphics (http://yihui.name/knitr/hooks)
\end_layout
\begin_layout Plain Layout
knit_hooks$set(par=function(before, options, envir){
\end_layout
\begin_layout Plain Layout
if (before && options$fig.show!='none') par(mar=c(4,4,.1,.1),cex.lab=.95,cex.axis=.9,mg
p=c(2,.7,0),tcl=-.3)
\end_layout
\begin_layout Plain Layout
})
\end_layout
\begin_layout Plain Layout
@
\end_layout
\end_inset
\end_layout
\begin_layout Title
knitr: A General-Purpose Tool for Dynamic Report Generation in R
\end_layout
\begin_layout Author
Yihui Xie
\end_layout
\begin_layout Standard
The original paradigm of literate programming was brought forward mainly
for software development, or specifically, to mix source code (for computer)
and documentation (for human) together.
Early systems include
\begin_inset CommandInset href
LatexCommand href
name "WEB"
target "http://www.literateprogramming.com/web.pdf"
\end_inset
and
\begin_inset CommandInset href
LatexCommand href
name "Noweb"
target "http://www.cs.tufts.edu/~nr/noweb/"
\end_inset
; Sweave
\begin_inset CommandInset citation
LatexCommand citep
key "leisch2002"
\end_inset
was derived from the latter, but it is less focused on documenting software,
instead it is mainly used for reproducible data analysis and generating
statistical reports.
The
\series bold
knitr
\series default
package
\begin_inset CommandInset citation
LatexCommand citep
key "R-knitr"
\end_inset
is following the steps of Sweave.
For this manual, I assume readers have some background knowledge of Sweave
to understand the technical details; for a reference of available options,
hooks and demos, see the package homepage
\begin_inset Flex URL
status collapsed
\begin_layout Plain Layout
http://yihui.name/knitr/
\end_layout
\end_inset
.
\end_layout
\begin_layout Section
Hello World
\end_layout
\begin_layout Standard
A natural question is why to reinvent the wheel.
The short answer is that extending Sweave by hacking
\family sans
SweaveDrivers.R
\family default
in the
\series bold
utils
\series default
package is a difficult job to me.
Many features in
\series bold
knitr
\series default
come naturally as users would have expected.
Figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:cars-demo"
\end_inset
is a simple demo of some features of
\series bold
knitr
\series default
.
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\begin_inset ERT
status open
\begin_layout Plain Layout
<<cars-demo,dev='tikz',fig.width=4,fig.height=2.8,out.width='.45
\backslash
\backslash
textwidth',message=FALSE,cache=TRUE>>=
\end_layout
\begin_layout Plain Layout
fit=lm(dist~speed,data=cars) # linear regression
\end_layout
\begin_layout Plain Layout
par(mar=c(4, 4, 1, .1), mgp=c(2,1,0))
\end_layout
\begin_layout Plain Layout
with(cars,plot(speed,dist,panel.last=abline(fit)))
\end_layout
\begin_layout Plain Layout
text(10,100,'$Y =
\backslash
\backslash
beta_0 +
\backslash
\backslash
beta_1x +
\backslash
\backslash
epsilon$')
\end_layout
\begin_layout Plain Layout
library(ggplot2)
\end_layout
\begin_layout Plain Layout
qplot(speed, dist, data=cars)+geom_smooth()
\end_layout
\begin_layout Plain Layout
@
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption
\begin_layout Plain Layout
\begin_inset CommandInset label
LatexCommand label
name "fig:cars-demo"
\end_inset
A simple demo of possible output in
\series bold
knitr
\series default
: (1) multiple plots per chunk; (2) no need to
\emph on
print()
\emph default
objects in
\series bold
ggplot2
\series default
; (3) device size is
\begin_inset Formula $4\times2.8$
\end_inset
(inches) but output size is adjusted to
\family typewriter
.45
\backslash
textwidth
\family default
in chunk options; (4) base graphics and
\series bold
ggplot2
\series default
can sit side by side; (5) use the
\emph on
tikz()
\emph default
device in
\series bold
tikzDevice
\series default
by setting chunk option
\family typewriter
dev='tikz'
\family default
(hence can write native LaTeX expressions in R plots); (6) code highlighting.
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Standard
I would have chosen to hide the R code if this were a real report, but here
I show the code just for the sake of demonstration.
If we type
\emph on
qplot()
\emph default
in R, we get a plot, and the same thing happens in
\series bold
knitr
\series default
.
If we draw two plots in the code,
\series bold
knitr
\series default
will show two plots and we do not need to tell it how many plots are there
in the code in advance.
If we set
\family typewriter
out.width='.49
\backslash
\backslash
textwidth'
\family default
in chunk options, we get it in the final output document.
If we say
\family typewriter
fig.align='center'
\family default
, the plots are centered.
That's it.
Many enhancements and new features will be introduced later.
If you come from the Sweave land, you can take a look at the page of transition
first:
\begin_inset Flex URL
status collapsed
\begin_layout Plain Layout
http://yihui.name/knitr/demo/sweave/
\end_layout
\end_inset
.
\end_layout
\begin_layout Section
Design
\end_layout
\begin_layout Standard
The flow of processing an input file is similar to Sweave, and two major
differences are that
\series bold
knitr
\series default
provides more flexibility to the users to customize the processing, and
has many built-in options such as the support to a wide range of graphics
devices and cache.
Below is a brief description of the process:
\end_layout
\begin_layout Enumerate
\series bold
knitr
\series default
takes an input file and automatically determines an appropriate set of
\begin_inset CommandInset href
LatexCommand href
name "patterns"
target "http://yihui.name/knitr/patterns"
\end_inset
to use if they are not provided in advance (e.g.
\family sans
file.Rnw
\family default
will use
\family typewriter
knit_patterns$get('rnw')
\family default
);
\end_layout
\begin_layout Enumerate
a set of output
\begin_inset CommandInset href
LatexCommand href
name "hooks"
target "http://yihui.name/knitr/hooks"
\end_inset
will also be set up automatically according to the filename extension (e.g.
use LaTeX environments or HTML elements to wrap up R results);
\end_layout
\begin_layout Enumerate
the input file is read in and split into pieces consisting of R code chunks
and normal texts; the former will be executed one after the other, and
the latter may contain global chunk options or inline R code;
\end_layout
\begin_layout Enumerate
for each chunk, the code is evaluated using the
\series bold
evaluate
\series default
package
\begin_inset CommandInset citation
LatexCommand citep
key "R-evaluate"
\end_inset
, and the results may be filtered according to chunk options (e.g.
\family typewriter
echo=FALSE
\family default
will remove the R source code)
\end_layout
\begin_deeper
\begin_layout Enumerate
if
\family typewriter
cache=TRUE
\family default
for this chunk,
\series bold
knitr
\series default
will first check if there are previously cached results under the cache
directory before really evaluating the chunk; if cached results exist and
this code chunk has not been changed since last run (use MD5 sum to verify),
the cached results will be (lazy-) loaded, otherwise new cache will be
built; if a cached chunk depends on other chunks (see the
\family typewriter
dependson
\family default
\begin_inset CommandInset href
LatexCommand href
name "option"
target "http://yihui.name/knitr/options"
\end_inset
) and any one of these chunks has changed, this chunk must be forcibly updated
(old cache will be purged)
\end_layout
\begin_layout Enumerate
there are six types of possible output from
\series bold
evaluate
\series default
, and their classes are
\family typewriter
character
\family default
(normal text output),
\family typewriter
source
\family default
(source code),
\family typewriter
warning
\family default
,
\family typewriter
message
\family default
,
\family typewriter
error
\family default
and
\family typewriter
recordedplot
\family default
; an internal S3 generic function
\emph on
wrap()
\emph default
is used to deal with different types of output, using output hooks defined
in the object
\family typewriter
knit_hooks
\end_layout
\begin_layout Enumerate
note plots are recorded as R objects before they are really saved to files,
so graphics devices will not be opened unless plots have really been produced
in a chunk
\end_layout
\begin_layout Enumerate
a code chunk is evaluated in a separate empty environment with the global
environment as its parent, and all the objects in this environment after
the evaluation will be saved if
\family typewriter
cache=TRUE
\end_layout
\begin_layout Enumerate
chunk hooks can be run before and/or after a chunk
\end_layout
\end_deeper
\begin_layout Enumerate
for normal texts,
\series bold
knitr
\series default
will find out if there are chunk options set among them (e.g.
\family typewriter
\backslash
SweaveOpts{}
\family default
), and evaluate inline R code (e.g.
\family typewriter
\backslash
Sexpr{}
\family default
); the latter is involved with the
\family typewriter
inline
\family default
hook;
\end_layout
\begin_layout Standard
The hooks play important roles in
\series bold
knitr
\series default
: this package makes almost everything accessible to the users.
Consider the following extremely simple example which may demonstrate this
freedom:
\end_layout
\begin_layout Standard
\begin_inset ERT
status open
\begin_layout Plain Layout
<<simple-example>>=
\end_layout
\begin_layout Plain Layout
1+1
\end_layout
\begin_layout Plain Layout
@
\end_layout
\end_inset
\end_layout
\begin_layout Standard
There are two parts in the final output: the source code
\family typewriter
1 + 1
\family default
and the output
\family typewriter
[1] 2
\family default
; the comment characters
\family typewriter
##
\family default
are from the default chunk option
\family typewriter
comment
\family default
.
Users may define a hook function for the source code like this to use the
\family typewriter
lstlisting
\family default
environment:
\end_layout
\begin_layout Standard
\begin_inset ERT
status open
\begin_layout Plain Layout
<<hook-source, eval=FALSE>>=
\end_layout
\begin_layout Plain Layout
knit_hooks$set(source = function(x, options) {
\end_layout
\begin_layout Plain Layout
paste('
\backslash
\backslash
begin{lstlisting}
\backslash
n', x, '
\backslash
\backslash
end{lstlisting}
\backslash
n', sep = '')
\end_layout
\begin_layout Plain Layout
})
\end_layout
\begin_layout Plain Layout
@
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Similarly we can put other types of output into other environments.
There is no need to hack at
\family sans
Sweave.sty
\family default
for
\series bold
knitr
\series default
and you can put the output in any environments.
What is more, the output hooks make
\series bold
knitr
\series default
ready for other types of output, and a typical one is HTML (there are built-in
hooks).
The website has provided many examples demonstrating the flexibility of
the output.
\end_layout
\begin_layout Section
Features
\end_layout
\begin_layout Standard
The
\series bold
knitr
\series default
package borrowed features such as tikz graphics and cache from
\series bold
pgfSweave
\series default
\begin_inset CommandInset citation
LatexCommand citep
key "R-pgfSweave"
\end_inset
and
\series bold
cacheSweave
\series default
\begin_inset CommandInset citation
LatexCommand citep
key "R-cacheSweave"
\end_inset
respectively, but the implementations are different.
New features like code reference from an external R script as well as output
customization are also introduced.
The feature of hook functions in Sweave is re-implemented and hooks have
new usage now.
There are several other small features which are motivated from my everyday
use of Sweave.
For example, a progress bar is provided when knitting a file so we roughly
know how long we still need to wait; output from inline R code (e.g.
\family typewriter
\backslash
Sexpr{x[1]}
\family default
) is automatically formatted in TeX math notation (like
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
Sexpr{123456789}
\end_layout
\end_inset
) if the result is numeric, and we will not get too many digits by default.
You may check out a number of specific manuals dedicated to specific features
such as graphics in the website:
\begin_inset Flex URL
status collapsed
\begin_layout Plain Layout
http://yihui.name/knitr/demos
\end_layout
\end_inset
.
\end_layout
\begin_layout Subsection
Code Decoration
\end_layout
\begin_layout Standard
The
\series bold
highlight
\series default
package
\begin_inset CommandInset citation
LatexCommand citep
key "R-highlight"
\end_inset
is used by default to highlight R code, and
\series bold
formatR
\series default
\begin_inset CommandInset citation
LatexCommand citep
key "R-formatR"
\end_inset
is used to reformat R code (like
\family typewriter
keep.source=FALSE
\family default
in Sweave but will also try to retain comments).
For LaTeX output, the
\series bold
framed
\series default
package is used to decorate code chunks with a light gray background.
If this LaTeX package is not found in the system, a version will be copied
directly from
\series bold
knitr
\series default
.
The prompt characters are removed by default because they mangle the R
source code in the output and make it difficult to copy R code.
The R output is masked in comments by default based on the same rationale.
It is easy to revert to the output with prompts (set option
\family typewriter
prompt=TRUE
\family default
), and you will quickly realize the inconvenience to the readers if they
want to copy and run the code in the output document:
\end_layout
\begin_layout Standard
\begin_inset ERT
status open
\begin_layout Plain Layout
<<stupid-prompts, prompt=TRUE, comment=NA, highlight=FALSE>>=
\end_layout
\begin_layout Plain Layout
x=rnorm(5)
\end_layout
\begin_layout Plain Layout
x
\end_layout
\begin_layout Plain Layout
var(x)
\end_layout
\begin_layout Plain Layout
@
\end_layout
\end_inset
\end_layout
\begin_layout Standard
The example below shows the effect of
\family typewriter
tidy=TRUE/FALSE
\family default
:
\end_layout
\begin_layout Standard
\begin_inset ERT
status open
\begin_layout Plain Layout
<<tidy-no, eval=FALSE, tidy=FALSE>>=
\end_layout
\begin_layout Plain Layout
## option tidy=FALSE
\end_layout
\begin_layout Plain Layout
for(k in 1:10){j=cos(sin(k)*k^2)+3;print(j-5)}
\end_layout
\begin_layout Plain Layout
@
\end_layout
\begin_layout Plain Layout
<<tidy-yes, eval=FALSE, tidy=TRUE>>=
\end_layout
\begin_layout Plain Layout
## option tidy=TRUE
\end_layout
\begin_layout Plain Layout
for(k in 1:10){j=cos(sin(k)*k^2)+3;print(j-5)}
\end_layout
\begin_layout Plain Layout
@
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Note
\family typewriter
=
\family default
is replaced by
\family typewriter
<-
\family default
because
\family typewriter
options('replace.assign')
\family default
was set to be
\family typewriter
TRUE
\family default
in this document; see the documentation of
\emph on
tidy.source()
\emph default
in
\series bold
formatR
\series default
for details.
\end_layout