This repository has been archived by the owner on Oct 24, 2023. It is now read-only.
forked from Edouard-Legoupil/koboloadeR
/
kobo_crunching_report.R
2365 lines (1912 loc) · 187 KB
/
kobo_crunching_report.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#' @name kobo_crunching_report
#' @rdname kobo_crunching_report
#' @title Generate Data Crunching Report
#'
#' @description Generate crunching Report that contains all descriptive statistics, correlation analysis, tabulation and data visualization for variables and indicators.
#' For disaggregation of variable indicate "facet","stak", "fill" or "dodge". for test of correlation on select_one variable use correlation = TRUE
#'
#' @param form The full filename of the form to be accessed (xls or xlsx file).
#'
#' It is assumed that the form is stored in the data-raw folder.
#' @param output The output format html or aspx if you need to upload on sharepoint), docx (to quickly cut non interesting vz and take note during data interpretation session), pptx (to quickly cut non interesting vz and persent during data interpretation session), Default is html
#' @param app The place where the function has been executed, the default is the console and the second option is the shiny app
#' @param render TRUE or FALSE - Tells whether to only produce Rmd or to also knit it in the required output format. Default is TRUE. Useful for testing as rending takes time.
#' @param lang eng, fre or esp - Change the language of the intro to the report - default is "eng" for english
#' @param unhcRstyle TRUE or FALSE tells whether to use UNHCR style fo rendering
#' @param use_pct TRUE or FALSE - Tells whether to add percent or absolute value in the chart - default is TRUE
#' @param add_error_bar TRUE or FALSE - Tells whether to whiskers for error in the chart - default is TRUE
#'
#' @return No return, All results will be saved on RMD files and Word files
#'
#' @author Edouard Legoupil, Maher Daoud
#'
#'
#' @examples
#' \dontrun{
#' kobo_crunching_report("myform.xlsx")
#' }
#'
#' @export kobo_crunching_report
#'
kobo_crunching_report <- function(form = "form.xlsx",
app = "console",
output ="html",
render = "TRUE",
lang = "eng",
unhcRstyle = "TRUE",
use_pct = "TRUE",
add_error_bar = "TRUE") {
tryCatch({
if (app == "shiny") {
progress <- shiny::Progress$new()
progress$set(message = "Generating crunching report in progress...", value = 0)
on.exit(progress$close())
updateProgress <- function(value = NULL, detail = NULL) {
if (is.null(value)) {
value <- progress$getValue()
value <- value + (progress$getMax() - value) / 100
}
progress$set(value = value, detail = detail)
}
updateProgress()
}
## Load all required packages
koboloadeR::kobo_load_packages()
configInfo <- koboloadeR::kobo_get_config(form)
configInfo <- configInfo[!is.na(configInfo$name),]
mainDir <- koboloadeR::kobo_getMainDirectory()
#form_tmp <- paste(mainDir, "data", form, sep = "/", collapse = "/")
#library(koboloadeR)
## hack on cat functions then merge back small Rmd portions with future::multisession...
cat <- function(x, file = "", ...) {
if (file == "")
base::cat(x, ...)
else
readr::write_lines(x, file = file, ...)
}
### Load the data
cat("\n\n Loading data. It is assumed that the cleaning, weighting & re-encoding has been done previously \n")
MainDataFrame <- readr::read_csv(paste(mainDir,"/data/MainDataFrame_encoded.csv",sep = ""))
#load(paste(mainDir,"/data/MainDataFrame_encoded.rda",sep = ""))
# Form ##########################################
## Load form
cat("\n\n Building dictionnary from the xlsform \n")
#form <- "form.xls"
## Generate dico to test here - in normal process - it has been done just before in kobo_load_data()
#kobo_dico(form)
## Load dictionary
dico <- readr::read_csv(paste0(mainDir,"/data/dico_",form,".csv"))
#load(paste0(mainDir,"/data/dico_",form,".rda"))
#rm(form)
## label Variables
cat("\n\n Labelling variables \n")
if (app == "shiny") {
progress$set(message = "Labelling variables in the Main Data File in progress...")
updateProgress()
}
MainDataFrame <- koboloadeR::kobo_label(MainDataFrame , dico)
cat("\n\nload all required data files..\n")
dataBeginRepeat <- koboloadeR::kobo_get_begin_repeat(form)
dataBeginRepeat <- dataBeginRepeat$names
## Check if there's a repeat - aka hierarchical structure in the dataset
if (length(dataBeginRepeat) > 0) {
for (dbr in dataBeginRepeat) {
dataFrame <- readr::read_csv(paste(mainDir,"/data/",dbr,"_encoded.csv",sep = ""))
#load(paste(mainDir,"/data/",dbr,"_encoded.rda",sep = ""))
assign(dbr, koboloadeR::kobo_label(dataFrame, dico))
if (app == "shiny") {
progress$set(message = paste("Labelling variables in",dbr,"File in progress..."))
updateProgress()
}
}
}
## Get a list of variables to be used for disaggregation #######
disaggregation <- dico[ which(dico$disaggregation %in% c("facet", "stak", "fill", "dodge") & dico$formpart == "questions"),
c("chapter", "name", "label","labelReport", "type", "qrepeatlabel", "fullname", "disaggregation", "correlate", "listname", "variable") ]
## Get a list of variables to be used for analyisis of association - chisquarred #######
correlation <- dico[which(dico$type %in% c("select_multiple_d","select_one") & !(is.na(dico$correlate)) & dico$formpart == "questions"),
c("chapter", "name", "label","labelReport", "type", "qrepeatlabel", "fullname", "disaggregation", "correlate", "listname", "variable") ]
## Get a list of variables to be used for analyisis of association - chisquarred #######
ordinal <- dico[which(dico$type %in% c("select_multiple_d","select_one") & dico$variable == "ordinal"),
c( "qrepeatlabel", "fullname", "listname", "variable") ]
## To do: insert reference to formpart when there's an indicator reference
# & dico$formpart=="questions"
### Get the dico with list of chapter
cat("\n\n Building now the chapters of the reports in Rmd format \n")
if (app == "shiny") {
progress$set(message = "Building now the configures reports in Rmd format in progress...")
updateProgress()
}
## Start Report ##########################################
reports <- as.data.frame(unique(dico$report))
names(reports)[1] <- "Report"
## Default behavior if no report was defined in xlsform
if ( nrow(reports) == 1 & checkmate::anyMissing(reports$Report)) {
cat("Defaulting questions allocation to chapter")
dico$report[ dico$type %in% c("select_one","select_multiple_d")] <- "report"
reports <- as.data.frame(unique(dico$chapter))
names(reports)[1] <- "Report"
} else {
}
## Remove when report frame contains NA!!
reports <- as.data.frame(reports[ !(is.na(reports$Report)),])
names(reports)[1] <- "Report"
readr::write_csv(reports, paste(mainDir,"/data/reports.csv",sep = ""))
#save(reports, file = paste(mainDir,"/data/reports.rda",sep = ""))
future::plan(future::multisession)
## For each Report, create a Rmd file & Loop through defined reports ------------
for (i in 1:nrow(reports) )
{
# i <-1
reportsname <- as.character(reports[ i , 1])
if (app == "shiny") {
progress$set(message = paste(i, " - Write chapter for ",as.character(reports[ i , 1])))
updateProgress()
}
cat(paste(i, " - Write chapter for ",as.character(reports[ i , 1]),"\n" ))
report.name <- paste(mainDir, "/vignettes/",i,"-", reportsname, "-report.Rmd", sep = "")
## TO DO : CHECK IF FILE EXIST - AND REQUEST USER TO DELETE BEFORE REGENERATING - SUGGESTING TO SAVE PREVIOUS UNDER NEW NAME
if (file.exists(report.name)) file.remove(report.name)
## TO DO : put in configuration file name of report, author, organisation & location
## TO DO : put in configuration wethere report should be portrait or landscape
cat("---", file = report.name , sep = "\n", append = TRUE)
cat(paste("title: \"Data Crunching Report: ",reportsname , "- Draft not for distribution. \"", sep = ""), file = report.name , sep = "\n", append = TRUE)
cat("author: \"Generated with [Koboloader](https://unhcr.github.io/koboloadeR/docs) \"", file = report.name , sep = "\n", append = TRUE)
cat("date: \" `r format(Sys.Date(), '%d %B %Y')`\"", file = report.name , sep = "\n", append = TRUE)
cat("always_allow_html: yes", file = report.name , sep = "\n", append = TRUE)
if (output == "docx") {
if(unhcRstyle == "TRUE") {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" unhcRstyle::unhcr_templ_doc:", file = report.name , sep = "\n", append = TRUE)
cat(" toc: true", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
} else {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" word_document:", file = report.name , sep = "\n", append = TRUE)
cat(" fig_caption: yes", file = report.name , sep = "\n", append = TRUE)
cat(" fig_height: 5", file = report.name , sep = "\n", append = TRUE)
cat(" fig_width: 8", file = report.name , sep = "\n", append = TRUE)
cat(" toc: yes", file = report.name , sep = "\n", append = TRUE)
cat(" toc_depth: 2", file = report.name , sep = "\n", append = TRUE)
# cat(" reference_docx: style-unhcr-portrait.docx", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
}
} else if (output == "html") {
if(unhcRstyle == "TRUE") {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" unhcRstyle::unhcr_templ_html:", file = report.name , sep = "\n", append = TRUE)
cat(" toc: true", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
} else {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" html_document:", file = report.name , sep = "\n", append = TRUE)
cat(" fig_caption: yes", file = report.name , sep = "\n", append = TRUE)
cat(" fig_height: 5", file = report.name , sep = "\n", append = TRUE)
cat(" fig_width: 8", file = report.name , sep = "\n", append = TRUE)
cat(" toc: yes", file = report.name , sep = "\n", append = TRUE)
cat(" toc_depth: 2", file = report.name , sep = "\n", append = TRUE)
cat(" toc_float: yes", file = report.name , sep = "\n", append = TRUE)
cat(" includes:", file = report.name , sep = "\n", append = TRUE)
# cat(" in_header: css/header.html", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
# cat("<link rel=\"stylesheet\" href=\"css/unhcr-bootstrap.css\">", file = report.name , sep = "\n", append = TRUE)
# cat("<link rel=\"stylesheet\" href=\"css/style.css\">", file = report.name , sep = "\n", append = TRUE)
# cat("<link rel=\"stylesheet\" href=\"css/unhcr-header.css\">", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
}
}else if (output == "aspx") {
if(unhcRstyle == "TRUE") {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" unhcRstyle::unhcr_templ_html:", file = report.name , sep = "\n", append = TRUE)
cat(" toc: true", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
} else {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" html_document:", file = report.name , sep = "\n", append = TRUE)
cat(" fig_caption: yes", file = report.name , sep = "\n", append = TRUE)
cat(" fig_height: 5", file = report.name , sep = "\n", append = TRUE)
cat(" fig_width: 8", file = report.name , sep = "\n", append = TRUE)
cat(" toc: yes", file = report.name , sep = "\n", append = TRUE)
cat(" toc_depth: 2", file = report.name , sep = "\n", append = TRUE)
cat(" toc_float: yes", file = report.name , sep = "\n", append = TRUE)
cat(" includes:", file = report.name , sep = "\n", append = TRUE)
# cat(" in_header: css/header.html", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
# cat("<link rel=\"stylesheet\" href=\"css/unhcr-bootstrap.css\">", file = report.name , sep = "\n", append = TRUE)
# cat("<link rel=\"stylesheet\" href=\"css/style.css\">", file = report.name , sep = "\n", append = TRUE)
# cat("<link rel=\"stylesheet\" href=\"css/unhcr-header.css\">", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
}
} else if (output == "pptx") {
if(unhcRstyle == "TRUE") {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" unhcRstyle::unhcr_templ_ppt:", file = report.name , sep = "\n", append = TRUE)
cat(" toc: true", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
} else {
cat("output:",file = report.name , sep = "\n", append = TRUE)
cat(" powerpoint_presentation:", file = report.name , sep = "\n", append = TRUE)
cat(" fig_caption: yes", file = report.name , sep = "\n", append = TRUE)
cat(" fig_height: 9", file = report.name , sep = "\n", append = TRUE)
cat(" fig_width: 18", file = report.name , sep = "\n", append = TRUE)
# cat(" reference_doc: templateUNHCR.pptx", file = report.name , sep = "\n", append = TRUE)
cat(" slide_level: 2", file = report.name , sep = "\n", append = TRUE)
cat("---", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
}
}
## First chunk to get the data in the report
cat("```{r setup, include = FALSE, echo = FALSE, warning = FALSE, message = FALSE}", file = report.name , sep = "\n", append = TRUE)
if (output == "pptx") {
cat("knitr::opts_chunk$set(echo = FALSE, fig.height = 9, fig.width = 18, dpi = 300, comment = \"\" )", file = report.name , sep = "\n", append = TRUE)
}
cat("mainDir <- getwd()", file = report.name , sep = "\n", append = TRUE)
cat("mainDirroot <- substring(mainDir, 0 , nchar(mainDir) - 10)", file = report.name , sep = "\n", append = TRUE)
cat("using <- function(...) {", file = report.name , sep = "\n", append = TRUE)
cat("libs <- unlist(list(...))", file = report.name , sep = "\n", append = TRUE)
cat("req <- unlist(lapply(libs,require,character.only = TRUE))", file = report.name , sep = "\n", append = TRUE)
cat(" need <- libs[req == FALSE]", file = report.name , sep = "\n", append = TRUE)
cat(" if (length(need) > 0) { ", file = report.name , sep = "\n", append = TRUE)
cat(" install.packages(need, repos = 'http://cran.us.r-project.org')", file = report.name , sep = "\n", append = TRUE)
cat(" lapply(need,require,character.only = TRUE)", file = report.name , sep = "\n", append = TRUE)
cat(" }", file = report.name , sep = "\n", append = TRUE)
cat(" }", file = report.name , sep = "\n", append = TRUE)
cat("\n\n", file = report.name , sep = "\n", append = TRUE)
cat("## Load all required packages", file = report.name , sep = "\n", append = TRUE)
cat("using('tidyverse', 'ggthemes', 'plyr', 'ggrepel', 'viridis', 'RColorBrewer', 'extrafont', 'corrplot', 'reshape2',", file = report.name , sep = "\n", append = TRUE)
cat(" 'scales', 'survey', 'knitr', 'rmarkdown', 'ggpubr', 'grid', 'jtools', 'moments', 'koboloadeR')", file = report.name , sep = "\n", append = TRUE)
cat("options(scipen = 999) # turn-off scientific notation like 1e+48", file = report.name , sep = "\n", append = TRUE)
cat("## Provide below the name of the form in xsl form - format should be xls not xlsx", file = report.name , sep = "\n", append = TRUE)
cat(paste0("form <- \"",form,"\""), file = report.name , sep = "\n", append = TRUE)
cat("dico <- readr::read_csv(paste0(mainDirroot,\"/data/dico_\",form,\".csv\"))", file = report.name , sep = "\n", append = TRUE)
#cat("load(paste0(mainDirroot,\"/data/dico_\",form,\".rda\"))", file = report.name , sep = "\n", append = TRUE)
## TO DO: Use config file to load the different frame
cat("MainDataFrame <- readr::read_csv(paste0(mainDirroot,\"/data/MainDataFrame_encoded.csv\"))", file = report.name , sep = "\n", append = TRUE)
#cat("load(paste0(mainDirroot,\"/data/MainDataFrame_encoded.rda\"))", file = report.name , sep = "\n", append = TRUE)
## Check if there's a repeat - aka hierarchical structure in the dataset
if (length(dataBeginRepeat) > 0) {
for (dbr in dataBeginRepeat) {
cat(paste(dbr, " <- readr::read_csv(paste0(mainDirroot,\"/data/",dbr,"_encoded.csv\"))", sep = ""), file = report.name , sep = "\n", append = TRUE)
#cat(paste("load(paste0(mainDirroot,\"/data/",dbr,"_encoded.rda\"))", sep = ""), file = report.name , sep = "\n", append = TRUE)
}
}
cat("\n", file = report.name , sep = "\n", append = TRUE)
cat("## label Variables", file = report.name , sep = "\n", append = TRUE)
cat("MainDataFrame <- koboloadeR::kobo_label(MainDataFrame , dico)", file = report.name , sep = "\n", append = TRUE)
## Check if there's a repeat - aka hierarchical structure in the dataset
if (length(dataBeginRepeat) > 0) {
for (dbr in dataBeginRepeat) {
cat(paste(dbr, " <- koboloadeR::kobo_label(",dbr ," , dico)", sep = ""), file = report.name , sep = "\n", append = TRUE)
}
}
#### Convert to ordinal variable
cat("\n", file = report.name , sep = "\n", append = TRUE)
cat("## Set up ordinal Variables", file = report.name , sep = "\n", append = TRUE)
if (nrow(ordinal) > 0) {
for (o in 1:nrow(ordinal)) {
# o <- 1
ordinal.listname <- as.character(ordinal[ o, c("listname")])
ordinal.name <- as.character(ordinal[ o, c("fullname")])
ordinal.frame <- as.character(ordinal[ o, c("qrepeatlabel")])
if ( exists(paste0(ordinal.frame)) == TRUE) {
# cat(paste0("list.ordinal <- unique(dico[ dico$listname == \"", ordinal.listname,"\" & dico$type == \"select_one_d\", c(\"labelchoice\") ])$labelchoice"),file = report.name ,sep = "\n", append = TRUE)
cat(paste0("list.ordinal <- unique(dico[ dico$listname == \"", ordinal.listname,"\" & dico$type == \"select_one_d\", c(\"labelchoice\") ])$labelchoice"),file = report.name ,sep = "\n", append = TRUE)
cat(paste0(ordinal.frame,"$",ordinal.name," <- factor(",ordinal.frame,"$",ordinal.name,", levels = list.ordinal)"),file = report.name ,sep = "\n", append = TRUE)
} else {}
}
} else {}
cat("\n", file = report.name , sep = "\n", append = TRUE)
cat("## Create weighted survey object", file = report.name , sep = "\n", append = TRUE)
## If no weight, the weighted object is unweigthted
if (configInfo[configInfo$name == "sample_type","value"] == "No sampling (type 1)") {
## If no weight, the weighted object is unweigthted
cat("MainDataFrame.survey <- survey::svydesign(ids = ~ 1 , data = MainDataFrame )", file = report.name , sep = "\n", append = TRUE)
## Check if there's a repeat - aka hierarchical structure in the dataset
if (length(dataBeginRepeat) > 0) {
for (dbr in dataBeginRepeat) {
cat(paste(dbr,".survey <- survey::svydesign(ids = ~ 1 , data = ",dbr," )", sep = ""), file = report.name , sep = "\n", append = TRUE)
}
}
## with clusters
}else if (configInfo[configInfo$name == "sample_type","value"] == "Cluster sample (type 2)") {
## with clusters
cat(paste("MainDataFrame.survey <- survey::svydesign(ids = ~ ", configInfo[configInfo$name == "variable_name","value"],", data = MainDataFrame, weights = ~ ", configInfo[configInfo$name == "weightsVariable","value"]," , fpc = ~ fpc )", sep = ""), file = report.name , sep = "\n", append = TRUE)
## Check if there's a repeat - aka hierarchical structure in the dataset
if (length(dataBeginRepeat) > 0) {
for (dbr in dataBeginRepeat) {
cat(paste(dbr,".survey <- survey::svydesign(ids = ~ ", configInfo[configInfo$name == "variable_name","value"],", data = ",dbr,", weights = ~ ", configInfo[configInfo$name == "weightsVariable","value"]," , fpc = ~ fpc )", sep = ""), file = report.name , sep = "\n", append = TRUE)
}
}
## with strata
}else if (configInfo[configInfo$name == "sample_type","value"] == "Stratified sample (type 3)") {
## with strata
cat(paste("MainDataFrame.survey <- survey::svydesign(id=~1, strata= ~ ", configInfo[configInfo$name == "variable_name","value"]," ,check.strata = TRUE, data = MainDataFrame, weights = ~ ", configInfo[configInfo$name == "weightsVariable","value"]," )", sep = ""), file = report.name , sep = "\n", append = TRUE)
## Check if there's a repeat - aka hierarchical structure in the dataset
if (length(dataBeginRepeat) > 0) {
for (dbr in dataBeginRepeat) {
cat(paste(dbr,".survey <- survey::svydesign(id=~1, strata= ~ ", configInfo[configInfo$name == "variable_name","value"]," ,check.strata = TRUE, data = ",dbr,", weights = ~ ", configInfo[configInfo$name == "weightsVariable","value"]," )", sep = ""), file = report.name , sep = "\n", append = TRUE)
}
}
}
## with strata
#cat("MainDataFrame_edited.survey <- survey::svydesign(id=~1, strata= ~ RecordCategory ,check.strata = TRUE, data = MainDataFrame_edited, weights = ~ WeightingCoefficient )", file = report.name , sep = "\n", append = TRUE)
## with clusters
#cat("MainDataFrame_edited.survey <- survey::svydesign(ids = ~ Camp.Province , data = MainDataFrame_edited, weights = ~ weight , fpc = ~ fpc )", file = report.name , sep = "\n", append = TRUE)
#cat("br1.survey <- survey::svydesign(ids = ~ Camp.Province , data = br1, weights = ~ weight , fpc = ~ fpc )", file = report.name , sep = "\n", append = TRUE)
#cat("br2.survey <- survey::svydesign(ids = ~ Camp.Province , data = br2, weights = ~ weight , fpc = ~ fpc )", file = report.name , sep = "\n", append = TRUE)
# ## If no weight, the weighted object is unweigthted
# cat("MainDataFrame_edited.survey <- survey::svydesign(ids = ~ 1 , data = MainDataFrame_edited )", file = report.name , sep = "\n", append = TRUE)
# cat("br1.survey <- survey::svydesign(ids = ~ 1 , data = br1 )", file = report.name , sep = "\n", append = TRUE)
# cat("br2.survey <- (ids = ~ 1 , data = br2 )", file = report.name , sep = "\n", append = TRUE)
cat(paste0("\n```\n", sep = '\n'), file = report.name, append = TRUE)
if (output == "pptx") {
if (lang == "eng") {
### To DO : Offer option to insert in the report skeleton interpretation questions
### Intro text####################################################################
cat(paste("# Crunching step\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("This data crunching report allows to quickly explore the results of the survey that can be regenerated as needed.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("The objective of this report is to allow to quickly identify potential patterns in your dataset.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("A quick screening of this initial report should allow to select the most meaningful graphs.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("The crunching process produces a lot of visuals. Therefore, it is key to carefully select the most relevant visual that will be presented for potential interpretation in the next step. A typical data interpretation session shall not last more than 2hours and include more than 60 visuals to look at in order to keep participants with a good focus level.\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Selecting contents "),file = report.name , sep = "\n", append = TRUE)
cat(paste("In order to guide this selection phase, the data crunching expert and report designer, in collaboration with the data analysis group, can use the following elements:\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * For numeric value, check the frequency distributions of each variable to average, deviation, including outliers and oddities\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * For categorical variables, check for unexpected values: any weird results based on common sense expectations\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Use correlation analysis to check for potential contradictions in respondent's answers to different questions for identified associations (chi-square)\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Always, Check for missing data (NA) or \"%of respondent who answered\" that you cannot confidently explain\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Check unanswered questions, that corresponds to unused skip logic in the questionnaire: For instance, did a person who was never displaced answer displacement-related questions? Were employment-related answers provided for a toddler?\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Interpretation sessions "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" When analyzing those representations in a collective setting during data interpretation sessions, you may: \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Reflect__: question data quality and/or make suggestions to adjust questions, identify additional cleaning steps; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Interpret__: develop qualitative interpretations of data patterns; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Recommend__: suggest recommendations in terms of programmatic adjustment; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Classify__: define level of sensitivity for certain topics if required; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("## The report can be regenerated as needed by: "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * adjusting the report configuration in the xlsform to break it into report and chapter; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * configuring disaggregation & correlation for each question; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * revising the data cleansing based on the cleaning log; \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * appending calculated indicators to your data frame to reshape variable - also called feature engineering. \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Dataset description\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Title of the study:__ ",configInfo[configInfo$name == "titl", c("value")]," \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Abstract:__ ",configInfo[configInfo$name == "abstract", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
# cat(paste("__Rights & Disclaimer:__ ",configInfo[configInfo$name == "disclaimer", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Country where the study took place:__ ",configInfo[configInfo$name == "Country", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Geographic Coverage for the study within the country:__ ",configInfo[configInfo$name == "geogCover", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Kind of Data:__ ",configInfo[configInfo$name == "dataKind", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Number of records in the main data frame__: `r nrow(MainDataFrame)`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Period of data collection__: between `r min(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))` and `r max(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Documented cleaning__: ",configInfo[configInfo$name == "cleanOps", c("value")],"\n\n"),file = report.name , sep = "\n\n", append = TRUE)
#cat(paste("__Entity being analyzed in the study:__ ",configInfo[configInfo$name == "AnalysisUnit", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
#cat(paste("__Procedure, technique, or mode of inquiry used to attain the data:__ ",configInfo[configInfo$name == "ModeOfCollection", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
#cat(paste("__Study Universe:__ (i.e. group of persons or other elements that are the object of research and to which any analytic results refer:",configInfo[configInfo$name == "universe", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
} else if (lang == "esp") {
cat(paste("# Etapa de Crunching\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("El data crunching es el paso donde se procesan y estructuran los datos de la encuesta, y se generan visuales de los resultados de una manera automatizada y rápida.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" Este informe de análisis y crunching de datos permite explorar rápidamente los resultados de la encuesta. El informe puede ser regenerado según sea necesario. El objetivo de este informe es poder identificar rápidamente posibles tendencias y patrones en el conjunto de datos.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Un repaso rápido de este informe inicial deberá permitir la selección de los gráficos más significativos.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("El proceso de crunching produce una gran cantidad de imágenes. Por lo tanto, es importante seleccionar cuidadosamente los visuales y gráficos más relevantes que podrán ser presentados para su interpretación en la siguiente etapa. Usualmente una sesión de interpretación de datos no debe durar más de 2 horas y no debe incluir más de 60 visuales a examinar con el fin de mantener a los participantes con un buen nivel de enfoque. \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Selección "),file = report.name , sep = "\n", append = TRUE)
cat(paste("El experto en análisis de datos (DIMA del Bureau / IMs de la Operación) y el diseñador de reportes (de la operación, e.g. Oficial de reportes, PI), en colaboración con el grupo de análisis de datos, pueden utilizar los siguientes elementos para guiar esta fase de selección:\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Para el valor numérico, compruebe las distribuciones de frecuencia de cada variable a la media, la desviación, incluidos los valores atípicos y las rarezas\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Para variables categóricas, compruebe si hay valores inesperados: cualquier resultado extraño basado en las expectativas de sentido común\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Utilice el análisis de correlación para comprobar posibles contradicciones en las respuestas de los encuestados a diferentes preguntas para asociaciones identificadas (chi-cuadrado)\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Siempre, Compruebe si faltan datos (NA) o \"%del encuestado que respondió\" que no puede explicar con confianza\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Compruebe las preguntas sin respuesta, que corresponde a la lógica de omisión no utilizada en el cuestionario: Por ejemplo, ¿respondió una persona que nunca fue desplazada a las preguntas relacionadas con el desplazamiento? ¿Se proporcionaron respuestas relacionadas con el empleo para un niño pequeño?\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Interpretatcion sessions "),file = report.name , sep = "\n", append = TRUE)
cat(paste("Al analizar los visuales y gráficos en conjunto durante las sesiones de interpretación de datos, se puede: \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Reflejar__: cuestionar la calidad de los datos y / o hacer sugerencias para ajustar las preguntas, identificar pasos de limpieza adicionales; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Interpretar__: desarrollar interpretaciones cualitativas de los patrones en los datos; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Recomendar__: sugerir recomendaciones en términos de ajuste programático; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Clasificar__: se define el nivel de sensibilidad para ciertos temas si es necesario; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Este informe se puede regenerar según sea necesario: "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * ajustando la configuración del informe en el xlsform para dividirlo en informe y capítulo; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * configurar la desagregación y correlación para cada pregunta;; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * revisar la limpieza de datos basada en el registro de limpieza; \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * anexar indicadores calculados a su trama de datos para cambiar la forma de la variable - también llamada ingeniería de características. \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Descripción del conjunto de datos\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Título del estudio:__ ",configInfo[configInfo$name == "titl", c("value")]," \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Resumen:__ ",configInfo[configInfo$name == "abstract", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
# cat(paste("__Derechos y Descargo de Responsabilidad::__ ",configInfo[configInfo$name == "disclaimer", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__País donde tuvo lugar el estudio:__ ",configInfo[configInfo$name == "Country", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Cobertura geográfica para el estudio dentro del país:__ ",configInfo[configInfo$name == "geogCover", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Tipo de datos:__ ",configInfo[configInfo$name == "dataKind", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Número de registros en el marco de datos principal__: `r nrow(MainDataFrame)`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Período de recopilación de datos__: between `r min(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))` and `r max(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Documented cleaning__: ",configInfo[configInfo$name == "cleanOps", c("value")],"\n\n"),file = report.name , sep = "\n\n", append = TRUE)
#cat(paste("__Entity being analyzed in the study:__ ",configInfo[configInfo$name == "AnalysisUnit", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
#cat(paste("__Procedure, technique, or mode of inquiry used to attain the data:__ ",configInfo[configInfo$name == "ModeOfCollection", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
#cat(paste("__Study Universe:__ (i.e. group of persons or other elements that are the object of research and to which any analytic results refer:",configInfo[configInfo$name == "universe", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
} else if (lang == "fre") {
cat(paste("# Etape de Crunching\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Ce rapport d'analyse de données permet d'explorer rapidement les résultats de l'enquête qui peuvent être régénérés au besoin.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("L'objectif de ce rapport est de permettre d'identifier rapidement les potentielles dans votre jeu de données.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Un examen rapide de ce rapport initial devrait permettre de sélectionner les graphiques les plus significatifs.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Le processus de crunching produit beaucoup de visuels. Par conséquent, il est essentiel de sélectionner soigneusement les visuels les plus pertinent qui seront par la suite présenté pour une interprétation potentielle à l'étape suivante. Une session typique d'interprétation des données ne doit pas durer plus de 2 heures et comprendre plus de 60 visuels à regarder afin de garder les participants avec un bon niveau de concentration.\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Sélection de contenu "),file = report.name , sep = "\n", append = TRUE)
cat(paste("Afin de guider cette phase de sélection, l'expert en data crunching et le concepteur de rapports, en collaboration avec le groupe d'analyse des données, peuvent utiliser les éléments suivants:\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Pour la valeur numérique, vérifiez les distributions de fréquence de chaque variable en moyenne, écart, y compris les valeurs aberrantes et les bizarreries\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Pour les variables catégorielles, vérifiez les valeurs inattendues: tout résultat étrange basé sur des attentes de bon sens\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Utiliser l'analyse de corrélation pour vérifier les contradictions potentielles dans les réponses des répondants aux différentes questions pour les associations identifiées (chi carré)\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Toujours, vérifiez les données manquantes (NA) ou \"% du répondant qui a répondu\" que vous ne pouvez pas expliquer en toute confiance\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Vérifiez les questions sans réponse, qui correspondent à la logique de saut inutilisée dans le questionnaire: par exemple, une personne qui n'a jamais été déplacée a-t-elle répondu aux questions liées au déplacement? Des réponses liées à l'emploi ont-elles été fournies à un tout-petit?\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Session d'interpretation "),file = report.name , sep = "\n", append = TRUE)
cat(paste("Lors de l'analyse de ces représentations dans un cadre collectif lors de sessions d'interprétation des données, vous pouvez: \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Reflexion__: remettre en question la qualité des données et / ou faire des suggestions pour ajuster les questions, identifier des étapes de nettoyage supplémentaires; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Interpretation__: développer des interprétations qualitatives des modèles de données; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Recommendation__: proposer des recommandations en termes d'ajustement programmatique; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Classification__: définir le niveau de sensibilité pour certains sujets si nécessaire; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Le rapport peut être régénéré au besoin en: "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * ajustant la configuration du rapport dans le xlsform pour le diviser en rapport et chapitre; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * configurant la désagrégation et la corrélation pour chaque question; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * révisant le nettoyage des données en fonction du journal de nettoyage; \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * ajoutant des indicateurs calculés à votre bloc de données pour remodeler la variable - également appelée ingénierie des fonctionnalités. \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Description du jeu de données\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Titre de l'étude:__ ",configInfo[configInfo$name == "titl", c("value")]," \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Résumé:__ ",configInfo[configInfo$name == "abstract", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
#cat(paste("__Droits et avis de non-responsabilité:__ ",configInfo[configInfo$name == "disclaimer", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Pays où l'étude a eu lieu:__ ",configInfo[configInfo$name == "Country", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Couverture géographique de l'étude dans le pays:__ ",configInfo[configInfo$name == "geogCover", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Type de données:__ ",configInfo[configInfo$name == "dataKind", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Nombre d'enregistrements dans le bloc de données principal__: `r nrow(MainDataFrame)`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Période de collecte des données__: entre le `r min(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))` et le `r max(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Nettoyage documenté__: ",configInfo[configInfo$name == "cleanOps", c("value")],"\n\n"),file = report.name , sep = "\n\n", append = TRUE)
#cat(paste("__Entité analysée dans l'étude:__ ",configInfo[configInfo$name == "AnalysisUnit", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
# cat(paste("__Procédure, technique ou mode d'enquête utilisé pour obtenir les données:__ ",configInfo[configInfo$name == "ModeOfCollection", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
#cat(paste("__Univers d'étude:__ (c'est-à-dire un groupe de personnes ou d'autres éléments qui font l'objet d'une recherche et auxquels se réfèrent les résultats analytiques:",configInfo[configInfo$name == "universe", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
}
} else {
if (lang == "eng") {
cat(paste("# Crunching step\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("This data crunching report allows to quickly explore the results of the survey that can be regenerated as needed.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("The objective of this report is to allow to quickly identify potential patterns in your dataset.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("A quick screening of this initial report should allow to select the most meaningful graphs.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("The crunching process produces a lot of visuals. Therefore it is key to carefully select the most relevant visual that will be presented for potential interpretation in the next step. A typical data interpretation session shall not last more than 2hours and include more than 60 visuals to look at in order to keep participants with a good focus level.\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("In order to guide this selection phase, the data crunching expert and report designer, in collaboration with the data analysis group, can use the following elements:\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * For numeric value, check the frequency distributions of each variable to average, deviation, including outliers and oddities\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * For categorical variables, check for unexpected values: any weird results based on common sense expectations\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Use correlation analysis to check for potential contradictions in respondents answers to different questions for identified associations (chi-square)\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Always, Check for missing data (NA) or \"%of respondent who answered\" that you cannot confidently explain\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Check unanswered questions, that corresponds to unused skip logic in the questionnaire: For instance, did a person who was never displaced answer displacement-related questions? Were employment-related answers provided for a toddler?\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("When analyzing those representations in a collective setting during data interpretation sessions, you may: \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Reflect__: question data quality and/or make suggestions to adjust questions, identify additional cleaning steps; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Interpret__: develop qualitative interpretations of data patterns; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Recommend__: suggest recommendations in terms of programmatic adjustment; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Classify__: define level of sensitivity for certain topics if required; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("The report can be regenerated as needed by: "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * adjusting the report configuration in the xlsform to break it into report and chapter; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * configuring disaggregation & correlation for each question; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * revising the data cleansing based on the cleaning log; \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * appending calculated indicators to your data frame to reshape variable - also called feature engineering. \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("# Dataset description\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Title of the study:__ ",configInfo[configInfo$name == "titl", c("value")]," \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Abstract:__ ",configInfo[configInfo$name == "abstract", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Rights & Disclaimer:__ ",configInfo[configInfo$name == "disclaimer", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Country where the study took place:__ ",configInfo[configInfo$name == "Country", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Geographic Coverage for the study within the country:__ ",configInfo[configInfo$name == "geogCover", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Kind of Data:__ ",configInfo[configInfo$name == "dataKind", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Number of records in the main data frame__: `r nrow(MainDataFrame)`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Period of data collection__: between `r min(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))` and `r max(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Documented cleaning__: ",configInfo[configInfo$name == "cleanOps", c("value")],"\n\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Entity being analyzed in the study:__ ",configInfo[configInfo$name == "AnalysisUnit", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Procedure, technique, or mode of inquiry used to attain the data:__ ",configInfo[configInfo$name == "ModeOfCollection", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Study Universe:__ (i.e. group of persons or other elements that are the object of research and to which any analytic results refer:",configInfo[configInfo$name == "universe", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
} else if (lang == "esp") {
cat(paste("# Etapa de Crunching\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("El data crunching es el paso donde se procesan y estructuran los datos de la encuesta, y se generan visuales de los resultados de una manera automatizada y rápida.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" Este informe de análisis y crunching de datos permite explorar rápidamente los resultados de la encuesta. El informe puede ser regenerado según sea necesario. El objetivo de este informe es poder identificar rápidamente posibles tendencias y patrones en el conjunto de datos.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Un repaso rápido de este informe inicial deberá permitir la selección de los gráficos más significativos.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("El proceso de crunching produce una gran cantidad de imágenes. Por lo tanto, es importante seleccionar cuidadosamente los visuales y gráficos más relevantes que podrán ser presentados para su interpretación en la siguiente etapa. Usualmente una sesión de interpretación de datos no debe durar más de 2 horas y no debe incluir más de 60 visuales a examinar con el fin de mantener a los participantes con un buen nivel de enfoque. \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("El experto en análisis de datos (DIMA del Bureau / IMs de la Operación) y el diseñador de reportes (de la operación, e.g. Oficial de reportes, PI), en colaboración con el grupo de análisis de datos, pueden utilizar los siguientes elementos para guiar esta fase de selección:\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Para el valor numérico, compruebe las distribuciones de frecuencia de cada variable a la media, la desviación, incluidos los valores atípicos y las rarezas\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Para variables categóricas, compruebe si hay valores inesperados: cualquier resultado extraño basado en las expectativas de sentido común\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Utilice el análisis de correlación para comprobar posibles contradicciones en las respuestas de los encuestados a diferentes preguntas para asociaciones identificadas (chi-cuadrado)\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Siempre, Compruebe si faltan datos (NA) o \"%del encuestado que respondió\" que no puede explicar con confianza\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Compruebe las preguntas sin respuesta, que corresponde a la lógica de omisión no utilizada en el cuestionario: Por ejemplo, ¿respondió una persona que nunca fue desplazada a las preguntas relacionadas con el desplazamiento? ¿Se proporcionaron respuestas relacionadas con el empleo para un niño pequeño?\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("Al analizar los visuales y gráficos en conjunto durante las sesiones de interpretación de datos, se puede: \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Reflejar__: cuestionar la calidad de los datos y / o hacer sugerencias para ajustar las preguntas, identificar pasos de limpieza adicionales; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Interpretar__: desarrollar interpretaciones cualitativas de los patrones en los datos; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Recomendar__: sugerir recomendaciones en términos de ajuste programático; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Clasificar__: se define el nivel de sensibilidad para ciertos temas si es necesario; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Este informe se puede regenerar según sea necesario: "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * ajustando la configuración del informe en el xlsform para dividirlo en informe y capítulo; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * configurar la desagregación y correlación para cada pregunta;; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * revisar la limpieza de datos basada en el registro de limpieza; \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * anexar indicadores calculados a su trama de datos para cambiar la forma de la variable - también llamada ingeniería de características. \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("## Descripción del conjunto de datos\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Título del estudio:__ ",configInfo[configInfo$name == "titl", c("value")]," \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Resumen:__ ",configInfo[configInfo$name == "abstract", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Derechos y Descargo de Responsabilidad::__ ",configInfo[configInfo$name == "disclaimer", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__País donde tuvo lugar el estudio:__ ",configInfo[configInfo$name == "Country", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Cobertura geográfica para el estudio dentro del país:__ ",configInfo[configInfo$name == "geogCover", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Tipo de datos:__ ",configInfo[configInfo$name == "dataKind", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Número de registros en el marco de datos principal__: `r nrow(MainDataFrame)`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Período de recopilación de datos__: entre `r min(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))` y `r max(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Limpieza documentada__: ",configInfo[configInfo$name == "cleanOps", c("value")],"\n\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Entidad analizada en el estudio:__ ",configInfo[configInfo$name == "AnalysisUnit", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Procedimiento, técnica o modo de investigación utilizado para lograr los datos:__ ",configInfo[configInfo$name == "ModeOfCollection", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Universo de estudio__ : (es decir, grupo de personas u otros elementos que son objeto de investigación y a los que se refieren los resultados analíticos: ",configInfo[configInfo$name == "universe", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
} else if (lang == "fre") {
cat(paste("# Etape de Crunching\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Ce rapport d'analyse de données permet d'explorer rapidement les résultats de l'enquête qui peuvent être régénérés au besoin.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("L'objectif de ce rapport est de permettre d'identifier rapidement les potentielles dans votre jeu de données.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Un examen rapide de ce rapport initial devrait permettre de sélectionner les graphiques les plus significatifs.\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Le processus de crunching produit beaucoup de visuels. Par conséquent, il est essentiel de sélectionner soigneusement les visuels les plus pertinent qui seront par la suite présenté pour une interprétation potentielle à l'étape suivante. Une session typique d'interprétation des données ne doit pas durer plus de 2 heures et comprendre plus de 60 visuels à regarder afin de garder les participants avec un bon niveau de concentration.\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("Afin de guider cette phase de sélection, l'expert en data crunching et le concepteur de rapports, en collaboration avec le groupe d'analyse des données, peuvent utiliser les éléments suivants:\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Pour la valeur numérique, vérifiez les distributions de fréquence de chaque variable en moyenne, écart, y compris les valeurs aberrantes et les bizarreries\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Pour les variables catégorielles, vérifiez les valeurs inattendues: tout résultat étrange basé sur des attentes de bon sens\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Utiliser l'analyse de corrélation pour vérifier les contradictions potentielles dans les réponses des répondants aux différentes questions pour les associations identifiées (chi carré)\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Toujours, vérifiez les données manquantes (NA) ou \"% du répondant qui a répondu\" que vous ne pouvez pas expliquer en toute confiance\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * Vérifiez les questions sans réponse, qui correspondent à la logique de saut inutilisée dans le questionnaire: par exemple, une personne qui n'a jamais été déplacée a-t-elle répondu aux questions liées au déplacement? Des réponses liées à l'emploi ont-elles été fournies à un tout-petit?\n "),file = report.name , sep = "\n", append = TRUE)
cat(paste("Lors de l'analyse de ces représentations dans un cadre collectif lors de sessions d'interprétation des données, vous pouvez: \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Reflexion__: remettre en question la qualité des données et / ou faire des suggestions pour ajuster les questions, identifier des étapes de nettoyage supplémentaires; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Interpretation__: développer des interprétations qualitatives des modèles de données; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Recommendation__: proposer des recommandations en termes d'ajustement programmatique; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * __Classification__: définir le niveau de sensibilité pour certains sujets si nécessaire; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("Le rapport peut être régénéré au besoin en: "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * ajustant la configuration du rapport dans le xlsform pour le diviser en rapport et chapitre; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * configurant la désagrégation et la corrélation pour chaque question; \n"),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * révisant le nettoyage des données en fonction du journal de nettoyage; \n "),file = report.name , sep = "\n", append = TRUE)
cat(paste(" * ajoutant des indicateurs calculés à votre bloc de données pour remodeler la variable - également appelée ingénierie des fonctionnalités. \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("# Description du jeu de données\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Titre de l'étude:__ ",configInfo[configInfo$name == "titl", c("value")]," \n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Résumé:__ ",configInfo[configInfo$name == "abstract", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Droits et avis de non-responsabilité:__ ",configInfo[configInfo$name == "disclaimer", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Pays où l'étude a eu lieu:__ ",configInfo[configInfo$name == "Country", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Couverture géographique de l'étude dans le pays:__ ",configInfo[configInfo$name == "geogCover", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Type de données:__ ",configInfo[configInfo$name == "dataKind", c("value")],"\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Nombre d'enregistrements dans le bloc de données principal__: `r nrow(MainDataFrame)`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Période de collecte des données__: entre le `r min(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))` et le `r max(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\"))`\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Nettoyage documenté__: ",configInfo[configInfo$name == "cleanOps", c("value")],"\n\n"),file = report.name , sep = "\n\n", append = TRUE)
cat(paste("__Entité analysée dans l'étude:__ ",configInfo[configInfo$name == "AnalysisUnit", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Procédure, technique ou mode d'enquête utilisé pour obtenir les données:__ ",configInfo[configInfo$name == "ModeOfCollection", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
cat(paste("__Univers d'étude:__ (c'est-à-dire un groupe de personnes ou d'autres éléments qui font l'objet d'une recherche et auxquels se réfèrent les résultats analytiques:",configInfo[configInfo$name == "universe", c("value")],"\n\n"),file = report.name , sep = "\n", append = TRUE)
}
}
## get list of chapters
chapters <- as.data.frame(unique(dico[ , c("chapter","report")]))
names(chapters)[2] <- "Report"
names(chapters)[1] <- "Chapter"
## Default behavior if no chapter was defined in xlsform
if (nrow(chapters) == 1 & checkmate::anyMissing(chapters$Chapter)) {
cat("Defaulting questions allocation to chapter")
dico$chapter[ dico$type %in% c("select_one","select_multiple_d")] <- "report"
chapters <- as.data.frame(unique(dico$chapter))
names(chapters)[1] <- "Chapter"
} else {}
chapters <- as.data.frame(chapters[!is.na(chapters$Chapter) & chapters$Report == reportsname, ])
names(chapters)[1] <- "Chapter"
for (v in 1:nrow(chapters) )
{
# v <- 1
chaptersname <- as.character(chapters[ v , 1])
report.name.i.v <-
stringr::str_c(
report.name,
stringr::str_pad(i, 2, pad = "0"),
stringr::str_pad(v, 2, pad = "0"),
"00",
sep = "-")
## Getting chapter questions ####################################################################################################
#chapterquestions <- dico[which(dico$chapter== chaptersname ), c("chapter", "name", "label", "type", "qrepeatlabel", "fullname","listname") ]
chapterquestions <- dico[which(dico$chapter == chaptersname & dico$type %in% c("select_one","integer","select_multiple_d", "text","date", "numeric", "calculate")),
c("chapter", "name", "label", "labelReport","hintReport", "type", "qrepeatlabel", "fullname","listname","variable") ]
# levels(as.factor(as.character(dico[which(!(is.na(dico$chapter)) & dico$formpart=="questions"), c("type") ])))
## Reorder chapter
chapterquestions <- chapterquestions[ order(chapterquestions$chapter), ]
## add better slides separator
if (output == "pptx") {
cat(paste("---"),file = report.name.i.v , sep = "\n", append = TRUE)
cat(paste("# ", chaptersname),file = report.name.i.v , sep = "\n", append = TRUE)
# cat(paste0("Linked questions: ", as.character(chapterquestions$labelReport) ),file = report.name , sep = "\n", append = TRUE)
cat(paste("---"),file = report.name.i.v , sep = "\n", append = TRUE)
} else {
cat(paste("# ", chaptersname),file = report.name.i.v , sep = "\n", append = TRUE)
# cat(paste("Linked questions: ", as.character(chapterquestions$labelReport) ),file = report.name , sep = "\n", append = TRUE)
}
if (app == "shiny") {
progress$set(message = "Compilation of questions results in progress...")
updateProgress()
}
## Loop.questions
if (app == "shiny") {
progress$set(message = "Getting levels for each questions in progress...")
updateProgress()
}
## Parallel function to crunch questions ##########
# j <- 1
crunch_question <- function(j)
{
report.name.i.v.j <-
stringr::str_c(
report.name,
stringr::str_pad(i, 2, pad = "0"),
stringr::str_pad(v, 2, pad = "0"),
stringr::str_pad(j, 2, pad = "0"),
sep = "-")
## Now getting level for each questions
if (app == "shiny") {
progress$set(message = paste("Render question: ",as.character(chapterquestions[ j , c("labelReport")])))
updateProgress()
}
questions.name <- as.character(chapterquestions[ j , c("fullname")])
questions.shortname <- as.character(chapterquestions[ j , c("name")])
questions.type <- as.character(chapterquestions[ j , c("type")])
questions.frame <- as.character(chapterquestions[ j , c("qrepeatlabel")])
questions.label <- as.character(chapterquestions[ j , c("labelReport")])
questions.hint <- as.character(chapterquestions[ j , c("hintReport")])
questions.listname <- as.character(chapterquestions[ j , c("listname")])
questions.ordinal <- as.character(chapterquestions[ j , c("variable")])
if (is.na(questions.ordinal) ) {questions.ordinal <- "not.defined"} else {questions.ordinal <- questions.ordinal }
questions.variable <- paste0(questions.frame,"$",questions.name)
cat(paste("\n", i, "-", j, " - Render question: ", questions.variable, " -",questions.type, "\n" ))
## write question name
cat("\n ",file = report.name.i.v.j , sep = "\n", append = TRUE)
cat(paste("## ", questions.label ,sep = ""),file = report.name.i.v.j , sep = "\n", append = TRUE)
## Now create para based on question type
cat(paste(if (is.na(questions.hint)){paste0("")} else {paste0("__Interpretation Hint__: ", questions.hint)},"\n\n",sep = ""),file = report.name.i.v.j ,sep = "\n", append = TRUE)
#### Question Type = select_one ###################################################################################################
if (questions.type == "select_one" ) {
if (lang == "eng") {
cat(paste("Single choice question ","\n\n",sep = ""),file = report.name.i.v.j ,sep = "\n", append = TRUE)
} else if (lang == "esp") {
cat(paste("Pregunta de selección única ","\n\n",sep = ""),file = report.name.i.v.j ,sep = "\n", append = TRUE)
} else if (lang == "fre") {
cat(paste("Question a choix unique ","\n\n",sep = ""),file = report.name.i.v.j ,sep = "\n", append = TRUE)
}
## select_one.tabulation
## compute frequency to see if it's not empty
frequ <- as.data.frame(table( get(paste0(questions.frame))[[questions.name]]))
figheight <- as.integer(nrow(frequ))
## Getting the figure height for the charts in Rmd
if ( figheight == 0) { figheight <- 3 }
else if ( figheight == 1) {figheight <- 3 }
else if ( figheight == 2) {figheight <- 3 }
else if ( figheight == 3) {figheight <- 3 }
else if ( figheight == 4) {figheight <- 4}
else if ( figheight == 5) {figheight <- 4}
else if ( figheight == 6) {figheight <- 5}
else if ( figheight == 7) {figheight <- 6}
else if ( figheight == 8) {figheight <- 7}
else if ( figheight == 9) {figheight <- 7}
else if ( figheight == 10) {figheight <- 9}
else if ( figheight >= 11) {figheight <- 10}
## Check that there are responses to be displayed
if (nrow(frequ) %in% c("0") ) {
if (lang == "eng") {
cat(paste0("No responses recorded for this question.\n"),file = report.name.i.v.j , sep = "\n", append = TRUE)
} else if (lang == "esp") {
cat(paste0("No se registraron respuestas para esta pregunta.\n"),file = report.name.i.v.j , sep = "\n", append = TRUE)
} else if (lang == "fr") {
cat(paste0("Pas de reponses donne a cette question.\n"),file = report.name.i.v.j , sep = "\n", append = TRUE)
}
cat("No responses recorded for this question...\n")
# names(frequ)[2] <- "ccheck"
# try <- frequ$ccheck
# } else if (sum(try) == 0) {
# cat(paste0("cat(\"No responses recorded for this question...\")"),file = report.name , sep = "\n", append = TRUE)
# cat("No responses recorded for this question...\n")
} else if (nrow(frequ) %in% c("1") ) {
## Case we a unique modality
if (lang == "eng") {
cat(paste0("The same answer was given (only one modality) recorded for this question.\n"),file = report.name.i.v.j , sep = "\n", append = TRUE)
} else if (lang == "esp") {
cat(paste0("La misma respuesta (sólo una modalidad) se registraron para esta pregunta.\n"),file = report.name.i.v.j , sep = "\n", append = TRUE)
} else if (lang == "fr") {
cat(paste0("La meme reponse a toujours ete donne a cette question.\n"),file = report.name.i.v.j , sep = "\n", append = TRUE)
}
if (output == "pptx") {
cat(paste0("```{r ", questions.name, ".tab, echo=FALSE, warning=FALSE, cache=FALSE, tidy = TRUE, message=FALSE }\n"), file = report.name.i.v.j, append = TRUE)
} else {
cat(paste0("```{r ", questions.name, ".tab, echo=FALSE, warning=FALSE, cache=FALSE, tidy = TRUE, message=FALSE, comment = \"\", fig.height=",figheight,", size=\"small\"}\n"), file = report.name.i.v.j, append = TRUE)
}
cat(paste0("table(",questions.variable,")"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("\n```\n", sep = '\n'), file = report.name.i.v.j, append = TRUE)
} else {
#cat(paste("### Tabulation" ,sep = ""),file = report.name ,sep = "\n", append = TRUE)
## Open chunk
if (output == "pptx") {
cat(paste0("```{r ", questions.name, ".tab, echo=FALSE, warning=FALSE, cache=FALSE, tidy = TRUE, message=FALSE }\n"), file = report.name.i.v.j, append = TRUE)
} else {
cat(paste0("```{r ", questions.name, ".tab, echo=FALSE, warning=FALSE, cache=FALSE, tidy = TRUE, message=FALSE, comment = \"\", fig.height=",figheight,", size=\"small\"}\n"), file = report.name.i.v.j, append = TRUE)
}
# cat(paste("### Tabulation" ,sep = ""),file = report.name ,sep = "\n", append = TRUE)
cat(paste0("##Compute contingency table"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ <- as.data.frame(table(",questions.variable,"))"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
#cat(paste0("if (nrow(frequ)==0){ cat(\"No response for this question\") } else{"),file = report.name ,sep = "\n", append = TRUE)
cat(paste0("nresp <- sum(frequ$Freq)"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
# cat(paste0("## display table"),file = report.name ,sep = "\n", append = TRUE)
# cat(paste0("## Reorder factor"),file = report.name ,sep = "\n", append = TRUE)
## Check variable type to order the factor
## - if not ordinal order according to frequency - if ordinal order according to order in the dico
if (questions.ordinal == "ordinal" ) {
### get the list of options in the right order
cat(paste0("list.ordinal <- unique(dico[ dico$listname == \"", questions.listname,"\" & dico$type == \"select_one_d\", c(\"labelchoice\") ])$labelchoice"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("levels(frequ$Var1) <- list.ordinal"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
} else {
cat(paste0("frequ[ ,1] = factor(frequ[ ,1],levels(frequ[ ,1])[order(frequ$Freq, decreasing = FALSE)])"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ <- frequ[ order(frequ[ , 1]) , ]"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
}
cat(paste0("names(frequ)[1] <- \"", questions.shortname,"\""),file = report.name.i.v.j ,sep = "\n", append = TRUE)
# cat(paste0("kable(frequ, caption=\"__Table__:", questions.label,"\")"),file = report.name ,sep = "\n", append = TRUE)
cat(paste0("## Frequency table with NA in order to get non response rate"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ1 <- as.data.frame(prop.table(table(", questions.variable,", useNA = \"ifany\")))"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ1 <- frequ1[!(is.na(frequ1$Var1)), ]"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ1 <- frequ1[!(frequ1$Var1 == \"NA\"), ]"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("percentreponse <- paste0(round(sum(frequ1$Freq)*100,digits = 1),\"%\")"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("## Frequency table without NA"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ2 <- as.data.frame(prop.table(table(", questions.variable,",useNA = \"no\")))"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("## Frequency table with weight"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ.weight <- as.data.frame(svymean(~ ",questions.name,", design = ",questions.frame,".survey, na.rm = TRUE))"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("## Binding the two"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ3 <- cbind(frequ2,frequ.weight)"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("## Reorder factor"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
# cat(paste0("frequ2[ ,1] = factor(frequ2[ ,1],levels(frequ2[ ,1])[order(frequ2$Freq, decreasing = FALSE)])"),file = report.name ,sep = "\n", append = TRUE)
# cat(paste0("frequ2 <- frequ2[ order(frequ2[ , 1]) , ]"),file = report.name ,sep = "\n", append = TRUE)
# cat(paste0("frequ2[ ,3] <- paste0(round(frequ2[ ,2]*100,digits = 1),\"%\")"),file = report.name ,sep = "\n", append = TRUE)
# cat(paste0("names(frequ2)[3] <- \"freqper2\""),file = report.name ,sep = "\n", append = TRUE)
if (questions.ordinal == "ordinal" ) {
cat(paste0("list.ordinal <- unique(dico[ dico$listname == \"", questions.listname,"\" & dico$type == \"select_one_d\", c(\"labelchoice\") ])$labelchoice"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("levels(frequ3$Var1) <- list.ordinal"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
} else {
cat(paste0("frequ3[ ,1] = factor(frequ3[ ,1],levels(frequ3[ ,1])[order(frequ3$mean, decreasing = FALSE)])"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ3 <- frequ3[ order(frequ3[ , 1]) , ]"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
}
cat(paste0("frequ3[ ,5] <- paste0(round(frequ3[ ,3]*100,digits = 1),\"%\")"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("names(frequ3)[5] <- \"freqper2\""),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("frequ3 <- cbind(frequ3,frequ[, 2])"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("names(frequ3)[6] <- \"count\""),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("\n"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("## and now the graph"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
if (use_pct == "TRUE") {
cat(paste0("plot1 <- ggplot(frequ3, aes(x = frequ3$Var1, y = frequ3$mean)) +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
} else {
cat(paste0("plot1 <- ggplot(frequ3, aes(x = frequ3$Var1, y = frequ3$count)) +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
}
cat(paste0("geom_bar(fill = \"#2a87c8\", colour = \"#2a87c8\", stat = \"identity\", width = .8) +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
if(add_error_bar == "TRUE") {
cat(paste0("geom_errorbar(aes(ymin = mean-SE, ymax = mean+SE), size=.4, width=.3, colour = 'grey20') +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
}
cat(paste0("guides(fill = FALSE) +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
if (use_pct == "TRUE") {
cat(paste0("geom_label_repel(aes(y = mean, label = freqper2), fill = \"#2a87c8\", color = 'white') +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("ylab(\"Frequency\") +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("scale_y_continuous(labels = percent) +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
} else {
cat(paste0("geom_label_repel(aes(y = count, label = count), fill = \"#2a87c8\", color = 'white') +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("ylab(\"Count\") +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
}
cat(paste0("xlab(\"\") +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("coord_flip() +"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("labs(title = \"",questions.label,"\","),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("subtitle = paste0(\" Question response rate: \",percentreponse,\" - respondents: \", nresp),"),file = report.name.i.v.j ,sep = "\n", append = TRUE)
cat(paste0("caption = paste0(\"", configInfo[configInfo$name == "titl", c("value")], "- \",
nrow(MainDataFrame), \" total records collected between \",
min(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\")), \" and \",
max(as.Date(MainDataFrame$today, format = \"%Y-%m-%d\")), \" in \", \" ",
configInfo[configInfo$name == "Country", c("value")]," \")) +"), file = report.name.i.v.j ,sep = "\n", append = TRUE)