forked from vynguyen92/nhanes_mortality_associations
-
Notifications
You must be signed in to change notification settings - Fork 0
/
f - compile_demographics_dataset.R
70 lines (57 loc) · 3.15 KB
/
f - compile_demographics_dataset.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
############################ FUNCTION TO COMPILE THE NHANES DEMOGRAPHICS DATASET #############################
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Purpose: This function extracts the demographics dataset for each cycle and merged all cycle-specific
# demographics datasets into one merged demographic dataset
#
# Inputs: main_directory - the working directory of the folder that contains the files of cycle-specific
# demographics dataset
# current_directory - the working directory of the folder where the function and main scripts of the
# project is housed.
#
# Outputs: merged_nhanes_dataset - dataframe with the merged demographic dataset
compile_demographics_dataset <- function(main_directory
, current_directory)
{
# Establish the working directory to be the folder that contains the files of the cycle-specific demographics
# datasets
setwd(main_directory)
# Determine the names of the files within this working directory
nhanes_dataset_by_cycle <- list.files()
# Determine the number of files in this working directory
# The number of files should be the number of cycles in the NHANES continuous database
num_cycles <- length(nhanes_dataset_by_cycle)
# Determine the name of the first file in this working directory
first_file_name.xpt <- nhanes_dataset_by_cycle[1]
# Replace the ".XPT" in the file name with ""
first_file_name <- gsub(".XPT"
, ""
, first_file_name.xpt)
# Use the updated file name to extract the appropriate dataset from the nhanesA package
# Store the demographics dataset for the first cycle into merged_nhanes_dataset, so that subsequent
# demographics datasets can be merged with the first one
merged_nhanes_dataset <- nhanes(first_file_name)
# For the subsequent demographics datasets, the file name will be used to extract the ith cycle demographics
# dataset and merge them with the demographics datasets from the previous cycles to form a merged demographics
# dataset
for(i in 2:num_cycles)
{
# Extract the file name for the demographics dataest in the ith cycle
file_i_name.xpt <- nhanes_dataset_by_cycle[i]
# Replace the ".XPT" in the file name with ""
file_i_name <- gsub(".XPT"
, ""
, file_i_name.xpt)
# Extract the demographics dataset for the ith cycle
cycle_i_dataset <- nhanes(file_i_name)
# Merge the demographics dataset for the ith cycle with the previous demographic dataset
merged_nhanes_dataset <- full_join(merged_nhanes_dataset
, cycle_i_dataset
, by = NULL)
# Message to help know which cycle has been merged in
print(paste("Merge in Cycle ", i, sep = ""))
}
setwd(current_directory)
# Return the merged demographics dataset
return(merged_nhanes_dataset)
}