From cc1333e73fadb906c9fc76b3330a24836e5d32bb Mon Sep 17 00:00:00 2001 From: Diana Goldemberg Date: Thu, 17 Oct 2019 20:02:06 -0400 Subject: [PATCH] [ado_files] migrate ado folder from GLAD-Production repo --- 05_adofiles/glad_hpro_as_cpi.ado | 108 ++++++++++++++++++++ 05_adofiles/glad_local_folder_setup.ado | 53 ++++++++++ 05_adofiles/glad_split_region_2_country.ado | 80 +++++++++++++++ 05_adofiles/glad_toolkit.pkg | 8 ++ 05_adofiles/stata.toc | 3 + 5 files changed, 252 insertions(+) create mode 100644 05_adofiles/glad_hpro_as_cpi.ado create mode 100644 05_adofiles/glad_local_folder_setup.ado create mode 100644 05_adofiles/glad_split_region_2_country.ado create mode 100644 05_adofiles/glad_toolkit.pkg create mode 100644 05_adofiles/stata.toc diff --git a/05_adofiles/glad_hpro_as_cpi.ado b/05_adofiles/glad_hpro_as_cpi.ado new file mode 100644 index 0000000..dd9ebbd --- /dev/null +++ b/05_adofiles/glad_hpro_as_cpi.ado @@ -0,0 +1,108 @@ +*==============================================================================* +* PROGRAM: HARMONIZATION OF PROFICIENCY ON THE FLY (THRESHOLDS AS CPI IN GMD) +* Project information at: https://github.com/worldbank/GLAD +* Author: Diana Goldemberg +*==============================================================================* + +/* The use case of this program is: GLAD.dta have valuevars score_* and level_* + stored in them. But the harmonization, according to proficiency thresholds, + is done 'on the fly', like the CPI adjusts in the GMD collection. + + That is, datalibweb has the GLAD microdata stored without the harmonization, + and whenever a file is queried, the harmonization thresholds are merged + on the fly and the proficiency dummies are calculated. +*/ + +cap program drop glad_hpro_as_cpi +program define glad_hpro_as_cpi, rclass + + syntax , + + * Brings thresholds triplets defined in dta which should sit in DLW (our version of CPI.dta) + merge m:1 surveyid idgrade using "${clone}/01_harmonization/011_rawdata/lp_thresholds_as_cpi.dta", keep(match) gen(merge_thresholds) + * This merge should have an assert, but we want a more informative error message, thus we do it manually + cap assert inlist(merge_thresholds, 2 , 3) // equivalent to assert(match using) + if _rc { + noi di as error "{pstd}New surveys have been added to the GLAD collection, but not to the thresholds_as_cpi file in 011_raw_data. Please add the appropriate surveys in for the following cases:{p_end}" + noi tab surveyid idgrade if merge_thresholds == 1 + error 2222 + } + * Drop the merge_thresholds variables after the assert + drop merge_thresholds + + * Each prefix_threshold is a triplet: prefix_threshold_var, prefix_threshold_val, prefix_threshold_res + + * Loop through all threshold triplets (specifically, prefix_threshold_res but could be val or var) + ds *_threshold_res + foreach threshold_res of varlist `r(varlist)' { + + local this_prefix = subinstr("`threshold_res'", "_threshold_res", "", 1) + + * Check if this_prefix was used for this assessment-year, or has all missing obs + count if missing(`threshold_res') + if `r(N)'<_N { + * Not all observations are missing + + * Concatenate list of prefixes used + local prefixes = "`prefixes' `this_prefix'" + + * Concatenate list of results to be created, in two steps + * 1. loop through all results used in a prefix + levelsof `threshold_res', local(resultvars_in_prefix) + foreach resultvar of local resultvars_in_prefix { + + * 2. Update the list of results (unique entries only) + local resultvars : list resultvars | resultvar + } + } + + else { + * All observations are missing + * Drop the threshold triplet, for it was not used at all + drop `this_prefix'_threshold_* + } + + } + + * Value labels for dummy variables of Harmonized Proficiency + label define lb_hpro 0 "Non-proficient" 1 "Proficient" .a "Missing score/level" .b "Non-harmonized grade", replace + + * Generate all result variables as dummies which start empty + * (labeled as if this grade was not being harmonized) + foreach resultvar of local resultvars { + gen byte `resultvar': lb_hpro = .b + label var `resultvar' "Harmonized proficiency (subject-specific)" + char `resultvar'[clo_marker] "dummy" + } + + + * Loop through all prefixes + foreach prefix of local prefixes { + + * Retrieves list of variables used in the current prefix_threshold_var + levelsof `prefix'_threshold_var, local(originalvars_used_in_prefix) + + * Loop through all variables used in the current prefix, + * and performs the calculation based on it + foreach originalvar of local originalvars_used_in_prefix { + foreach resultvar of local resultvars { + + * Calculate the harmonized proficiency dummy, for example: + * resultvar is hpro_read and originalvar is level_llece_read + replace `resultvar' = (`originalvar'>=`prefix'_threshold_val) if `prefix'_threshold_res == "`resultvar'" & `prefix'_threshold_var=="`originalvar'" & !missing(`originalvar') + + * Case of missing test score or test level + replace `resultvar' = .a if `prefix'_threshold_res == "`resultvar'" & `prefix'_threshold_var == "`originalvar'" & missing(`originalvar') + } + } + } + + * When this ado is called, a GLAD.dta is open and it should already + * have the metadata as standardized in the collection. This adds more: + char _dta[onthefly_valuevars] "`resultvars'" + * Unabbreviate wildcards* in the threshold triplets variables + cap unab thresholdvars : *_threshold_var *_threshold_val *_threshold_res + if _rc == 111 noi disp as err "No harmonized minimum proficiency thresholds defined for this learning assessment." + else char _dta[onthefly_traitvars] "`thresholdvars'" + +end diff --git a/05_adofiles/glad_local_folder_setup.ado b/05_adofiles/glad_local_folder_setup.ado new file mode 100644 index 0000000..59c2e5b --- /dev/null +++ b/05_adofiles/glad_local_folder_setup.ado @@ -0,0 +1,53 @@ +*==============================================================================* +* PROGRAM: SETUP LOCAL FOLDER STRUCTURE WHEN CREATING NEW GLAD MODULE +* Project information at: https://github.com/worldbank/GLAD +* Author: Kristoffer Bjarkefur +*==============================================================================* + +cap program drop glad_local_folder_setup +program define glad_local_folder_setup, rclass + + * Names: + syntax , Region(string) Year(string) ASsessment(string) MAster(string) ADaptation(string) + + local topfolder "`region'_`year'_`assessment'" + local surveyid "`topfolder'_`master'" + + return local surveyid `surveyid' + + * Paths: + makefolder, parent("${output}") newfolder("`region'") + makefolder, parent("`r(folder)'") newfolder("`topfolder'") + + return local output_dir "`r(folder)'" + + local tempfolder "TEMP_`topfolder'" + + makefolder, parent("${output}") newfolder("TEMP") + makefolder, parent("`r(folder)'") newfolder("`tempfolder'") + + return local temp_dir "`r(folder)'" +end + + +cap program drop makefolder +program define makefolder, rclass + + syntax, parent(string) newfolder(string) + + * Test that parent folder exists + mata : st_numscalar("r(dirExist)", direxists("`parent'")) + if `r(dirExist)' == 0 { + noi di as error `"{phang}Internal error glad_local_folder_setup.ado, folder [`parent'] does not exist{p_end}"' + error 601 + } + + * Test if new folder exists, if not create it + mata : st_numscalar("r(dirExist)", direxists("`parent'/`newfolder'")) + if `r(dirExist)' == 0 { + mkdir "`parent'/`newfolder'" + } + + return local folder "`parent'/`newfolder'" + +end diff --git a/05_adofiles/glad_split_region_2_country.ado b/05_adofiles/glad_split_region_2_country.ado new file mode 100644 index 0000000..016286b --- /dev/null +++ b/05_adofiles/glad_split_region_2_country.ado @@ -0,0 +1,80 @@ +*==============================================================================* +* PROGRAM: SPLIT REGIONS INTO COUNTRIES WHEN COPYING TO DLW ROOT +* Project information at: https://github.com/worldbank/GLAD +* Author: Kristoffer Bjarkefur +*==============================================================================* + +cap program drop split_region_2_country +program define split_region_2_country, rclass + + syntax , fname(string) floc(string) OVERWrite_files(numlist) regionfolder(string) masterfolder(string) + + qui { + + * Get region, year and assessment from file name + gettoken region rest : fname , parse("_") + local rest = substr("`rest'", 2, .) + gettoken year rest : rest , parse("_") + local rest = substr("`rest'", 2, .) + gettoken assessment rest : rest , parse("_") + + * Generate the surveyname for this file + local surveyname "`region'_`year'_`assessment'" + + * Open up the global/regional file + use "`floc'/`fname'", clear + + cap confirm variable countrycode + if _rc { + noi di as error `"{phang}The file [`floc'/`fname'] does not have a variable called {inp:countrycode} which is required for all files to be split by country.{p_end}"' + error 601 + } + + * List all the country codes in this file + levelsof countrycode, clean local(countries) + + noi di "{phang2}Countries included: [`countries']{p_end}" + + local country_counter = 0 + local country_total : word count `countries' + + * Loop over all countries + foreach country of local countries { + + * Increment the counter for screen output + local ++country_counter + + * Create the country file and folder name from the region file and folder name + local cnt_surveyname = subinstr("`surveyname'","`region'", "`country'", 1) + local cnt_filename = subinstr("`fname'" ,"`region'", "`country'", 1) + local cnt_folder = subinstr("`regionfolder'" ,"`region'", "`country'", 1) + local cnt_master_folder = subinstr("`masterfolder'" ,"`region'", "`country'", 1) + + + * Creates folder if not already exists + edukit_rmkdir, parent(${output}) newfolders(`country'/`cnt_surveyname'/`cnt_master_folder') + edukit_rmkdir, parent(${output}) newfolders(`country'/`cnt_surveyname'/`cnt_folder'/Data/Harmonized) + local output_folder "`r(folder)'" + + * Confirm that the file to be split exists + cap confirm file "`output_folder'/`cnt_filename'" + * If the file does not exist or overwrite_files local is set to one, run the do + if (_rc == 601) | (`overwrite_files') { + + preserve + * Keep only observations for this country + keep if countrycode == "`country'" + + * Save the file with obs only for this country in the country folder name + save "`output_folder'/`cnt_filename'", replace + noi di "{phang2}File [`cnt_filename'] saved (`surveyname' country `country_counter' of `country_total'){p_end}" + restore + } + else { + * Still show output even when file already exist + noi di "{phang2}File [`cnt_filename'] already exists (`surveyname' country `country_counter' of `country_total'){p_end}" + } + } + noi di "" + } +end diff --git a/05_adofiles/glad_toolkit.pkg b/05_adofiles/glad_toolkit.pkg new file mode 100644 index 0000000..0eb165e --- /dev/null +++ b/05_adofiles/glad_toolkit.pkg @@ -0,0 +1,8 @@ +v 1.3 +d glad_toolkit +d EduAnalytics, World Bank Group (eduanalytics@worldbank.org) + +f /glad_local_folder_setup.ado +f /glad_hpro_as_cpi.ado +f /glad_split_region_2_country.ado +e diff --git a/05_adofiles/stata.toc b/05_adofiles/stata.toc new file mode 100644 index 0000000..aea94ec --- /dev/null +++ b/05_adofiles/stata.toc @@ -0,0 +1,3 @@ +v 1.3 +d EduAnalytics, World Bank Group (eduanalytics@worldbank.org) +p glad_toolkit EduAnalytics