Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ado_files] migrate ado folder from GLAD-Production repo #2

Merged
merged 1 commit into from
Nov 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions 05_adofiles/glad_hpro_as_cpi.ado
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
*==============================================================================*
* PROGRAM: HARMONIZATION OF PROFICIENCY ON THE FLY (THRESHOLDS AS CPI IN GMD)
* Project information at: https://github.com/worldbank/GLAD
* Author: Diana Goldemberg
*==============================================================================*

/* The use case of this program is: GLAD.dta have valuevars score_* and level_*
stored in them. But the harmonization, according to proficiency thresholds,
is done 'on the fly', like the CPI adjusts in the GMD collection.

That is, datalibweb has the GLAD microdata stored without the harmonization,
and whenever a file is queried, the harmonization thresholds are merged
on the fly and the proficiency dummies are calculated.
*/

cap program drop glad_hpro_as_cpi
program define glad_hpro_as_cpi, rclass

syntax ,

* Brings thresholds triplets defined in dta which should sit in DLW (our version of CPI.dta)
merge m:1 surveyid idgrade using "${clone}/01_harmonization/011_rawdata/lp_thresholds_as_cpi.dta", keep(match) gen(merge_thresholds)
* This merge should have an assert, but we want a more informative error message, thus we do it manually
cap assert inlist(merge_thresholds, 2 , 3) // equivalent to assert(match using)
if _rc {
noi di as error "{pstd}New surveys have been added to the GLAD collection, but not to the thresholds_as_cpi file in 011_raw_data. Please add the appropriate surveys in for the following cases:{p_end}"
noi tab surveyid idgrade if merge_thresholds == 1
error 2222
}
* Drop the merge_thresholds variables after the assert
drop merge_thresholds

* Each prefix_threshold is a triplet: prefix_threshold_var, prefix_threshold_val, prefix_threshold_res

* Loop through all threshold triplets (specifically, prefix_threshold_res but could be val or var)
ds *_threshold_res
foreach threshold_res of varlist `r(varlist)' {

local this_prefix = subinstr("`threshold_res'", "_threshold_res", "", 1)

* Check if this_prefix was used for this assessment-year, or has all missing obs
count if missing(`threshold_res')
if `r(N)'<_N {
* Not all observations are missing

* Concatenate list of prefixes used
local prefixes = "`prefixes' `this_prefix'"

* Concatenate list of results to be created, in two steps
* 1. loop through all results used in a prefix
levelsof `threshold_res', local(resultvars_in_prefix)
foreach resultvar of local resultvars_in_prefix {

* 2. Update the list of results (unique entries only)
local resultvars : list resultvars | resultvar
}
}

else {
* All observations are missing
* Drop the threshold triplet, for it was not used at all
drop `this_prefix'_threshold_*
}

}

* Value labels for dummy variables of Harmonized Proficiency
label define lb_hpro 0 "Non-proficient" 1 "Proficient" .a "Missing score/level" .b "Non-harmonized grade", replace

* Generate all result variables as dummies which start empty
* (labeled as if this grade was not being harmonized)
foreach resultvar of local resultvars {
gen byte `resultvar': lb_hpro = .b
label var `resultvar' "Harmonized proficiency (subject-specific)"
char `resultvar'[clo_marker] "dummy"
}


* Loop through all prefixes
foreach prefix of local prefixes {

* Retrieves list of variables used in the current prefix_threshold_var
levelsof `prefix'_threshold_var, local(originalvars_used_in_prefix)

* Loop through all variables used in the current prefix,
* and performs the calculation based on it
foreach originalvar of local originalvars_used_in_prefix {
foreach resultvar of local resultvars {

* Calculate the harmonized proficiency dummy, for example:
* resultvar is hpro_read and originalvar is level_llece_read
replace `resultvar' = (`originalvar'>=`prefix'_threshold_val) if `prefix'_threshold_res == "`resultvar'" & `prefix'_threshold_var=="`originalvar'" & !missing(`originalvar')

* Case of missing test score or test level
replace `resultvar' = .a if `prefix'_threshold_res == "`resultvar'" & `prefix'_threshold_var == "`originalvar'" & missing(`originalvar')
}
}
}

* When this ado is called, a GLAD.dta is open and it should already
* have the metadata as standardized in the collection. This adds more:
char _dta[onthefly_valuevars] "`resultvars'"
* Unabbreviate wildcards* in the threshold triplets variables
cap unab thresholdvars : *_threshold_var *_threshold_val *_threshold_res
if _rc == 111 noi disp as err "No harmonized minimum proficiency thresholds defined for this learning assessment."
else char _dta[onthefly_traitvars] "`thresholdvars'"

end
53 changes: 53 additions & 0 deletions 05_adofiles/glad_local_folder_setup.ado
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
*==============================================================================*
* PROGRAM: SETUP LOCAL FOLDER STRUCTURE WHEN CREATING NEW GLAD MODULE
* Project information at: https://github.com/worldbank/GLAD
* Author: Kristoffer Bjarkefur
*==============================================================================*

cap program drop glad_local_folder_setup
program define glad_local_folder_setup, rclass

* Names:
syntax , Region(string) Year(string) ASsessment(string) MAster(string) ADaptation(string)

local topfolder "`region'_`year'_`assessment'"
local surveyid "`topfolder'_`master'"

return local surveyid `surveyid'

* Paths:
makefolder, parent("${output}") newfolder("`region'")
makefolder, parent("`r(folder)'") newfolder("`topfolder'")

return local output_dir "`r(folder)'"

local tempfolder "TEMP_`topfolder'"

makefolder, parent("${output}") newfolder("TEMP")
makefolder, parent("`r(folder)'") newfolder("`tempfolder'")

return local temp_dir "`r(folder)'"
end


cap program drop makefolder
program define makefolder, rclass

syntax, parent(string) newfolder(string)

* Test that parent folder exists
mata : st_numscalar("r(dirExist)", direxists("`parent'"))
if `r(dirExist)' == 0 {
noi di as error `"{phang}Internal error glad_local_folder_setup.ado, folder [`parent'] does not exist{p_end}"'
error 601
}

* Test if new folder exists, if not create it
mata : st_numscalar("r(dirExist)", direxists("`parent'/`newfolder'"))
if `r(dirExist)' == 0 {
mkdir "`parent'/`newfolder'"
}

return local folder "`parent'/`newfolder'"

end
80 changes: 80 additions & 0 deletions 05_adofiles/glad_split_region_2_country.ado
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
*==============================================================================*
* PROGRAM: SPLIT REGIONS INTO COUNTRIES WHEN COPYING TO DLW ROOT
* Project information at: https://github.com/worldbank/GLAD
* Author: Kristoffer Bjarkefur
*==============================================================================*

cap program drop split_region_2_country
program define split_region_2_country, rclass

syntax , fname(string) floc(string) OVERWrite_files(numlist) regionfolder(string) masterfolder(string)

qui {

* Get region, year and assessment from file name
gettoken region rest : fname , parse("_")
local rest = substr("`rest'", 2, .)
gettoken year rest : rest , parse("_")
local rest = substr("`rest'", 2, .)
gettoken assessment rest : rest , parse("_")

* Generate the surveyname for this file
local surveyname "`region'_`year'_`assessment'"

* Open up the global/regional file
use "`floc'/`fname'", clear

cap confirm variable countrycode
if _rc {
noi di as error `"{phang}The file [`floc'/`fname'] does not have a variable called {inp:countrycode} which is required for all files to be split by country.{p_end}"'
error 601
}

* List all the country codes in this file
levelsof countrycode, clean local(countries)

noi di "{phang2}Countries included: [`countries']{p_end}"

local country_counter = 0
local country_total : word count `countries'

* Loop over all countries
foreach country of local countries {

* Increment the counter for screen output
local ++country_counter

* Create the country file and folder name from the region file and folder name
local cnt_surveyname = subinstr("`surveyname'","`region'", "`country'", 1)
local cnt_filename = subinstr("`fname'" ,"`region'", "`country'", 1)
local cnt_folder = subinstr("`regionfolder'" ,"`region'", "`country'", 1)
local cnt_master_folder = subinstr("`masterfolder'" ,"`region'", "`country'", 1)


* Creates folder if not already exists
edukit_rmkdir, parent(${output}) newfolders(`country'/`cnt_surveyname'/`cnt_master_folder')
edukit_rmkdir, parent(${output}) newfolders(`country'/`cnt_surveyname'/`cnt_folder'/Data/Harmonized)
local output_folder "`r(folder)'"

* Confirm that the file to be split exists
cap confirm file "`output_folder'/`cnt_filename'"
* If the file does not exist or overwrite_files local is set to one, run the do
if (_rc == 601) | (`overwrite_files') {

preserve
* Keep only observations for this country
keep if countrycode == "`country'"

* Save the file with obs only for this country in the country folder name
save "`output_folder'/`cnt_filename'", replace
noi di "{phang2}File [`cnt_filename'] saved (`surveyname' country `country_counter' of `country_total'){p_end}"
restore
}
else {
* Still show output even when file already exist
noi di "{phang2}File [`cnt_filename'] already exists (`surveyname' country `country_counter' of `country_total'){p_end}"
}
}
noi di ""
}
end
8 changes: 8 additions & 0 deletions 05_adofiles/glad_toolkit.pkg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
v 1.3
d glad_toolkit
d EduAnalytics, World Bank Group (eduanalytics@worldbank.org)

f /glad_local_folder_setup.ado
f /glad_hpro_as_cpi.ado
f /glad_split_region_2_country.ado
e
3 changes: 3 additions & 0 deletions 05_adofiles/stata.toc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
v 1.3
d EduAnalytics, World Bank Group (eduanalytics@worldbank.org)
p glad_toolkit EduAnalytics