In [3]:
import os
import pandas as pd
import subprocess

In [4]:
def get_git_root():
    try:
        root_dir = (
            subprocess.check_output(["git", "rev-parse", "--show-toplevel"])
            .strip()
            .decode("utf-8")
        )
        return root_dir
    except subprocess.CalledProcessError:
        raise Exception("This is not a Git repository")


# Get the Git repository's root directory
repo_root = get_git_root()

In [6]:
DHS_dict = {
    "Senegal_Data": ["2023", "2019", "2018", "2017", "2016", "2015"],
    "Ghana_Data": ["2022"],
}

In [8]:
dfs = {}
for country, years in DHS_dict.items():
    for year in years:
        file_path = os.path.join(repo_root,'Code','WealthIndex', "data", "DHS", country, year)
        file_name = next((f for f in os.listdir(file_path) if f.endswith(".DTA")), None)
        if file_name:
            df = pd.read_stata(os.path.join(file_path, file_name))
            dfs[f"{country}_{year}"] = df
        else:
            print(f"Could not find {country} {year}")

In [None]:
columns_important = [
    "hv000",
    "hv001",
    "hv005",
    "hv009",
    "hv014",
    "hv025",
    "hv201",
    "hv202",
    "hv204",
    "hv205",
    "hv206",
    "hv207",
    "hv208",
    "hv209",
    "hv210",
    "hv211",
    "hv212",
    "hv213",
    "hv214",
    "hv215",
    "hv216",
    "hv217",
    "hv219",
    "hv220",
    "hv221",
    "hv225",
    "hv226",
    "hv227",
    "hv228",
    "hv230a",
    "hv230b",
    "hv232",
    "hv232b",
    "hv232y",
    "hv234a",
    "hv235",
    "hv237",
    "hv237a",
    "hv237b",
    "hv237c",
    "hv237d",
    "hv237e",
    "hv237f",
    "hv237x",
    "hv237z",
    "hv238",
    "hv240",
    "hv241",
    "hv242",
    "hv243a",
    "hv243b",
    "hv243c",
    "hv243d",
    "hv244",
    "hv245",
    "hv246",
    "hv246a",
    "hv246b",
    "hv246c",
    "hv246d",
    "hv246e",
    "hv246f",
    "hv246g",
    "hv246h",
    "hv247",
    "hml1",
    "hml2",
]

Unnamed: 0,hhid,hv000,hv001,hv002,hv003,hv004,hv005,hv006,hv007,hv008,...,hml36_57,hml36_58,hml36_59,hml36_60,hml36_61,hml36_62,hml36_63,hml36_64,hml36_65,hml36_66
0,1 1,SN6,1,1,1,1,3371553,4,2015,1384,...,,,,,,,,,,
1,1 2,SN6,1,2,1,1,3371553,4,2015,1384,...,,,,,,,,,,
2,1 3,SN6,1,3,1,1,3371553,4,2015,1384,...,,,,,,,,,,
3,1 4,SN6,1,4,1,1,3371553,4,2015,1384,...,,,,,,,,,,
4,1 5,SN6,1,5,1,1,3371553,4,2015,1384,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4506,214 18,SN6,214,18,6,214,413549,10,2015,1390,...,,,,,,,,,,
4507,214 19,SN6,214,19,1,214,413549,10,2015,1390,...,,,,,,,,,,
4508,214 20,SN6,214,20,1,214,413549,10,2015,1390,...,,,,,,,,,,
4509,214 21,SN6,214,21,4,214,413549,10,2015,1390,...,,,,,,,,,,
