# Data Loader for World Modeler Dec 2020 Datasets

In [1]:
from pathlib import Path
import papermill as pm

WM_DATAMART_URL = 'https://datamart:datamart-api-789@dsbox02.isi.edu:8888/datamart-api-wm'
WM_TEST_URL = 'https://datamart:datamart-api-789dsbox02.isi.edu:8888/datamart-api-d3m'
LOCAL_URL = 'http://localhost:12543'

username = None
password = None

In [2]:
# Datamart URL
datamart_api_url = LOCAL_URL

# World Modeler shared data directory
shared_data_dir = Path('/lfs1/ktyao/Shared/kgtk-private-data/world-modelers')

# Directory where the git repositories are cloned. 
datamart_repos_dir = Path('/home/ktyao/dev/dsbox')

In [3]:
# datamart-api repository
datamart_dir = datamart_repos_dir / 'datamart-api'
data_refresh_dir = datamart_dir / 'data-refresh'

# Directory to store KGTK edge files
kgtk_dir = shared_data_dir / 'kgtk'

executed_notebook_dir = shared_data_dir / 'executed_notebook'

mitre_dir = shared_data_dir / 'MITRE Annotated Datasets for Dec 2020'

# Upload datasets

## ACLED dataset

In [19]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'acled.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(mitre_dir / 'ACLED' / 'ACLED_1997-01-01-2020-11-12-Ethiopia_ANNOTATED_MODIFIED.xlsx'),
        save_tsv_path = str(kgtk_dir / 'ACLED.tar.gz')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…




## ETH-CROP dataset

In [15]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'ETH-CROP.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(shared_data_dir / 'Three Priority Datasets' / 'Jemal-Crop land Area- Production and Yield of Major Crops-2012-2018_Annotated_JS00.xlsx'),
        save_tsv_path = str(kgtk_dir / 'ETH-CROP.tar.gz')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…




## KIMETRICA-WFP dataset

In [14]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'KIMETRICA-WFP.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(shared_data_dir / 'Three Priority Datasets' / 'Jemal-Kimetrica Ethiopia Price Data with lat long_2014-2017_annotate_test.xlsx'),
        save_tsv_path = str(kgtk_dir / 'KIMETRICA-WFP.tar.gz')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…




## ETH-CENSUS dataset

In [25]:
# Does not work
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'ETH-CENSUS.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(shared_data_dir / 'Three Priority Datasets' / 'Population 2007.xlsx'),
        save_tsv_path = str(kgtk_dir / 'ETH-CENSUS.tar.gz'),
        yamlfile_path = str(shared_data_dir / 'Three Priority Datasets' / 'Population 2007.yaml')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…




## CHIRPS dataset
`2 files`: Sequential upload required

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'CHIRPS.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'CHIRPSEthiopiaDailyPrecip_ANNOTATED-modified.xlsx'),
        put_data=True, DEBUG=False));

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'CHIRPS.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'CHIRPS', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'CHIRPSOromiaDailyPrecip_1981-01-01_2020-08-31_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'CHIRPS.tar.gz'), 
        put_data=False, DEBUG=True, TEST_ALL=True));

## MERRA2 Re-analysis dataset

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'MERRA2 Re-analysis.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'MERRA2 Re-analysis', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'MERRA2OromiaDailyPrecip_1981-01-01_2020-09-30_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'MERRA2 Re-analysis.tar.gz'),
        put_data=True, DEBUG=True, TEST_ALL=True));

## ERA5 dataset

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'ERA5.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'ERA5', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'ERA5OromiaDailyMeanTemp_1981-01-01_2020-07-09_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'ERA5.tar.gz'),
        put_data=True, DEBUG=True, TEST_ALL=True));

## TerraClimate (11 datasets)
`2 files`: Sequential upload required for `5 variables`

In [None]:
#### Actual Evaporation
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimateET.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimateET', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateEthiopiaMonthlyActualEvapotranspiration_ANNOTATED.xlsx'),
        put_data=True, DEBUG=False));

pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimateET.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimateET', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateOromiaMonthlyActualEvapotranspiraton_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'TerraClimateET.tar.gz'),
        put_data=False, DEBUG=True, TEST_ALL=True));

In [None]:
#### Max Temperature
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimateMaxTemp.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimateMaxTemp', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateEthiopiaMonthlyMaxTemp_ANNOTATED.xlsx'),
        put_data=True, DEBUG=False));

pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimateMaxTemp.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimateMaxTemp', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateOromiaMonthlyMaxTemp_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'TerraClimateMaxTemp.tar.gz'),
        put_data=False, DEBUG=True, TEST_ALL=True));

In [None]:
#### Min Temperature
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimateMinTemp.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimateMinTemp', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateEthiopiaMonthlyMinTemp_ANNOTATED.xlsx'),
        put_data=True, DEBUG=False));

pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimateMinTemp.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimateMinTemp', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateOromiaMonthlyMinTemp_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'TerraClimateMinTemp.tar.gz'),
        put_data=False, DEBUG=True, TEST_ALL=True));

In [None]:
#### TerraClimatePrecipitation
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimatePrecipitation.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimatePrecipitation', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateEthiopiaMonthlyPrecip_ANNOTATED.xlsx'),
        put_data=True, DEBUG=False));

pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimatePrecipitation.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimatePrecipitation', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateOromiaMonthlyPrecip_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'TerraClimatePrecipitation.tar.gz'),
        put_data=False, DEBUG=True, TEST_ALL=True));

In [None]:
#### PDSI
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimatePDSI.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimatePDSI', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateEthiopiaMonthlyPalmerDroughtSeverityIndex_ANNOTATED.xlsx'),
        put_data=True, DEBUG=False));

pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimatePDSI.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimatePDSI', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateOromiaMonthlyPalmerDroughtSeverityIndex_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'TerraClimatePDSI.tar.gz'),
        put_data=False, DEBUG=True, TEST_ALL=True));

In [None]:
#### Soil Moisture
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'TerraClimateSoilMoisture.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'TerraClimateSoilMoisture', username=username, password=password,
        annotated_path = str(mitre_dir / 'Climate and Soil Indicators' / 'TerraClimateEthiopiaMonthlySoilMoisture_ANNOTATED.xlsx'),
        save_tsv_path = str(kgtk_dir / 'TerraClimateSoilMoisture.tar.gz'),
        put_data=True, DEBUG=True, TEST_ALL=True));

## DHS_Nutrition
`2 files`: Sequential upload required

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'DHS_Nutrition.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, username=username, password=password,
        annotated_path = str(mitre_dir / 'DHS Nutrition Data' / 'DHS Nutrition STATcompilerExport2020721_161028_ANNOTATED.xlsx'),
        put_data=True, DEBUG=False));

pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'DHS_Nutrition.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'DHS_Nutrition', username=username, password=password,
        annotated_path = str(mitre_dir / 'DHS Nutrition Data' / 'DHS Nutrition STATcompilerExport2020721_161028_Totals_ANNOTATED.xlsx'),
        save_tsv_path = str(kgtk_dir / 'DHS_Nutrition.tar.gz'), 
        put_data=False, DEBUG=True, TEST_ALL=True));

## IHME_Wasting_Data dataset
`2 files`: Sequential upload required

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'IHME_Wasting_Data.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, username=username, password=password,
        annotated_path = str(mitre_dir / 'IHME' / 'IHME_LMIC_CGF_2000_2017_WASTING_PREV_ADMIN_1_Y2020M01D08_ANNOTATED.xlsx'),
        put_data=True, DEBUG=False));

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'IHME_Wasting_Data.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, username=username, password=password,
        annotated_path = str(mitre_dir / 'IHME' / 'IHME_LMIC_CGF_2000_2017_WASTING_PREV_ADMIN_2_Y2020M01D08_ANNOTATED.xlsx'),
        save_tsv_path = str(kgtk_dir / 'IHME_Wasting_Data.tar.gz'), dataset_id = 'IHME_Wasting_Data',
        put_data=False, DEBUG=True, TEST_ALL=True));

## WFP_Food_Prices dataset

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'WFP_Food_Prices.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'WFP_Food_Prices', username=username, password=password,
        annotated_path = str(mitre_dir / 'Food price data' / 'wfp_food_prices_ethiopia _updated June 2020_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'WFP_Food_Prices.tar.gz'),
        put_data=True, DEBUG=True, TEST_ALL=True));

## Kimetrica_Price_Data dataset

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'Kimetrica_Price_Data.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'Kimetrica_Price_Data', username=username, password=password,
        annotated_path = str(mitre_dir / 'Food price data' / 'Kimetrica Ethiopia Price Data with lat long_2014-2017_ANNOTATED-modified.xlsx'),
        save_tsv_path = str(kgtk_dir / 'Kimetrica_Price_Data.tar.gz'),
        put_data=True, DEBUG=True, TEST_ALL=True));

## FAO_Locust_Swarms dataset

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'FAO_Locust_Swarms.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'FAO_Locust_Swarms', username=username, password=password,
        annotated_path = str(mitre_dir / 'FAO Locust Data' / 'Swarms-Simplified2-Ethiopia_ANNOTATED-modified.xlsx'),
        yamlfile_path = str(mitre_dir / 'FAO Locust Data' / 'swarms.yaml'),
        save_tsv_path = str(kgtk_dir / 'FAO_Locust_Swarms.tar.gz'),
        put_data=True, DEBUG=True, validate=False, TEST_ALL=True));

## FAO_Locust_Control dataset

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'FAO_Locust_Control.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'FAO_Locust_Control', username=username, password=password,
        annotated_path = str(mitre_dir / 'FAO Locust Data' / 'Control_Operations-Simplified2-Ethiopia_ANNOTATED-modified.xlsx'),
        yamlfile_path = str(mitre_dir / 'FAO Locust Data' / 'control.yaml'),
        save_tsv_path = str(kgtk_dir / 'FAO_Locust_Control.tar.gz'),
        put_data=True, validate=False, DEBUG=True, TEST_ALL=True));

## FAO_Locust_Adults

In [None]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'FAO_Locust_Adults.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url, dataset_id = 'FAO_Locust_Adults', username=username, password=password,
        annotated_path = str(mitre_dir / 'FAO Locust Data' / 'Adults-Simplified2-Ethiopia_ANNOTATED-modified.xlsx'),
        yamlfile_path = str(mitre_dir / 'FAO Locust Data' / 'adults.yaml'),
        save_tsv_path = str(kgtk_dir / 'FAO_Locust_Adults.tar.gz'),
        put_data=True, validate=False, DEBUG=True, TEST_ALL=True));