# Data Loader for World Modeler Dec 2020 Datasets

In [1]:
from pathlib import Path
import papermill as pm

WM_DATAMART_URL = 'https://datamart:datamart-api-789@dsbox02.isi.edu:8888/datamart-api-wm'
WM_TEST_URL = 'https://datamart:datamart-api-789dsbox02.isi.edu:8888/datamart-api-d3m'
LOCAL_URL = 'http://localhost:12543'


In [2]:
# Datamart URL
datamart_api_url = LOCAL_URL

# World Modeler shared data directory
shared_data_dir = Path('/lfs1/ktyao/Shared/kgtk-private-data/world-modelers')

# Directory where the git repositories are cloned. 
datamart_repos_dir = Path('/home/ktyao/dev/dsbox')

In [3]:
# datamart-api repository
datamart_dir = datamart_repos_dir / 'datamart-api'
data_refresh_dir = datamart_dir / 'data-refresh'

# Directory to store KGTK edge files
kgtk_dir = shared_data_dir / 'kgtk'

executed_notebook_dir = shared_data_dir / 'executed_notebook'

mitre_dir = shared_data_dir / 'MITRE Annotated Datasets for Dec 2020'

# Upload datasets

## ACLED dataset

In [19]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'acled.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(mitre_dir / 'ACLED' / 'ACLED_1997-01-01-2020-11-12-Ethiopia_ANNOTATED_MODIFIED.xlsx'),
        save_tsv_path = str(kgtk_dir / 'ACLED.tar.gz')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…




## ETH-CROP dataset

In [15]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'ETH-CROP.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(shared_data_dir / 'Three Priority Datasets' / 'Jemal-Crop land Area- Production and Yield of Major Crops-2012-2018_Annotated_JS00.xlsx'),
        save_tsv_path = str(kgtk_dir / 'ETH-CROP.tar.gz')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…




## KIMETRICA-WFP dataset

In [14]:
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'KIMETRICA-WFP.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(shared_data_dir / 'Three Priority Datasets' / 'Jemal-Kimetrica Ethiopia Price Data with lat long_2014-2017_annotate_test.xlsx'),
        save_tsv_path = str(kgtk_dir / 'KIMETRICA-WFP.tar.gz')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…




## ETH-CENSUS dataset

In [25]:
# Does not work
pm.execute_notebook(
    data_refresh_dir / 'DataLoader.ipynb',
    executed_notebook_dir / 'ETH-CENSUS.ipynb',
    parameters=dict(
        datamart_api_url=datamart_api_url,
        annotated_path = str(shared_data_dir / 'Three Priority Datasets' / 'Population 2007.xlsx'),
        save_tsv_path = str(kgtk_dir / 'ETH-CENSUS.tar.gz'),
        yamlfile_path = str(shared_data_dir / 'Three Priority Datasets' / 'Population 2007.yaml')));

HBox(children=(FloatProgress(value=0.0, description='Executing', max=21.0, style=ProgressStyle(description_wid…


