# This is a tutorial/demo on how to use the `datamart` upload api.

## Import python modules

In [1]:
from app import app
from requests import get,post,put
import json
import pandas as pd
from io import StringIO
from IPython.display import display, HTML

#### Before running this Jupyter Notebook, please make the webservice is up and running by following the instructions here - https://github.com/usc-isi-i2/datamart-api

### Get all datasets 

**GET `/metadata/datasets`**

In [2]:
response = get('http://localhost:5000/metadata/datasets')
print(json.dumps(response.json(), indent=2))

[
  {
    "name": "OECD dataset",
    "description": "data downloaded from OECD",
    "url": "https://data.oecd.org",
    "dataset_id": "OECD"
  },
  {
    "name": "UAZ Indicators",
    "description": "Collection of indicators, including indicators from FAO, WDI, FEWSNET, CLiMIS, UNICEF, ieconomics.com, UNHCR, DSSAT, WHO, IMF, WHP, ACLDE, World Bank and IOM-DTM",
    "url": "https://github.com/ml4ai/delphi",
    "dataset_id": "UAZ"
  }
]


There are datasets in the database. `UAZ` and `OECD`. We can also get metadata about one dataset using the `dataset_id`

### Get metadata about one dataset

**GET `/metadata/datasets/{dataset_id}`**

In [3]:
response = get('http://localhost:5000/metadata/datasets/OECD')
print(json.dumps(response.json(), indent=2))

[
  {
    "name": "OECD dataset",
    "description": "data downloaded from OECD",
    "url": "https://data.oecd.org",
    "dataset_id": "OECD"
  }
]


### Get all variables in a dataset 

**GET `metadata/datasets/{dataset_id}/variables`**

In [4]:
response = get('http://localhost:5000/metadata/datasets/OECD/variables')
print(json.dumps(response.json()[:4], indent=2)) # printing only 4 

[
  {
    "variable_id": "gdp_per_capita",
    "dataset_id": "OECD"
  },
  {
    "variable_id": "gross_national_income_gni_per_capita",
    "dataset_id": "OECD"
  },
  {
    "variable_id": "household_disposable_income",
    "dataset_id": "OECD"
  },
  {
    "variable_id": "real_gdp_growth",
    "dataset_id": "OECD"
  }
]


In [5]:
print('Total number of variables in dataset: {} is {}'.format('OECD', len(response.json())))

Total number of variables in dataset: OECD is 112


### Get metadata about one variable

**GET `/metadata/datasets/{dataset_id}/variables/{variable_id}`**

In [6]:
response = get('http://localhost:5000/metadata/datasets/OECD/variables/real_gdp_growth')
print(json.dumps(response.json(), indent=2))

{
  "variable_id": "real_gdp_growth",
  "dataset_id": "OECD",
  "description": "Real GDP growth in OECD",
  "corresponds_to_property": "POECD-005",
  "qualifier": [
    {
      "identifier": "P585",
      "name": null
    },
    {
      "identifier": "P248",
      "name": null
    }
  ]
}


### Find a variable using keyword search

**GET `/metadata/variables?keyword={keyword}`**

In [7]:
response = get('http://localhost:5000/metadata/variables?keyword=road')
print(json.dumps(response.json(), indent=2))

{
  "message": "Internal Server Error"
}
{"message": "Internal Server Error"}

1


### Get time series data for a variable

**GET `/datasets/{dataset_id}/variables/{variable_id}`**

In [8]:
response = get('http://localhost:5000/datasets/OECD/variables/real_gdp_growth')
df = pd.read_csv(StringIO(response.text))
display(HTML(df.to_html()))

Unnamed: 0,dataset_id,variable_id,variable,main_subject,main_subject_id,value,value_unit,time,time_precision,country,coordinate
0,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,3.1,Annual growth %,2011-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
1,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,3.7,Annual growth %,2012-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
2,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,1.8,Annual growth %,2013-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
3,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,0.7,Annual growth %,2014-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
4,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,-2.3,Annual growth %,2015-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
5,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,0.3,Annual growth %,2016-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
6,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,1.6,Annual growth %,2017-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
7,OECD,real_gdp_growth,Real GDP growth,Russia,Q159,2.3,Annual growth %,2018-01-01T00:00:00Z,year,Russia,"POINT(100.0, 62.0)"
8,OECD,real_gdp_growth,Real GDP growth,Canada,Q16,3.1,Annual growth %,2011-01-01T00:00:00Z,year,Canada,"POINT(-109.0, 56.0)"
9,OECD,real_gdp_growth,Real GDP growth,Canada,Q16,1.8,Annual growth %,2012-01-01T00:00:00Z,year,Canada,"POINT(-109.0, 56.0)"


### Get time series data for a variable for a country

**GET `/datasets/{dataset_id}/variables/{variable_id}?country={country}`**

In [9]:
response = get('http://localhost:5000/datasets/OECD/variables/real_gdp_growth?country=Belgium')
df = pd.read_csv(StringIO(response.text))
display(HTML(df.to_html()))

Unnamed: 0,dataset_id,variable_id,variable,main_subject,main_subject_id,value,value_unit,time,time_precision,country,coordinate
0,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.8,Annual growth %,2011-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
1,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,0.2,Annual growth %,2012-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
2,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,0.2,Annual growth %,2013-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
3,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.3,Annual growth %,2014-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
4,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.7,Annual growth %,2015-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
5,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.5,Annual growth %,2016-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
6,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.7,Annual growth %,2017-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
7,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.4,Annual growth %,2018-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"


### Upload data to a variable

Lets upload some data to the dataset: OECD and the variable real_gdp_growth. We will upload the contents of the file in `test_data/oecd_gdp_sample.csv`

In [10]:
df = pd.read_csv('test_data/oecd_gdp_sample.csv')
df

Unnamed: 0,main_subject,value,value_unit,time,time_precision,country,admin1,place,source
0,Belgium,1.8,Annual growth %,2019-01-01T00:00:00Z,year,Belgium,,,FAO
1,Belgium,1.9,Annual growth %,2020-01-01T00:00:00Z,year,Belgium,,,FAO


**PUT `/datasets/{dataset_id}/variables/{variable_id}`**

In [11]:
import os
def upload_data(file_path, url):
    file_name = os.path.basename(file_path)
    files = {
        'file': (file_name, open(file_path, mode='rb'), 'application/octet-stream')
    }
    response = put(url, files=files)
    print(response.json())

In [12]:
url = 'http://localhost:5000/datasets/OECD/variables/real_gdp_growth'
file_path = 'test_data/oecd_gdp_sample.csv'
upload_data(file_path, url)


2 rows imported!


Get the data for the variable `real_gdp_growth` to check if the was added

In [14]:
response = get('http://localhost:5000/datasets/OECD/variables/real_gdp_growth?country=Belgium')
df = pd.read_csv(StringIO(response.text))
display(HTML(df.to_html()))

Unnamed: 0,dataset_id,variable_id,variable,main_subject,main_subject_id,value,value_unit,time,time_precision,country,coordinate
0,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.8,Annual growth %,2011-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
1,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,0.2,Annual growth %,2012-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
2,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,0.2,Annual growth %,2013-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
3,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.3,Annual growth %,2014-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
4,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.7,Annual growth %,2015-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
5,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.5,Annual growth %,2016-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
6,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.7,Annual growth %,2017-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
7,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.4,Annual growth %,2018-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
8,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.8,Annual growth %,2019-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"
9,OECD,real_gdp_growth,Real GDP growth,Belgium,Q31,1.9,Annual growth %,2020-01-01T00:00:00Z,year,Belgium,"POINT(4.6680555555556, 50.641111111111)"


Success! The 2 rows from 2019 and 2020 were added 