In [1]:
# Datamart server running on localhost in development mode
datamart_api_url = 'http://localhost:12543'

In [2]:
import os
import json
import pandas as pd
from io import StringIO
from IPython.display import display, HTML
from requests import get,post,put,delete

In [9]:
def upload_data_put(file_path, url):
    file_name = os.path.basename(file_path)
    files = {
        'file': (file_name, open(file_path, mode='rb'), 'application/octet-stream')
    }
    response = put(url, files=files)

    print(json.dumps(response.json(), indent=2))

### Create a Test Dataset to be deleted later

**POST `/metadata/datasets`**

In [5]:
# Define a new dataset
test_dataset = {
    "name": "delete-test",
    "dataset_id": "delete-test",
    "description": "will be deleted",
    "url": "http://test01.com/test"
}

In [6]:
td_response = post(f'{datamart_api_url}/metadata/datasets', json=test_dataset)
print(json.dumps(td_response.json(), indent=2))

{
  "name": "delete-test",
  "description": "will be deleted",
  "url": "http://test01.com/test",
  "dataset_id": "delete-test"
}


### Upload an annotated file to the dataset

**PUT `/datasets/{dataset_id}/annotated`**

In [10]:
url = f'{datamart_api_url}/datasets/delete-test/annotated'
file_path = 'test/test_data/04_worker_incidents_delete_test.xlsx'
upload_data_put(file_path, url)

[
  {
    "name": "UN workers",
    "variable_id": "un",
    "dataset_id": "delete-test",
    "description": "number of UN workers affected",
    "corresponds_to_property": "PVARIABLE-Qdelete-test-003",
    "qualifier": [
      {
        "name": "located in the administrative territorial entity",
        "identifier": "P131"
      },
      {
        "name": "stated in",
        "identifier": "P248"
      },
      {
        "name": "point in time",
        "identifier": "P585"
      }
    ]
  },
  {
    "name": "International NGO",
    "variable_id": "ingo",
    "dataset_id": "delete-test",
    "description": "number of NGO workers affected",
    "corresponds_to_property": "PVARIABLE-Qdelete-test-004",
    "qualifier": [
      {
        "name": "located in the administrative territorial entity",
        "identifier": "P131"
      },
      {
        "name": "stated in",
        "identifier": "P248"
      },
      {
        "name": "point in time",
        "identifier": "P585"
      }
   

### Variables in the dataset `delete-test`

**GET `/metadata/datasets/{dataset_id}/variables`**

In [11]:
response = get(f'{datamart_api_url}/metadata/datasets/delete-test/variables')
print(json.dumps(response.json(), indent=2))


[
  {
    "name": "International NGO",
    "variable_id": "ingo",
    "description": "number of NGO workers affected",
    "corresponds_to_property": "PVARIABLE-Qdelete-test-004",
    "qualifier": [
      {
        "name": "located in the administrative territorial entity",
        "identifier": "P131"
      },
      {
        "name": "stated in",
        "identifier": "P248"
      },
      {
        "name": "point in time",
        "identifier": "P585"
      }
    ]
  },
  {
    "name": "UN workers",
    "variable_id": "un",
    "description": "number of UN workers affected",
    "corresponds_to_property": "PVARIABLE-Qdelete-test-003",
    "qualifier": [
      {
        "name": "located in the administrative territorial entity",
        "identifier": "P131"
      },
      {
        "name": "stated in",
        "identifier": "P248"
      },
      {
        "name": "point in time",
        "identifier": "P585"
      }
    ]
  }
]


### Get Data in variables `un` and `ingo` in the dataset `delete-test`

**GET `/datasets/{dataset_id}/variables/{variable_id}`**

In [13]:
response = get(f'{datamart_api_url}/datasets/delete-test/variables/un')
df = pd.read_csv(StringIO(response.text))
display(HTML(df.fillna('').head().to_html(index=False)))

dataset_id,variable_id,variable,main_subject,main_subject_id,value,value_unit,time,time_precision,country,admin1,admin2,admin3,region_coordinate,stated_in,stated_in_id,located in the administrative territorial entity,stated in
delete-test,un,UN workers,Incident ID 103,QQdelete_test_Incident_ID_103,0.0,count,2000-02-04T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,un,UN workers,Incident ID 1060,QQdelete_test_Incident_ID_1060,1.0,count,2010-06-18T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,un,UN workers,Incident ID 1069,QQdelete_test_Incident_ID_1069,2.0,count,2010-03-23T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,un,UN workers,Incident ID 1102,QQdelete_test_Incident_ID_1102,4.0,count,2011-05-13T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,un,UN workers,Incident ID 1281,QQdelete_test_Incident_ID_1281,1.0,count,2011-08-05T00:00:00Z,,,,,,,,,Ethiopia,


In [14]:
response = get(f'{datamart_api_url}/datasets/delete-test/variables/ingo')
df = pd.read_csv(StringIO(response.text))
display(HTML(df.fillna('').head().to_html(index=False)))

dataset_id,variable_id,variable,main_subject,main_subject_id,value,value_unit,time,time_precision,country,admin1,admin2,admin3,region_coordinate,stated_in,stated_in_id,located in the administrative territorial entity,stated in
delete-test,ingo,International NGO,Incident ID 103,QQdelete_test_Incident_ID_103,2.0,count,2000-02-04T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,ingo,International NGO,Incident ID 1060,QQdelete_test_Incident_ID_1060,0.0,count,2010-06-18T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,ingo,International NGO,Incident ID 1069,QQdelete_test_Incident_ID_1069,0.0,count,2010-03-23T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,ingo,International NGO,Incident ID 1102,QQdelete_test_Incident_ID_1102,0.0,count,2011-05-13T00:00:00Z,,,,,,,,,Ethiopia,
delete-test,ingo,International NGO,Incident ID 1281,QQdelete_test_Incident_ID_1281,0.0,count,2011-08-05T00:00:00Z,,,,,,,,,Ethiopia,


### Delete data for variable `un` in the dataset `delete-test`

**DELETE `/datasets/{dataset_id}/variables/{variable_id}`**

In [17]:
response = delete(f'{datamart_api_url}/datasets/delete-test/variables/un')
print(json.dumps(response.json(), indent=2))

{
  "Message": "Canonical data for Variable: un in Dataset: delete-test is deleted."
}


#### Is it really deleted though ?

In [18]:
response = get(f'{datamart_api_url}/datasets/delete-test/variables/un')
df = pd.read_csv(StringIO(response.text))
display(HTML(df.fillna('').head().to_html(index=False)))

dataset_id,variable_id,variable,main_subject,main_subject_id,value,value_unit,time,time_precision,country,admin1,admin2,admin3,region_coordinate,stated_in,stated_in_id,located in the administrative territorial entity,stated in


Of course it is!

**This API will only delete the canonical data associated with the variable. The metadata for the variable still exists.**

### Delete variable `un` in the dataset `delete-test`

**DELETE `/metadata/datasets/{dataset_id}/variables/{variable_id}`**

In [20]:
response = delete(f'{datamart_api_url}/metadata/datasets/delete-test/variables/un')
print(json.dumps(response.json(), indent=2))

{
  "Message": "Successfully deleted ['un'] in the dataset: delete-test."
}


### Delete variable `ingo` in the dataset `delete-test`

**DELETE `/metadata/datasets/{dataset_id}/variables/{variable_id}`**

In [21]:
response = delete(f'{datamart_api_url}/metadata/datasets/delete-test/variables/ingo')
print(json.dumps(response.json(), indent=2))

{
  "Error": "Please delete all variable data before deleting metadata"
}


The variable `ingo` cannot be deleted because it still has canonical data associated with it. Failsafe to accidently delete a variable if it still has data.

Delete the data first and then delete the variable.

### Delete dataset `delete-test`

**DELETE `/metadata/datasets/{dataset_id}`**

In [23]:
response = delete(f'{datamart_api_url}/metadata/datasets/delete-test')
print(json.dumps(response.json(), indent=2))

{
  "Error": "Dataset delete-test is not empty"
}


You'll see an error message: `Dataset delete-test is not empty`. This is because there is still a variable `ingo` present and it has data associated with it.

A dataset can only be deleted if, it has no variables in it.

To delete the dataset `delete-me`, there are now two options:
1. First delete data and then metadata for variable `ingo` and then delete dataset `delete-test`
or 
2. Force delete the dataset. Invoking this option will,

  a. delete all the data associated with all the variables in the dataset
  
  b. delete metadata for all variables in the dataset
  
  c. delete the dataset

**DELETE `/metadata/datasets/{dataset_id}?force=true`**

In [26]:
response = delete(f'{datamart_api_url}/metadata/datasets/delete-test?force=true')
print(json.dumps(response.json(), indent=2))

{
  "Message": "Dataset delete-test deleted"
}
