# Refresh FSI data in Datamart

In [1]:
import io
import os.path
import pandas as pd
import json
from requests import get,post,put,delete
from IPython.display import display, HTML

## All parameters are passed from commandline

In [39]:
# Parameters to be injected
template_path = 'FSI_template.tsv'
datasets_path = './datasets'
datamart_api_url = 'http://localhost:12543'

### Utilities

In [48]:
def erase_dataset(datamart_api_url: str, dataset_id: str):
    response = get(f'{datamart_api_url}/metadata/datasets/{dataset_id}/variables')
    df = pd.DataFrame(response.json())
    if 'variable_id' in df.columns:
        for variable_id in df['variable_id']:
            delete(f'{datamart_api_url}/datasets/{dataset_id}/variables/{variable_id}')
            delete(f'{datamart_api_url}/metadata/datasets/{dataset_id}/variables/{variable_id}')
    delete(f'{datamart_api_url}/metadata/datasets/{dataset_id}')

In [3]:
def upload_data_annotated(file_path, url, put_data=True):
    file_name = os.path.basename(file_path)
    files = {
        'file': (file_name, open(file_path, mode='rb'), 'application/octet-stream')
    }
    if put_data:
        response = put(url, files=files)
    else:
        response = post(url, files=files)
    if response.status_code == 400:
        print(json.dumps(response.json(), indent=2))
    else:
        print(json.dumps(response.json(), indent=2))

In [32]:
def upload_frame_annotated(buffer, url, put_data=True):
    
    buffer.seek(0)
    
    files = {
        'file': ('buffer.csv', buffer, 'application/octet-stream')
    }
    
    if put_data:
        response = put(url, files=files)
    else:
        response = post(url, files=files)
    if response.status_code == 400:
        print(json.dumps(response.json(), indent=2))

### Load template into memory

In [49]:
erase_dataset(datamart_api_url, 'WFP')

In [40]:
if not os.path.isfile(template_path):
    raise FileNotFoundError("Template file does not exist!")

In [41]:
df_template = pd.read_csv(template_path, sep='\t', dtype=object, header=None).fillna('')
df_template

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,dataset,FSI,FSI dataset,data downloaded from FSI,https://fragilestatesindex.org,,,,,,,,,,,,
1,role,main subject,time,,,variable,variable,variable,variable,variable,variable,variable,variable,variable,variable,variable,variable
2,type,country,%Y-%m-%d %H:%M:%S,,,number,number,number,number,number,number,number,number,number,number,number,number
3,description,,,,,,,,,,,,,,,,
4,name,,,,,,,,,,,,,,,,
5,unit,,,,,,,,,,,,,,,,
6,header,Country,Year,Rank,Total,C1: Security Apparatus,C2: Factionalized Elites,C3: Group Grievance,E1: Economy,E2: Economic Inequality,E3: Human Flight and Brain Drain,P1: State Legitimacy,P2: Public Services,P3: Human Rights,S1: Demographic Pressures,S2: Refugees and IDPs,X1: External Intervention


### Build the list of files to be uploaded

In [46]:
def generate_filenames(datasets_path: str):
    files_to_upload = []
    if os.path.isfile(datasets_path):
        files_to_upload.append(datasets_path)
    else:
        files_to_upload += [ os.path.join(datasets_path, f) for f in os.listdir(datasets_path) if os.path.isfile(os.path.join(datasets_path,f)) ]
    return files_to_upload

files_to_upload = generate_filenames(datasets_path)

### Upload dataset on at a time

In [64]:
dataset_id = df_template.iloc[0,1]
nCols = len(df_template.iloc[0]) - 1
for i, data_path in enumerate(files_to_upload):
        
    print(data_path)

    # Only extract the first nCols columns specified in the template
    sheet = pd.read_excel(data_path, dtype=object)
    sheet = sheet[sheet.columns[:nCols]]
    
    # Verify Label matches
    if False in sheet.columns == df_template.iloc[6][1:]:
        raise ValueError(f'Columns do not match between template and input: {data_path}. Abort...')
    
    # Build inputs
    sheet.insert(loc=0, column='', value='')
    sheet.iloc[0,0] = 'data'
    
    # Build annotated data
    sheet.columns = df_template.columns
    annotated_sheet = df_template.append(sheet)
    
    # post data to datamart
    buffer = io.StringIO()
    annotated_sheet.to_csv(buffer, index=False, header=False)
    url = f'{datamart_api_url}/datasets/{dataset_id}/annotated?create_if_not_exist=true'
    upload_frame_annotated(buffer, url, False)

./datasets\fsi-2006.xlsx
./datasets\fsi-2007.xlsx
./datasets\fsi-2008.xlsx
./datasets\fsi-2009.xlsx
./datasets\fsi-2010.xlsx
./datasets\fsi-2011.xlsx
./datasets\fsi-2012.xlsx
./datasets\fsi-2013.xlsx
./datasets\fsi-2014.xlsx
./datasets\fsi-2015.xlsx
./datasets\fsi-2016.xlsx
./datasets\fsi-2017.xlsx
./datasets\fsi-2018.xlsx
./datasets\fsi-2019.xlsx
./datasets\fsi-2020.xlsx


### Check the data has been uploaded

In [45]:
dataset_id = 'FSI'
q_variable = 'c1_security_apparatus'
q_country = 'Gabon'
response = get(f'{datamart_api_url}/datasets/{dataset_id}/variables/{q_variable}?country={q_country}')
df = pd.read_csv(io.StringIO(response.text))
display(HTML(df.fillna('').to_html(index=False)))

dataset_id,variable_id,variable,main_subject,main_subject_id,value,value_unit,time,time_precision,country,admin1,admin2,admin3,region_coordinate,stated_in,stated_in_id,stated in
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2006-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2007-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2008-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2009-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2010-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2011-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2012-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2013-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2014-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
FSI,c1_security_apparatus,C1: Security Apparatus,Gabon,Q1000,5.1,,2015-01-01T00:00:00Z,,Gabon,,,,POINT(11.5 -0.68333055555556),,,
