# ISTAC playground

**NOTE: You must select the istac kernel to run this playbook!**

This playbook uses a virtualenv that must be setup beforehand. This is a one-time only process, recorded here for future reference.
The steps to create a virtualenv for this project and make it available as a jupyter kernel are:

```bash
# Move to the proper folder
cd /home/jovyan/work/istac
# Install pipenv
pip install pipenv
# Create the venv and install dependencies
pipenv install
# Activate the shell
pipenv shell
# Create a jupyter core
python -m ipykernel install --user --name=istac
```

Now you can select the "istac" core when running this playbook.

In [8]:
# Import istac lib into your app
import istac

In [9]:
# Collect all indicators
import aiohttp

async with aiohttp.ClientSession() as session:
    indicators = [ind async for ind in istac.indicators(session)]

In [10]:
# Now you can list the indicators, e.g.
from pprint import pprint

pprint([ind.code for ind in indicators[:10]])

['AFILIACIONES',
 'TURISTAS',
 'EMPLEO_REGISTRADO_AGRICULTURA',
 'EMPLEO_REGISTRADO_HOSTELERIA',
 'EMPLEO_REGISTRADO_INDUSTRIA',
 'EMPLEO_REGISTRADO_SERVICIOS',
 'POBLACION_INACTIVA',
 'POBLACION_INACTIVA_HOMBRES',
 'POBLACION_INACTIVA_MUJERES',
 'PARO_REGISTRADO']


In [11]:
# And optionally, turn the list into a dataframe
import pandas as pd

fields = istac.Indicator.fields()
ind_frame = pd.DataFrame((dict((field, getattr(ind, field)) for field in fields) for ind in indicators), columns=fields).set_index('id')
for col in ('selfLink', 'systemSurveyLinks', 'kind'):
    ind_frame = ind_frame.drop(col, axis=1)
ind_frame.head()

Unnamed: 0_level_0,code,version,title,subjectCode,subjectTitle,conceptDescription,notes
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AFILIACIONES,AFILIACIONES,1.13,"{'es': 'Afiliaciones a la Seguridad Social', '...",51,"{'es': '051 Empleo', '__default__': '051 Empleo'}",{'es': 'Puestos de trabajo registrados en la S...,{'en': 'Affiliations registered on data collec...
TURISTAS,TURISTAS,1.19,"{'es': 'Turistas recibidos', 'en': 'Tourists a...",82,"{'es': '082 Hostelería y turismo', '__default_...",{'es': 'Número de turistas recibidos por vía a...,{'en': 'Tourists are visitors who overnight in...
EMPLEO_REGISTRADO_AGRICULTURA,EMPLEO_REGISTRADO_AGRICULTURA,1.23,"{'en': 'Registered employment. Agriculture', '...",51,"{'es': '051 Empleo', '__default__': '051 Empleo'}","{'en': 'Jobs registered in the primary sector,...",{'es': 'En el sector primario se contabiliza c...
EMPLEO_REGISTRADO_HOSTELERIA,EMPLEO_REGISTRADO_HOSTELERIA,1.23,"{'es': 'Empleo registrado. Hostelería', 'en': ...",51,"{'es': '051 Empleo', '__default__': '051 Empleo'}",{'es': 'Puestos de trabajo registrados en la s...,{'es': 'Se entiende por empleo registrado a la...
EMPLEO_REGISTRADO_INDUSTRIA,EMPLEO_REGISTRADO_INDUSTRIA,1.24,"{'es': 'Empleo registrado. Industria', 'en': '...",51,"{'es': '051 Empleo', '__default__': '051 Empleo'}",{'en': 'Jobs registered in the industry and en...,{'es': 'Se entiende por empleo registrado a la...


In [12]:
# Let's save the list
ind_frame.to_csv(r'indicadores.csv', index=None, header=True)

In [26]:
import asyncio

# Get data (with dimensions) for some indicator
async def get_data(indicator):
    # Collect data and dimensions in parallel
    async with aiohttp.ClientSession() as session:
        joined, dims = await asyncio.gather(
            # Get indicator data
            istac.indicator_data(session, indicator, {
                #'granularity': 'TIME[MONTHLY]',
                #'representation': 'MEASURE[ABSOLUTE]',
                'fields': '-observationsMetadata'
            }),
            # Get dimensions for this same indicator
            istac.dimension_data(session, indicator)
        )
    # Join the data with the dimensions
    for dim_name, dim_data in dims.items():
        joined = joined.join(dim_data.points, on=dim_name, rsuffix=f'_{dim_name}')
    # And set the index of the df to be the combination of dimensions
    joined = joined.set_index(list(dims.keys()))
    return joined.dropna(axis=1, how='all')

## Get indicator data

In [42]:
import ipywidgets as widgets

options = sorted([ind.code for ind in indicators])
selection = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Indicador:',
    disabled=False,
)

selection

Dropdown(description='Indicador:', options=('ACCIDENTES_TRABAJO_BAJA', 'ACCIDENTES_TRABAJO_BAJA_JORNADAS', 'AF…

In [45]:
# Export the indicator to CSV
indicador = selection.value
data = await get_data(indicador)
data.to_csv(f'{indicador}.csv', index=True, header=True)
pprint(data.head().to_csv())

('GEOGRAPHICAL,TIME,MEASURE,F,title,granularityCode,latitude,longitude,title_TIME,granularityCode_TIME,title_MEASURE,decimalPlaces,isPercentage,type,unit,unitMultiplier,unitSymbol,unitSymbolPosition\n'
 'ES70,2018,ANNUAL_PUNTUAL_RATE,-1117,Canarias,REGIONS,28.2869925,-15.8335245,2018,YEARLY,Variación '
 'anual,0,,AMOUNT,Cabezas,Unidades,,\n'
 'ES70,2018,INTERPERIOD_PUNTUAL_RATE,-1117,Canarias,REGIONS,28.2869925,-15.8335245,2018,YEARLY,Variación '
 'interperiódica,0,,AMOUNT,Cabezas,Unidades,,\n'
 'ES70,2018,ANNUAL_PERCENTAGE_RATE,-2.29,Canarias,REGIONS,28.2869925,-15.8335245,2018,YEARLY,Tasa '
 'variación anual,2,True,CHANGE_RATE,Porcentaje,,%,END\n'
 'ES70,2018,INTERPERIOD_PERCENTAGE_RATE,-2.29,Canarias,REGIONS,28.2869925,-15.8335245,2018,YEARLY,Tasa '
 'variación interperiódica,2,True,CHANGE_RATE,Porcentaje,,%,END\n'
 'ES70,2018,ABSOLUTE,47689,Canarias,REGIONS,28.2869925,-15.8335245,2018,YEARLY,Dato,0,,AMOUNT,Cabezas,Unidades,,\n')
