# Data retriaval from World bank API

https://databank.worldbank.org/source/world-development-indicators

### imports

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from io import BytesIO
from zipfile import ZipFile
import requests
import xml.etree.ElementTree as et

### Download xml

for example 
- GDP:'NY.GDP.PCAP.CD'
- International tourism, number of arrivals: 'ST.INT.ARVL'
- Unemployment, total (% of total labor force) (modeled ILO estimate): 'SL.UEM.TOTL.ZS'

Method to downlaod data from api and to save it in local csv file. Only data on years >2017.

Data is saved in CSV format, where columns are 'Country or Area', 'Item', 'Year', 'Value'.

In [18]:
def get_WB_indicator(indicator):
    url_base='https://api.worldbank.org/v2/en/indicator/'
    filename = requests.get(url_base+indicator+'?downloadformat=xml').content
    zf = ZipFile(BytesIO(filename), 'r' )
    data=zf.read(zf.namelist()[0])
    dataroot = et.fromstring(data)
    df = pd.DataFrame()
    for m in dataroot[0]:
        if m.tag=='record':
            d={}
            for x in m:
                d[x.attrib['name']]=x.text
                if x.attrib['name']=='Country or Area':
                    d['Code3']=x.attrib['key']
            record = pd.Series(data=d, dtype=str, index=['Country or Area', 'Item', 'Year','Value','Code3'])
            if int(d['Year'])>2017:
                df=df.append(record, ignore_index=True)
    return df

df=get_WB_indicator('ST.INT.ARVL')
df=df.append(get_WB_indicator('SL.UEM.TOTL.ZS'))
df=df.append(get_WB_indicator('NY.GDP.PCAP.CD'))
df.loc[df.Item=='Unemployment, total (% of total labor force) (modeled ILO estimate)','Item']='Unemployment (% of total labor force)'
df.to_csv('WB_statistics.csv', index=False)

In [19]:
df.shape

(2394, 5)

In [20]:
df.head()

Unnamed: 0,Country or Area,Item,Year,Value,Code3
0,Aruba,"International tourism, number of arrivals",2018,1897000.0,ABW
1,Aruba,"International tourism, number of arrivals",2019,1951000.0,ABW
2,Aruba,"International tourism, number of arrivals",2020,,ABW
3,Africa Eastern and Southern,"International tourism, number of arrivals",2018,41189145.2687099,AFE
4,Africa Eastern and Southern,"International tourism, number of arrivals",2019,39826701.4025488,AFE


In [21]:
df[df['Country or Area']=='Finland']

Unnamed: 0,Country or Area,Item,Year,Value,Code3
225,Finland,"International tourism, number of arrivals",2018,3224000.0,FIN
226,Finland,"International tourism, number of arrivals",2019,3290000.0,FIN
227,Finland,"International tourism, number of arrivals",2020,,FIN
225,Finland,Unemployment (% of total labor force),2018,7.36,FIN
226,Finland,Unemployment (% of total labor force),2019,6.7,FIN
227,Finland,Unemployment (% of total labor force),2020,7.83,FIN
225,Finland,GDP per capita (current US$),2018,50013.2942027626,FIN
226,Finland,GDP per capita (current US$),2019,48711.5642079496,FIN
227,Finland,GDP per capita (current US$),2020,49041.3422565659,FIN
