# Lesson 25a: Import data

# Import libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
# We can import data directly from the link:

In [3]:
url = "https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/avia_par_pl.tsv.gz&unzip=true"

routes = pd.read_csv(url, delimiter = "\t")

# The property delimiter = "\t" is used to decide how we separate the data. Here we use "Tab", although
# the data are separated by both comas and tabs.

routes.head()

Unnamed: 0,"unit,tra_meas,airp_pr\time",2022Q2,2022Q1,2022M06,2022M05,2022M04,2022M03,2022M02,2022M01,2021Q4,...,2004M09,2004M08,2004M07,2004M06,2004M05,2004M04,2004M03,2004M02,2004M01,2004
0,"FLIGHT,CAF_PAS,PL_EPBY_BG_LBBG",:,:,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
1,"FLIGHT,CAF_PAS,PL_EPBY_DE_EDDF",:,:,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
2,"FLIGHT,CAF_PAS,PL_EPBY_DE_EDLV",:,:,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
3,"FLIGHT,CAF_PAS,PL_EPBY_ES_LEGE",:,:,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
4,"FLIGHT,CAF_PAS,PL_EPBY_IE_EIDW",:,:,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:


In [4]:
# Note that in the first columns we have actually 3 names separated by comas, and we want to separate them.
# Note how to use backslash.

routes_desc = routes["unit,tra_meas,airp_pr\\time"].str.split(",", expand = True)
# We used "expand=True" to get a data frame and not the the list.

routes_desc.head()

Unnamed: 0,0,1,2
0,FLIGHT,CAF_PAS,PL_EPBY_BG_LBBG
1,FLIGHT,CAF_PAS,PL_EPBY_DE_EDDF
2,FLIGHT,CAF_PAS,PL_EPBY_DE_EDLV
3,FLIGHT,CAF_PAS,PL_EPBY_ES_LEGE
4,FLIGHT,CAF_PAS,PL_EPBY_IE_EIDW


In [5]:
# Now we want to rename the names of the columns:

routes_desc.rename({0 : "unit", 1 : "tra_meas", 2 : "airport"}, axis = 1, inplace = True)

routes_desc.head()

Unnamed: 0,unit,tra_meas,airport
0,FLIGHT,CAF_PAS,PL_EPBY_BG_LBBG
1,FLIGHT,CAF_PAS,PL_EPBY_DE_EDDF
2,FLIGHT,CAF_PAS,PL_EPBY_DE_EDLV
3,FLIGHT,CAF_PAS,PL_EPBY_ES_LEGE
4,FLIGHT,CAF_PAS,PL_EPBY_IE_EIDW


In [6]:
routes_desc = routes_desc.join(routes)
routes_desc.head()

Unnamed: 0,unit,tra_meas,airport,"unit,tra_meas,airp_pr\time",2022Q2,2022Q1,2022M06,2022M05,2022M04,2022M03,...,2004M09,2004M08,2004M07,2004M06,2004M05,2004M04,2004M03,2004M02,2004M01,2004
0,FLIGHT,CAF_PAS,PL_EPBY_BG_LBBG,"FLIGHT,CAF_PAS,PL_EPBY_BG_LBBG",:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
1,FLIGHT,CAF_PAS,PL_EPBY_DE_EDDF,"FLIGHT,CAF_PAS,PL_EPBY_DE_EDDF",:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
2,FLIGHT,CAF_PAS,PL_EPBY_DE_EDLV,"FLIGHT,CAF_PAS,PL_EPBY_DE_EDLV",:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
3,FLIGHT,CAF_PAS,PL_EPBY_ES_LEGE,"FLIGHT,CAF_PAS,PL_EPBY_ES_LEGE",:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
4,FLIGHT,CAF_PAS,PL_EPBY_IE_EIDW,"FLIGHT,CAF_PAS,PL_EPBY_IE_EIDW",:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:


In [7]:
routes_desc.drop("unit,tra_meas,airp_pr\\time", axis = 1, inplace = True)
routes_desc.head()

Unnamed: 0,unit,tra_meas,airport,2022Q2,2022Q1,2022M06,2022M05,2022M04,2022M03,2022M02,...,2004M09,2004M08,2004M07,2004M06,2004M05,2004M04,2004M03,2004M02,2004M01,2004
0,FLIGHT,CAF_PAS,PL_EPBY_BG_LBBG,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
1,FLIGHT,CAF_PAS,PL_EPBY_DE_EDDF,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
2,FLIGHT,CAF_PAS,PL_EPBY_DE_EDLV,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
3,FLIGHT,CAF_PAS,PL_EPBY_ES_LEGE,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:
4,FLIGHT,CAF_PAS,PL_EPBY_IE_EIDW,:,:,:,:,:,:,:,...,:,:,:,:,:,:,:,:,:,:


In [9]:
# Note tht we have many different formats for reading files. And some of them require more advanced knowledge.

# Here we will try to use json format for data from the webpage Float Rates:

json_link = "http://www.floatrates.com/daily/chf.json"

pd.read_json(json_link).transpose()

Unnamed: 0,code,alphaCode,numericCode,name,rate,date,inverseRate
usd,USD,USD,840,U.S. Dollar,1.069636,"Tue, 3 Jan 2023 11:55:01 GMT",0.934897
eur,EUR,EUR,978,Euro,1.012034,"Tue, 3 Jan 2023 11:55:01 GMT",0.988109
gbp,GBP,GBP,826,U.K. Pound Sterling,0.895338,"Tue, 3 Jan 2023 11:55:01 GMT",1.116897
cad,CAD,CAD,124,Canadian Dollar,1.452551,"Tue, 3 Jan 2023 11:55:01 GMT",0.688444
jpy,JPY,JPY,392,Japanese Yen,139.57564,"Tue, 3 Jan 2023 11:55:01 GMT",0.007165
...,...,...,...,...,...,...,...
cny,CNY,CNY,156,Chinese Yuan,7.377178,"Tue, 3 Jan 2023 11:55:01 GMT",0.135553
isk,ISK,ISK,352,Icelandic Krona,152.040625,"Tue, 3 Jan 2023 11:55:01 GMT",0.006577
lyd,LYD,LYD,434,Libyan Dinar,5.211192,"Tue, 3 Jan 2023 11:55:01 GMT",0.191895
clp,CLP,CLP,152,Chilean Peso,908.298765,"Tue, 3 Jan 2023 11:55:01 GMT",0.001101
