# Interactive Visualisation

In [137]:
import numpy as np
import pandas as pd
import folium
import json
from urllib.request import urlopen
from urllib import parse
import requests

In [138]:
df = pd.read_csv('P3_GrantExport.csv',sep=';')

# Transformation from string to floats in 'Approved Amount'
float_amounts = list()
for x in df['Approved Amount'].tolist():
    try:
        float_amounts.append(float(x))
    except:
        float_amounts.append(0) #When we are missing a real number we set the amount to 0 (basically to discard it later)
    
df = df[['University']] #we keep only university names associated at projects
df['Funding'] = float_amounts #and we attach float value of the Approved Amount, now renamed to 'Funding'
df.head()

Unnamed: 0,University,Funding
0,Nicht zuteilbar - NA,11619.0
1,Université de Genève - GE,41022.0
2,"NPO (Biblioth., Museen, Verwalt.) - NPO",79732.0
3,Universität Basel - BS,52627.0
4,"NPO (Biblioth., Museen, Verwalt.) - NPO",120042.0


In [139]:
df.count()

University    50988
Funding       63969
dtype: int64

In [140]:
sum(df['Funding'] == 0) #checking for projects without funding data.

10997

In [141]:
df.University.describe()

count                       50988
unique                         77
top       Universität Zürich - ZH
freq                         6774
Name: University, dtype: object

In [142]:
funding_by_uni = df.groupby('University')[['Funding']].sum()
funding_by_uni['Longitude'] = ''; funding_by_uni['Latitude'] = ''; funding_by_uni['Canton'] = ''
funding_by_uni.head()

Unnamed: 0_level_0,Funding,Longitude,Latitude,Canton
University,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AO Research Institute - AORI,3435621.0,,,
Allergie- und Asthmaforschung - SIAF,19169965.0,,,
Berner Fachhochschule - BFH,31028695.0,,,
Biotechnologie Institut Thurgau - BITG,2492535.0,,,
Centre de rech. sur l'environnement alpin - CREALP,1567678.0,,,


In [143]:
funding_by_uni.describe()

Unnamed: 0,Funding
count,77.0
mean,168543800.0
std,439437500.0
min,8000.0
25%,1430686.0
50%,6882730.0
75%,42771910.0
max,1838237000.0


77 different universities with a huge disparity in funding. Maximum is 3 orders of magnitude bigger than the 1st quantile's top. 

In [144]:
#imporving names before search
fbu = funding_by_uni.reset_index()

University_name = list()
University_code = list()
for x in fbu.University.tolist():

    try:
        name, code = x.split(' - ') #split by line
    except:
        name, code = x.split(' ') #or split by space
        
    University_name.append(name)
    University_code.append(code)
fbu['University_name'] = University_name
fbu['University_code'] = University_code
fbu.head()
    

Unnamed: 0,University,Funding,Longitude,Latitude,Canton,University_name,University_code
0,AO Research Institute - AORI,3435621.0,,,,AO Research Institute,AORI
1,Allergie- und Asthmaforschung - SIAF,19169965.0,,,,Allergie- und Asthmaforschung,SIAF
2,Berner Fachhochschule - BFH,31028695.0,,,,Berner Fachhochschule,BFH
3,Biotechnologie Institut Thurgau - BITG,2492535.0,,,,Biotechnologie Institut Thurgau,BITG
4,Centre de rech. sur l'environnement alpin - CR...,1567678.0,,,,Centre de rech. sur l'environnement alpin,CREALP


Using Geonames Full Text Search API to map the universities to their respective columns. 'requests' library is used to get the HTTP response. But since 'University' names have spaces and other special characters, it needs to be UTF-8 encoded before using the requests.get. This is accomplished using 'parse.quote' method in 'urllib' library.

In [149]:
num_projects = len(fbu)
username = 'dunaiada'

for i in range(0,num_projects):
    url = 'http://api.geonames.org/search?q=' + fbu['University_code'][i] + '&country=CH' + '&maxRows=1&username=' + username + '&type=json'
    ## Encode special characters and spaces
    query = parse.quote(url,safe=':/&=?')
    ## Parse JSON data
    # print(query)
        
    d = json.loads(requests.get(query).text)
    
    if bool(d['geonames']): #checks whether we get a match.
        try:
            fbu['Canton'].iloc[i] = d['geonames'][0]['adminCode1']
            fbu['Latitude'].iloc[i] = d['geonames'][0]['lat']
            fbu['Longitude'].iloc[i] = d['geonames'][0]['lng']
        except:
            pass
fbu.head()

http://api.geonames.org/search?q=AORI&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=SIAF&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=BFH&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=BITG&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=CREALP&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=EPFL&country=CH&maxRows=1&username=dunaiada&type=json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


http://api.geonames.org/search?q=ETHZ&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=EAWAG&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=WSL&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=EHB&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=EMPA&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=EOC&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=FHKD&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=FHNW&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=FHO&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=FTL&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=FFHS&country=CH&maxRows=1&username=dunaiada&type=json
http://api.geonames.org/search?q=FP&country=CH&

Unnamed: 0,University,Funding,Longitude,Latitude,Canton,University_name,University_code
0,AO Research Institute - AORI,3435621.0,,,,AO Research Institute,AORI
1,Allergie- und Asthmaforschung - SIAF,19169965.0,,,,Allergie- und Asthmaforschung,SIAF
2,Berner Fachhochschule - BFH,31028695.0,,,,Berner Fachhochschule,BFH
3,Biotechnologie Institut Thurgau - BITG,2492535.0,,,,Biotechnologie Institut Thurgau,BITG
4,Centre de rech. sur l'environnement alpin - CR...,1567678.0,,,,Centre de rech. sur l'environnement alpin,CREALP


Currently considering only those universities which were mapped to their cantons. 

In [154]:
## Removing all univerisites without canton mappings [TODO: To be changes]
fbu = fbu[fbu['Canton'] != '']
fbu

Unnamed: 0,University,Funding,Longitude,Latitude,Canton,University_name,University_code
5,EPF Lausanne - EPFL,1175316000.0,6.56673,46.51939,VD,EPF Lausanne,EPFL
6,ETH Zürich - ETHZ,1635597000.0,8.54805,47.3763,ZH,ETH Zürich,ETHZ
7,Eidg. Anstalt für Wasserversorgung - EAWAG,74619220.0,8.61246,47.40311,ZH,Eidg. Anstalt für Wasserversorgung,EAWAG
8,"Eidg. Forschungsanstalt für Wald,Schnee,Land -...",48360390.0,8.45484,47.36044,ZH,"Eidg. Forschungsanstalt für Wald,Schnee,Land",WSL
10,Eidg. Material und Prüfungsanstalt - EMPA,58574520.0,8.61246,47.40311,ZH,Eidg. Material und Prüfungsanstalt,EMPA
13,Fachhochschule Nordwestschweiz (ohne PH) - FHNW,42771910.0,7.63856,47.53378,BL,Fachhochschule Nordwestschweiz (ohne PH),FHNW
38,Kantonsspital St. Gallen - KSPSG,15034100.0,9.38826,47.42925,SG,Kantonsspital St. Gallen,KSPSG
40,Nicht zuteilbar - NA,142425700.0,7.44744,46.94809,BE,Nicht zuteilbar,
41,Paul Scherrer Institut - PSI,115269000.0,8.23028,47.5385,AG,Paul Scherrer Institut,PSI
43,Pädag. Hochschule Tessin (Teilschule SUPSI) - ASP,159317.0,8.05034,47.44496,AG,Pädag. Hochschule Tessin (Teilschule SUPSI),ASP


In [158]:
m = folium.Map(location=[46.76, 8.26], zoom_start=8, tiles='Mapbox Bright')
for i in range(len(fbu)):
    ## Add markers on all universities
    folium.Marker([fbu['Latitude'].iloc[i], fbu['Longitude'].iloc[i]], popup=fbu['University'].iloc[i],
                   icon = folium.Icon(icon = 'cloud')).add_to(m)
m.save('m.html') # Need to be seen in browser

In [160]:
#Now we do the grouping by canton
fbc = fbu.groupby('Canton')[['Funding']].sum()
fbc

Unnamed: 0_level_0,Funding
Canton,Unnamed: 1_level_1
AG,115428300.0
BE,1661799000.0
BL,42771910.0
BS,1352251000.0
GE,1838237000.0
LU,41925890.0
NE,383204600.0
SG,84229060.0
TI,24040080.0
VD,1175316000.0


Plot the map using JSON file (with coordinates for each canton) and using the dataframe containing grant money for each canton.

In [151]:
geo_canton_data = json.load(open('ch-cantons.topojson.json'))