### OBJECTIVE 
To visualize the distribution of grants issued to different Swiss Cantons by Swiss National Science Foundation.

Import necessay libs

In [1]:
import pandas as pd
from urllib import parse
import json
import requests
import folium

Load the P3 database for Grants given to different Swiss Universities

In [129]:
data = pd.read_csv('P3_GrantExport.csv',sep=';')
data

Unnamed: 0,"﻿""Project Number""",Project Title,Project Title English,Responsible Applicant,Funding Instrument,Funding Instrument Hierarchy,Institution,University,Discipline Number,Discipline Name,Discipline Name Hierarchy,Start Date,End Date,Approved Amount,Keywords
0,1,Schlussband (Bd. VI) der Jacob Burckhardt-Biog...,,Kaegi Werner,Project funding (Div. I-III),Project funding,,Nicht zuteilbar - NA,10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1976,11619.00,
1,4,Batterie de tests à l'usage des enseignants po...,,Massarenti Léonard,Project funding (Div. I-III),Project funding,Faculté de Psychologie et des Sciences de l'Ed...,Université de Genève - GE,10104,Educational science and Pedagogy,"Human and Social Sciences;Psychology, educatio...",01.10.1975,30.09.1976,41022.00,
2,5,"Kritische Erstausgabe der ""Evidentiae contra D...",,Kommission für das Corpus philosophorum medii ...,Project funding (Div. I-III),Project funding,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10101,Philosophy,Human and Social Sciences;Linguistics and lite...,01.03.1976,28.02.1985,79732.00,
3,6,Katalog der datierten Handschriften in der Sch...,,Burckhardt Max,Project funding (Div. I-III),Project funding,Abt. Handschriften und Alte Drucke Bibliothek ...,Universität Basel - BS,10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1976,52627.00,
4,7,Wissenschaftliche Mitarbeit am Thesaurus Lingu...,,Schweiz. Thesauruskommission,Project funding (Div. I-III),Project funding,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",10303,Ancient history and Classical studies,Human and Social Sciences;Theology & religious...,01.01.1976,30.04.1978,120042.00,
5,8,Die schweizerische Wirtschaftspolitik seit dem...,,Kleinewefers Henner,Project funding (Div. I-III),Project funding,"Séminaire de politique économique, d'économie ...",Université de Fribourg - FR,10203,Economics,"Human and Social Sciences;Economics, law",01.01.1976,31.12.1978,53009.00,
6,9,Theologische Forschungen zur Oekumene (Studien...,,Stirnimann Heinrich,Project funding (Div. I-III),Project funding,Institut für ökumenische Studien Université de...,Université de Fribourg - FR,10102,"Religious sciences, Theology",Human and Social Sciences;Theology & religious...,01.01.1976,31.12.1976,25403.00,
7,10,Konfuzianische Kulturwerte in der sozialen Ent...,,Deuchler Martina,Project funding (Div. I-III),Project funding,Ostasiatisches Seminar Universität Zürich,Universität Zürich - ZH,10301,History in general,Human and Social Sciences;Theology & religious...,01.10.1975,31.03.1977,47100.00,
8,11,Edizione degli scritti di Aurelio de' Giorgi B...,,Stäuble Antonio,Project funding (Div. I-III),Project funding,,Université de Lausanne - LA,10502,Romance languages and literature,Human and Social Sciences;Linguistics and lite...,01.10.1975,31.03.1977,25814.00,
9,13,La construction de nouveautés au sein des morp...,,Piaget Jean,Project funding (Div. I-III),Project funding,Laboratoire de Didactique et Epistémologie des...,Université de Genève - GE,10105,Psychology,"Human and Social Sciences;Psychology, educatio...",01.10.1975,30.09.1978,360000.00,


For our study we need only University Name and Approved Amount Columns

In [3]:
data = data[['University','Approved Amount']]
data.head(5)

Unnamed: 0,University,Approved Amount
0,Nicht zuteilbar - NA,11619.0
1,Université de Genève - GE,41022.0
2,"NPO (Biblioth., Museen, Verwalt.) - NPO",79732.0
3,Universität Basel - BS,52627.0
4,"NPO (Biblioth., Museen, Verwalt.) - NPO",120042.0


The database description mentions that the University Field is left **empty** if the project is not carried out in Swiss University. Hence we decide to drop these entries as the money is not used by the Swiss University. There were **12981** such enteries. 

In [4]:
null_data = data[data['University'].isnull()]
null_data.shape

(12981, 2)

In [5]:
clean_data = data.drop(data.index[null_data.index])
clean_data = clean_data.drop(clean_data.index[(clean_data['University'] ==  'Nicht zuteilbar - NA')])
clean_data = clean_data.drop(clean_data.index[(clean_data['University'] ==  'NPO (Biblioth., Museen, Verwalt.) - NPO')])

In [6]:
print("No. of Entries: {}".format(clean_data.shape[0]))
clean_data.head(10)

No. of Entries: 46920


Unnamed: 0,University,Approved Amount
1,Université de Genève - GE,41022.0
3,Universität Basel - BS,52627.0
5,Université de Fribourg - FR,53009.0
6,Université de Fribourg - FR,25403.0
7,Universität Zürich - ZH,47100.0
8,Université de Lausanne - LA,25814.0
9,Université de Genève - GE,360000.0
10,Université de Fribourg - FR,153886.0
12,Université de Genève - GE,116991.0
13,Universität Basel - BS,112664.0


Removing the entry which don't have numeric value for amount column

In [77]:
clean_data = clean_data.drop(clean_data.index[clean_data['Approved Amount'] == 'data not included in P3'])
clean_data.shape

(46810, 5)

We identify different universities in our data and query for the geocoding data.

In [78]:
dictionary = clean_data.groupby('University').groups
university = list(dictionary.keys())
print("No. of unique universities:",len(university))


No. of unique universities: 75


In [79]:
clean_data1 = pd.DataFrame(clean_data.groupby('University')
                                    .apply(lambda x: x['Approved Amount'].astype(float).sum()))
#clean_data1.reset_index(level=0, inplace=True)
clean_data1.columns = ['Total Amount']
clean_data1

Unnamed: 0_level_0,Total Amount
University,Unnamed: 1_level_1
AO Research Institute - AORI,3.435621e+06
Allergie- und Asthmaforschung - SIAF,1.916996e+07
Berner Fachhochschule - BFH,3.102870e+07
Biotechnologie Institut Thurgau - BITG,2.492535e+06
Centre de rech. sur l'environnement alpin - CREALP,1.567678e+06
EPF Lausanne - EPFL,1.175316e+09
ETH Zürich - ETHZ,1.635597e+09
Eidg. Anstalt für Wasserversorgung - EAWAG,7.461922e+07
"Eidg. Forschungsanstalt für Wald,Schnee,Land - WSL",4.836039e+07
Eidg. Hochschulinstitut für Berufsbildung - EHB,2.086572e+06


Add column for Geocoding data

In [92]:
clean_data1['Canton'] = ''
clean_data1['Latitude'] = '' 
clean_data1['Longitude'] = ''

clean_data1.head()
clean_data1.index

Index(['AO Research Institute - AORI', 'Allergie- und Asthmaforschung - SIAF',
       'Berner Fachhochschule - BFH', 'Biotechnologie Institut Thurgau - BITG',
       'Centre de rech. sur l'environnement alpin - CREALP',
       'EPF Lausanne - EPFL', 'ETH Zürich - ETHZ',
       'Eidg. Anstalt für Wasserversorgung - EAWAG',
       'Eidg. Forschungsanstalt für Wald,Schnee,Land - WSL',
       'Eidg. Hochschulinstitut für Berufsbildung - EHB',
       'Eidg. Material und Prüfungsanstalt - EMPA',
       'Ente Ospedaliero Cantonale - EOC', 'Fachhochschule Kalaidos - FHKD',
       'Fachhochschule Nordwestschweiz (ohne PH) - FHNW',
       'Fachhochschule Ostschweiz - FHO',
       'Facoltà di Teologia di Lugano - FTL',
       'Fernfachhochschule Schweiz (Mitglied SUPSI) - FFHS',
       'Firmen/Privatwirtschaft - FP', 'Forschungsanstalten Agroscope - AGS',
       'Forschungsinstitut für Opthalmologie - IRO',
       'Forschungsinstitut für biologischen Landbau - FIBL',
       'Forschungskommission 

Using Geonames Full Text Search API to map the universities to their respective columns. 'requests' library is used to get the HTTP response. But since 'University' names have spaces and other special characters, it needs to be UTF-8 encoded before using the requests.get. This is accomplished using 'parse.quote' method in 'urllib' library.

In [100]:
base_url = 'http://api.geonames.org/search?q='
url_end  = '&maxRows=2&username=ada_homework&type=json'
for uni in clean_data1.index:
    uni_tmp = uni
    if uni_tmp.find('-') != -1:
        uni_tmp = uni_tmp.replace(uni_tmp[uni_tmp.find('-'):],'')
    if uni_tmp.find('(') != -1:
        uni_tmp = uni_tmp.replace(uni_tmp[uni_tmp.find('('):],'')
        
    url = base_url + uni_tmp + url_end
    print(uni_tmp)
    ## Encode special characters and spaces
    query = parse.quote(url,safe=':/&=?')
    ## Parse JSON data
    response = json.loads(requests.get(query).text)
    if bool(response['geonames']):
        clean_data1['Canton'].loc[uni]    = response['geonames'][0]['adminCode1']
        clean_data1['Latitude'].loc[uni]  = response['geonames'][0]['lat']
        clean_data1['Longitude'].loc[uni] = response['geonames'][0]['lng']

AO Research Institute 


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


Allergie
Berner Fachhochschule 
Biotechnologie Institut Thurgau 
Centre de rech. sur l'environnement alpin 
EPF Lausanne 
ETH Zürich 
Eidg. Anstalt für Wasserversorgung 
Eidg. Forschungsanstalt für Wald,Schnee,Land 
Eidg. Hochschulinstitut für Berufsbildung 
Eidg. Material und Prüfungsanstalt 
Ente Ospedaliero Cantonale 
Fachhochschule Kalaidos 
Fachhochschule Nordwestschweiz 
Fachhochschule Ostschweiz 
Facoltà di Teologia di Lugano 
Fernfachhochschule Schweiz 
Firmen/Privatwirtschaft 
Forschungsanstalten Agroscope 
Forschungsinstitut für Opthalmologie 
Forschungsinstitut für biologischen Landbau 
Forschungskommission SAGW
Franklin University Switzerland 
Friedrich Miescher Institute 
HES de Suisse occidentale 
Haute école pédagogique BE, JU, NE 
Haute école pédagogique du canton de Vaud 
Haute école pédagogique fribourgeoise 
Hochschule Luzern 
Idiap Research Institute 
Inst. Suisse de Spéléologie et Karstologie 
Inst. de Hautes Etudes Internat. et du Dév 
Inst. universit. romand de S

In [102]:
clean_data1

Unnamed: 0_level_0,Total Amount,Canton,Latitude,Longitude
University,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AO Research Institute - AORI,3.435621e+06,38,-19.2,17.58333
Allergie- und Asthmaforschung - SIAF,1.916996e+07,,,
Berner Fachhochschule - BFH,3.102870e+07,,,
Biotechnologie Institut Thurgau - BITG,2.492535e+06,,,
Centre de rech. sur l'environnement alpin - CREALP,1.567678e+06,,,
EPF Lausanne - EPFL,1.175316e+09,,,
ETH Zürich - ETHZ,1.635597e+09,ZH,47.3763,8.54805
Eidg. Anstalt für Wasserversorgung - EAWAG,7.461922e+07,,,
"Eidg. Forschungsanstalt für Wald,Schnee,Land - WSL",4.836039e+07,,,
Eidg. Hochschulinstitut für Berufsbildung - EHB,2.086572e+06,,,


In [104]:
gp = clean_data1.groupby('University').filter(lambda x: (x['Canton'] == '').all())
gp

KeyError: 'University'

In [119]:
import geocoder

for uni in clean_data1.index:
    uni_tmp = uni
    if uni_tmp.find('-') != -1:
        uni_tmp = uni_tmp.replace(uni_tmp[uni_tmp.find('-'):],'')
    if uni_tmp.find('(') != -1:
        uni_tmp = uni_tmp.replace(uni_tmp[uni_tmp.find('('):],'')
        
    

    response = geocoder.google(uni_tmp,components="country:CH")
    json_out = response.json
    if 'state' in json_out.keys():
        clean_data1['Canton'].loc[uni]    = json_out['state']
        clean_data1['Latitude'].loc[uni]  = json_out['lat']
        clean_data1['Longitude'].loc[uni] = json_out['lng']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [120]:
clean_data1

Unnamed: 0_level_0,Total Amount,Canton,Latitude,Longitude
University,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AO Research Institute - AORI,3.435621e+06,38,-19.2,17.58333
Allergie- und Asthmaforschung - SIAF,1.916996e+07,,,
Berner Fachhochschule - BFH,3.102870e+07,,,
Biotechnologie Institut Thurgau - BITG,2.492535e+06,TG,47.6038,9.05574
Centre de rech. sur l'environnement alpin - CREALP,1.567678e+06,,,
EPF Lausanne - EPFL,1.175316e+09,VD,46.519,6.5676
ETH Zürich - ETHZ,1.635597e+09,ZH,47.3765,8.54809
Eidg. Anstalt für Wasserversorgung - EAWAG,7.461922e+07,,,
"Eidg. Forschungsanstalt für Wald,Schnee,Land - WSL",4.836039e+07,,,
Eidg. Hochschulinstitut für Berufsbildung - EHB,2.086572e+06,,,


In [175]:
print(sum(clean_data1['Canton'] == ''))
response = geocoder.google('PHFHNW',components="country:CH")
json_out = response.json
json_out

36


{'accuracy': 'APPROXIMATE',
 'address': 'Switzerland',
 'bbox': {'northeast': [47.8084545, 10.4923401],
  'southwest': [45.8179199, 5.956080099999999]},
 'confidence': 1,
 'country': 'CH',
 'encoding': 'utf-8',
 'lat': 46.818188,
 'lng': 8.227511999999999,
 'location': 'PHFHNW',
 'ok': True,
 'place': 'ChIJYW1Zb-9kjEcRFXvLDxG1Vlw',
 'provider': 'google',
 'quality': 'country',
 'status': 'OK',
 'status_code': 200}

In [27]:
map_with_uni = folium.Map(location=[46.76, 8.26], tiles='Mapbox Bright',zoom_start=8)
for indx in range(clean_data.shape[0]):
    if clean_data.iloc[indx]['Canton'] != '' and indx <5:
        folium.Marker(location=[clean_data.iloc[indx]['Latitude'],clean_data.iloc[indx]['Longitude']],
                      popup=clean_data.iloc[indx]['University']).add_to(map_with_uni)
map_with_uni

In [41]:
total_amt_per_canton = pd.DataFrame(clean_data.groupby('Canton')
                                    .apply(lambda x: x['Approved Amount'].astype(float).sum()))
total_amt_per_canton.reset_index(level=0, inplace=True)
total_amt_per_canton.columns = ['Canton','Total Amount']
total_amt_per_canton.drop(0, inplace= True)
total_amt_per_canton

Unnamed: 0,Canton,Total Amount
1,BE,1519373000.0
2,BS,1352251000.0
3,FR,457526200.0
4,NE,383204600.0
5,VD,1183291000.0
6,ZH,1826843000.0


In [123]:
state_geo = r'ch-cantons.topojson.json'
map2 = folium.Map(location=[46.76, 8.26], zoom_start=3, tiles='Mapbox Bright')
map2.choropleth(geo_path=state_geo, data=clean_data1,
             columns=['Canton', 'Total Amount'],
             key_on='feature.id',
             fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,topojson = 'objects.cantons',
             legend_name='Unemployment Rate (%)')
map2



KeyError: 'UR'

<folium.folium.Map at 0x1163ef9b0>