Read dataset, find preexisting fountains in WikiData, and write the QIDs to the dataset 

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import re
from scipy import spatial
%matplotlib inline
import sys
# import gastrodon
from gastrodon import RemoteEndpoint,QName,ttl,URIRef,inline
pd.options.display.width=120
pd.options.display.max_colwidth=100

#@prefix wikibase: <wikibase: <http://wikiba.se/ontology#> .
prefixes=inline("""
   @prefix wd: <http://www.wikidata.org/entity/> .
   @prefix wdt: <http://www.wikidata.org/prop/direct/> .
   @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
   @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
""").graph

In [3]:
endpoint=RemoteEndpoint(
   "http://query.wikidata.org/sparql"
   ,prefixes=prefixes
)



In [4]:
query = """ SELECT ?place ?placeLabel ?location
WHERE
{
  # Enter coordinates
  SERVICE wikibase:around {
    ?place wdt:P625 ?location .
    bd:serviceParam wikibase:center "Point(%s %s)"^^geo:wktLiteral.
    bd:serviceParam wikibase:radius "%s" .
  } .
  # Is a water well or fountain or subclass of fountain
  FILTER (EXISTS { ?place wdt:P31/wdt:P279* wd:Q43483 } || EXISTS { ?place wdt:P31/wdt:P279* wd:Q483453 }).
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
  } 
}
  """

# Load data

In [5]:
data_raw=pd.read_csv("20170918_brunnen_zuerich.csv")

In [6]:
data_raw.head()

Unnamed: 0,X,Y,X_dms,Y-dms,primaryind,nummer,brunnenart,historisch,wasserart_,bezeichnun
0,8.599255,47.369752,"E8°35'57.31""","N47°22'11.10""",1,510.0,öffentlicher Brunnen,1970.0,Verteilnetz,Aussichtsturm
1,8.590811,47.369293,"E8°35'26.92""","N47°22'9.453""",2,349.0,öffentlicher Brunnen,1933.0,Verteilnetz,
2,8.575754,47.36613,"E8°34'32.71""","N47°21'58.06""",3,365.0,öffentlicher Brunnen,1965.0,Quellwasser,Biberlinterrasse
3,8.564845,47.370993,"E8°33'53.44""","N47°22'15.57""",4,338.0,öffentlicher Brunnen,1910.0,Quellwasser,
4,8.56439,47.369327,"E8°33'51.80""","N47°22'9.576""",5,6069.0,Notwasserbrunnen,1988.0,Quellwasser,


In [7]:
data_raw['qid'] = ''

In [8]:
for index, fountain in data_raw.head().iterrows():
    # search within 5 meters
    while True:
        try:
            query_result = endpoint.select(query % (fountain.X, fountain.Y, 0.005))
        except:
            print ('caught a timeout. fountain number: ', fountain.nummer)
            continue
        
        if query_result.shape[0] == 1:
            print('fountain found')
            data_raw.loc[index, 'qid'] = query_result.loc[0,'place'].split(':')[1]
        elif query_result.shape[0]>1:
            data_raw.loc[index, 'qid'] = 'conflicting'

        break

fountain found
fountain found


In [9]:
data_raw

Unnamed: 0,X,Y,X_dms,Y-dms,primaryind,nummer,brunnenart,historisch,wasserart_,bezeichnun,qid
0,8.599255,47.369752,"E8°35'57.31""","N47°22'11.10""",1,510.0,öffentlicher Brunnen,1970.0,Verteilnetz,Aussichtsturm,Q4115189
1,8.590811,47.369293,"E8°35'26.92""","N47°22'9.453""",2,349.0,öffentlicher Brunnen,1933.0,Verteilnetz,,
2,8.575754,47.366130,"E8°34'32.71""","N47°21'58.06""",3,365.0,öffentlicher Brunnen,1965.0,Quellwasser,Biberlinterrasse,
3,8.564845,47.370993,"E8°33'53.44""","N47°22'15.57""",4,338.0,öffentlicher Brunnen,1910.0,Quellwasser,,Q27230192
4,8.564390,47.369327,"E8°33'51.80""","N47°22'9.576""",5,6069.0,Notwasserbrunnen,1988.0,Quellwasser,,
5,8.560110,47.365197,"E8°33'36.39""","N47°21'54.71""",6,317.0,öffentlicher Brunnen,1903.0,Quellwasser,,
6,8.562215,47.365124,"E8°33'43.97""","N47°21'54.44""",7,329.0,öffentlicher Brunnen,1907.0,Quellwasser,,
7,8.560303,47.366987,"E8°33'37.08""","N47°22'1.151""",8,327.0,öffentlicher Brunnen,1906.0,Quellwasser,,
8,8.556960,47.368493,"E8°33'25.05""","N47°22'6.576""",9,315.0,öffentlicher Brunnen,1903.0,Quellwasser,,
9,8.555891,47.369544,"E8°33'21.20""","N47°22'10.35""",10,313.0,öffentlicher Brunnen,1892.0,Quellwasser,,
