In [1]:
""" 
1. References:
http://linkedgeodata.org/OnlineAccess/SparqlEndpoints
http://linkedgeodata.org/OSM
http://linkedgeodata.org/sparql


2. Case insensitive, partial matching (searching time)
  FILTER regex (?o, 'black mountain' , "i")

case-insensitive: FILTER (lcase(str(?o)) = \"%s\")

'BLACK MOUNTAIN', 'black mountain': no result
Only "Black Mountain" has results. """


# case-sensitive exact matching
# getting sqarql query results to use labeling function
from SPARQLWrapper import SPARQLWrapper, JSON

def sparql_query (name):
    sparql = SPARQLWrapper("http://linkedgeodata.org/sparql")

    sparql.setQuery("""

Prefix lgdo: <http://linkedgeodata.org/ontology/>
Prefix geom: <http://geovocab.org/geometry#>
Prefix ogc: <http://www.opengis.net/ont/geosparql#>
Prefix owl: <http://www.w3.org/2002/07/owl#>
Prefix wgs84_pos: <http://www.w3.org/2003/01/geo/wgs84_pos#>

Select * {
    ?s rdfs:label \'%s\' ;
        wgs84_pos:lat ?lat ;
        wgs84_pos:long ?long .    
    
} 
    """ %name)

    sparql.setReturnFormat(JSON)    
       
    return sparql.query().convert()

In [2]:
def sparql_query2 (name):
    sparql = SPARQLWrapper("http://linkedgeodata.org/sparql")

    sparql.setQuery("""


Prefix owl: <http://www.w3.org/2002/07/owl#>
Prefix gn: <http://www.geonames.org/ontology#>
Prefix wgs84_pos: <http://www.w3.org/2003/01/geo/wgs84_pos#>

Select * {
    ?s owl:sameAs ?geo .
    ?geo gn:name \'%s\' ;
        wgs84_pos:lat ?lat ;
        wgs84_pos:long ?long .

}""" %name)

    sparql.setReturnFormat(JSON)    
       
    return sparql.query().convert()

In [3]:
import json

with open('map_phrase_USGS(GT).json', 'r') as f: # extractPhrase_USGS(GT).ipynb in make RDF folder
    phrase_dic = json.load(f)

In [4]:
# make "black mountain" to "Black Mountain"
new_dic = dict()
for key in phrase_dic:
    new_phrase = list()
    for phrase in phrase_dic[key]:
        string = ""
        for voc in phrase.split():
            string += voc.capitalize() + " "
        new_phrase.append(string[:-1])
    new_dic[key] = new_phrase

In [5]:
new_dic

{'USGS-15-CA-brawley-e1957-s1957-p1961.jpg': ['Fairgrounds',
  'Lilac Drain',
  'El Centro',
  'Southern Pacific',
  'Cotton Gin',
  'Sunbeam Recreation Area',
  'Rio Vista Ave',
  '10th St',
  'Sumac Canal',
  'Lateral Drain',
  'Bm 107',
  'Mc Call Drain One',
  'United States Department Of The Interior Geological Survey',
  'Bryant Canal',
  'Sumac Lateral',
  'Rubber Drain',
  'Mound',
  'Tokay Canal',
  'Fig Canal',
  'Westside Main Sandal',
  'Dahlia Canal',
  'Central Drain',
  'Landing Field',
  'Thorn One Canal',
  'Sunbeam Lake',
  'Flax Canal',
  'Reid Sch',
  'Thorn Canal',
  'Wt',
  'Radio Facility',
  'Edgar',
  'Palm Ave',
  'Purification Plant',
  'Pearson Drain',
  'Witter Sch',
  'Imperial',
  'Pioneer Memorial Hospital',
  'Bm 115',
  'Hotville',
  'Best Canal',
  'Rose Drain',
  'Tuberose Canal',
  'Imperial Cem',
  'Bm 111',
  'Labor Camp',
  'Rockwood Canal',
  'Dandelion Canal',
  'Dahlia Lateral',
  'Date Canal',
  'Thistle Lateral',
  'Imperial Valley',
  'Po',

In [6]:
import time

In [7]:
def getURI1 (key):
    
    tmp_dic = dict()
    for phrase in new_dic[key]:
        try:
            results = sparql_query (phrase)
        
            tmp_dic[phrase] = list()
            for result in results["results"]["bindings"]:
    #             lat = result['lat']['value']
    #             long = result['long']['value']
    #             if (float(lat) >= min(loc_dic[key]['lat'])) and (float(lat) <= max(loc_dic[key]['lat'])) and (float(long) >= min(loc_dic[key]['long'])) and (float(long) <= max(loc_dic[key]['long'])):
                tmp_dic[phrase].append(
                       [result['s']['value'], result['lat']['value'],result['long']['value']]
                   )
        except:
            continue
    return tmp_dic

In [8]:
start_time = time.time()
uri_dic = dict()
for key in new_dic:
    uri_dic[key] = getURI1(key)
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 955


In [9]:
import pickle

with open('sparql_usgs_result1.pkl', 'wb') as handle:
    pickle.dump(uri_dic, handle)

In [10]:
uri_dic 

{'USGS-15-CA-brawley-e1957-s1957-p1961.jpg': {'Fairgrounds': [['http://linkedgeodata.org/triplify/node472459712',
    '40.1582',
    '-105.129'],
   ['http://linkedgeodata.org/triplify/node1621257769', '44.5123', '-92.9172'],
   ['http://linkedgeodata.org/triplify/node1621264958', '44.5155', '-92.914'],
   ['http://linkedgeodata.org/triplify/node570580091', '39.9318', '-104.874'],
   ['http://linkedgeodata.org/triplify/node983913294', '35.5489', '-117.71']],
  'Lilac Drain': [],
  'El Centro': [['http://linkedgeodata.org/triplify/node2623608640',
    '39.1842',
    '-78.1658'],
   ['http://linkedgeodata.org/triplify/node3802571884', '20.8711', '-102.626'],
   ['http://linkedgeodata.org/triplify/node1243747194', '40.3614', '-2.85419'],
   ['http://linkedgeodata.org/triplify/node2464414044', '3.23032', '-76.4193'],
   ['http://linkedgeodata.org/triplify/node2682445513', '1.11737', '-76.3254'],
   ['http://linkedgeodata.org/triplify/node3732231293', '19.0596', '-70.1523'],
   ['http://lin

In [11]:
def getURI2 (key):
    
    tmp_dic = dict()
    for phrase in new_dic[key]:
        try:
            results = sparql_query2 (phrase)
        
            tmp_dic[phrase] = list()
            for result in results["results"]["bindings"]:               
                tmp_dic[phrase].append(
                       [result['s']['value'], result['lat']['value'],result['long']['value'], result['geo']['value']]
                   )
        except:
            continue
    return tmp_dic

In [12]:
start_time = time.time()
uri_dic2 = dict()
for key in new_dic:
    uri_dic2[key] = getURI2(key)
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 916


In [13]:
uri_dic2

{'USGS-15-CA-brawley-e1957-s1957-p1961.jpg': {'Fairgrounds': [],
  'Lilac Drain': [],
  'El Centro': [['http://linkedgeodata.org/triplify/node357266204',
    '18.45022',
    '-66.95601',
    'http://sws.geonames.org/4563529/']],
  'Southern Pacific': [],
  'Cotton Gin': [],
  'Sunbeam Recreation Area': [['http://linkedgeodata.org/triplify/node358793093',
    '32.78339',
    '-115.68917',
    'http://sws.geonames.org/5399936/']],
  'Rio Vista Ave': [],
  '10th St': [],
  'Sumac Canal': [],
  'Lateral Drain': [],
  'Bm 107': [],
  'Mc Call Drain One': [],
  'United States Department Of The Interior Geological Survey': [],
  'Bryant Canal': [],
  'Sumac Lateral': [],
  'Rubber Drain': [],
  'Mound': [['http://linkedgeodata.org/triplify/node358789455',
    '34.96525',
    '-117.67839',
    'http://sws.geonames.org/5375100/']],
  'Tokay Canal': [],
  'Fig Canal': [],
  'Westside Main Sandal': [],
  'Dahlia Canal': [],
  'Central Drain': [],
  'Landing Field': [],
  'Thorn One Canal': [],
  

In [14]:
with open('sparql_usgs_result2.pkl', 'wb') as handle:
    pickle.dump(uri_dic2, handle)