In [1]:
import pickle
from pathlib import Path

intermediate_output = Path("intermediate_output")

with open(intermediate_output /'URIwithPhrase.pkl', 'rb') as handle:
    uri_dic = pickle.load(handle)

In [2]:
# example
uri_dic['USGS-15-CA-brawley-e1957-s1957-p1961.jpg']

{'Fairgrounds': [['http://linkedgeodata.org/triplify/node472459712'],
  ['http://linkedgeodata.org/triplify/node1621257769'],
  ['http://linkedgeodata.org/triplify/node1621264958'],
  ['http://linkedgeodata.org/triplify/node570580091'],
  ['http://linkedgeodata.org/triplify/way136245036'],
  ['http://linkedgeodata.org/triplify/way136247824'],
  ['http://linkedgeodata.org/triplify/way147612875'],
  ['http://linkedgeodata.org/triplify/way147612880'],
  ['http://linkedgeodata.org/triplify/node983913294']],
 'Lilac Drain': [],
 'El Centro': [['http://linkedgeodata.org/triplify/node2623608640'],
  ['http://linkedgeodata.org/triplify/node3802571884'],
  ['http://linkedgeodata.org/triplify/way71745414'],
  ['http://linkedgeodata.org/triplify/node1243747194'],
  ['http://linkedgeodata.org/triplify/node2464414044'],
  ['http://linkedgeodata.org/triplify/node2682445513'],
  ['http://linkedgeodata.org/triplify/node3732231293'],
  ['http://linkedgeodata.org/triplify/node676126734'],
  ['http://lin

In [3]:
"""
get coordinates information from URIs
use UNION (or operation)
"""

from SPARQLWrapper import SPARQLWrapper, JSON

def sparql_query_loc (uri):
    sparql = SPARQLWrapper("http://linkedgeodata.org/sparql")

    sparql.setQuery("""

Prefix lgdo: <http://linkedgeodata.org/ontology/>
Prefix geom: <http://geovocab.org/geometry#>
Prefix ogc: <http://www.opengis.net/ont/geosparql#>
Prefix owl: <http://www.w3.org/2002/07/owl#>
Prefix wgs84_pos: <http://www.w3.org/2003/01/geo/wgs84_pos#>
Prefix gn: <http://www.geonames.org/ontology#>

Select ?lat ?long ?lat_geo ?long_geo ?longlat  {
    { %s 
        wgs84_pos:lat ?lat ;
        wgs84_pos:long ?long } UNION
        
    { %s owl:sameAs ?geo .
    ?geo wgs84_pos:lat ?lat_geo ;
        wgs84_pos:long ?long_geo } UNION
        
    { %s geom:geometry [ogc:asWKT ?longlat] }
   
} 
    """ %(uri, uri, uri))
    
    sparql.setReturnFormat(JSON)    
       
    return sparql.query().convert()

In [4]:
map_names = list(uri_dic.keys())

In [5]:
map_names

['USGS-15-CA-brawley-e1957-s1957-p1961.jpg',
 'USGS-15-CA-capesanmartin-e1921-s1917.jpg',
 'USGS-15-CA-paloalto-e1899-s1895-rp1911.jpg',
 'USGS-15-CA-sanfrancisco-e1899-s1892-rp1911.jpg',
 'USGS-30-CA-dardanelles-e1898-s1891-rp1912.jpg',
 'USGS-30-CA-holtville-e1907-s1905-rp1946.jpg',
 'USGS-30-CA-indiospecial-e1904-s1901-rp1910.jpg',
 'USGS-30-CA-lompoc-e1943-s1903-ap1941-rv1941.jpg',
 'USGS-30-CA-sanpedro-e1943-rv1944.jpg',
 'USGS-60-CA-alturas-e1892-rp1904.jpg',
 'USGS-60-CA-amboy-e1942.jpg',
 'USGS-60-CA-amboy-e1943-rv1943.jpg',
 'USGS-60-CA-modoclavabed-e1886-s1884.jpg',
 'USGS-60-CA-saltonsea-e1943-ap1940-rv1942.jpg']

In [6]:
import time
uri_loc_dic = dict()
uri_fail_list = list()

def getLoc (map_name):
    tmp_dic2 = dict()
    for phrase in uri_dic[map_name]:

        tmp_dic = dict() 
        for uri_list in uri_dic[map_name][phrase]:
            for uri in uri_list:
                try:
                    results = sparql_query_loc ("<"+uri+">")           

                    loc_dic = dict()
                    for ele in results["results"]["bindings"]:
                        for key in ele:
                            loc_dic.update({key: ele[key]['value']}) 
                    tmp_dic.update({uri: loc_dic})
                except:
                    uri_fail_list.append(uri)

        tmp_dic2.update({phrase:tmp_dic})
    uri_loc_dic.update({map_name:tmp_dic2})
    return uri_loc_dic

In [7]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[0])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 291


In [8]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[1])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 1410


In [9]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[2])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 3567


In [10]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[3])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 2247


In [11]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[4])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 360


In [12]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[5])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 168


In [13]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[6])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 715


In [14]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[7])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 1813


In [15]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[8])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 3490


In [16]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[9])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 1714


In [17]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[10])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 1241


In [18]:
with open('tmp_partial_loc_result1.pkl', 'wb') as handle:
    pickle.dump(uri_loc_dic, handle)

In [19]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[11])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 1167


In [20]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[12])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 577


In [21]:
start_time = time.time()

uri_loc_dic = getLoc (map_names[13])
    
print("Duration: %d" %int(time.time() -start_time))

Duration: 795


In [22]:
# results have name and location
with open(intermediate_output /'name_loc_result.pkl', 'wb') as handle:
    pickle.dump(uri_loc_dic, handle)

In [23]:
# uris which fails to get location infromation
with open(intermediate_output /'uri_failtogetLoc_list.txt', 'w') as filehandle:
    for listitem in uri_fail_list:
        filehandle.write('%s\n' % listitem)