In [33]:
import qqespm_module as qq
import qqsimple_module as qs
import pandas as pd
import numpy as np
import geopandas
from geoobject import GeoObj
from ilquadtree import ILQuadTree
import random
from func_timeout import func_timeout, FunctionTimedOut
from copy import deepcopy
from copy import deepcopy
from time import time

In [3]:
def read_df_csv(data_dir = 'data/pois_paraiba_OSM.csv'):
    pois = pd.read_csv(data_dir,  low_memory=False)
    pois['geometry'] = geopandas.GeoSeries.from_wkt(pois['geometry'])
    return pois

def get_df_surrounding_bbox(pois, delta = 0.01):
    lons_lats = np.vstack([np.array(t) for t in pois['geometry'].apply(lambda e: e.centroid).apply(lambda e: e.coords[0]).values])
    pois['lon'], pois['lat'] = lons_lats[:, 0], lons_lats[:, 1]
    surrounding_bbox = (pois['lon'].min()-delta, pois['lat'].min()-delta, pois['lon'].max()+delta, pois['lat'].max()+delta)
    pois.drop(['lon','lat'], axis = 1, inplace = True)
    return surrounding_bbox

pois = read_df_csv()

amenity_totals = pois.amenity.value_counts()
shop_totals = pois.shop.value_counts()
tourism_totals = pois.tourism.value_counts()


most_frequent_keywords = amenity_totals[amenity_totals>100].index.tolist() + \
    shop_totals[shop_totals>100].index.tolist() + \
    tourism_totals[tourism_totals>100].index.tolist()

print('Total most frequent keywords:', len(most_frequent_keywords))

print(pois.shape)
pois.head()

Total most frequent keywords: 66
(40410, 6)


Unnamed: 0,osm_id,geometry,name,amenity,shop,tourism
0,4843736221,POINT (-35.23422 -5.89278),Super coop,,supermarket,
1,10861346942,POINT (-35.42379 -5.63600),,,clothes,
2,477221805,"POLYGON ((-36.70129 -8.19197, -36.70100 -8.192...",,parking,,
3,9298135216,POINT (-34.87934 -8.06423),Esposende,,shoes,
4,737434871,"POLYGON ((-34.97234 -8.13835, -34.97207 -8.138...",Primeira Igreja Batista,place_of_worship,,


In [4]:
# PARAIBA: -5.9275, -8.3610, -34.7415, -38.8559,
# AROUND PARAIBA: -41.418, -9.254, -32.794, -4.544, 

In [9]:
total_bbox = get_df_surrounding_bbox(pois)
total_bbox

(-41.427930599999996,
 -9.263716199850455,
 -34.783308600000005,
 -4.534062599689478)

In [10]:
def generate_ilquadtree(pois, total_bbox_ilq, max_depth = 3, keyword_columns = ['amenity','shop','tourism'], insertion_fraction = 1.0):
    objs = GeoObj.get_objects_from_geopandas(pois, keyword_columns = keyword_columns)
    ilq = ILQuadTree(total_bbox = total_bbox_ilq, max_depth = max_depth)
    ilq.insert_elements_from_list(objs[0: int(insertion_fraction*len(objs))+1])
    return ilq

In [19]:
#pois['geometry'] = geopandas.GeoSeries.from_wkt(pois['geometry'])
def get_centroid(geom):
    if geom.geom_type == 'Point':
        return geom
    return geom.centroid
pois['centroid'] = pois['geometry'].apply(get_centroid)
pois['lon'] = pois['geometry'].apply(get_centroid).apply(lambda e: e.x) # longitudes
pois['lat'] = pois['geometry'].apply(get_centroid).apply(lambda e: e.y) # latitudes
pois.sort_values(by = 'lon', inplace = True)
pois = pois.sample(frac=1)
#pois.fillna('', inplace = True)
print('Tamanho do dataset:', pois.shape)
pois.head()

Tamanho do dataset: (40410, 9)


Unnamed: 0,osm_id,geometry,name,amenity,shop,tourism,centroid,lon,lat
36767,6416487776,POINT (-38.12966 -5.24872),Unopar,university,,,POINT (-38.1296614 -5.248719599680504),-38.129661,-5.24872
24058,197031934,"POLYGON ((-35.87057 -7.22764, -35.86892 -7.228...",Hospital Doutor Edgley,hospital,,,POINT (-35.86939118737756 -7.227615786319496),-35.869391,-7.227616
22050,201658167,"POLYGON ((-37.06333 -6.44977, -37.06251 -6.450...",Cê Ky Sabe Motel,,,motel,POINT (-37.062748642753164 -6.449747662403363),-37.062749,-6.449748
21109,6509662600,POINT (-35.27478 -7.40743),Prefeitura Municipal de Camutanga,townhall,,,POINT (-35.2747809 -7.407434099728714),-35.274781,-7.407434
39734,8409699909,POINT (-34.92145 -8.05074),,vending_machine,,,POINT (-34.921453299999996 -8.05074029976375),-34.921453,-8.05074


In [11]:
pois.loc[pois['geometry'].apply(lambda e: e.geom_type) == 'LineString', 'geometry'] = pois.loc[pois['geometry'].apply(lambda e: e.geom_type) == 'LineString', 'geometry'].apply(lambda e: e.centroid)

In [12]:
distinct_keywords = pois.amenity.value_counts().index.tolist() + pois.shop.value_counts().index.tolist() + pois.tourism.value_counts().index.tolist()
len(distinct_keywords)

343

In [13]:
total_keywords = (pois.shape[0] - pois['amenity'].isna().sum()) + (pois.shape[0] - pois['shop'].isna().sum()) + (pois.shape[0] - pois['tourism'].isna().sum()) 
total_keywords

40604

In [15]:
objs = GeoObj.get_objects_from_geopandas(pois, keyword_columns = ['amenity', 'shop', 'tourism'])

In [16]:
objs[4].keywords()

{'place_of_worship'}

In [20]:
total_bbox = pois['lon'].min(), pois['lat'].min(), pois['lon'].max(), pois['lat'].max()
total_bbox

(-41.4179306, -9.253716199850455, -34.7933086, -4.544062599689478)

In [21]:
dataset = {
    '20%': ILQuadTree(total_bbox = total_bbox, max_depth = 3),
    '40%': ILQuadTree(total_bbox = total_bbox, max_depth = 3),
    '60%': ILQuadTree(total_bbox = total_bbox, max_depth = 3),
    '80%': ILQuadTree(total_bbox = total_bbox, max_depth = 3),
    '100%': ILQuadTree(total_bbox = total_bbox, max_depth = 3)
}

In [22]:
for percentage in dataset:
    proportion = float(percentage.replace('%','')) / 100.0
    dataset[percentage].insert_elements_from_list(objs[0: int(proportion*len(objs))+1])

In [24]:
for percentage in dataset:
    proportion = float(percentage.replace('%','')) / 100.0
    print(int(proportion*len(objs))+1)

8083
16165
24247
32329
40411


In [25]:
def generate_pattern_from_structure(structure, candidate_keywords, qq_module, qualitative_prob, seed = None):
    vertices_ids = []
    for e in structure:
        vertices_ids.extend(e)
    vertices_ids = list(set(vertices_ids))
    #print('total vertices:', len(vertices_ids))
    keywords = random.sample(candidate_keywords, len(vertices_ids))
    vertices = [ qq_module.SpatialVertex(vertices_ids[i], keywords[i]) for i in range(len(vertices_ids)) ]

    edges = []
    for i, edge in enumerate(structure):
        lij = random.random()*1000 #choose a number between 0 and 1km
        uij = random.random()*10000 + lij + 1#choose a number between lij and 4km
        sign = random.choice(['<','>','<>','-'])
        relation_type = np.random.choice([None, 'related'], size = 1, p = [1-qualitative_prob, qualitative_prob])[0]
        if relation_type == 'related':
            #relation = random.choice(['equals', 'touches', 'covers', 'coveredby', 'overlaps', 'disjoint'])
            relation = random.choice(['contains', 'within', 'intersects', 'disjoint'])
        else:
            relation = None
        edges.append(qq_module.SpatialEdge(i, vertices[edge[0]], vertices[edge[1]], lij, uij, sign, relation))
    sp = qq_module.SpatialPatternGraph(vertices, edges)
    sp.qualitative_prob = qualitative_prob
    return sp, seed

In [31]:
#gerar padrões otimizados
spatial_patterns = []
seeds = []
seeds_by_pattern = {}
pattern_structures = [
    [(0,1)], 
    [(0,1),(1,2)],
    [(0,1),(1,2),(2,0)],
    [(0,1),(1,2),(2,3)],
    [(0,1),(1,2),(1,3)],
    [(0,1),(1,2),(2,3),(3,0)],
    [(0,1),(1,2),(2,3),(3,1)],
    [(0,1),(1,2),(2,3),(3,1),(3,4)],
    [(0,1),(1,2),(2,3),(3,4),(4,0)],
    [(0,1),(1,2),(2,3),(3,4),(4,1)],
    [(0,1),(1,2),(2,3),(3,4),(4,1),(1,5)],
    [(0,1),(1,2),(2,3),(3,4),(4,5),(5,0),(2,5)],
]

qualitative_probs = [1/2, 1/3]

for structure in pattern_structures:
    for qualitative_prob in qualitative_probs:
        for i in range(5):
            sp, seed = generate_pattern_from_structure(structure, most_frequent_keywords, qq, qualitative_prob)
            spatial_patterns.append(sp)
            seeds.append(seed)
            seeds_by_pattern[sp] = seed
print('Total spatial patterns:', len(spatial_patterns))

Total spatial patterns: 120


In [34]:
solutions, elapsed_time, memory_usage = qq.QQESPM(ilq=dataset['20%'], sp=spatial_patterns[0], debug=True)
len(solutions), elapsed_time, memory_usage

Computing qq-n-matches for edges
- Total n-matches for edge 0: 99
Computing qq-e-matches for edges
- Computing qq-e-matches for edge 0
- Total e-matches for edge 0: 43
Number of skip-edges: 0
Joining qq-e-matches


(43, 0.050362348556518555, 343.8984375)