In [1]:
import qqespm_quadtree_CGA as qq2
import random
import numpy as np
import pickle

In [2]:
def generate_pattern_from_structure(structure, candidate_keywords, qq_module, qualitative_prob, pois_df, max_complexity = 100000000):
    vertices_ids = []
    for e in structure:
        vertices_ids.extend(e)
    vertices_ids = list(set(vertices_ids))
    
    max_freq1, max_freq2 = float('inf'), float('inf')
    while max_freq1 * max_freq2 > max_complexity:
        keywords = random.sample(candidate_keywords, len(vertices_ids))
        keywords_frequencies = qq_module.get_keywords_frequencies(keywords, pois_df, column_names = ['amenity','shop','tourism','landuse','leisure','building'])
        max_freq1, max_freq2 = sorted(list(keywords_frequencies.values()), reverse=True)[:2]
    #print('Chosen keywords and their frequencies:', keywords_frequencies, ' / Complexity:', max_freq1*max_freq2)

    vertices = [ qq_module.SpatialVertex(vertices_ids[i], keywords[i]) for i in range(len(vertices_ids)) ]

    edges = []
    for i, edge in enumerate(structure):
        lij = random.random()*1000 #choose a number between 0 and 1km
        min_uij, max_uij = lij + 200, lij + 2000
        uij = random.random()*(max_uij-min_uij) + min_uij#choose a number between lij and 4km
        sign = random.choice(['<','>','<>','-'])
        relation_type = np.random.choice([None, 'related'], size = 1, p = [1-qualitative_prob, qualitative_prob])[0]
        if relation_type == 'related':
            #relation = random.choice(['equals', 'touches', 'covers', 'coveredby', 'overlaps', 'disjoint'])
            relation = random.choice(['contains', 'within', 'intersects', 'disjoint'])
        else:
            relation = None
        edges.append(qq_module.SpatialEdge(i, vertices[edge[0]], vertices[edge[1]], lij, uij, sign, relation))
    sp = qq_module.SpatialPatternGraph(vertices, edges)
    sp.qualitative_prob = qualitative_prob
    return sp

In [3]:
pois = qq2.read_df_csv(data_dir = 'data/london_pois_bbox_100perc.csv')

amenity_totals = pois.amenity.value_counts()
shop_totals = pois.shop.value_counts()
tourism_totals = pois.tourism.value_counts()
landuse_totals = pois.landuse.value_counts()
leisure_totals = pois.leisure.value_counts()
building_totals = pois.building.value_counts()


most_frequent_keywords = amenity_totals[amenity_totals>30].index.tolist() + \
    shop_totals[shop_totals>30].index.tolist() + \
    tourism_totals[tourism_totals>30].index.tolist() + \
    landuse_totals[landuse_totals>30].index.tolist() + \
    leisure_totals[leisure_totals>30].index.tolist() + \
    building_totals[building_totals>30].index.tolist()

most_frequent_keywords = list(set(most_frequent_keywords))

print('Total most frequent keywords:', len(most_frequent_keywords))

Total most frequent keywords: 186


In [4]:
most_frequent_keywords[:5]

['attraction', 'e_cigarette', 'toys', 'supermarket', 'recreation_ground']

In [5]:
#gerar padrões
spatial_patterns = []
seeds = []
seeds_by_pattern = {}
pattern_structures = [
    [(0,1)], 
    [(0,1),(1,2)],
    [(0,1),(1,2),(2,0)],
    [(0,1),(1,2),(2,3)],
    [(0,1),(1,2),(1,3)],
    [(0,1),(1,2),(2,3),(3,0)],
    [(0,1),(1,2),(2,3),(3,1)],
    [(0,1),(1,2),(2,3),(3,1),(3,4)],
    #[(0,1),(1,2),(2,3),(3,4),(4,0)],
    #[(0,1),(1,2),(2,3),(3,4),(4,1)],
    # [(0,1),(1,2),(2,3),(3,4),(4,1),(1,5)],
    # [(0,1),(1,2),(2,3),(3,4),(4,5),(5,0),(2,5)],
]

qualitative_probs = [1, 1/2, 1/3, 1/4]
num_repetitions = 4

for i in range(num_repetitions):
    for structure in pattern_structures:
        for qualitative_prob in qualitative_probs:
            sp = generate_pattern_from_structure(structure, most_frequent_keywords, qq2, qualitative_prob, pois_df=pois)
            spatial_patterns.append(sp)

print('Total generated spatial patterns:', len(spatial_patterns))



Total generated spatial patterns: 128


In [6]:
with open('spatial_patterns_for_experiments_london.pkl', 'wb') as f:
    pickle.dump(spatial_patterns, f)