In [1]:
import pandas as pd
import geopandas
import os
import qqespm_quadtree_CGA as qq2
import pickle

In [2]:
data_dir = 'data/london_pois_5500.csv'
pois = pd.read_csv(data_dir,  low_memory=False)
pois['geometry'] = geopandas.GeoSeries.from_wkt(pois['geometry'])
pois['centroid'] = geopandas.GeoSeries.from_wkt(pois['centroid'])

In [3]:
pois.head()

Unnamed: 0,osm_id,name,amenity,shop,tourism,landuse,leisure,building,geometry,centroid
0,995869070,MP Express Telecom Ltd,,mobile_phone,,,,,"POLYGON ((-0.12896 51.51178, -0.12897 51.51178...",POINT (-0.12898 51.51182)
1,1216229012,,,,,,garden,,"POLYGON ((-0.09660 51.51328, -0.09658 51.51325...",POINT (-0.09681 51.51328)
2,256782037,King's Cross Methodist Church,place_of_worship,,,,,church,"POLYGON ((-0.12246 51.52989, -0.12249 51.52992...",POINT (-0.12233 51.53001)
3,9306966309,Tank & Paddle Minster Court,bar,,,,,,"POLYGON ((-0.08123 51.51092, -0.08133 51.51082...",POINT (-0.08133 51.51092)
4,414489471,,,,,residential,,,"POLYGON ((-0.10249 51.49547, -0.10266 51.49543...",POINT (-0.10322 51.49646)


In [4]:
pois.shape

(38000, 10)

In [5]:
pois['geometry'].apply(lambda e: e.geom_type).unique()

array(['Polygon'], dtype=object)

In [6]:
pois_20perc = pois.sample(frac = 0.2)
pois_40perc = pois.sample(frac = 0.4)
pois_60perc = pois.sample(frac = 0.6)
pois_80perc = pois.sample(frac = 0.8)
pois_100perc = pois.sample(frac = 1.0)

In [7]:
pois_20perc.to_csv('data/london_pois_5500_20perc.csv', index = False)
pois_40perc.to_csv('data/london_pois_5500_40perc.csv', index = False)
pois_60perc.to_csv('data/london_pois_5500_60perc.csv', index = False)
pois_80perc.to_csv('data/london_pois_5500_80perc.csv', index = False)
pois_100perc.to_csv('data/london_pois_5500_100perc.csv', index = False)

In [8]:
ilq_base_folder = 'ilquadtrees_london_5500'
base_csv_filename = 'data/london_pois_5500'
percs = ['20perc', '40perc', '60perc', '80perc', '100perc']
keyword_columns=['amenity','shop','tourism','landuse','leisure','building']

In [9]:
if not os.path.isdir(f"{ilq_base_folder}"):
    os.mkdir(f"{ilq_base_folder}")
for perc in percs:
    if not os.path.isdir(f"{ilq_base_folder}/{perc}"):
        os.mkdir(f"{ilq_base_folder}/{perc}")

In [10]:
import os

for perc in percs:
    for file in os.listdir(f"{ilq_base_folder}/{perc}"):
        os.remove(f"{ilq_base_folder}/{perc}/{file}")

In [14]:
ilq = {}
for perc in percs:
    ilq[perc] = qq2.generate_remote_ilquadtree(f'{base_csv_filename}_{perc}.csv', ilq_folder=f'{ilq_base_folder}/{perc}', max_depth=6, max_items=500, keyword_columns=keyword_columns)

In [15]:
for perc in percs:
    with open(f'{ilq_base_folder}/ilq_{perc}.pkl', 'wb') as f:
        pickle.dump(ilq[perc], f)