In [1]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import geojson
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from time import time
import shapely

In [None]:
base_index_name = "london_pois_5500_index"
base_dataset_filename = 'london_pois_5500'

In [3]:
ELASTIC_PASSWORD = open('../config/elasticpassword').read()

In [4]:
es = Elasticsearch(
    ['https://localhost:9200'],
    request_timeout=1000,
    basic_auth=('elastic', ELASTIC_PASSWORD),
    verify_certs=False,
    ssl_show_warn=False
)
es.info()

ObjectApiResponse({'name': 'senna', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'RvD8-FfeQ4ezwvkqJbgipQ', 'version': {'number': '8.13.1', 'build_flavor': 'default', 'build_type': 'deb', 'build_hash': '9287f29bba5e270bd51d557b8daccb7d118ba247', 'build_date': '2024-03-29T10:05:29.787251984Z', 'build_snapshot': False, 'lucene_version': '9.10.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [5]:
def get_and_fix_problematic_indices(features):
    problematic_indices = []
    for i, feature in enumerate(features['features']):
        if feature['geometry']['type'] != 'Point':
            geom = shapely.Polygon(feature['geometry']['coordinates'][0])
            if not geom.is_valid:
                geom = geom.convex_hull
                features['features'][i]['geometry'] = geojson.loads(shapely.to_geojson(geom))
                #print(f'Invalid polygon for index {i}, applied convex_hull', feature['properties'], feature['geometry'])
                problematic_indices.append(i)
    print('Total fixed problematic indices:', len(problematic_indices))
    return problematic_indices

In [6]:
def reset_index(es, index_name):
    try:
        es.indices.delete(index=index_name)
    except Exception as e:
        pass
    
    if not es.indices.exists(index=index_name).body:
       es.indices.create(index=index_name, ignore=400, mappings= {
            'properties':{
                'geometry': {
                    'type': 'geo_shape'
                },
                'centroid': {
                    'type': 'geo_point'
                }
            }
        })

In [7]:
def populate_index(es, features, index_name):
    actions = [
        {
            "_index": index_name,
            "_source": feature
        } for i, feature in enumerate(features['features']) # if i not in problematic_indices
    ]

    bulk(es, actions)

In [None]:
percs = ['20perc', '40perc', '60perc', '80perc', '100perc']

In [9]:
features = {}
for perc in percs:
    with open(f'../data/{base_dataset_filename}_{perc}.geojson') as f:
        features[perc] = geojson.load(f)
    print('Total features read:', len(features[perc]['features']))
    get_and_fix_problematic_indices(features[perc])
    index_name = f'{base_index_name}_{perc}'
    reset_index(es, index_name)
    populate_index(es, features[perc], index_name)
    print('Created/Reset and populated index:', index_name)

Total features read: 25595
Total fixed problematic indices: 0


  es.indices.create(index=index_name, ignore=400, mappings= {


Created/Reset and populated index: london_pois_bbox_index_20perc
Total features read: 51190
Total fixed problematic indices: 0


  es.indices.create(index=index_name, ignore=400, mappings= {


Created/Reset and populated index: london_pois_bbox_index_40perc
Total features read: 76785
Total fixed problematic indices: 0


  es.indices.create(index=index_name, ignore=400, mappings= {


Created/Reset and populated index: london_pois_bbox_index_60perc
Total features read: 102380
Total fixed problematic indices: 0


  es.indices.create(index=index_name, ignore=400, mappings= {


Created/Reset and populated index: london_pois_bbox_index_80perc
Total features read: 127975
Total fixed problematic indices: 0


  es.indices.create(index=index_name, ignore=400, mappings= {


Created/Reset and populated index: london_pois_bbox_index_100perc
