## Download data from OSM

In [2]:
!wget https://download.openstreetmap.fr/extracts/asia/israel.osm.pbf
!wget https://download.openstreetmap.fr/extracts/asia/palestine.osm.pbf

--2021-12-24 08:35:40--  https://download.openstreetmap.fr/extracts/asia/israel.osm.pbf
Resolving download.openstreetmap.fr (download.openstreetmap.fr)... 2a01:e0d:1:c:58bf:fac1:0:14, 213.36.253.212
Connecting to download.openstreetmap.fr (download.openstreetmap.fr)|2a01:e0d:1:c:58bf:fac1:0:14|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 79738628 (76M)
Saving to: ‘israel.osm.pbf’


2021-12-24 08:35:48 (9.89 MB/s) - ‘israel.osm.pbf’ saved [79738628/79738628]

--2021-12-24 08:35:48--  https://download.openstreetmap.fr/extracts/asia/palestine.osm.pbf
Resolving download.openstreetmap.fr (download.openstreetmap.fr)... 2a01:e0d:1:c:58bf:fac1:0:14, 213.36.253.212
Connecting to download.openstreetmap.fr (download.openstreetmap.fr)|2a01:e0d:1:c:58bf:fac1:0:14|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 33443399 (32M)
Saving to: ‘palestine.osm.pbf’


2021-12-24 08:35:52 (8.60 MB/s) - ‘palestine.osm.pbf’ saved [33443399/33443399]



## Convert to gpkg

In [3]:
!ogr2ogr israel.osm.gpkg israel.osm.pbf
!ogr2ogr palestine.osm.gpkg palestine.osm.pbf

0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.


In [4]:
import fiona
fiona.listlayers('israel.osm.gpkg')

['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']

## Hebrew Labels

In [1]:
import dataflows as DF
import fiona
import pghstore
import json
from fuzzywuzzy import process, fuzz

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='points').filter() for f in FILES]
with open('ranks.json') as ranks:
    RANKS = list(json.load(ranks).values())
    RANKS_MAP = {}
    RANK_NAMES = []
    for r in RANKS:
        RANKS_MAP[r[3]] = r
        RANKS_MAP[r[4]] = r
        RANK_NAMES.extend([r[3], r[4]])
RANK_NAMES = [n for n in RANK_NAMES if n]
print(RANKS[:10])
print(RANKS_MAP['שדה בוקר'])
print(len(RANKS))
print(len(RANK_NAMES))

def find_from_rank():
    def func(rows):
        hits = 0
        total = 0
        # keys = set()
        name_keys = {'name', 'name:he', 'name:en', 'name:ar'}
        for row in rows:
            # for k in row['properties'].keys():
            #     if k not in keys:
            #         print('k', k)
            #         keys.add(k)
            names = [v for k, v in row['properties'].items() if k in name_keys and v]
            total += 1
            max_name = None
            max_score = 94
            for name in names:
                for choice in RANK_NAMES:
                    # score = process.extractBests(name, RANK_NAMES, score_cutoff=90, limit=1)
                    score = fuzz.UQRatio(name.lower(), choice.lower())
                    if score > max_score:
                        max_score = score
                        max_name = choice
            if max_name is not None:
                rank = RANKS_MAP[max_name]
                row['filterrank'] = rank[6]
                row['symbolrank'] = rank[5]
                row['text_anchor'] = rank[7]
                row['layer'] = rank[0]
                yield row
                hits += 1
            # else:
            #     print('DIDNT FIND for {}'.format(name))
        print('Found {}/{}'.format(hits, total))
    return DF.Flow(
        DF.add_field('filterrank', 'integer', 2),
        DF.add_field('symbolrank', 'integer', 13),
        DF.add_field('text_anchor', 'string', None),
        DF.add_field('layer', 'string', ''),
        func
    )

dp = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('orig_name', 'string', lambda r: r['properties'].get('name')),
    DF.filter_rows(lambda r: r['place'] in {'city', 'town', 'village', 'hamlet'} or r['properties'].get('population') is not None),
    DF.filter_rows(lambda r: r['geometry'] and r['geometry']['type'] == 'Point'),
    DF.checkpoint('heb_labels'),
    find_from_rank(),
    # DF.add_field('population', 'string', lambda r: r['properties'].get('population')),
    DF.add_field('name', 'string', lambda r: r['properties'].get('name:he') or r['orig_name']),
    DF.select_fields(['place', 'name', 'geometry', 'filterrank', 'symbolrank', 'text_anchor']),
    DF.concatenate(
        dict((k, []) for k in ['place', 'name', 'geometry', 'filterrank', 'symbolrank', 'text_anchor']),
        target=dict(name='place_labels_he', path='./place_labels_he.geojson')
    ),
    DF.add_field('class', 'string', 'settlement'),
    DF.add_field('worldview', 'string', 'all'),
    DF.add_field('name_script', 'string', 'Hebrew'),
    DF.add_field('capital', 'integer', lambda r: 2 if r['name'] == 'ירושלים' else 0),
    DF.add_field('disputed', 'boolean', False),
    DF.set_type('geometry', type='geopoint', transform=lambda v: list(v['coordinates']), resources=None),
    DF.dump_to_path('data/place_labels_he', force_format=False),
    DF.printer(tablefmt='html')
).process()



[['settlement-major-label', 'settlement', 'city', 'عمان\u200e', 'Amman', 7, 1, 'bottom-left'], ['settlement-minor-label', 'settlement', 'city', 'العريش', 'Al Arish', 11, 3, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'الخليل', 'Hebron', 10, 3, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'العقبة', 'Aqaba', 11, 3, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'ערד', 'Arad', 9, 2, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'غزة', 'Gaza', 9, 2, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'באר שבע', 'Beer Sheva', 9, 1, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'ירושלים', 'Jerusalem', 8, 1, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'القريات', 'Qurayyat', 13, 1, 'bottom'], ['settlement-minor-label', 'settlement', 'city', 'חיפה', 'Haifa', 10, 3, 'bottom']]
['settlement-major-label', 'settlement', 'village', 'שדה בוקר', 'Sde Boker', 14, 1, 'bottom']
2562
5074
using checkpoint data 

Found 1319/1453
Found 725/905


#,place (string),name (string),geometry (geopoint),filterrank (integer),symbolrank (integer),text_anchor (string),class (string),worldview (string),name_script (string),capital (integer),disputed (boolean)
1,city,אילת,"[34.9497949, 29.5569348]",1.0,12.0,bottom,settlement,all,Hebrew,0.0,False
2,city,ירושלים,"[35.2257626, 31.778824200000003]",1.0,8.0,bottom,settlement,all,Hebrew,2.0,False
3,village,יבנאל,"[35.5060043, 32.7026978]",3.0,18.0,bottom,settlement,all,Hebrew,0.0,False
4,town,קיסריה,"[34.9057861, 32.5114971]",1.0,13.0,bottom,settlement,all,Hebrew,0.0,False
5,city,נהריה,"[35.094557900000005, 33.006306]",1.0,12.0,bottom,settlement,all,Hebrew,0.0,False
6,village,שדות ים,"[34.8933053, 32.492159300000004]",1.0,14.0,bottom,settlement,all,Hebrew,0.0,False
7,town,אור יהודה,"[34.8523936, 32.0309712]",1.0,12.0,bottom,settlement,all,Hebrew,0.0,False
8,town,נס ציונה,"[34.7990609, 31.929557700000004]",1.0,12.0,bottom,settlement,all,Hebrew,0.0,False
9,town,נוף הגליל,"[35.318321600000004, 32.7023065]",1.0,12.0,bottom,settlement,all,Hebrew,0.0,False
10,village,אבו גוש,"[35.1088651, 31.806353]",1.0,14.0,bottom,settlement,all,Hebrew,0.0,False


## Locations and Bounds

In [None]:
import dataflows as DF
import fiona
import pghstore
from shapely.geometry import shape
from shapely.ops import unary_union

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='multipolygons').filter() for f in FILES]
RANKS = dict(
    city=0,
    town=1,
    village=2,
    hamlet=3
)

def names(r):
    ret = [v for k, v in r['properties'].items() if k.endswith('he')]
    if len(ret) == 0:
        name = r['properties'].get('name')
        if name:
            return [name]
    else:
        return ret
    
def bounds(r):
    geometry = r['geometry']
    geometry = unary_union([shape(g) for g in geometry])
    bounds = geometry.bounds
    return bounds

dp, _ = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('boundary', 'string', lambda r: r['properties'].get('boundary')),
    DF.add_field('population', 'string', lambda r: r['properties'].get('population')),
    DF.filter_rows(lambda r: r['place'] in RANKS.keys()),
    DF.filter_rows(lambda r: r['geometry'] and 'Polygon' in r['geometry']['type']),
    DF.add_field('name', 'array', names),
    DF.filter_rows(lambda r: r['name'] is not None),
    DF.add_field('key', 'string', lambda r: '_'.join(r['name'])),
    DF.concatenate(
        dict((k, []) for k in ['place', 'name', 'key', 'geometry', 'population']),
    ),
    DF.join_with_self('concat', ['key'], dict(
        place=None,
        population=None,
        name=None,
        key=None,
        geometry=dict(aggregate='array')
    )),
    DF.add_field('bounds', 'array', bounds),
    DF.select_fields(['key', 'place', 'name', 'population', 'bounds']),
    DF.update_resource(-1, name='place_bounds_he', path='place_bounds_he.csv'),
    DF.set_type('bounds', **{'es:index': False, 'es:itemType': 'number'}),
    DF.set_type('place', **{'es:keyword': True}),
    DF.set_type('name', **{'es:itemType': 'string'}),
    DF.set_type('key', **{'es:keyword': True}),
    DF.set_primary_key(['key']),
    DF.dump_to_zip('data/place_bounds_he.zip'),
    DF.printer(tablefmt='html')
).process()
dp.resources[0].descriptor

## Cities without names

In [1]:
import dataflows as DF
import fiona
import pghstore
from shapely.geometry import shape
from shapely.ops import unary_union

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='multipolygons').filter() for f in FILES]
RANKS = dict(
    city=0,
    town=1,
    village=2,
    hamlet=3
)

def names(r):
    ret = [v for k, v in r['properties'].items() if k.endswith('he')]
    if len(ret) == 0:
        name = r['properties'].get('name')
        if name:
            return [name]
    else:
        return ret

r = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('osm_way_id', 'string', lambda r: r['properties'].get('osm_way_id')),
    DF.filter_rows(lambda r: r['place'] in RANKS.keys()),
    DF.filter_rows(lambda r: r['geometry'] and 'Polygon' in r['geometry']['type']),
    DF.add_field('name', 'array', names),
    DF.filter_rows(lambda r: r['name'] is None),
    DF.sort_rows('{place}'),
    DF.delete_fields(['geometry']),
    DF.printer(tablefmt='html')
).results()[0][0]


  for x in self.iterable:


#,type (string),id (string),properties (object),tags (string),place (string),osm_way_id (string),name (array)
1,Feature,16107,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,120745730,
2,Feature,269205,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757889,
3,Feature,269207,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757891,
4,Feature,269208,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757892,
5,Feature,269209,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757893,
6,Feature,281483,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,452985072,
7,Feature,316340,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,502103410,
8,Feature,5233,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,86342235,
9,Feature,386038,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,605653857,
10,Feature,386074,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,605927534,


#,type (string),id (string),properties (object),tags (string),place (string),osm_way_id (string),name (array)
1,Feature,18264,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,265587469,
2,Feature,18274,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,265595164,
3,Feature,263525,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,612000771,
4,Feature,294839,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,767112843,
5,Feature,18260,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,town,265587435,
6,Feature,18262,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,town,265587437,
7,Feature,14923,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,242723361,
8,Feature,16023,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,244087537,
9,Feature,18265,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,265587470,
10,Feature,18266,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,265587471,


In [None]:
_