## Download data from OSM

In [1]:
# !wget https://download.openstreetmap.fr/extracts/asia/israel.osm.pbf
# !wget https://download.openstreetmap.fr/extracts/asia/palestine.osm.pbf
!wget https://download.geofabrik.de/asia/israel-and-palestine-latest.osm.pbf
!wget https://download.geofabrik.de/asia/jordan-latest.osm.pbf
!wget https://download.geofabrik.de/asia/lebanon-latest.osm.pbf
!wget https://download.geofabrik.de/asia/syria-latest.osm.pbf


--2022-01-07 16:49:00--  https://download.geofabrik.de/asia/israel-and-palestine-latest.osm.pbf
Resolving download.geofabrik.de (download.geofabrik.de)... 95.216.115.119, 116.202.112.212
Connecting to download.geofabrik.de (download.geofabrik.de)|95.216.115.119|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 90839156 (87M) [application/octet-stream]
Saving to: ‘israel-and-palestine-latest.osm.pbf’


2022-01-07 16:49:08 (10.8 MB/s) - ‘israel-and-palestine-latest.osm.pbf’ saved [90839156/90839156]

--2022-01-07 16:49:08--  https://download.geofabrik.de/asia/jordan-latest.osm.pbf
Resolving download.geofabrik.de (download.geofabrik.de)... 116.202.112.212, 95.216.115.119
Connecting to download.geofabrik.de (download.geofabrik.de)|116.202.112.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 29604790 (28M) [application/octet-stream]
Saving to: ‘jordan-latest.osm.pbf’


2022-01-07 16:49:11 (10.4 MB/s) - ‘jordan-latest.osm.pbf’ saved [29604

## Convert to gpkg

In [2]:
!ogr2ogr israel-and-palestine.osm.gpkg israel-and-palestine-latest.osm.pbf
!ogr2ogr jordan.osm.gpkg jordan-latest.osm.pbf
!ogr2ogr lebanon.osm.gpkg lebanon-latest.osm.pbf
!ogr2ogr syria.osm.gpkg syria-latest.osm.pbf


0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.


In [4]:
import fiona
fiona.listlayers('israel.osm.gpkg')

['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']

## Hebrew Labels

In [1]:
import dataflows as DF
import fiona
import pghstore
import json
from fuzzywuzzy import process, fuzz
import re
from geopy.distance import distance

HEB = re.compile(r'[א-ת]')
ARAB = re.compile(r'[\u0600-\u06FF]')
def is_heb(s):
    return len(HEB.findall(s)) > len(s)/2

def is_arab(s):
    return len(ARAB.findall(s)) > len(s)/2

simple_ranks = dict(city=0, town=1, village=2, hamlet=3)

FILES = ['israel-and-palestine', 'jordan', 'lebanon', 'syria']
sources = [fiona.open(f'{f}.osm.gpkg', layer='points').filter() for f in FILES]
h_translations = dict()
a_translations = dict()
for source in sources:
    for item in source:
        props = item['properties']
        props.update(pghstore.loads(props.get('other_tags') or '')),
        place = props.get('place')
        if place not in simple_ranks.keys():
            continue
        fields = ['name', 'name:en', 'name:ar', 'name:he']
        values = [props.get(f) for f in fields]
        values = [v for v in values if v]
        # Hebrew
        heb_values = [v for v in values if is_heb(v)]
        non_heb_values = [v for v in values if not is_heb(v)]
        for hv in heb_values:
            for nhv in non_heb_values:
                h_translations.setdefault(nhv, []).append((hv, item['geometry']['coordinates']))
        # Arabic
        arab_values = [v for v in values if is_arab(v)]
        non_arab_values = [v for v in values if not is_arab(v)]
        for av in arab_values:
            for nav in non_arab_values:
                a_translations.setdefault(nav, []).append((av, item['geometry']['coordinates']))
h_translation_keys = list(h_translations.keys())
a_translation_keys = list(a_translations.keys())
ARABIC = ('AR', is_arab, a_translations, a_translation_keys)
HEBREW = ('HE', is_heb, h_translations, h_translation_keys)

def translate(s, lang, src_coords):
    lang, test, translations, translation_keys = lang
    if test(s):
        return s
    # if s in translations:
    #     return translations[s]
    candidate_groups = process.extractBests(s, translation_keys, score_cutoff=75, limit=20, scorer=fuzz.UQRatio)
    name = None
    max_score = 0
    src_coords = [src_coords[1], src_coords[0]]
    for candidate_key, score in candidate_groups:
        candidate_group = translations[candidate_key]
        for candidate, tgt_coords  in candidate_group:
            tgt_coords = [tgt_coords[1], tgt_coords[0]]
            dist = distance(src_coords, tgt_coords).km
            if dist < 5:
                score = 10 * score - dist
                if score > max_score:
                    max_score = score
                    name = candidate
    if name is None:
        print('!!!', lang, s, '->', name)
    return name

def get_geodata():
    with open('ranks1.json') as ranks:
        ranks = list(json.load(ranks).values())
        for rank in ranks:
            name = rank[3]
            name_he = translate(name, HEBREW, rank[6]['coordinates'])
            name_ar = translate(name, ARABIC, rank[6]['coordinates'])
            if name is None:
                continue
            yield dict(
                name=name,
                name_he=name_he,
                name_ar=name_ar,
                symbolrank=rank[5],
                text_anchor=rank[7],
                geometry=rank[6],
                klass=rank[1],
                place=rank[2],
                name_script='Hebrew' if is_heb(name) else ('Arabic' if is_arab(name) else 'English'),
                capital=2 if name_he=='ירושלים' else 0,
                disputed=False
            )

dp = DF.Flow(
    get_geodata(),
    DF.checkpoint('labels_data'),
    DF.add_field('rank', 'integer', lambda r: simple_ranks[r['place']]),
    DF.rename_fields(dict(klass='class')),
    DF.set_type('geometry', type='geojson', transform=json.dumps),
    DF.update_resource(-1, name='place_labels_he', path='place_labels_he.geojson'),
    DF.dump_to_path('data/place_labels_he', force_format=False),
    DF.printer(tablefmt='html')
).process()



saving checkpoint to: .checkpoints/labels_data
!!! الحسنة -> None
!!! العريش -> None
!!! الشيخ زويد -> None
!!! مخيّم النصيرات -> None
!!! מבקיעים -> None
!!! יושיביה -> None
!!! כפר עזה -> None
!!! مخيّم جباليا‎ -> None
!!! נאות סמדר -> None
!!! שדה בוקר -> None
!!! שיזף -> None
!!! מחנה רביב -> None
!!! أبو عمرة -> None
!!! בית קמה -> None
!!! אחוזם -> None
!!! לי און -> None
!!! נטע -> None
!!! אמציה -> None
!!! גבעת הבוסתן -> None
!!! נחושה -> None
!!! שפיר -> None
!!! כפר הרי"ף -> None
!!! גן יבנה -> None
!!! נס הרים -> None
!!! טל שחר -> None
!!! מבוא חורון -> None
!!! בית אריה – עופרים -> None
!!! אלפי מנשה -> None


!!! גן שמואל -> None
!!! אביחיל -> None
!!! עין שריד -> None
!!! שדות ים -> None
!!! מגדים -> None
!!! לוטן -> None
!!! Al-Risheh -> None
!!! חצבה -> None
!!! עיר אובות -> None
!!! נאות הכיכר -> None
!!! קבועה -> None
!!! הר עמשא -> None
!!! אבנת -> None
!!! מעלה חבר -> None
!!! מצוקי דרגות -> None
!!! פני קדם -> None
!!! מעון -> None
!!! עין גדי -> None
!!! מצפה שלם -> None
!!! צור הדסה -> None
!!! אלון -> None
!!! ישוב הדעת -> None
!!! מצפה כרמים -> None
!!! עמנואל -> None
!!! קרני שומרון -> None
!!! חוות סקאלי -> None
!!! מלכישוע -> None
!!! עין השופט -> None
!!! תמרת -> None
!!! כפר קיש -> None
!!! גבע -> None
!!! שבי ציון -> None
!!! כפר שמאי -> None
!!! אמירים -> None
!!! بطيشية -> None
!!! سريفا -> None
!!! برعشيت -> None
!!! الطفيلة -> None
!!! موتة -> None
!!! الربة -> None
!!! لب -> None
!!! مرج الحمام -> None
!!! دابوق -> None
!!! سوف -> None
!!! عجلون -> None
!!! ארבל -> None
!!! טבח'ה -> None
!!! נטור -> None
!!! רמת מגשימים -> None
!!! נאות גולן -> None
!!! מבוא חמה -> No

#,name (string),name_he (string),name_ar (string),symbolrank (integer),text_anchor (string),geometry (geojson),class (string),place (string),name_script (string),capital (integer),disputed (boolean),rank (integer)
1,الحسنة,,الحسنة,16.0,bottom,"{'type': 'Point', 'coordinates': [33.78167152404785, 30.46524675075318]}",settlement,town,Arabic,0.0,False,1.0
2,العريش,,العريش,11.0,bottom,"{'type': 'Point', 'coordinates': [33.804588317871094, 31.12724415616698]}",settlement,city,Arabic,0.0,False,0.0
3,נווה,נווה,نفي,17.0,bottom,"{'type': 'Point', 'coordinates': [34.330129623413086, 31.162063919660113]}",settlement,village,Hebrew,0.0,False,2.0
4,בני נצרים,בני נצרים,بني نتسريم,17.0,bottom,"{'type': 'Point', 'coordinates': [34.31528091430664, 31.143994499280225]}",settlement,village,Hebrew,0.0,False,2.0
5,الشيخ زويد,,الشيخ زويد,13.0,bottom,"{'type': 'Point', 'coordinates': [34.1110897064209, 31.216031292138368]}",settlement,town,Arabic,0.0,False,1.0
6,שלומית,שלומית,شلوميت,14.0,bottom,"{'type': 'Point', 'coordinates': [34.30360794067383, 31.16852694072726]}",settlement,village,Hebrew,0.0,False,2.0
7,رفح‎,רפיח,رفح‎,11.0,bottom,"{'type': 'Point', 'coordinates': [34.25580024719238, 31.27517644662366]}",settlement,city,Arabic,0.0,False,0.0
8,خان يونس,ח'אן יונס,خان يونس,10.0,bottom,"{'type': 'Point', 'coordinates': [34.30249214172363, 31.345793833930642]}",settlement,city,Arabic,0.0,False,0.0
9,عبسان الكبيرة‎,עבאסן אל-כבירה,عبسان الكبيرة‎,13.0,bottom,"{'type': 'Point', 'coordinates': [34.34969902038574, 31.323873645847797]}",settlement,town,Arabic,0.0,False,1.0
10,אופקים,אופקים,أوفاكيم,13.0,bottom,"{'type': 'Point', 'coordinates': [34.6208381652832, 31.31258165744771]}",settlement,town,Hebrew,0.0,False,1.0


checkpoint saved: labels_data


## Locations and Bounds

In [None]:
import dataflows as DF
import fiona
import pghstore
from shapely.geometry import shape
from shapely.ops import unary_union

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='multipolygons').filter() for f in FILES]
RANKS = dict(
    city=0,
    town=1,
    village=2,
    hamlet=3
)

def names(r):
    ret = [v for k, v in r['properties'].items() if k.endswith('he')]
    if len(ret) == 0:
        name = r['properties'].get('name')
        if name:
            return [name]
    else:
        return ret
    
def bounds(r):
    geometry = r['geometry']
    geometry = unary_union([shape(g) for g in geometry])
    bounds = geometry.bounds
    return bounds

dp, _ = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('boundary', 'string', lambda r: r['properties'].get('boundary')),
    DF.add_field('population', 'string', lambda r: r['properties'].get('population')),
    DF.filter_rows(lambda r: r['place'] in RANKS.keys()),
    DF.filter_rows(lambda r: r['geometry'] and 'Polygon' in r['geometry']['type']),
    DF.add_field('name', 'array', names),
    DF.filter_rows(lambda r: r['name'] is not None),
    DF.add_field('key', 'string', lambda r: '_'.join(r['name'])),
    DF.concatenate(
        dict((k, []) for k in ['place', 'name', 'key', 'geometry', 'population']),
    ),
    DF.join_with_self('concat', ['key'], dict(
        place=None,
        population=None,
        name=None,
        key=None,
        geometry=dict(aggregate='array')
    )),
    DF.add_field('bounds', 'array', bounds),
    DF.select_fields(['key', 'place', 'name', 'population', 'bounds']),
    DF.update_resource(-1, name='place_bounds_he', path='place_bounds_he.csv'),
    DF.set_type('bounds', **{'es:index': False, 'es:itemType': 'number'}),
    DF.set_type('place', **{'es:keyword': True}),
    DF.set_type('name', **{'es:itemType': 'string'}),
    DF.set_type('key', **{'es:keyword': True}),
    DF.set_primary_key(['key']),
    DF.dump_to_zip('data/place_bounds_he.zip'),
    DF.printer(tablefmt='html')
).process()
dp.resources[0].descriptor

## Cities without names

In [1]:
import dataflows as DF
import fiona
import pghstore
from shapely.geometry import shape
from shapely.ops import unary_union

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='multipolygons').filter() for f in FILES]
RANKS = dict(
    city=0,
    town=1,
    village=2,
    hamlet=3
)

def names(r):
    ret = [v for k, v in r['properties'].items() if k.endswith('he')]
    if len(ret) == 0:
        name = r['properties'].get('name')
        if name:
            return [name]
    else:
        return ret

r = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('osm_way_id', 'string', lambda r: r['properties'].get('osm_way_id')),
    DF.filter_rows(lambda r: r['place'] in RANKS.keys()),
    DF.filter_rows(lambda r: r['geometry'] and 'Polygon' in r['geometry']['type']),
    DF.add_field('name', 'array', names),
    DF.filter_rows(lambda r: r['name'] is None),
    DF.sort_rows('{place}'),
    DF.delete_fields(['geometry']),
    DF.printer(tablefmt='html')
).results()[0][0]


  for x in self.iterable:


#,type (string),id (string),properties (object),tags (string),place (string),osm_way_id (string),name (array)
1,Feature,16107,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,120745730,
2,Feature,269205,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757889,
3,Feature,269207,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757891,
4,Feature,269208,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757892,
5,Feature,269209,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757893,
6,Feature,281483,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,452985072,
7,Feature,316340,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,502103410,
8,Feature,5233,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,86342235,
9,Feature,386038,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,605653857,
10,Feature,386074,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,605927534,


#,type (string),id (string),properties (object),tags (string),place (string),osm_way_id (string),name (array)
1,Feature,18264,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,265587469,
2,Feature,18274,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,265595164,
3,Feature,263525,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,612000771,
4,Feature,294839,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,767112843,
5,Feature,18260,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,town,265587435,
6,Feature,18262,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,town,265587437,
7,Feature,14923,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,242723361,
8,Feature,16023,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,244087537,
9,Feature,18265,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,265587470,
10,Feature,18266,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,265587471,


In [None]:
_