In [1]:
import re
import osmium
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import pandas as pd
import platform

In [2]:
system = platform.system()
if system == "Darwin":
    current_dir = os.path.basename(os.getcwd())
    
    if current_dir != "backend":
        os.chdir(os.path.join(os.getcwd(), "backend"))

In [3]:
LEVEL_HEIGHT = 3.4

# https://wiki.openstreetmap.org/wiki/Simple_3D_buildings#Other_roof_tags


def _feet_to_meters(s):
    r = re.compile(r"([0-9]*\.?[0-9]+)'([0-9]*\.?[0-9]+)?\"?")
    m = r.findall(s)[0]
    if len(m[0]) > 0 and len(m[1]) > 0:
        m = float(m[0]) + float(m[1]) / 12.0
    elif len(m[0]) > 0:
        m = float(m[0])
    return m * 0.3048


def _get_height(tags):
    if 'height' in tags:
        # already accounts for roof
        if '\'' in tags['height'] or '\"' in tags['height']:
            return _feet_to_meters(tags['height'])
        r = re.compile(r"[-+]?\d*\.\d+|\d+")
        return float(r.findall(tags['height'])[0])
    if 'levels' in tags:
        roof_height = 0
        if 'roof_height' in tags:
            if '\'' in tags['roof_height'] or '\"' in tags['roof_height']:
                roof_height = _feet_to_meters(tags['roof_height'])
            else:
                r = re.compile(r"[-+]?\d*\.\d+|\d+")
                roof_height = float(r.findall(tags['roof_height'])[0])

        # does not account for roof height
        height = float(tags['levels']) * LEVEL_HEIGHT
        if 'roof_levels' in tags and roof_height == 0:
            height += float(tags['roof_levels']) * LEVEL_HEIGHT
        return height
    return 7.0


def _get_min_height(tags):
    if 'min_height' in tags:
        # already accounts for roof
        if '\'' in tags['min_height'] or '\"' in tags['min_height']:
            return _feet_to_meters(tags['min_height'])
        r = re.compile(r"[-+]?\d*\.\d+|\d+")
        return float(r.findall(tags['min_height'])[0])
    if 'min_level' in tags:
        height = float(tags['min_level']) * LEVEL_HEIGHT
        return height
    return 0.0


class BuildingHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.geometry = []       # WKB bytes
        self.height = []
        self.min_height = []
        self.osm_id = []         # numeric id
        self.osm_type = []       # 'W' or 'R'
        self.wkbfab = osmium.geom.WKBFactory()

    def get_gdf(self):
        geom = gpd.GeoSeries.from_wkb(self.geometry, crs='EPSG:4326')
        gdf = gpd.GeoDataFrame({
            'osm_id': self.osm_id,
            'osm_type': self.osm_type,
            'min_height': pd.Series(self.min_height, dtype='float'),
            'height': pd.Series(self.height, dtype='float'),
            'geometry': geom
        }, index=geom.index)
        return gdf

    def area(self, a):
        id = int(a.orig_id())
        osm_type = 'W' if a.from_way() else 'R'

        tags = a.tags
        # Qualifiers
        if not ('building' in tags or 'building:part' in tags or tags.get('type', None) == 'building'):
            return
        # Disqualifiers
        if (tags.get('location', None) == 'underground' or 'bridge' in tags):
            return
        try:
            poly = self.wkbfab.create_multipolygon(a)
            height = _get_height(tags)
            min_height = _get_min_height(tags)

            self.geometry.append(poly)
            self.height.append(height)
            self.min_height.append(min_height)
            self.osm_id.append(id)
            self.osm_type.append(osm_type)
            
        except Exception as e:
            print(e)
            print(a)

def save_buildings_geojson(handler: BuildingHandler, out_path: str) -> gpd.GeoDataFrame:
    """
    Build a GeoDataFrame (with osm_id, osm_type, height, min_height, geometry)
    and save it as GeoJSON. Returns the GeoDataFrame.
    """
    gdf = handler.get_gdf()
    gdf.to_file(out_path, driver="GeoJSON")
    return

In [4]:
city = 'nyc'
filename = 'data/%s.osm.pbf' % (city)

In [5]:
h = BuildingHandler()
h.apply_file(filename, locations=True)

invalid area (area_id=7396795)
a7396795: num_rings=(0, 0), tags={addr:housenumber=421,addr:postcode=10016,addr:s...}
invalid area (area_id=2768368382)
a2768368382: num_rings=(0, 0), tags={building=yes}
invalid area (area_id=2777370780)
a2777370780: num_rings=(0, 0), tags={building=apartments,height=7.3152,source:geomet...}
invalid area (area_id=2777371272)
a2777371272: num_rings=(0, 0), tags={building=house,height=4.572,source:geometry:ref...}


In [7]:
gdf = h.get_gdf()
gdf.to_feather('data/%s/buildings.feather' % city, compression='lz4')