In [1]:
import os
import pathlib
import re
import warnings

import contextily as cx 
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import osmnx as ox
import seaborn as sns

from geobr import read_municipality
from h3census.assemble import get_hexagons_with_census_data
from matplotlib.patches import Patch
from shapely.geometry import Point

%matplotlib inline
%config InlineBackend.figure_format='retina'

In [2]:
out_folder = os.environ.get('OUT_FOLDER')
out_folder = pathlib.Path(out_folder)
out_folder = out_folder / 'A'

db_folder = os.environ.get('DB_FOLDER')
db_folder = pathlib.Path(db_folder)

In [3]:
def _get_zipped_path_for_gpd(path):
    """This gets a full path for a zipped shp file and parses it
    into a structure that gpd.read_file() understands.
    """
    prefix = r'zip://'
    
    try:
        path = prefix + path.as_posix()
    except:
        path = pathlib.PureWindowsPath(path)
        path = prefix + path.as_posix()
        
        
    return path


def _get_geodata(path, is_zipped=True):
    """Takes either a full raw string path os a pathlib's pure windows
    path and uses it to return a shapefile. It also makes the necessary
    adjustments to read shapefiles compressed into a .zip file.
    """    
    if is_zipped:
        path = _get_zipped_path_for_gpd(path)
            
            
    return gpd.read_file(path)


def get_land_use_data(year):
    path = (db_folder
            / 'beaga'
            / 'tipologia_uso_ocupacao'
            / f'uso_ocup_{year}.zip')
    land_uses = _get_geodata(path)

    # A few instances are not georeferenced and, even though
    # they have an existing parcel ID, that ID is also not 
    # present at the parcel geodata provided by BH city hall.
    # This happened with 2018 data and amounted to about 0.4%,
    # which makes it seem reasoble to just drop these records.
    land_uses = land_uses.loc[land_uses
                              .geometry
                              .notnull()]
    
    # some later operations don't handle well a mix
    # of Polygons and MultyPolygons.
    land_uses = land_uses.explode()
    
    
    return land_uses.reset_index(drop=True)


lu_2017 = get_land_use_data(2017)


In [5]:
lu_2017.rename(columns={'TIPOLOGIA_': 'type',
                          'TIPOLOGIA0': 'category'},
                 inplace=True,)

lu_2017 = lu_2017.reindex(columns=['type',
                                       'category',
                                       'geometry',])


In [45]:
lu_2017.groupby(['type']).area_km2.sum().sort_values(ascending=False)

type
RESIDENCIAL                  95.511467
LOTE VAGO                    45.957879
NAO RESIDENCIAL              29.915041
MISTO                        13.861005
NAO RESIDENCIAL+LOTE VAGO     8.362966
MISTO + LOTE VAGO             4.877592
RESIDENCIAL+LOTE VAGO         4.382295
Name: area_km2, dtype: float64

In [47]:
lu_2020.groupby(['type']).area_km2.sum().sort_values(ascending=False)

type
RESIDENCIAL        100.459043
SEM INFORMACAO      53.431436
NAO RESIDENCIAL     49.075531
LOTE VAGO           33.108260
MISTO               16.536411
PARQUE              10.511451
Name: area_km2, dtype: float64

In [52]:
lu_2017.loc[lu_2017['type'].isnull()].area_km2.sum()

60.58060350473666

In [13]:
lu_2020 = get_land_use_data(2020)


lu_2020 = lu_2020.rename(columns={'uso_do_s_1': 'type',
                                      'agreg_ativ': 'activities',
                                      'uso_ativ_1': 'category'})

lu_2020 = lu_2020.reindex(columns=['type',
                                       'category',
                                       'activities',
                                       'geometry'])

# Land use categories and types are between curly brackets
# by whatever reason...
regex = r'(\{?)(.+\w)(\})?'

for each in ['type', 'category']:
    lu_2020[each] = lu_2020[each].str.extract(regex)[1]

In [19]:
lu_2020.loc[lu_2020['type'] == 'SEM INFORMACAO'].category.value_counts()

SEM INFORMACAO    51205
Name: category, dtype: int64

In [22]:
lu_2020.shape

(360422, 4)

In [None]:
number_of_parcels = len(hex_with_uses)
hex_with_uses['type'].value_counts() / number_of_parcels * 100
hex_with_uses.category.value_counts() / number_of_parcels * 100

- Most instances are straightforward, such as 'casa unifamiliar', for example, which clearly is residential; or such as 'edificio de uso comercial e/ou servicos' which is obviously retail/services.

- There were cases in which discordant classifications were residual. Hence, the residuals have been put together along with the majority class. For example, there are 6 mixed use parcels of 'edificio' amidst a sea of 16009 residential parcels.

- Mixed uses presented a difficulty as well because the classical model deals with discrete classifications only. There are models that allow for some fuzzy classification, which means that a cell can go from fully residential to completely commercial, while being able to assume any percentual balance in between those extremes. Unfortunatelly, to the best of my knowledge, such models seem still incipient and displaying some perks that I don't think are for me to solve (at least not now). Hence, I chose to create a 'mixed' category and leave it at that. After some future experimentations, I might choose to place it either under retail/services or under residential.

- All parcels containing any kind of (a) club or public goods, and of (b) public facilities, amenities or infrastructures have been considered static and are all labeled as either:
    - amenities,
    - infrastructure, or
    - public services.

- 'edificação sem tipologia especificada' was assumed to be residential, because:
    - For any given lot, the highest probability is that it is residential;
    - In the brazilian context, there is plenty of illegal land ocupation due to favelas and whatnot;
    - I assumed it is harder for commercial activity to function without registry in the tax records.
    
**A minor number of instances require a slightly more detailed analysis, which here means that a proper reclassification required a simultaneous analysis of 'type' and 'category' attributes. Specifically:**

- There are two land use categories that are unclear in of themselves ('ocupação diversificada', 'vaga residencial ou comercial') these have been classified with the aid of _regroup_with_scrutiny() function.

- 'galpao' at first seemed obviously related to the industrial sector, but a closer look at its spatial distribution, along with a couple checks in Google maps, made it seem otherwise. It did not seem, for the most part, related to heavy industry, specially the parcels of this category that were placed within the city's core and along its major roads. Hence, for the time being, it was placed under either retail/services or mixed, until some later analysis reveals otherwise, if such a thing is revealed at all. The choice between mixed and retail/services is made within _regroup_with_scrutiny().

**Finally, parcels with no data at all are handled are to be handled in later steps**


In [None]:
regrouping_2017 = {
    'active': {
        'residential': ['casa unifamiliar',
                        'edificio', 
                        'conjunto multifamiliar vertical',
                        'conjunto multifamiliar horizontal',
                        'edificação sem tipologia especificada',],
        
        'retail/services': ['loja ou conjunto de lojas',
                            'edificio de uso comercial e/ou servicos',
                            'loja em edificio/galeria',
                            'shopping center',
                            'galeria/mini shopping de bairro',
                            'apart hotel',],
        # TO DO: settle approach towards mixed uses
        'mixed': ['casa/sobrado',
                  'edificio residencial e comercio e/ou servicos',]
                },

     'passive': { 
         'vacant': ['lote vago',]
                 },

     'static': {
         'industry': ['industria',],
         
         'public services': ['instituicao de ensino',
                             'equipamento de saude',],
         
         'infrastructure': ['estacao de transporte coletivo',
                            'aeroporto',
                            'aterro sanitario',],
         
         'amenities': ['instituicao religiosa',
                       'cemiterio', # weird to put it here, but I couldn't find anywhere else
                       'parque',
                       'clubes esportivos e sociais',
                       'estadio/ginasio',],
               }
}

In [None]:
def regroup_with_scrutiny(gdf):
    """Deals with some perks that seem too specific for generalization.
    Handles instances in which TIPOLOGIA0 may be more accurately placed
    when analyzed together with TIPOLOGIA_
    
    NOTE: function's name is an exaggeration
    """
    # 'ocupação diversificada'
    # ----------------------
    mask_a = gdf.category=='ocupação diversificada'
    mask_b = gdf['type'].isin(['RESIDENCIAL', 'RESIDENCIAL+LOTE VAGO'])
    mask_c = gdf['type'].isin(['MISTO', 'MISTO + LOTE VAGO'])
    
    full_mask = mask_a & mask_b
    gdf.loc[full_mask,'category'] = 'residential'
    
    full_mask = mask_a & mask_c
    gdf.loc[full_mask,'category'] = 'mixed'
    
    full_mask = mask_a & (~mask_b) & (~mask_c)
    gdf.loc[full_mask,'category'] = 'retail/services'
    
    # 'vaga residencial ou comercial'
    # -----------------------------
    mask_d = gdf.category=='vaga residencial ou comercial'
    mask_e = gdf['type']=='NAO RESIDENCIAL'
    mask_f = gdf['type']=='RESIDENCIAL'
    
    full_mask = mask_d & mask_e
    gdf.loc[full_mask,'category'] = 'retail/services'
    
    full_mask = mask_d & mask_f
    gdf.loc[full_mask,'category'] = 'residential'
    
    full_mask = mask_d & (~mask_e) & (~mask_f)
    gdf.loc[full_mask,'category'] = 'mixed'
    
    # 'galpao'
    # ------
    # TO DO: assert if this decision is appropriate
    mask_g = gdf.category=='galpao'
    
    full_mask = mask_g & mask_c
    gdf.loc[full_mask,'category'] = 'mixed'
    
    full_mask = mask_g & (~mask_c)
    gdf.loc[full_mask,'category'] = 'retail/services'
    
    # 'Final adjustment'
    # ----------------
    cat_list = ['residential', 'retail/services', 'mixed', 'galpao']
    gdf.loc[gdf.category.isin(cat_list), 'type'] = 'active'
    

def regroup_land_uses(gdf, dictionary):
    for key in dictionary.keys():
        for umbrella_category,old_categories in dictionary[key].items():
            mask = gdf.category.isin(old_categories)
            gdf.loc[mask, 'type'] = key
            
            replacement_dict = {
                cat: umbrella_category
                for cat
                in old_categories
                                }
            view = gdf.loc[mask,'category']
            gdf.update(view.replace(replacement_dict))

In [None]:
regroup_with_scrutiny(lu_2017)


regroup_land_uses(lu_2017,
                  regrouping_2017,)

### Land Use Inputation and Category Revision

This section concerns some more specific land use (re)classifications that have to be made manually. After messing around with some different possible approaches, it seemed better to first input the current classification into the hexagonal grid first, and do the corrections afterwards. The steps below also handle all instances of parcels with little to no land use information. That is, parcels labelled as ***'sem informacao'*** (no information) are assigned a category.
- First, whatever hexagons intersect with the road network will be labeled as infrastructure and are hence fixed as a static land use. The superimpositon of the land use data shows that the empty spaces between parcels remain mostly constant thrpughout the years, which implies that no major changes happened to the street grid layout. Hence, I'll use the current street topology as retrieved from OpenStreetMaps by osmnx.
- Second, some major local landmarks are enforced. That is done especially because of the 2011 data, which missclassified some old and established local landmarks or infrastructures. For example, the local airport was marked as a warehouse, and both some parks and the landfill were marked as vacant. Another example is that of Lake Pampulha, which is understandably not represented by polygons of any kind - somewhere that is not land could not be represented by a land use polygon after all. Nevertheless, I chose to retain that region in the hexgonal grid of the municipality, which are then maked as amenity.
- Third, there are parcels that mostly coincide with the footprints of the subnormal agglomerates in the municipality - see note below. Those are to be labeled subnormal, but probably will be joined together with residential in a later step of the analysis.
- Fourth, parcels are evaluated against the 2018 urban footprint: those that are not completely contained within the footprint are considered vacant land. The year 2018 was chosen because its the closest year available that was found at the time of writing. ***It should be noted that several vacant plots are preservation areas and cannot be occupied during simulation. Such constrainsts are to be enforced when zoning restrictions are imposed in the model.***
- Finally, whatever places are still marked as unknown are assumed, for the time being, as reflecting urban functional voids. If even BH city hall could not gather proper information on them, and, at the same time, if the above procedures failed to provide a resonable assumption of their intended use, there's not much left to do.


***Note:***

A subnormal agglomerate is a form of irregular occupation of land – either public or private - owned by a third party, for housing purposes in urban areas, usually characterized by an irregular urban pattern, with scarce essential public services and located in areas not proper or allowed for housing use.  In Brazil, those irregular settlements are known by the names of favelas, invaded areas, slums in deep valleys, slums in low-lands, communities, villages, slums in backwaters, irregular lots, shacks and stilt houses

#### Auxilliary Datasets

In [None]:
path_to_footprint = (db_folder
                     / 'beaga'
                     / 'footprints'
                     / '2018_footprint.zip')
footprint_2018 = _get_geodata(path_to_footprint)

In [None]:
# BH city hall uses the term favelas for the subnormal agglomerates
path_to_favelas = (db_folder
                   / 'beaga'
                   / 'footprints'
                   / 'vila_favela.zip')
favelas = _get_geodata(path_to_favelas)

footprint and favela data:
   - BH Maps: http://bhmap.pbh.gov.br
   
It should be noted that the temporality of the favela geodata is not clear. The metadata (https://bit.ly/BH_favela_metadata) suggests that the file is updated whenever the public administration deems necessary, which implies that the location of the subnormal communities should be up to date with the time with which this script was written (2021). Thus, on first thought, it follows that the data should reflect, with reasonable accuracy, the location of such communities for the 2017, 2018 and 2020 maps, as there's no evidence to suppose that a major favela growth happened in the last couple of years.

A later analysis reinforced this reasoning because the footprint of the favelas remarkably fits some data voids in the land use data for 2011 as well. Notheless, the data will be joined with IBGE's data as it fills some minor gaps present in BH's data.

In [None]:
path_to_subnormal = (db_folder
                     / 'census'
                     / '2010_subnormal_agglomerates'
                     / 'SetoresXAreaDivAGSN_shp.zip') 
subnormal_agg = _get_geodata(path_to_subnormal)

# IBGE data provides all census tracts of Brazil and
# specifies which are subnormal, so that we need to
# slice the dataset.
#
# First we select only census tracts of BH city. To do
# that end we take advantage of the fact the tract IDs 
# contain the city ID they belong to.
#
# Second, we use the appropriate column to select the
# subnormal places.
subnormal_agg = subnormal_agg.loc[subnormal_agg
                                  .CD_GEOCODI
                                  .str
                                  .match('3106200')] # ibgeID for BH

subnormal_agg = subnormal_agg.loc[subnormal_agg
                                  .Subnormal == 'Sim']

# IBGE data comes in a geographic coordinate system
# that needd to be projected to the same CRS as
# Belo Horizonte's data
subnormal_agg.to_crs(epsg=31983, inplace=True)

data on subnormal agglomerates:

- IBGE: ftp://geoftp.ibge.gov.br/recortes_para_fins_estatisticos/malha_de_aglomerados_subnormais/censo_2010/areas_de_divulgacao_da_amostra/

In [None]:
def _get_slums_footprint(favelas, subnormal_agg):
    slums = gpd.overlay(favelas,
                        subnormal_agg,
                        how='union',)
    
    
    return (slums[['geometry']]
                 .explode()
                 .reset_index(drop=True))


slums = _get_slums_footprint(favelas, subnormal_agg)

#### Reclassifying

In [None]:
def _get_roads():
    bh_roads = ox.graph_from_place('Belo Horizonte, MG, Brazil')
    bh_roads = ox.project_graph(bh_roads,
                                to_crs='EPSG:31983')
    
    # This gets streets as a GeoDataFrame,
    # instead of a networkx graph object
    _, bh_roads = ox.graph_to_gdfs(bh_roads)
    
    
    return bh_roads


def _find_hexes_that_are_streets(hexes):
    roads = _get_roads()
    
    gdf = gpd.sjoin(hexes,
                    roads,
                    how='inner',
                    op='intersects',)
    
    
    return gdf.index
    


def _reclassify_hexes_on_roads(hexes):
    hex_labels = _find_hexes_that_are_streets(hexes)
    hexes.loc[hex_labels, 'type'] = 'static'
    hexes.loc[hex_labels, 'category'] = 'infrastructure'
    

In [None]:
def _get_greenery():
    osm_tags = {'name': 'Parque das Mangabeiras',

                'leisure': [
                           'park',
                           'nature_reserve',
                           ],

                'boundary': [
                            'national_park',
                            'protected_area',
                            ],
                
                'natural': [
                           'wood', 'tree_row', 'tree','scrub',
                           'heath', 'moor','grassland', 'fell',
                           'bare_rock', 'scree', 'shingle', 'sand', 'mud',
                           ]
               }
    
    # osmnx code presents the deprecation explained in the link below.
    # It should not impact performance in any way. I filtered it though
    # merely because I find there are too many annoying warnings
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning)
        greenery = ox.geometries_from_place('Belo Horizonte, MG, Brazil',
                                            tags=osm_tags,)
        
    mask = greenery.type.isin(['Polygon', 'MultiPolygon'])
    greenery = greenery.loc[mask]
        
    
    return greenery.to_crs(epsg=31983)
    

def _get_amenities():    
    osm_tags = {'tourism': 'zoo',
                    
                'name': 'Lagoa da Pampulha',

                'leisure': [
                           'stadium',
                           'sports_centre',
                           ],
               }
    
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning)
        amen = ox.geometries_from_place('Belo Horizonte, MG, Brazil',
                                        tags=osm_tags,)
        
    mask = amen.type.isin(['Polygon', 'MultiPolygon'])
    amen = amen.loc[mask]
    
    
    return amen.to_crs(epsg=31983)
    
    
def _get_infrastructures():
    osm_tags = {'name': [
                        'Aeroporto',
                        'ETE Onça',
                        ],
            
                'landuse': 'landfill'}
    
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning)
        infra = ox.geometries_from_place('Belo Horizonte, MG, Brazil',
                                         tags=osm_tags,)
    
    mask = infra.type.isin(['Polygon', 'MultiPolygon'])
    infra = infra.loc[mask]
    
        
    return infra.to_crs(epsg=31983)
    

def _classify_landmarks(hexes):
    greenery = _get_greenery()       
    amenities = _get_amenities()
    infra = _get_infrastructures()

    for each in [greenery, amenities, infra]:
        gdf = gpd.sjoin(hexes,
                        each,
                        how='inner',
                        op='intersects',)
        idx = gdf.index

        if each is greenery:
            hexes.loc[idx, 'type'] = 'passive'
            hexes.loc[idx, 'category'] = 'vacant'

        elif each is amenities:
            hexes.loc[idx, 'type'] = 'static'
            hexes.loc[idx, 'category'] = 'amenities'

        elif each is infra:
            hexes.loc[idx, 'type'] = 'static'
            hexes.loc[idx, 'category'] = 'infrastructure'
            

In [None]:
def _find_subnormal(hexes, slums):
    """This finds the potentially subnormal land use cells, which
    those that both the parcels intersect with the parcels in the land
    use data, and the polygons that appear in 'slums' but do no t show
    up in parcels.
    """
    subnormal = gpd.sjoin(hexes,
                          slums,
                          how='inner',
                          op='intersects',)
    
    
    return subnormal.index


def _reclassify_subnormal(hexes, slums):
    subnormal_land = _find_subnormal(hexes, slums)
    
    # The main goal is to find where subnormal residences are, by
    # recategorizing (a) residential uses, (b) vacant land or, mostly,
    # (c) cells with otherwise no data on them. If the categories below
    # are not fixed, I'd loose the (possible) richness in activity
    # diversity within the slums.
    consolidated_categories = ['retail/services', 'mixed',
                               'industry', 'infrastructure',
                               'amenities', 'public services',]
    view = hexes.loc[~hexes
                     .category
                     .isin(consolidated_categories)]
    
    mask = view.index.isin(subnormal_land)
    view.loc[mask, 'type'] = 'active'
    view.loc[mask, 'category'] = 'subnormal'
    hexes.update(view)
    

In [None]:
def _find_vacant(hexes, footprint):
    # The following two lines are meant to preserve
    # road locations. Whatever else is outside the
    # urban footprint is presumed vacant.
    consolidated = ['static']
    view = hexes.loc[~hexes['type'].isin(consolidated)]
    
    inner_hexes = gpd.sjoin(view,
                            footprint,
                            how='inner',
                            op='intersects',)
    inner_hexes = inner_hexes.index
    
    vacant_land = view.loc[~view
                           .index
                           .isin(inner_hexes)]
    
    
    return vacant_land.index


def _reclassify_vacant(hexes, footprint):
    vacant_land = _find_vacant(hexes, footprint)
    hexes.loc[vacant_land, 'type'] = 'passive'
    hexes.loc[vacant_land,'category'] = 'vacant'

In [None]:
def _reclassify_leftovers(hexes):
    """Recategorizes whatever cell that has not been reclassified
    so far by assuming it is vacant, an urban functional void,
    so to speak.
    
    TO DO: Classify land uses based on the preponderant use
    in the hexagons in the closer neighborhood (maybe 50m radius)
    """
    # Again, Whatever hexagon whose type is not within the
    # list below did not pass by any classification
    # scheme so far
    consolidated = ['active', 'passive', 'static']
    mask = ~hexes['type'].isin(consolidated)
    hexes.loc[mask, 'type'] = 'active'
    hexes.loc[mask, 'category'] = 'residential'

In [None]:
def reclassify_land_uses(hexes, footprint, slums):
    _reclassify_hexes_on_roads(hexes)
    
    _classify_landmarks(hexes)
    
    _reclassify_subnormal(hexes,
                          slums)
    
    _reclassify_vacant(hexes,
                       footprint)
    
    _reclassify_leftovers(hexes)
    

https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [None]:
reclassify_land_uses(hex_with_uses,
                     footprint_2018,
                     slums,)

In [None]:
number_of_parcels = len(hex_with_uses)
hex_with_uses['type'].value_counts() / number_of_parcels * 100

In [None]:
hex_with_uses.category.value_counts() / number_of_parcels * 100

In [None]:
def plot_land_uses(parcels, attribute, palette, ax):
    legend_elements = []
    for cat, group in parcels.groupby(attribute):
        color = palette[cat]

        group.plot(ax=ax,
                   color=color,)

        patch_element = Patch(facecolor=color,
                              edgecolor=color,
                              label=cat,)

        legend_elements.append(patch_element)

    ax.legend(handles=legend_elements,
              bbox_to_anchor=(1, 0.01),
              loc='lower right',
              prop={'size': 3},)
    
    ax.axis('off')
    
    # Web map tiles are typically provided in Web Mercator
    # (EPSG 3857) and that's contextily default. Hence, CRS
    # needs to be adjusted to match geodata.
    cx.add_basemap(ax,
                   crs=parcels.crs.to_string(),
                   source=cx.providers.CartoDB.PositronNoLabels,
                   attribution_size=2,)
    

In [None]:
fig,axes = plt.subplots(ncols=2, dpi=300, figsize=(5, 8))
ax1,ax2 = axes


type_palette = {'active': '#C97064',
                'static': '#39487F',
                'passive': '#527048',}
plot_land_uses(hex_with_uses,
               'type',
               type_palette,
               ax1,)


category_palette = {'residential': '#FECEF1',
                    'subnormal': '#B05F66',
                    'retail/services': '#3B727C',
                    'mixed': '#60E1E0',
                    'vacant': '#82A775',
                    'industry': '#291A10',
                    'infrastructure': '#FFFBDB',
                    'public services': '#A85118',
                    'amenities': '#7D573C',}

mask = hex
plot_land_uses(hex_with_uses,
               'category',
               category_palette,
               ax2,)

plt.savefig(out_folder / 'land_use_map_2017.png',
            dpi=300,)
plt.show()

In [None]:
path_to_hexes = (out_folder
                 / f'BH_hex_{hexagon_size}_with_land_uses.gpkg')

hex_with_uses.to_file(path_to_hexes,
                      layer='2017',
                      driver='GPKG',)

# Land Use Maps for 2011

data dource: provided by Prodabel directly

These have been made with a methodology similar to that of 2017 data

In [54]:
lu_2011 = get_land_use_data(2011)

lu_2011.rename(columns={'sigla_uso': 'type',
                          'descr_ocup': 'category'},
                 inplace=True,)

lu_2011 = lu_2011.reindex(columns=['type',
                                       'category',
                                       'geometry'])


In [55]:
number_of_parcels = len(lu_2011)

lu_2011['type'].value_counts() / number_of_parcels * 100

RES     61.322360
NUL     12.052835
NRES     6.875683
IND      6.547383
LV       6.496853
MIS      4.821371
IDT      1.883514
Name: type, dtype: float64

In [56]:
lu_2011.category.value_counts() / number_of_parcels * 100

casa unifamiliar                                 55.985343
nulo                                             12.052835
lote vago                                         6.496853
edificações em LV                                 5.546526
edifício                                          4.723856
casa / sobrado                                    4.241896
loja ou conjunto de lojas                         3.000502
galpão                                            2.841228
indeterminado                                     1.883514
ZEIS-1                                            1.000857
edifício residencial e comércio e/ou serviços     0.579475
conjunto multifamiliar vertical                   0.481073
edifício de uso comercial e/ou de serviços        0.450932
instituição de ensino                             0.291363
conjunto multifamiliar horizontal                 0.132088
loja em edifício / galeria                        0.062646
instituição religiosa                             0.0537

Again, it is self-evident where to put some of the land use categories. Contrarily, the folowwing require a somewhat in-depth analysis:

- ***'nulo'*** seem to be a category akin to 2017's ***'sem informacao'***: they are close in number of parcels and are not given any specific category because there's no detailed information about them in the treasury's registry. Hence, 'nulo' is going to be classified in the same way as 'sem informacao' was previously. The difference being that I'll now use the 2010 urban footrpint. (TO DO: is it necessary to elaborate on the different methodologies for footprint creation?)

- ***edificações em LV*** are land parcels that do not yet have a consolidated land use class because that only happens when the real estate receives a certificate of occupancy. These have been assumed as residential The rationale here being the same as the one used for ***'edificacao sem tipologia especificada'***, which is present in the 2017 data.

- Also obviuosly uncertain is the category ***indeterminado***. These ha been assumed as vacant. Firstly because if a different category was created, which setes undetermined apart from ***edificações em LV***, it is arguable as to whether the undetermined parcels should represent residential as well. Secondly, some trial and error seemed to provide better results if these are considered vacant.

- Similar to the approach with 2017 data, ***galpao*** is placed under retail/services.

- ***ZEIS-1*** are those parcels that explicitly belong to subnormal agglomerates

This time the function *_regroup_with_scrutiny()* is useless because type attribute provides no useful information that enables a more precise category disaggregation.

In [None]:
regrouping_2011 = {
    'active': {
        'residential': ['casa unifamiliar',
                        'edificações em LV', 
                        'edifício',
                        'conjunto multifamiliar vertical',
                        'conjunto multifamiliar horizontal',],
        
        'subnormal': ['ZEIS-1',],
        
        'retail/services': ['loja ou conjunto de lojas',
                            'galpão',
                            'edifício de uso comercial e/ou de serviços',
                            'loja em edifício / galeria',
                            'galeria / mini-shopping de bairro',
                            'shopping',
                            'apart hotel',],
        # TO DO: settle approach towards mixed uses
        'mixed': ['casa / sobrado',
                  'edifício residencial e comércio e/ou serviços',]
                },

     'passive': { 
         'vacant': ['lote vago',
                    'indeterminado',]
                 },

     'static': {
         'industry': ['indústria',],
         
         'public services': ['instituição de ensino',
                             'equipamento de saúde',],
        
         'infrastructure' : ['estação de transporte coletivo'],
        
         'amenities' : ['instituição religiosa',
                        'clubes esportivos e sociais',
                        'parques',
                        'cemitério',],
               }
}

In [None]:
regroup_land_uses(lu_2011,
                  regrouping_2011,)

In [None]:
# 2010 data that I found is for the whole metropolitan region
path_to_footprint = (db_folder
                     / 'beaga'
                     / 'footprints'
                     / '2010_footprint.zip')
footprint_2010 = _get_geodata(path_to_footprint)

bh_contours = read_municipality(int(ibgeID))
bh_contours.to_crs(epsg=31983, inplace=True)

footprint_2010 = gpd.clip(footprint_2010,
                          bh_contours,) # Defined in section 2.3.2

footprint source: http://www.rmbh.org.br/central-cartog.php

In [None]:
# It seems that land use data for 2011 has some duplicated
# or overlapping polygons. This causes for the same hexagon
# to match different geometries, which ultimately results in
# duplicated instances in hex_with_uses. This issue should be
# handled eventually, but given the small number of issues,
# duplicates will be simply dropped, for now.
hex_with_uses = hex_with_uses.loc[~hex_with_uses
                                  .index
                                  .duplicated()]

reclassify_land_uses(hex_with_uses,
                     footprint_2010,
                     slums,)

In [None]:
number_of_parcels = len(hex_with_uses)
hex_with_uses['type'].value_counts() / number_of_parcels * 100

In [None]:
hex_with_uses.category.value_counts() / number_of_parcels * 100

In [None]:
fig,axes = plt.subplots(ncols=2, dpi=300, figsize=(5, 8))
ax1,ax2 = axes

plot_land_uses(hex_with_uses,
               'type',
               type_palette,
               ax1,)

plot_land_uses(hex_with_uses,
               'category',
               category_palette,
               ax2,)

plt.savefig(out_folder / 'land_use_map_2011.png',
            dpi=300,)
plt.show()

In [None]:
hex_with_uses.to_file(path_to_hexes,
                      layer='2011',
                      driver='GPKG',)

# Land Use Maps for 2018

data dource: provided by Prodabel directly

In [53]:
land_uses = get_land_use_data(2018)


land_uses = land_uses.reindex(columns=['D_USO_REV', # Revised land use (broad) categories
                                       'USO_AT_REV',  # Revised spatial transcription of land use
                                       'ATIV', # Activity list per parcel 
                                       'geometry'])

land_uses.rename(columns={'D_USO_REV': 'type',
                          'USO_AT_REV': 'category',
                          'ATIV': 'activities'},
                 inplace=True,)

In [59]:
land_uses['type'].value_counts()

RESIDENCIAL        217909
SEM INFORMACAO      55820
MISTO               28251
LOTE VAGO           27412
NAO RESIDENCIAL     27387
PARQUE                397
Name: type, dtype: int64

In [None]:
regrouping_2018 = {
    'active': {
        'residential': ['CASA/SOBRADO RESIDENCIAL',
                        'EDIFICIO RESIDENCIAL',],
        
        'retail/services': ['EDIFICIO NAO RESIDENCIAL',
                            'CASA DE SHOW',
                            'INSTITUICAO FINANCEIRA',
                            'HOTEIS, MOTEIS E SIMILARES',
                            'ESTACIONAMENTO',
                            'CASA/SOBRADO NAO RESIDENCIAL',
                            'SUPERMERCADO/HIPERMERCADO',
                            'SHOPPING CENTER',
                            'GALPAO COMERCIAL',],
        # TO DO: settle approach towards mixed uses
        'mixed': ['CASA/SOBRADO DE USO MISTO',
                  'EDIFICIO DE USO MISTO',],
                },

     'passive': { 
         'vacant': ['LOTE VAGO',]
                 },

     'static': {
         'industry': ['GALPAO INDUSTRIAL',],
         
         'public services': ['INSTITUICAO DE ENSINO',
                             'HOSPITAL/SERVICO DE SAUDE',
                             'SERVICOS PUBLICOS',],
         
         'infrastructure' : ['TERMINAL RODOVIARIO/FERROVIARIO',
                             'AEROPORTO',
                             'CEMITERIO',
                             'ATERRO SANITARIO',],
         
         'amenities' : ['CLUBE ESPORTIVO/SOCIAL',
                        'CENTRO DE CONVENCOES/EXPOSICOES',
                        'INSTITUICAO CULTURAL',
                        'ESTADIO/GINASIO',
                        'INSTITUICAO RELIGIOSA',
                        'PARQUE',],
               }
}

In [None]:
def regroup_with_scrutiny(gdf):
    """Deals with some perks that seem too specific for generalization.
    Handles instances in which TIPOLOGIA0 may be more accurately placed
    when analyzed together with TIPOLOGIA_
    
    NOTE: function's name is an exaggeration
    """
    # 'SEM INFORMACAO'
    # ----------------------
    mask_a = (
        (gdf.category=='SEM INFORMACAO') & (gdf['type']=='RESIDENCIAL'))
    
    mask_b = (
        (gdf.category=='SEM INFORMACAO') & (gdf['type']=='NAO RESIDENCIAL'))
    
    gdf.loc[mask_a,'category'] = 'residential'
        
    gdf.loc[mask_b,'category'] = 'retail/services'
    
    
    # Final adjustments
    # ----------------
    cat_list = ['residential', 'retail/services']
    gdf.loc[gdf.category.isin(cat_list), 'type'] = 'active'

In [None]:
regroup_with_scrutiny(land_uses)

In [None]:
regroup_land_uses(land_uses,
                  regrouping_2018,)

In [None]:
hex_with_uses = input_uses_into_hex(land_uses,
                                    hexagons,)

reclassify_land_uses(hex_with_uses,
                     footprint_2018,
                     slums,)

In [None]:
fig,axes = plt.subplots(ncols=2, dpi=300, figsize=(5, 8))
ax1,ax2 = axes

plot_land_uses(hex_with_uses,
               'type',
               type_palette,
               ax1,)

plot_land_uses(hex_with_uses,
               'category',
               category_palette,
               ax2,)

plt.savefig(out_folder / 'land_use_map_2018.png',
            dpi=300,)
plt.show()

In [None]:
hex_with_uses.to_file(path_to_hexes,
                      layer='2018',
                      driver='GPKG',)

# Land Use Maps for 2020

data dource: provided by Prodabel directly

In [None]:
regroup_with_scrutiny(land_uses)

In [None]:
regroup_land_uses(land_uses,
                  regrouping_2020,)

In [None]:
hex_with_uses = input_uses_into_hex(land_uses,
                                    hexagons,)

# It seems that we have here the same issue that 2011
# data had. This must be looked into with more care, but
# for now, as before, I'll just drop duplicates.
hex_with_uses = hex_with_uses.loc[~hex_with_uses
                                  .index
                                  .duplicated()]

reclassify_land_uses(hex_with_uses,
                     footprint_2018, # Couldn't find a closer one
                     slums,)

In [None]:
number_of_parcels = len(hex_with_uses)
hex_with_uses['type'].value_counts() / number_of_parcels * 100

In [None]:
hex_with_uses.category.value_counts() / number_of_parcels * 100

In [None]:
fig,axes = plt.subplots(ncols=2, dpi=300, figsize=(5, 8))
ax1,ax2 = axes

plot_land_uses(hex_with_uses,
               'type',
               type_palette,
               ax1,)

plot_land_uses(hex_with_uses,
               'category',
               category_palette,
               ax2,)

plt.savefig(out_folder / 'land_use_map_2020.png',
            dpi=300)
plt.show()

In [None]:
hex_with_uses.to_file(path_to_hexes,
                      layer='2020',
                      driver='GPKG')