## Denoising owner type

We want to revisit the colorful owner types map by trying to find spatial clusters with a certain ownership type.
We will approach this problem as a denoising one, and we will attribute to each parcel an owner type which is given by the category the most represented in its local neighborhood.
We therefore find the K nearest neighbors to a parcel (itself included), and assign as type the most represented type in the neighbors.

- Find a representative point for each parcel (maybe the center of mass of the parcel)
- For each parcel find its nearest neighbors
- compute the distribution of ownership for each neihborhood
- assign to the parcel the ownership in the neighborhood

In [None]:
# TODO explain
EARTH_RADIUS_METERS = 6.3e6
meters_per_easting_degree = 2*(EARTH_RADIUS_METERS*np.sin(np.pi/4))*np.pi/360
meters_per_northing_degree = 2*EARTH_RADIUS_METERS*np.pi/360

In [None]:
meters_per_easting_degree

In [None]:
meters_per_northing_degree

In [None]:
K = 5
types = polygons_df[['parc_no', 'owner_type']].copy().set_index('parc_no').sort_index()['owner_type']
for (idx, (parc_no, proprio, _, owner_type, x, y)) in polygons_df.iterrows():
    distances2 = compute_distance2(polygons_df['x'] - x, polygons_df['y'] - y)
    neigh = distances2.sort_values()[:K]
    neighbor_parcels = pd.concat((polygons_df, neigh), axis='columns', join='inner')
    types.loc[parc_no] = neighbor_parcels['owner_type'].value_counts().index[0]

In [None]:
polygons_df['owner_type'].value_counts().plot.pie();

In [None]:
types.value_counts().plot.pie();

In [None]:
def compute_distance2(delta_east, delta_north):
    """squared distance in meters"""
    return (delta_east*meters_per_easting_degree)**2 +\
        (delta_north*meters_per_northing_degree)**2

In [None]:
def polygons_intersection(poly):
    def inters(serie):
            return serie['poly'].intersection(poly).area
    return inters
radius = 100
types = polygons_df[['parc_no', 'owner_type']].copy().set_index('parc_no').sort_index()['owner_type']
for (idx, (parc_no, proprio, poly, owner_type, x, y)) in polygons_df.iterrows():
    distances2 = compute_distance2(polygons_df['x'] - x, polygons_df['y'] - y)
    neigh = distances2[distances2 < radius**2]
    circle = poly.centroid.buffer(radius/meters_per_easting_degree)
    neighbor_parcels = pd.concat((polygons_df, neigh), axis='columns', join='inner')
    neighbor_parcels['intersection'] = neighbor_parcels.apply(polygons_intersection(circle), axis='columns')
    intersection_per_cat = neighbor_parcels.groupby('owner_type')['intersection'].sum()
    types.loc[parc_no] = intersection_per_cat.sort_values().index[-1]

In [None]:
m = getMap()
#TODO weight by area
def style_function(feature):
    colors = {
        'coop': 'yellow',
        'société' : 'red',
        'public' : 'green',
        'private': 'blue',
        'PPE': 'orange',
        'pension': 'purple',
        'fondation/association' : 'brown'
        
    }
    parc_num = feature['properties']['NO_PARC']
    cat = types[parc_num]
    
    return {
        'stroke':False,
        'fillColor': colors[cat]
    }

folium.GeoJson(
    geo_parcels, 
    style_function=style_function,
    # show the owner at hover
).add_to(m)
m

In [None]:
def polygons_intersection(poly):
    def inters(serie):
            return serie['poly'].intersection(poly).area
    return inters
radius = 100
diverse = polygons_df[['parc_no', 'owner_type']].copy().set_index('parc_no').sort_index()['owner_type']
for (idx, (parc_no, proprio, poly, owner_type, x, y)) in polygons_df.iterrows():
    distances2 = compute_distance2(polygons_df['x'] - x, polygons_df['y'] - y)
    neigh = distances2[distances2 < radius**2]
    circle = poly.centroid.buffer(radius/meters_per_easting_degree)
    neighbor_parcels = pd.concat((polygons_df, neigh), axis='columns', join='inner')
    neighbor_parcels['intersection'] = neighbor_parcels.apply(polygons_intersection(circle), axis='columns')
    intersection_per_owner = neighbor_parcels.groupby('proprio')['intersection'].sum()
    p = intersection_per_owner / intersection_per_cat.sum()
    diverse.loc[parc_no] = scipy.stats.entropy(p)

In [None]:
from matplotlib import cm
from matplotlib import colors

In [None]:
m = getMap()
#TODO weight by area
min_s, max_s = 1, np.quantile(diverse.values, q = .95)

def style_function(feature):
    def entropy_color(entropy):
        rgb = cm.RdGy( (entropy - min_s) / (max_s - min_s))
        return colors.rgb2hex(rgb)
    
    parc_num = feature['properties']['NO_PARC']
    entropy = diverse[parc_num]    
    return {
        'stroke':False,
        'fillColor': entropy_color(entropy),
        'fillOpacity':0.8
    }

folium.GeoJson(
    geo_parcels, 
    style_function=style_function,
    # show the owner at hover
).add_to(m)
m