In [2]:
%load_ext dotenv
%reload_ext dotenv
%dotenv

In [3]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import folium
from folium.plugins import HeatMap
from shapely.geometry import MultiPoint

In [4]:
import os
import sys

from os.path import dirname
UTILS_PATH=os.environ.get('REPO') + "/notebooks/commons"
sys.path.append(dirname(UTILS_PATH))

from commons import check_args, gen_paths, coordinates_bounds

In [5]:
import math

def safe_pow(e):
    result = math.inf
    try:
        result = float(2**e)
        return result
    except OverflowError as error:
        return result
    

In [6]:
def load_posts(input_path):
    df = pd.read_parquet(input_path)
    return df

In [7]:
if __name__ == "__main__":
    city = "manizales"
    property_type = "casas"
    post_type = "arriendo"
    
    #city, property_type, post_type = check_args()
    base_path = os.environ.get('REPO')
    
    raw_path, clean_path = gen_paths(city, property_type, post_type)
    
    raw_path = base_path + raw_path
    clean_path = base_path + clean_path

In [8]:
MSG = """
Hay {} publicaciones de {} en total
"""

posts = load_posts(clean_path)
print(MSG.format(posts.shape[0], post_type))
posts.head()


Hay 99 publicaciones de arriendo en total



Unnamed: 0,surface,rooms,baths,garages,price,latitude,description,longitude,location,url,admon,estrato,antiguedad,fid,city,property_type,post_type,price_m2
0,486.0,4.0,4.0,0.0,4.86,5.06864,"Área de 486 mtrs, 4 baños, cocina sencilla, cu...",-75.517929,Manizales CENTRO,https://www.fincaraiz.com.co/casa-en-arriendo/...,0.0,4.0,16 a 30,3078558,manizales,casas,arriendo,0.01
1,110.0,3.0,3.0,0.0,4.0,5.062479,"Área de 110 mtrs, 3 alcobas, 3 closet, 3 baños...",-75.495445,Manizales EL TRIANGULO,https://www.fincaraiz.com.co/casa-en-arriendo/...,0.0,6.0,16 a 30,3497615,manizales,casas,arriendo,0.036364
2,1.0,5.0,4.0,0.0,3.0,5.058675,"5 alcobas, closet, 4 baños, sala comedor, coci...",-75.488083,Manizales PALOGRANDE,https://www.fincaraiz.com.co/casa-en-arriendo/...,0.0,6.0,16 a 30,5619086,manizales,casas,arriendo,3.0
3,330.0,7.0,4.0,0.0,3.0,5.06899,"Para comercial consta de 7 alcobas, 4 baños, c...",-75.517342,Manizales centro,https://www.fincaraiz.com.co/casa-en-arriendo/...,0.0,4.0,desconocido,5594083,manizales,casas,arriendo,0.009091
4,120.0,4.0,3.0,1.0,1.55,5.035938,Administra Bienes Raíces ofrece amplia e ilumi...,-75.46933,Manizales San Marcel,https://www.fincaraiz.com.co/casa-en-arriendo/...,0.0,6.0,desconocido,4430438,manizales,casas,arriendo,0.012917


## Mapa de calor precios

In [9]:
posts["price"].describe()

count    99.000000
mean      4.055535
std       4.561656
min       0.400000
25%       1.177500
50%       3.000000
75%       4.350000
max      27.120000
Name: price, dtype: float64

In [17]:
folium_hmap = folium.Map(location=[
    np.mean([coordinates_bounds[city]["lat"]["lower"], coordinates_bounds[city]["lat"]["upper"]]),
    np.mean([coordinates_bounds[city]["lon"]["lower"], coordinates_bounds[city]["lon"]["upper"]])
], zoom_start=13, tiles="OpenStreetMap")

posts.loc[:, "price_exploded"] =  posts["price"].apply(safe_pow)

max_amount = posts['price_exploded'].max()

hm_wide = HeatMap( 
    posts[["latitude", "longitude", "price_exploded"]],
    min_opacity=0.2,
    max_val=max_amount,
    radius=7, blur=1, 
    max_zoom=15, 
    gradient={.1:'yellow', .4: 'orange',  .6: 'red'}
)

folium_hmap.add_child(hm_wide)

## Mapa de calor número de publicaciones

In [11]:
from sklearn.cluster import DBSCAN

In [12]:
coords = posts[['latitude','longitude']].values

kms_per_radian = 6371.0088
epsilon = 0.1 / kms_per_radian

min_samples = 5

In [13]:
## Geo clustering with examples
# https://stackoverflow.com/questions/24762435/clustering-geo-location-coordinates-lat-long-pairs-using-kmeans-algorithm-with
# https://geoffboeing.com/2014/08/clustering-to-reduce-spatial-data-set-size/

db = DBSCAN(eps=epsilon, min_samples=min_samples, algorithm='ball_tree', metric='haversine').fit(np.radians(coords))

In [14]:
centroids = []
clusters_size = len(db.labels_)

for n in range(clusters_size):
    cluster = posts[['latitude','longitude']][db.labels_ == n].copy()
    if len(cluster) > 0:
        cluster.loc[:, "cluster_id"] = n
        
        multi_point = MultiPoint(cluster.values)
        cluster.loc[:, "cluster_latitude"] = multi_point.centroid.x
        cluster.loc[:, "cluster_longitude"] = multi_point.centroid.y
        
    centroids.append(cluster)
        
clusters_df = pd.concat(centroids)
clusters_df = clusters_df.reset_index(drop=True)
clusters_df.head()

Unnamed: 0,latitude,longitude,cluster_id,cluster_latitude,cluster_longitude
0,5.06864,-75.517929,0.0,5.068282,-75.518055
1,5.06899,-75.517342,0.0,5.068282,-75.518055
2,5.067968,-75.518257,0.0,5.068282,-75.518055
3,5.067968,-75.51841,0.0,5.068282,-75.518055
4,5.06801,-75.518112,0.0,5.068282,-75.518055


In [15]:
clusters_coordinates = clusters_df[["cluster_id", "latitude", "longitude"]].groupby(["latitude", "longitude"]).count()

coordinates = list(zip(*clusters_coordinates.index))
clusters_coordinates.loc[:, "latitude"] = coordinates[0]
clusters_coordinates.loc[:, "longitude"] = coordinates[1]

clusters_coordinates.loc[:, "count"] = clusters_coordinates["cluster_id"].apply(safe_pow)
clusters_coordinates = clusters_coordinates.reset_index(drop=True)

clusters_coordinates.head()

Unnamed: 0,cluster_id,latitude,longitude,count
0,1,5.050056,-75.484512,2.0
1,1,5.05029,-75.483643,2.0
2,1,5.050306,-75.484032,2.0
3,1,5.050527,-75.483437,2.0
4,1,5.050879,-75.483902,2.0


In [16]:
folium_hmap = folium.Map(location=[
    np.mean([coordinates_bounds[city]["lat"]["lower"], coordinates_bounds[city]["lat"]["upper"]]),
    np.mean([coordinates_bounds[city]["lon"]["lower"], coordinates_bounds[city]["lon"]["upper"]])
], zoom_start=13, tiles="OpenStreetMap")

max_amount = clusters_coordinates['count'].max()

hm_wide = HeatMap( 
    clusters_coordinates[["latitude", "longitude", "count"]],
    min_opacity=0.2,
    max_val=max_amount,
    radius=7, blur=1, 
    max_zoom=15, 
    gradient={.1:'yellow', .5: 'orange',  1: 'red'}
)

folium_hmap.add_child(hm_wide)