# Clustering algorithm researching

### Project: Analiza przejazdu hulajnog elektrycznych

In [1]:
import sys
sys.path.append('..')

In [2]:
import folium
import folium.plugins
import numpy as np
from matplotlib import pyplot as plt
from sklearn import cluster

from geoanalysis_app import common
from geoanalysis_app import constants as C

%load_ext autoreload
%autoreload 2

In [3]:
data_df = common.load_data()

In [4]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   trip_id                      1000 non-null   object 
 1   start_time                   1000 non-null   object 
 2   end_time                     1000 non-null   object 
 3   trip_distance                1000 non-null   int64  
 4   trip_duration                1000 non-null   int64  
 5   accuracy                     1000 non-null   int64  
 6   start_census_tract           654 non-null    float64
 7   end_census_tract             654 non-null    float64
 8   start_community_area_number  935 non-null    float64
 9   end_community_area_number    935 non-null    float64
 10  start_community_area_name    935 non-null    object 
 11  end_community_area_name      935 non-null    object 
 12  start_centroid_latitude      935 non-null    float64
 13  start_centroid_long

In [5]:
keep_cols = [
    "start_centroid_latitude",
    "start_centroid_longitude",
    "end_centroid_latitude",
    "end_centroid_longitude",
]

data_df = data_df[keep_cols]

In [6]:
data_df.head()

Unnamed: 0,start_centroid_latitude,start_centroid_longitude,end_centroid_latitude,end_centroid_longitude
0,41.850266,-87.667568,41.850266,-87.667568
1,41.92276,-87.699156,41.92276,-87.699156
2,41.879077,-87.657045,41.879077,-87.657045
3,41.874005,-87.663518,41.874005,-87.663518
4,41.874005,-87.663518,41.874005,-87.663518


In [7]:
data_df.dropna(inplace=True)

In [8]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 935 entries, 0 to 999
Data columns (total 4 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   start_centroid_latitude   935 non-null    float64
 1   start_centroid_longitude  935 non-null    float64
 2   end_centroid_latitude     935 non-null    float64
 3   end_centroid_longitude    935 non-null    float64
dtypes: float64(4)
memory usage: 36.5 KB


In [9]:
data_df['end_centroid_longitude'].isnull().values.any()

False

In [10]:
start_loc = data_df[["start_centroid_latitude", "start_centroid_longitude"]]
end_loc = data_df[["end_centroid_latitude", "end_centroid_longitude"]]

In [11]:
start_loc = start_loc.to_numpy()

In [12]:
cluster_alg = cluster.KMeans(n_clusters=5, random_state=0)

cluster_alg.fit(start_loc)

centroids = cluster_alg.cluster_centers_

In [13]:
centroids

array([[ 41.92430837, -87.70480407],
       [ 41.88035825, -87.65837103],
       [ 41.90528757, -87.67471561],
       [ 41.88937003, -87.75461004],
       [ 41.94537955, -87.7785561 ]])

In [14]:
f = folium.Figure(width=800, height=600)

m_1 = folium.Map(location=C.CHICAGO_COORDINATES, tiles='openstreetmap', zoom_start=11)

# draw cluster centers
for cluster_centroid in centroids:
    location = (cluster_centroid[0], cluster_centroid[1])
    folium.Marker(location).add_to(m_1)

f.add_child(m_1)

f

In [15]:
?folium.plugins.MarkerCluster()

Object `folium.plugins.MarkerCluster()` not found.


In [16]:
f = folium.Figure(width=800, height=600)

m_1 = folium.Map(location=C.CHICAGO_COORDINATES, tiles='cartodbpositron', zoom_start=11)

mc = folium.plugins.MarkerCluster()
m_1.add_child(mc)

# draw cluster centers
for cluster_centroid in start_loc:
    location = (cluster_centroid[0], cluster_centroid[1])
    mc.add_child(folium.Marker(location, popup='some text'))

f.add_child(m_1)

f