In [1]:
import numpy  as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure, ColumnDataSource, gmap
from bokeh.tile_providers import get_provider, Vendors
from bokeh.palettes import PRGn, RdYlGn
from bokeh.transform import linear_cmap,factor_cmap
from bokeh.layouts import row, column
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter, GMapOptions
from sklearn.cluster import KMeans, DBSCAN

from database import Database
from datetime import datetime, timedelta
import random
db = Database()

In [2]:
from_date = '2021-08-01T00:00:00.000'
to_date = '2021-08-31T00:00:00.000'

def mercator_coord(x, y):
    r_major = 6378137.000
    a = r_major * np.radians(y)
    scale = a/y
    b = 180.0/np.pi * np.log(np.tan(np.pi/4.0 + x * (np.pi/180.0)/2.0)) * scale
    return(a, b)

data = db.get_all_data('booking', {'timestamp': {'$gte': from_date, '$lt': to_date}}, {'_id': 0})
entries = list(data)
df = pd.DataFrame(entries)
df['passenger_id'] = list(str(y) for y in df['passenger_id'])

# # Used for GMAP
# x = df['start_loc']
# df['coordinates'] = [[y['coordinates'][0], y['coordinates'][1]] for y in x]
# df[['src_cord_y', 'src_cord_x']] = df['coordinates'].apply(pd.Series)
# df = df.drop(columns=['coordinates'])
# x = df['dest_loc']
# df['coordinates'] = [(y['coordinates'][0], y['coordinates'][1]) for y in x]
# df[['dest_cord_y', 'dest_cord_x']] = df['coordinates'].apply(pd.Series)
# df = df.drop(columns=['coordinates'])
# df['x_line'] = list([x, y] for x, y in zip(df['src_cord_x'], df['dest_cord_x']))
# df['y_line'] = list([x, y] for x, y in zip(df['src_cord_y'], df['dest_cord_y']))

# Used for FIGURE
x = df['start_loc']
df['coordinates'] = [mercator_coord(y['coordinates'][0], y['coordinates'][1]) for y in x]
df[['mercator_s_x', 'mercator_s_y']] = df['coordinates'].apply(pd.Series)
df = df.drop(columns=['coordinates'])
x = df['dest_loc']
df['coordinates'] = [mercator_coord(y['coordinates'][0], y['coordinates'][1]) for y in x]
df[['mercator_d_x', 'mercator_d_y']] = df['coordinates'].apply(pd.Series)
df = df.drop(columns=['coordinates'])
df['mercator_x_line'] = list([x, y] for x, y in zip(df['mercator_s_x'], df['mercator_d_x']))
df['mercator_y_line'] = list([x, y] for x, y in zip(df['mercator_s_y'], df['mercator_d_y']))

df = df.drop(columns=['start_loc'])
df = df.drop(columns=['dest_loc'])

dfs = df.groupby(df.passenger_id)
dfx = [dfs.get_group(x) for x in dfs.indices]

ServerSelectionTimeoutError: 127.0.0.1:27017: [WinError 10061] No connection could be made because the target machine actively refused it, Timeout: 30s, Topology Description: <TopologyDescription id: 6145d2fef93febf73f293728, topology_type: Single, servers: [<ServerDescription ('127.0.0.1', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('127.0.0.1:27017: [WinError 10061] No connection could be made because the target machine actively refused it')>]>

In [None]:
def calc_cluster(arr_x, arr_y, ep):
    coords = np.array(list((x,y) for x,y in zip(arr_x, arr_y)))
    cluster_points = []
    cluster_center = []
    cluster_radius = []
    dbscan = DBSCAN(eps=ep, min_samples=10).fit(coords)
    core_samples_mask = np.zeros_like(dbscan.labels_, dtype=bool)
    core_samples_mask[dbscan.core_sample_indices_] = True
    labels = dbscan.labels_
    unique_labels = set(labels)
    for k in unique_labels:
        class_member_mask = (labels == k)
        if k != -1:
            xy = coords[class_member_mask & core_samples_mask]
            cluster_points.append(xy)

    for points in cluster_points:
        x = [p[0] for p in points]
        y = [p[1] for p in points]
        centroid = (sum(x) / len(points), sum(y) / len(points))
        cluster_center.append(centroid)
        rad = 0
        for i in range(len(x)):
            dist = (abs(x[i]-centroid[0])**2 + abs(y[i]-centroid[1])**2)**0.5
            if dist > rad:
                rad = dist
        cluster_radius.append(rad)
    return cluster_center, cluster_radius

# GMAP
# src_cluster_center, src_cluster_radius = calc_cluster(df['src_cord_x'], df['src_cord_y'], 0.01)
# dest_cluster_center, dest_cluster_radius = calc_cluster(df['dest_cord_x'], df['dest_cord_y'], 0.01)

# FIGURE
# 1200 - No of days 1
# 500 - No of days 31
src_cluster_center, src_cluster_radius = calc_cluster(df['mercator_s_x'], df['mercator_s_y'], 500)
dest_cluster_center, dest_cluster_radius = calc_cluster(df['mercator_d_x'], df['mercator_d_y'], 500)

In [None]:
# calc_cluster with it's standalone figure plotted
# def calc_cluster(arr_x, arr_y, ep):
#     coords = np.array(list((x,y) for x,y in zip(arr_x, arr_y)))
#     cluster_points = []
#     cluster_center = []
#     cluster_radius = []
#     db = DBSCAN(eps=ep, min_samples=10).fit(coords)
#     core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
#     core_samples_mask[db.core_sample_indices_] = True
#     labels = db.labels_
#     n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
#     unique_labels = set(labels)
#     colors = [plt.cm.Spectral(each)
#             for each in np.linspace(0, 1, len(unique_labels))]
#     for k, col in zip(unique_labels, colors):
#         class_member_mask = (labels == k)
#         if k == -1:
#             # Black used for noise.
#             col = [0, 0, 0, 1]
#             xy = coords[class_member_mask & ~core_samples_mask]
#             plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), markeredgecolor='k', markersize=1)
#         else:
#             xy = coords[class_member_mask & core_samples_mask]
#             cluster_points.append(xy)
#             plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), markeredgecolor='k', markersize=1)
#     plt.title('Estimated number of clusters: %d' % n_clusters_)
#     #plt.show()

#     for points in cluster_points:
#         x = [p[0] for p in points]
#         y = [p[1] for p in points]
#         centroid = (sum(x) / len(points), sum(y) / len(points))
#         cluster_center.append(centroid)
#         rad = 0
#         for i in range(len(x)):
#             dist = (abs(x[i]-centroid[0])**2 + abs(y[i]-centroid[1])**2)**0.5
#             if dist > rad:
#                 rad = dist
#         cluster_radius.append(rad)

#     fig = plt.gcf()
#     ax = plt.gca()
#     for i in range(len(cluster_center)):
#         circle = plt.Circle(cluster_center[i], cluster_radius[i], color = 'b')
#         ax.add_patch(circle)
#     plt.show()
#     return cluster_center, cluster_radius

# src_cluster_center, src_cluster_radius = calc_cluster(df['mercator_s_x'], df['mercator_s_y'], 500)
# dest_cluster_center, dest_cluster_radius = calc_cluster(df['mercator_d_x'], df['mercator_d_y'], 500)

In [None]:
sources = [ColumnDataSource(data=x) for x in dfx]
output_file("gmap.html")

# # GMAP
# map_options = GMapOptions(lat=12.9035, lng= 77.4914, map_type="roadmap", zoom=11)
# API = 'AIzaSyD3RPDlV7Ou-KTx_jr_ObDa8P9wqdUOBK0'
# p = gmap(API, map_options, title= 'Banglore')

# FIGURE
chosentile = get_provider(Vendors.STAMEN_TONER)
tooltips = [("Passenger","@passenger_id"), ("vehicle_type","@vehicle_type")]
p = figure(title = 'Passenger Location', x_axis_type="mercator", y_axis_type="mercator", x_axis_label = 'Longitude', y_axis_label = 'Latitude', tooltips = tooltips)
p.add_tile(chosentile)

for source in sources:
    r = random.randint(0,255)
    g = random.randint(0,255)
    b = random.randint(0,255)
    rgb = (r,g,b)
    
#     # GMAP
#     p.circle(x = 'src_cord_x', y = 'src_cord_y', fill_color = rgb, source=source, size=5, fill_alpha = 0.7)
#     p.triangle(x = 'dest_cord_x', y = 'dest_cord_y', fill_color = rgb, source=source, size=5, fill_alpha = 0.7)
#     p.multi_line(xs = 'x_line', ys = 'y_line', line_color= rgb, source=source, line_width=2, line_alpha= 0.7, legend_label='route')
   
    # FIGURE
    p.circle(x = 'mercator_s_x', y = 'mercator_s_y', fill_color = rgb, source=source, size=5, fill_alpha = 0.7, legend_label='source')
    p.triangle(x = 'mercator_d_x', y = 'mercator_d_y', fill_color = rgb, source=source, size=5, fill_alpha = 0.7, legend_label='destination') 
    p.multi_line(xs = 'mercator_x_line', ys = 'mercator_y_line', line_color= rgb, source=source, line_width=2, line_alpha= 0.7, legend_label='route')

p.circle(x = list(zip(*dest_cluster_center))[0], y = list(zip(*dest_cluster_center))[1], radius=dest_cluster_radius, fill_color = 'red', fill_alpha= 0.3, legend_label='destination_cluster')
p.circle(x = list(zip(*src_cluster_center))[0], y = list(zip(*src_cluster_center))[1], radius=src_cluster_radius, fill_color = 'blue', fill_alpha= 0.3, legend_label='source_cluster')
p.legend.location = "top_left"
p.legend.click_policy="hide"
show(p)