# Pertinent Nomad

 

In [1162]:
import sys
from typing import List, Tuple, Dict, Any, Hashable
# TODO install module rather than inserting to path
sys.path.insert(0, '..')
import os
import math
import time
import pandas as pd
import numpy as np
import folium
import itertools as it
import great_circle_calculator.great_circle_calculator as gcc

from common.helpers import nomad as nom
import logging

def myLogger():
  logger = logging.getLogger(__name__)
  formatter = logging.Formatter('%(asctime)s: %(lineno)2s: %(levelname)s: %(message)s')
  logger.setLevel(logging.DEBUG)
  logger.propagate = False
  filename = 'nomad_intersections.log'
  if (logger.hasHandlers()):
    logger.handlers.clear()

  fhandler = logging.FileHandler(filename=filename, mode='a')
  fhandler.setFormatter(formatter)
  fhandler.setLevel(logging.ERROR)
  logger.addHandler(fhandler)

  consoleHandler = logging.StreamHandler(sys.stdout)
  consoleHandler.setFormatter(formatter)
  consoleHandler.setLevel(logging.DEBUG)
  logger.addHandler(consoleHandler)

  return logger

logger = myLogger()

In [1176]:
stores_dataframe = pd.read_csv('https://raw.githubusercontent.com/voxxtelum/penitent_nomad/main/common/csv/merged_locations_clean.csv', dtype={'store_zip': 'str'})

logger.debug(f'Unique short_name found: {stores_dataframe.short_name.unique()}')

stores_dataframe.head(5)

2022-10-18 00:20:15,958:  3: DEBUG: Unique short_name found: ['containerstore' 'ikea' 'microcenter']


Unnamed: 0,store_uid,uuid,short_name,store_name,store_desc,store_add1,store_add2,store_city,store_state,store_zip,store_address_full,Latitude,Longitude,rand_uuid
0,01960_8e51e0a5_containerstore_peabody,8e51e0a5,containerstore,Container Store,Peabody,210 Andover Street,LL01,Peabody,MA,1960,"210 Andover Street Peabody, MA 01960",42.54142,-70.94459,40eaa5d2
1,02467_1a370ad1_containerstore_chestnut_hill,1a370ad1,containerstore,Container Store,Chestnut Hill,55 Boylston St,,Chestnut Hill,MA,2467,"55 Boylston St Chestnut Hill, MA 02467",42.322783,-71.16871,ebe56afc
2,01760_bd4ac9aa_containerstore_natick,bd4ac9aa,containerstore,Container Store,Natick,1265 Worcester Street,,Natick,MA,1760,"1265 Worcester Street Natick, MA 01760",42.299782,-71.389396,445fe8ff
3,02920_0d186a8a_containerstore_cranston,0d186a8a,containerstore,Container Store,Cranston,150 Hillside Rd,,Cranston,RI,2920,"150 Hillside Rd Cranston, RI 02920",41.76042,-71.457487,8fe60c8b
4,11530_a0fa003f_containerstore_garden_city,a0fa003f,containerstore,Container Store,Garden City,902 Old Country Rd,,Garden City,NY,11530,"902 Old Country Rd Garden City, NY 11530",40.744808,-73.603854,6ecb7029


In [1164]:
# Earth radius in miles
earth_r = 3958.8

# Names should match csv_list
stores_radii = {
  'ikea': 50,
  'microcenter': 30,
  'containerstore': 20
}

In [1165]:
stores_dataframe['coordinates'] = list(zip(stores_dataframe.Longitude, stores_dataframe.Latitude))

stores_dataframe_c = stores_dataframe.copy()

stores_dataframe_c = stores_dataframe[['short_name', 'coordinates']].groupby('short_name').agg({'coordinates': lambda c: list(c)}).reset_index()
stores_dataframe_c = stores_dataframe_c.rename(columns={'short_name': 'store'})
stores_dataframe_c['radius'] = stores_dataframe_c['store'].map(stores_radii)
stores_dataframe_c = stores_dataframe_c.sort_values(by='radius', ascending=False, ignore_index=True)

stores_dataframe_c.head()

Unnamed: 0,store,coordinates,radius
0,ikea,"[(-71.0684562, 42.1371268), (-72.9200335, 41.2...",50
1,microcenter,"[(-71.1143075, 42.3572095), (-73.5858212, 40.7...",30
2,containerstore,"[(-70.9445901, 42.5414196), (-71.1687096, 42.3...",20


In [1166]:
stores_dataframe_d = stores_dataframe_c.copy()
stores_dataframe_d = stores_dataframe_d.to_dict('records')

In [1167]:
def int_cluster_points_multi(stores: List[Dict[Hashable, Any]]):
  """ Sets first store in list (should be sorted by largest radius) as anchor with coord and radius, then searches in anchor radius
    to find all targets that intersect with circle of target radius and center of target coordinate.
    Then finds all subsequent locations around each target location, and calculates intermediate points on each line intersecting
    each locations centers.
    :param p1: list of {'store': str name, 'radius': int in miles, 'coordinates': list[tuple (Lat, Lon)]}
    :return 1: List of coordinates (Lat, Lon) representing the weighted intermediate points in each cluster group
    :return 2: List of coordinates (Lat, Lon) representing the local points points in each cluster group
    """
  
  anchor = stores[0]
  target = stores[1]
  targets = stores[1:]
  anchor_radius = anchor['radius']
  targets_count = len(targets)

  logger.debug(f'Using Store: ({len(anchor["coordinates"])}) {anchor["store"].upper()} as Anchor Points with Radius: {anchor["radius"]}')
  logger.debug(f'Additional Stores: {targets_count}')
  logger.debug(f'First Target: ({len(target["coordinates"])}) {target["store"].upper()} with Radius: {target["radius"]} ')

  local_points = []
  intermediate_points = []

  for anchor_point in anchor['coordinates']:

    target_point_clusters: List[Tuple(float, float)] = []
    intermediate_point_clusters: List[Tuple(float, float)] = []

    for i in range(1, targets_count):
      target_radius = target['radius']

      for target_point in stores[i]['coordinates']:
        if i < targets_count:
          target_intersections = []
          anchor_distance = gcc.distance_between_points(anchor_point, target_point, unit='miles')

          if anchor_distance < anchor_radius + target_radius:
            anchor_target_inter = gcc.intermediate_point(anchor_point, target_point, nom._intermediate_ratio(anchor_radius, target_radius))

            target_point_clusters[0:] = [(anchor_point[1], anchor_point[0])]
            intermediate_point_clusters[0:] = [(anchor_target_inter[1], anchor_target_inter[0])]

            next_radius = stores[i + 1]['radius']

            for next_target in stores[i + 1]['coordinates']:
              
              target_next_distance = gcc.distance_between_points(target_point, next_target, unit='miles')
              
              if target_next_distance < target_radius + next_radius:
                inter_target_next = gcc.intermediate_point(target_point, next_target, nom._intermediate_ratio(target_radius, next_radius))
                intermediate_point_clusters += [(inter_target_next[1], inter_target_next[0])]

                target_point_clusters[i:] = [(target_point[1], target_point[0])]
                target_intersections += [(next_target[1], next_target[0])]

            if len(target_intersections) > 0:
              target_point_clusters += target_intersections
            
        if i == targets_count:
          break
    
    if len(target_point_clusters) > 0:
      local_points += [target_point_clusters]
    
    if len(intermediate_point_clusters) > 0:
      intermediate_points += [intermediate_point_clusters]

  return intermediate_points, local_points

t0 = time.perf_counter()
intermediate_points, local_points = int_cluster_points_multi(stores_dataframe_d)
t1 = time.perf_counter()

logger.debug(f'int_cluster_points_multi took {round((t1-t0) * 1000, 2)}ms and found {len(intermediate_points)} clusters.')

intermediate_points_dataframe = pd.DataFrame(intermediate_points)
intermediate_points_dataframe.head(5)

2022-10-18 00:15:00,359: 17: DEBUG: Using Store: (51) IKEA as Anchor Points with Radius: 50
2022-10-18 00:15:00,360: 18: DEBUG: Additional Stores: 2
2022-10-18 00:15:00,360: 19: DEBUG: First Target: (25) MICROCENTER with Radius: 30 
2022-10-18 00:15:00,488: 74: DEBUG: int_cluster_points_multi took 128.5ms and found 31 clusters.


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,"(42.274680628854114, -71.09707574838758)","(42.46776561630177, -71.0125968909876)","(42.33655674568646, -71.14695590809627)","(42.3228315907223, -71.27942064884141)","(41.99925843622335, -71.3209880309333)",,,,
1,"(41.05420842419934, -73.67998660801094)","(40.80992237036381, -73.81530729781629)","(40.98221458605286, -73.90805011834583)","(40.939806894830205, -73.9667192849604)","(40.806974917245796, -74.05035254191289)","(40.93034187336925, -74.09594061931874)","(40.710821640541, -74.15339501520525)","(40.84097139670313, -74.25772131314014)","(40.70921183300416, -74.39039238028757)"
2,"(40.85772446263708, -73.90715492677926)","(40.80992237036381, -73.81530729781629)","(40.98221458605286, -73.90805011834583)","(40.939806894830205, -73.9667192849604)","(40.806974917245796, -74.05035254191289)","(40.93034187336925, -74.09594061931874)","(40.710821640541, -74.15339501520525)","(40.84097139670313, -74.25772131314014)","(40.70921183300416, -74.39039238028757)"
3,"(40.841151570920545, -74.03131354270296)","(40.80992237036381, -73.81530729781629)","(40.98221458605286, -73.90805011834583)","(40.939806894830205, -73.9667192849604)","(40.806974917245796, -74.05035254191289)","(40.93034187336925, -74.09594061931874)","(40.710821640541, -74.15339501520525)","(40.84097139670313, -74.25772131314014)","(40.70921183300416, -74.39039238028757)"
4,"(40.81889012301009, -74.08751678714235)","(40.80992237036381, -73.81530729781629)","(40.98221458605286, -73.90805011834583)","(40.939806894830205, -73.9667192849604)","(40.806974917245796, -74.05035254191289)","(40.93034187336925, -74.09594061931874)","(40.710821640541, -74.15339501520525)","(40.84097139670313, -74.25772131314014)","(40.70921183300416, -74.39039238028757)"


In [1168]:
local_points_dataframe = pd.DataFrame(local_points)
local_points_dataframe.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"(42.1371268, -71.0684562)","(42.3572095, -71.1143075)","(42.5414196, -70.9445901)","(42.3227831, -71.1687096)","(42.2997815, -71.3893956)","(41.7604202, -71.4574866)",,,,
1,"(41.2958017, -72.9200335)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
2,"(40.7748383, -73.5310569)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
3,"(40.731424, -73.8618461)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
4,"(40.672219, -74.0115416)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"


In [1169]:
def centroid_points_cluster(cluster: List[List[(Tuple[float, float])]]):
  cent_points: List[Tuple(float, float)] = []

  for region in cluster:
    if region:
      cent_point = nom._cent_points(region)
      cent_points += [cent_point]

  return cent_points

centroid_points = centroid_points_cluster(intermediate_points)

centroid_points_dataframe = pd.DataFrame(centroid_points, columns=['Latitude', 'Longitude'])
# centroid_points_dataframe.to_clipboard()
logger.debug(f'({len(centroid_points_dataframe.index)}) Points Found')
centroid_points_dataframe.head()

2022-10-18 00:15:00,597: 15: DEBUG: (31) Points Found


Unnamed: 0,Latitude,Longitude
0,42.280276,-71.171624
1,40.865128,-74.035572
2,40.843241,-74.06069
3,40.841388,-74.074481
4,40.838914,-74.080726


In [1170]:
local_points = centroid_points_cluster(local_points)

local_points_dataframe = pd.DataFrame(local_points, columns=['Latitude', 'Longitude'])
# local_points_dataframe.to_clipboard()
logger.debug(f'({len(local_points_dataframe.index)}) Points Found')
local_points_dataframe.head()

2022-10-18 00:15:00,622:  5: DEBUG: (31) Points Found


Unnamed: 0,Latitude,Longitude
0,42.236597,-71.190988
1,40.858981,-73.941548
2,40.806494,-74.001869
3,40.802061,-74.034952
4,40.796127,-74.049935


In [1171]:
def smooth_cluster_array(coords: List, radius=10):
  clean_cluster = []
  c = coords.copy()
  _len = len(c)

  for i, p in enumerate(c[:-1]):
    cluster_ = []

    if not any(p in x for x in clean_cluster):
      cluster_ += [(p[0], p[1])]

      for target in c[i + 1:]:
        _dist = gcc.distance_between_points((p[1], p[0]), (target[1], target[0]), unit='miles')

        if _dist < radius:
          cluster_ += [(target[0], target[1])]

      clean_cluster += [cluster_]

  return clean_cluster

smooth_cluster_ = smooth_cluster_array(centroid_points, 20)
print('smooth_cluster:', len(smooth_cluster_))
# print(smooth_cluster_)
smooth_dataframe = pd.DataFrame(smooth_cluster_)
smooth_dataframe.head(5)

smooth_cluster: 16


Unnamed: 0,0,1,2,3,4
0,"(42.28027557570544, -71.17162363798379)",,,,
1,"(40.86512814408159, -74.03557165810214)","(40.84324124342058, -74.0606895276922)","(40.84138809114104, -74.0744807429092)","(40.838913607198485, -74.08072584573458)","(40.84942592656442, -74.08330659032747)"
2,"(40.03793330362215, -75.2507096136472)","(39.96637160367155, -75.34077070837037)","(39.98297361426833, -75.35626367391329)",,
3,"(38.94462854630566, -77.12212372748188)","(38.92553459628119, -77.1469260122333)","(38.90540357029253, -77.16627394577064)",,
4,"(40.077146795202445, -82.98681634305424)",,,,


In [1172]:
smooth_cluster = centroid_points_cluster(smooth_cluster_)

smooth_cluster_dataframe = pd.DataFrame(smooth_cluster, columns=['Latitude', 'Longitude'])
# smooth_cluster_dataframe.to_clipboard()
logger.debug(f'({len(smooth_cluster_dataframe.index)}) Points Found')
smooth_cluster_dataframe.head(5)

2022-10-18 00:15:00,688:  5: DEBUG: (16) Points Found


Unnamed: 0,Latitude,Longitude
0,42.280276,-71.171624
1,40.847621,-74.066957
2,39.995769,-75.315934
3,38.92519,-77.145112
4,40.077147,-82.986816


In [1173]:
cent_points_map = folium.Map(location=[centroid_points_dataframe.Latitude.mean(), centroid_points_dataframe.Longitude.mean()], zoom_start=4)

for point in centroid_points:
  folium.Circle([point[0], point[1]], nom._miles_to_meters(25)).add_to(cent_points_map)

cent_points_map

In [1174]:
smoothed_cluster_map = folium.Map(location=[smooth_cluster_dataframe.Latitude.mean(), smooth_cluster_dataframe.Longitude.mean()], zoom_start=4)

circle_format = {'color': '#f77f00', 'fill': True, 'fillOpacity': .5}
for point in smooth_cluster:
  folium.Circle([point[0], point[1]], nom._miles_to_meters(25), **circle_format).add_to(smoothed_cluster_map)

for i, j in enumerate(intermediate_points):
  folium.Polygon(j).add_to(smoothed_cluster_map)
smoothed_cluster_map.save('../output/maps/smoothed_cluster_map.html')
smoothed_cluster_map