# Penitent Nomad

 

In [1]:
import sys
from typing import List, Tuple, Dict, Any, Hashable

# TODO install module rather than inserting to path
sys.path.insert(0, "..")
import os
import math
import time
import pandas as pd
import numpy as np
import folium
import itertools as it
import great_circle_calculator.great_circle_calculator as gcc

from common.helpers import nomad as nom
import logging


def myLogger():
    logger = logging.getLogger(__name__)
    formatter = logging.Formatter(
        "%(asctime)s: %(lineno)2s: %(levelname)s: %(message)s"
    )
    logger.setLevel(logging.DEBUG)
    logger.propagate = False
    filename = "nomad_intersections.log"
    if logger.hasHandlers():
        logger.handlers.clear()

    fhandler = logging.FileHandler(filename=filename, mode="a")
    fhandler.setFormatter(formatter)
    fhandler.setLevel(logging.ERROR)
    logger.addHandler(fhandler)

    consoleHandler = logging.StreamHandler(sys.stdout)
    consoleHandler.setFormatter(formatter)
    consoleHandler.setLevel(logging.DEBUG)
    logger.addHandler(consoleHandler)

    return logger


logger = myLogger()

In [2]:
stores_dataframe = pd.read_csv(
    "https://raw.githubusercontent.com/voxxtelum/penitent_nomad/main/common/csv/merged_locations_clean.csv",
    dtype={"store_zip": "str"},
)

# stores_dataframe = pd.read_csv(
#     "../common/csv/merged_locations_bean.csv",
#     dtype={"store_zip": "str"},
# )

logger.debug(f"Unique short_name found: {stores_dataframe.short_name.unique()}")
stores_dataframe.tail(5)

2022-10-24 19:47:41,901: 11: DEBUG: Unique short_name found: ['containerstore' 'ikea' 'microcenter']


Unnamed: 0,store_uid,uuid,short_name,store_name,store_desc,store_add1,store_add2,store_city,store_state,store_zip,store_address_full,Latitude,Longitude,rand_uuid
166,66212_938d4019_microcenter_overland_park,938d4019,microcenter,Micro Center,Overland Park,9294 Metcalf Ave,Regency Park Shopping Center,Overland Park,KS,66212,"9294 Metcalf Ave Overland Park, KS 66212",38.960546,-94.670176,f0814bdf
167,77081_ac74d2f5_microcenter_houston,ac74d2f5,microcenter,Micro Center,Houston,5305 S Rice Ave,,Houston,TX,77081,"5305 S Rice Ave Houston, TX 77081",29.724902,-95.46657,88cf93a5
168,75243_1f0eea52_microcenter_dallas,1f0eea52,microcenter,Micro Center,Dallas,13929 N Central Expy,,Dallas,TX,75243,"13929 N Central Expy Dallas, TX 75243",32.937463,-96.750319,6b24766a
169,80237_f9a36abc_microcenter_denver,f9a36abc,microcenter,Micro Center,Denver,8000 E Quincy Ave,,Denver,CO,80237,"8000 E Quincy Ave Denver, CO 80237",39.638296,-104.897322,6b784f8b
170,92780_82753ef0_microcenter_tustin,82753ef0,microcenter,Micro Center,Tustin,1100 E Edinger Ave,,Tustin,CA,92780,"1100 E Edinger Ave Tustin, CA 92780",33.724536,-117.832883,6d175392


In [3]:
# Earth radius in miles
earth_r = 3958.8

# Names should match csv_list
stores_radii = {"ikea": 50, "microcenter": 30, "containerstore": 20}

stores_settings = {
    "ikea": {"radius": 50, "enabled": True},
    "microcenter": {"radius": 30, "enabled": True},
    "containerstore": {"radius": 20, "enabled": True},
    "llbean": {"radius": 20, "enabled": True},
}

In [4]:
stores_dataframe["coordinates"] = list(
    zip(stores_dataframe.Longitude, stores_dataframe.Latitude)
)

stores_dataframe_c = stores_dataframe.copy()

stores_dataframe_c = (
    stores_dataframe[["short_name", "coordinates"]]
    .groupby("short_name")
    .agg({"coordinates": lambda c: list(c)})
    .reset_index()
)
stores_dataframe_c = stores_dataframe_c.rename(columns={"short_name": "store"})
stores_dataframe_c["radius"] = stores_dataframe_c.apply(
    lambda row: stores_settings[row["store"]]["radius"], axis=1
)
stores_dataframe_c = stores_dataframe_c.sort_values(
    by="radius", ascending=False, ignore_index=True
)


stores_dataframe_c.head()

Unnamed: 0,store,coordinates,radius
0,ikea,"[(-71.0684562, 42.1371268), (-72.9200335, 41.2...",50
1,microcenter,"[(-71.1143075, 42.3572095), (-73.5858212, 40.7...",30
2,containerstore,"[(-70.9445901, 42.5414196), (-71.1687096, 42.3...",20


In [5]:
# stores_dataframe_d = stores_dataframe_c.copy()
stores_dataframe_d = stores_dataframe_c.to_dict("records")

In [18]:
def int_cluster_points_multi(stores: List[Dict[Hashable, Any]]):
    """Sets first store in list (should be sorted by largest radius) as anchor with coord and radius, then searches in anchor radius
    to find all targets that intersect with circle of target radius and center of target coordinate.
    Then finds all subsequent locations around each target location, and calculates intermediate points on each line intersecting
    each locations centers.
    :param p1: list of {'store': str name, 'radius': int in miles, 'coordinates': list[tuple (Lat, Lon)]}
    :return 1: List of coordinates (Lat, Lon) representing the weighted intermediate points in each cluster group
    :return 2: List of coordinates (Lat, Lon) representing the local points points in each cluster group
    """

    anchor = stores[0]
    target = stores[1]
    targets = stores[1:]
    anchor_radius = anchor["radius"]
    targets_count = len(targets)

    logger.debug(
        f'Using Store: ({len(anchor["coordinates"])}) {anchor["store"].upper()} as Anchor Points with Radius: {anchor["radius"]}'
    )
    logger.debug(f"Additional Stores: {targets_count}")
    logger.debug(
        f'First Target: ({len(target["coordinates"])}) {target["store"].upper()} with Radius: {target["radius"]} '
    )

    local_points = []
    intermediate_points = []

    for anchor_point in anchor["coordinates"]:

        target_point_clusters: List[Tuple(float, float)] = []
        intermediate_point_clusters: List[Tuple(float, float)] = []

        for i in range(1, targets_count):
            target_radius = target["radius"]
            

            for target_point in stores[i]["coordinates"]:
                if i < targets_count:
                    target_intersections = []
                    anchor_distance = gcc.distance_between_points(
                        anchor_point, target_point, unit="miles"
                    )

                    if anchor_distance < anchor_radius + target_radius:
                        anchor_target_inter = gcc.intermediate_point(
                            anchor_point,
                            target_point,
                            nom._intermediate_ratio(anchor_radius, target_radius),
                        )

                        target_point_clusters[0:] = [(anchor_point[1], anchor_point[0])]
                        intermediate_point_clusters[0:] = [
                            (anchor_target_inter[1], anchor_target_inter[0])
                        ]

                        next_radius = stores[i + 1]["radius"]

                        # print(f"Scannning {stores[i + 1]['store']}")

                        for next_target in stores[i + 1]["coordinates"]:

                            target_next_distance = gcc.distance_between_points(
                                target_point, next_target, unit="miles"
                            )

                            if target_next_distance < target_radius + next_radius:
                                inter_target_next = gcc.intermediate_point(
                                    target_point,
                                    next_target,
                                    nom._intermediate_ratio(target_radius, next_radius),
                                )
                                intermediate_point_clusters += [
                                    (inter_target_next[1], inter_target_next[0])
                                ]

                                target_point_clusters[i:] = [
                                    (target_point[1], target_point[0])
                                ]
                                target_intersections += [
                                    (next_target[1], next_target[0])
                                ]

                        if len(target_intersections) > 0:
                            target_point_clusters += target_intersections

                if i == targets_count:
                    break

        if len(target_point_clusters) > 0:
            local_points += [target_point_clusters]

        if len(intermediate_point_clusters) > 0:
            intermediate_points += [intermediate_point_clusters]

    return intermediate_points, local_points


t0 = time.perf_counter()
intermediate_points, local_points = int_cluster_points_multi(stores_dataframe_d)
t1 = time.perf_counter()

logger.debug(
    f"int_cluster_points_multi took {round((t1-t0) * 1000, 2)}ms and found {len(intermediate_points)} clusters."
)

# intermediate_points_dataframe = pd.DataFrame(intermediate_points)
# intermediate_points_dataframe
local_points_dataframe = pd.DataFrame(local_points)
local_points_dataframe

2022-10-24 19:57:25,518: 17: DEBUG: Using Store: (51) IKEA as Anchor Points with Radius: 50
2022-10-24 19:57:25,519: 20: DEBUG: Additional Stores: 2
2022-10-24 19:57:25,520: 21: DEBUG: First Target: (25) MICROCENTER with Radius: 30 
2022-10-24 19:57:25,544: 102: DEBUG: int_cluster_points_multi took 26.0ms and found 31 clusters.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"(42.1371268, -71.0684562)","(42.3572095, -71.1143075)","(42.5414196, -70.9445901)","(42.3227831, -71.1687096)","(42.2997815, -71.3893956)","(41.7604202, -71.4574866)",,,,
1,"(41.2958017, -72.9200335)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
2,"(40.7748383, -73.5310569)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
3,"(40.731424, -73.8618461)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
4,"(40.672219, -74.0115416)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
5,"(40.9245349, -74.0732861)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
6,"(40.6751301, -74.1697393)","(40.0401182, -75.3694827)","(39.942495, -75.0255696)","(40.0881682, -75.3832079)","(39.6739544, -75.6496298)",,,,,
7,"(39.9171256, -75.1417686)","(40.0401182, -75.3694827)","(39.942495, -75.0255696)","(40.0881682, -75.3832079)","(39.6739544, -75.6496298)",,,,,
8,"(40.0943896, -75.3067793)","(40.0401182, -75.3694827)","(39.942495, -75.0255696)","(40.0881682, -75.3832079)","(39.6739544, -75.6496298)",,,,,
9,"(39.3753517, -76.4619125)","(38.8689425, -77.2615284)","(38.9910747, -76.5454317)","(38.9484195, -77.0799745)","(38.8875055, -77.092154)","(39.0635333, -77.1264943)","(38.9246276, -77.2399248)","(38.9616959, -77.3549644)",,


In [7]:
local_points_dataframe = pd.DataFrame(local_points)
local_points_dataframe.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"(42.1371268, -71.0684562)","(42.3572095, -71.1143075)","(42.5414196, -70.9445901)","(42.3227831, -71.1687096)","(42.2997815, -71.3893956)","(41.7604202, -71.4574866)",,,,
1,"(41.2958017, -72.9200335)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
2,"(40.7748383, -73.5310569)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
3,"(40.731424, -73.8618461)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
4,"(40.672219, -74.0115416)","(40.9068688, -74.1332633)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"


In [8]:
def centroid_points_cluster(cluster: List[List[(Tuple[float, float])]]):
    cent_points: List[Tuple(float, float)] = []

    for region in cluster:
        if region:
            if len(region) == 1:
                cent_points += [region[0]]
            else:
                cent_point = nom._cent_points(region)
                cent_points += [cent_point]

    return cent_points


centroid_points = centroid_points_cluster(intermediate_points)

centroid_points_dataframe = pd.DataFrame(
    centroid_points, columns=["Latitude", "Longitude"]
)
# centroid_points_dataframe.to_clipboard()
logger.debug(f"({len(centroid_points_dataframe.index)}) Points Found")
centroid_points_dataframe.head()

2022-10-24 19:47:42,240: 21: DEBUG: (31) Points Found


Unnamed: 0,Latitude,Longitude
0,42.280276,-71.171624
1,40.865128,-74.035572
2,40.843241,-74.06069
3,40.841388,-74.074481
4,40.838914,-74.080726


In [9]:
local_points = centroid_points_cluster(local_points)

local_points_dataframe = pd.DataFrame(local_points, columns=["Latitude", "Longitude"])
# local_points_dataframe.to_clipboard()
logger.debug(f"({len(local_points_dataframe.index)}) Points Found")
local_points_dataframe.head()

2022-10-24 19:47:42,271:  5: DEBUG: (31) Points Found


Unnamed: 0,Latitude,Longitude
0,42.236597,-71.190988
1,40.858981,-73.941548
2,40.806494,-74.001869
3,40.802061,-74.034952
4,40.796127,-74.049935


In [10]:
def smooth_cluster_array(coords: List, radius=10):
    clean_cluster = []
    c = coords.copy()
    _len = len(c)

    for i, p in enumerate(c[:-1]):
        cluster_ = []

        if not any(p in x for x in clean_cluster):
            cluster_ += [(p[0], p[1])]

            for target in c[i + 1 :]:
                _dist = gcc.distance_between_points(
                    (p[1], p[0]), (target[1], target[0]), unit="miles"
                )

                if _dist < radius:
                    cluster_ += [(target[0], target[1])]

            clean_cluster += [cluster_]

    return clean_cluster


smooth_cluster_ = smooth_cluster_array(centroid_points, 20)
print("smooth_cluster:", len(smooth_cluster_))
# print(smooth_cluster_)
smooth_dataframe = pd.DataFrame(smooth_cluster_)
smooth_dataframe.head(5)

smooth_cluster: 16


Unnamed: 0,0,1,2,3,4
0,"(42.28027557570544, -71.17162363798379)",,,,
1,"(40.86512814408159, -74.03557165810214)","(40.84324124342058, -74.0606895276922)","(40.84138809114104, -74.0744807429092)","(40.838913607198485, -74.08072584573458)","(40.84942592656442, -74.08330659032747)"
2,"(40.03793330362215, -75.2507096136472)","(39.96637160367155, -75.34077070837037)","(39.98297361426833, -75.35626367391329)",,
3,"(38.94462854630566, -77.12212372748188)","(38.92553459628119, -77.1469260122333)","(38.90540357029253, -77.16627394577064)",,
4,"(40.077146795202445, -82.98681634305424)",,,,


In [11]:
smooth_cluster = centroid_points_cluster(smooth_cluster_)

smooth_cluster_dataframe = pd.DataFrame(
    smooth_cluster, columns=["Latitude", "Longitude"]
)
# smooth_cluster_dataframe.to_clipboard()
logger.debug(f"({len(smooth_cluster_dataframe.index)}) Points Found")
smooth_cluster_dataframe.head(5)

2022-10-24 19:47:42,336:  7: DEBUG: (16) Points Found


Unnamed: 0,Latitude,Longitude
0,42.280276,-71.171624
1,40.847621,-74.066957
2,39.995769,-75.315934
3,38.92519,-77.145112
4,40.077147,-82.986816


In [21]:
cent_points_map = folium.Map(
    location=[
        centroid_points_dataframe.Latitude.mean(),
        centroid_points_dataframe.Longitude.mean(),
    ],
    zoom_start=4,
)

for point in centroid_points:
    folium.Circle([point[0], point[1]], nom._miles_to_meters(25)).add_to(
        cent_points_map
    )

cent_points_map

(42.28027557570544, -71.17162363798379)
(40.86512814408159, -74.03557165810214)
(40.84324124342058, -74.0606895276922)
(40.84138809114104, -74.0744807429092)
(40.838913607198485, -74.08072584573458)
(40.84942592656442, -74.08330659032747)
(40.03793330362215, -75.2507096136472)
(39.96637160367155, -75.34077070837037)
(39.98297361426833, -75.35626367391329)
(38.94462854630566, -77.12212372748188)
(38.92553459628119, -77.1469260122333)
(38.90540357029253, -77.16627394577064)
(40.077146795202445, -82.98681634305424)
(42.50475790271159, -83.23293935398495)
(33.92159196683945, -84.39788814702824)
(39.268187526190424, -84.41378506708944)
(41.95765907781731, -87.89190598720184)
(41.90447488176297, -87.89804387219418)
(41.88393908656162, -87.89817183332649)
(38.62747230561721, -90.3262281989469)
(44.909137087671446, -93.32233546848332)
(38.957201607017815, -94.66503382771937)
(29.794029985135115, -95.43253216880692)
(32.930978683227885, -96.86190328495917)
(32.912086468022146, -96.8713522759603

In [13]:
smoothed_cluster_map = folium.Map(
    location=[
        smooth_cluster_dataframe.Latitude.mean(),
        smooth_cluster_dataframe.Longitude.mean(),
    ],
    zoom_start=4,
)

circle_format = {"color": "#f77f00", "fill": True, "fillOpacity": 0.5}
for point in smooth_cluster:
    folium.Circle(
        [point[0], point[1]], nom._miles_to_meters(25), **circle_format
    ).add_to(smoothed_cluster_map)

file_out = "smoothed_map"
for s, r in stores_radii.items():
    file_out += f"_{s}{r}"

for i, j in enumerate(intermediate_points):
    folium.Polygon(j).add_to(smoothed_cluster_map)
smoothed_cluster_map.save(f"../output/maps/{file_out}.html")
logger.info(f"Map file saved in output/maps/{file_out}.html")
smoothed_cluster_map

2022-10-24 19:47:42,439: 22: INFO: Map file saved in output/maps/smoothed_map_ikea50_microcenter30_containerstore20.html
