# Penitent Nomad

 

In [641]:
import sys
from typing import List, Tuple, Dict, Any, Hashable
import os
import math
import time
import pandas as pd
import numpy as np
import folium
import itertools as it
import great_circle_calculator.great_circle_calculator as gcc
import logging

sys.path.insert(0, "..")
from helpers import nomad as nom


def myLogger():
    logger = logging.getLogger(__name__)
    formatter = logging.Formatter(
        "%(asctime)s: %(lineno)2s: %(levelname)s: %(message)s"
    )
    logger.setLevel(logging.DEBUG)
    logger.propagate = False
    filename = "nomad_intersections.log"
    if logger.hasHandlers():
        logger.handlers.clear()

    fhandler = logging.FileHandler(filename=filename, mode="a")
    fhandler.setFormatter(formatter)
    fhandler.setLevel(logging.ERROR)
    logger.addHandler(fhandler)

    consoleHandler = logging.StreamHandler(sys.stdout)
    consoleHandler.setFormatter(formatter)
    consoleHandler.setLevel(logging.DEBUG)
    logger.addHandler(consoleHandler)

    return logger


logger = myLogger()

## Importing CSVs

In [642]:
# stores_dataframe = pd.read_csv(
#     "https://raw.githubusercontent.com/voxxtelum/penitent_nomad/main/common/csv/merged_locations_clean.csv",
#     dtype={"store_zip": "str"},
# )

stores_dataframe = pd.read_csv(
    "../common/csv/merged_locations_bean.csv",
    dtype={"store_zip": "str"},
)

logger.debug(f"Unique short_name found: {stores_dataframe.short_name.unique()}")
stores_dataframe.tail(5)

2022-10-25 13:28:35,186: 11: DEBUG: Unique short_name found: ['containerstore' 'ikea' 'microcenter' 'llbean']


Unnamed: 0,store_uid,uuid,short_name,store_name,store_desc,store_add1,store_add2,store_city,store_state,store_zip,store_address_full,Latitude,Longitude,rand_uuid
222,04032_32866f06_llbean_freeport_outlet_store,32866f06,llbean,LL Bean,Freeport Outlet Store,1 Freeport Village Station,Village Station,Freeport,ME,4032,"1 Freeport Village Station Freeport, ME 04032",43.855819,-70.102834,7eb177c6
223,02649_fb43c61a_llbean_mashpee,fb43c61a,llbean,LL Bean,Mashpee,7 Market Street,Mashpee Commons,Mashpee,MA,2649,"7 Market Street Mashpee, MA 02649",41.61741,-70.490694,fd4a26c5
224,04401_c1b75319_llbean_bangor_outlet_store,c1b75319,llbean,LL Bean,Bangor Outlet Store,534 Stillwater Avenue,Parkade Shopping Center,Bangor,ME,4401,"534 Stillwater Avenue Bangor, ME 04401",44.830215,-68.756095,0e14c1e9
225,04605_6348fda9_llbean_ellsworth_outlet_store,6348fda9,llbean,LL Bean,Ellsworth Outlet Store,150 High Street,,Ellsworth,ME,4605,"150 High Street Ellsworth, ME 04605",44.537095,-68.411994,1255d0df
226,84060_34caa27c_llbean_park_city,34caa27c,llbean,LL Bean,Park City,675 Main Street,The Kimball On Main,Park City,UT,84060,"675 Main Street Park City, UT 84060",40.646456,-111.497615,7a0f0d8f


## Global Variables

In [643]:
# Earth radius in miles
earth_r = 3958.8

# Names should match csv_list
stores_radii = {"ikea": 50, "microcenter": 30, "containerstore": 20}

stores_settings = {
    "ikea": {"radius": 50, "enabled": True},
    "microcenter": {"radius": 30, "enabled": True},
    "containerstore": {"radius": 20, "enabled": True},
    "llbean": {"radius": 25, "enabled": True},
}

## Transforming Data
### Transformed Dataframe

In [644]:
stores_dataframe["coordinates"] = list(
    zip(stores_dataframe.Longitude, stores_dataframe.Latitude)
)


stores_dataframe_c = stores_dataframe.copy()

stores_dataframe_c = (
    stores_dataframe[["short_name", "coordinates"]]
    .groupby("short_name")
    .agg({"coordinates": lambda c: list(c)})
    .reset_index()
)

stores_dataframe_c = stores_dataframe_c.rename(columns={"short_name": "store"})
stores_dataframe_c["radius"] = stores_dataframe_c.apply(
    lambda row: stores_settings[row["store"]]["radius"], axis=1
)
stores_dataframe_c["enabled"] = stores_dataframe_c.apply(
    lambda row: stores_settings[row["store"]]["enabled"], axis=1
)
stores_dataframe_c = stores_dataframe_c.sort_values(
    by="radius", ascending=False, ignore_index=True
)


stores_dataframe_c.head()

Unnamed: 0,store,coordinates,radius,enabled
0,ikea,"[(-71.0684562, 42.1371268), (-72.9200335, 41.2...",50,True
1,microcenter,"[(-71.1143075, 42.3572095), (-73.5858212, 40.7...",30,True
2,llbean,"[(-88.163882, 43.035436), (-89.453067, 43.0718...",25,True
3,containerstore,"[(-70.9445901, 42.5414196), (-71.1687096, 42.3...",20,True


### Converting Dataframe to Records

In [645]:
stores_dataframe_d = stores_dataframe_c[stores_dataframe_c["enabled"] == True]
stores_dataframe_d = stores_dataframe_d.to_dict("records")


## Starting the Loop
### Recursive Function

```OUT  ``` Local Points Dataframe

Local Points are the coordinates of each store that match all of the criteria. For example: one cluster will include the first location with all subsequent locations of the second store within the radius of the first location, then all locations of the third store within the radius of the second location, and so on.

```OUT  ``` Intermediate Points Dataframe

Intermediate Points are point along a line drawn between each local point's center. Rather than finding a midpoint, the distance is weighted based on the radius set for the two stores, and favors stores with smaller radii. I do this because if I wanted to be within R=100 miles of one location and R=25 miles of another location, rather than centering around a midway point, I would rather be closer to the store with the smaller radius setting.

In [646]:
def int_cluster_points_multi(stores: List[Dict[Hashable, Any]]):
    """Sets first store in list (should be sorted by largest radius) as anchor with coord and radius, then searches in anchor radius
    to find all targets that intersect with circle of target radius and center of target coordinate.
    Then finds all subsequent locations around each target location, and calculates intermediate points on each line intersecting
    each locations centers.
    """

    anchor = stores[0]
    anchor_points = anchor["coordinates"]
    target = stores[1]
    targets = stores[1:]
    anchor_radius = anchor["radius"]
    targets_count = len(targets)

    logger.debug(
        f'Using Store: ({len(anchor["coordinates"])}) {anchor["store"].upper()} as Anchor Points with Radius: {anchor["radius"]}'
    )
    logger.debug(f"Additional Stores: {targets_count}")
    logger.debug(
        f'First Target: ({len(target["coordinates"])}) {target["store"].upper()} with Radius: {target["radius"]} '
    )

    local_points = []
    intermediate_points = []

    def compare_next_target(
        stores, current_point, current_radius, local_cluster, intermediate_cluster
    ):
        _len = len(stores)
        if _len > 1:

            next_radius = stores[0]["radius"]

            for next_point in stores[0]["coordinates"]:

                point_distance = gcc.distance_between_points(
                    current_point, next_point, unit="miles"
                )
                if point_distance < current_radius + next_radius:
                    local_cluster = [(next_point[1], next_point[0])]

                    intermediate_point = gcc.intermediate_point(
                        current_point,
                        next_point,
                        nom._intermediate_ratio(current_radius, next_radius),
                    )
                    intermediate_cluster += [
                        (intermediate_point[1], intermediate_point[0])
                    ]

                    return compare_next_target(
                        stores[1:],
                        next_point,
                        next_radius,
                        local_cluster,
                        intermediate_cluster,
                    )

        if _len == 1:
            next_radius = stores[0]["radius"]
            # print(stores[0]["store"])

            # print(current_radius, next_radius)
            for next_point in stores[0]["coordinates"]:
                point_distance = gcc.distance_between_points(
                    current_point, next_point, unit="miles"
                )
                if point_distance < current_radius + next_radius:
                    local_cluster += [(next_point[1], next_point[0])]

                    intermediate_point = gcc.intermediate_point(
                        current_point,
                        next_point,
                        nom._intermediate_ratio(current_radius, next_radius),
                    )
                    intermediate_cluster += [
                        (intermediate_point[1], intermediate_point[0])
                    ]

                    continue

        return local_cluster

    for anchor_point in anchor_points:
        local_cluster = [(anchor_point[1], anchor_point[0])]
        intermediate_cluster = []

        local_cluster += compare_next_target(
            targets, anchor_point, anchor_radius, local_cluster, intermediate_cluster
        )
        if len(local_cluster) > targets_count - 1:
            local_points += [local_cluster]

        if len(intermediate_cluster) > targets_count - 2:
            intermediate_points += [intermediate_cluster]

    return intermediate_points, local_points


t0 = time.perf_counter()
intermediate_points, local_points = int_cluster_points_multi(stores_dataframe_d)
t1 = time.perf_counter()

logger.debug(
    f"int_cluster_points_multi took {round((t1-t0) * 1000, 2)}ms and found {len(local_points)} clusters."
)

intermediate_points_dataframe = pd.DataFrame(intermediate_points)
intermediate_points_dataframe.head(5)

# local_points_dataframe = pd.DataFrame(local_points)
# local_points_dataframe

2022-10-25 13:28:35,302: 15: DEBUG: Using Store: (51) IKEA as Anchor Points with Radius: 50
2022-10-25 13:28:35,303: 18: DEBUG: Additional Stores: 3
2022-10-25 13:28:35,303: 19: DEBUG: First Target: (25) MICROCENTER with Radius: 30 
2022-10-25 13:28:35,313: 104: DEBUG: int_cluster_points_multi took 10.97ms and found 21 clusters.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"(42.274680628854114, -71.09707574838758)","(42.27149236650211, -71.47740159901544)","(42.26825202742158, -71.44034380791385)","(42.25523655874646, -71.5628052347626)","(41.955539246055636, -71.59993298327322)",,,,,
1,"(40.94859926801059, -73.33746692723902)","(40.85862573384604, -73.84882761424436)","(40.83951851438261, -73.81009354223232)","(40.99905456414811, -73.89597582424898)","(40.959794863244554, -73.95032276179359)","(40.83681533828059, -74.02782628650883)","(40.951052807977305, -74.07001181685166)","(40.7478062045706, -74.12331924209336)","(40.8683436301796, -74.21990307924962)","(40.746389275422416, -74.34288473340034)"
2,"(40.75279277070353, -73.56529139835874)","(40.85862573384604, -73.84882761424436)","(40.83951851438261, -73.81009354223232)","(40.99905456414811, -73.89597582424898)","(40.959794863244554, -73.95032276179359)","(40.83681533828059, -74.02782628650883)","(40.951052807977305, -74.07001181685166)","(40.7478062045706, -74.12331924209336)","(40.8683436301796, -74.21990307924962)","(40.746389275422416, -74.34288473340034)"
3,"(40.736586430851865, -73.68933842313325)","(40.85862573384604, -73.84882761424436)","(40.83951851438261, -73.81009354223232)","(40.99905456414811, -73.89597582424898)","(40.959794863244554, -73.95032276179359)","(40.83681533828059, -74.02782628650883)","(40.951052807977305, -74.07001181685166)","(40.7478062045706, -74.12331924209336)","(40.8683436301796, -74.21990307924962)","(40.746389275422416, -74.34288473340034)"
4,"(40.71449079940363, -73.74556716261844)","(40.85862573384604, -73.84882761424436)","(40.83951851438261, -73.81009354223232)","(40.99905456414811, -73.89597582424898)","(40.959794863244554, -73.95032276179359)","(40.83681533828059, -74.02782628650883)","(40.951052807977305, -74.07001181685166)","(40.7478062045706, -74.12331924209336)","(40.8683436301796, -74.21990307924962)","(40.746389275422416, -74.34288473340034)"


##### Local Points Dataframe

In [647]:
local_points_dataframe = pd.DataFrame(local_points)
local_points_dataframe.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"(42.1371268, -71.0684562)","(42.19918866, -71.77922195)","(42.3227831, -71.1687096)","(42.2997815, -71.3893956)","(41.7604202, -71.4574866)",,,,,
1,"(41.2958017, -72.9200335)","(40.95738893, -74.06872124)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
2,"(40.7748383, -73.5310569)","(40.95738893, -74.06872124)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
3,"(40.731424, -73.8618461)","(40.95738893, -74.06872124)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"
4,"(40.672219, -74.0115416)","(40.95738893, -74.06872124)","(40.7448076, -73.6038542)","(41.0322012, -73.7576226)","(40.9616324, -73.8555979)","(40.7403461, -73.9952173)","(40.9459839, -74.0710441)","(40.5801216, -74.1667504)","(40.7969656, -74.3405558)","(40.577125, -74.5609641)"


## Cleaning Results

### Finding Centroid of Each Intermediate Point Cluster

#### Centroid Points Cluster Function

##### Intermediate Points Centroid Dataframe


In [648]:
def centroid_points_cluster(cluster: List[List[(Tuple[float, float])]]):
    cent_points: List[Tuple(float, float)] = []

    for region in cluster:
        if region:
            if len(region) == 1:
                cent_points += [region[0]]
            else:
                cent_point = nom._cent_points(region)
                cent_points += [cent_point]

    return cent_points

intermediate_points_centroids = centroid_points_cluster(intermediate_points)

intermediate_points_centroids_dataframe = pd.DataFrame(
    intermediate_points_centroids, columns=["Latitude", "Longitude"]
)
intermediate_points_centroids_dataframe.to_clipboard()
logger.debug(f"({len(intermediate_points_centroids_dataframe.index)}) Points Found")
intermediate_points_centroids_dataframe

2022-10-25 13:28:35,390: 20: DEBUG: (21) Points Found


Unnamed: 0,Latitude,Longitude
0,42.205179,-71.435686
1,40.875895,-73.962835
2,40.856212,-73.985468
3,40.854552,-73.997884
4,40.852329,-74.003508
5,40.861792,-74.00582
6,40.852442,-74.009456
7,40.815392,-74.0916
8,40.822099,-74.097751
9,40.245894,-75.379161


##### Local Points Centroid Dataframe


In [649]:
local_points_centroids = centroid_points_cluster(local_points)

local_points_centroids_dataframe = pd.DataFrame(
    local_points_centroids, columns=["Latitude", "Longitude"]
)
# local_points_centroids_dataframe.to_clipboard()

logger.debug(f"({len(local_points_centroids_dataframe.index)}) Points Found")
local_points_centroids_dataframe.head()

2022-10-25 13:28:35,414:  8: DEBUG: (21) Points Found


Unnamed: 0,Latitude,Longitude
0,42.144127,-71.372786
1,40.864024,-73.935088
2,40.811541,-73.995419
3,40.807109,-74.028505
4,40.801176,-74.043489


## Mapping Results

#### Local Points Map

In [650]:
local_points_map = folium.Map(
    location=[
        local_points_centroids_dataframe.Latitude.mean(),
        local_points_centroids_dataframe.Longitude.mean(),
    ],
    zoom_start=4,
)

for point in local_points_centroids:
    folium.Circle([point[0], point[1]], nom._miles_to_meters(25)).add_to(
        local_points_map
    )

local_points_map

### Smoothing Cluster Array Function

#### Smoothing Intermediate Points Centroids
This will group points together that are within a certain radius

In [651]:
def smooth_cluster_array(coords: List, radius=10):
    clean_cluster = []
    c = coords.copy()
    _len = len(c)

    for i, p in enumerate(c):
        cluster_ = []

        if not any(p in x for x in clean_cluster):
            cluster_ += [(p[0], p[1])]

            for target in c[i + 1 :]:
                _dist = gcc.distance_between_points(
                    (p[1], p[0]), (target[1], target[0]), unit="miles"
                )

                if _dist < radius:
                    cluster_ += [(target[0], target[1])]

            clean_cluster += [cluster_]

    return clean_cluster


set_radius = 20

intermediate_smooth_cluster_ = smooth_cluster_array(
    intermediate_points_centroids, set_radius
)
logger.debug(
    f"{len(intermediate_points_centroids_dataframe.index)} Intermediate Points Centroids reduced to {len(intermediate_smooth_cluster_)} groups of locations within {set_radius} miles of each other."
)

smooth_dataframe = pd.DataFrame(intermediate_smooth_cluster_)
smooth_dataframe.head(5)

2022-10-25 13:28:35,482: 30: DEBUG: 21 Intermediate Points Centroids reduced to 11 groups of locations within 20 miles of each other.


Unnamed: 0,0,1,2,3,4,5,6,7
0,"(42.20517885905677, -71.43568639583498)",,,,,,,
1,"(40.87589535934485, -73.9628353172676)","(40.856212089724806, -73.9854676661675)","(40.85455237000172, -73.9978839830525)","(40.852329035110444, -74.00350823067645)","(40.86179169768468, -74.00582028465004)","(40.85244247832742, -74.00945647261175)","(40.815391629576254, -74.09160006797211)","(40.82209931861951, -74.0977510290006)"
2,"(40.245894005898634, -75.37916105650612)",,,,,,,
3,"(39.059918975482795, -77.01289366418833)","(39.042323859053965, -77.02988887165826)",,,,,,
4,"(40.06959567749802, -82.9659076119213)",,,,,,,


#### Finding Centroids of Groups of Adjacent Intermediate Points

```  IN  ```Intermediate point centroids grouped together by proximity

```OUT ```Single list of points consisting of the average (centroid) location of each group

In [652]:
intermediate_smooth_cluster = centroid_points_cluster(intermediate_smooth_cluster_)

intermediate_smooth_cluster_dataframe = pd.DataFrame(
    intermediate_smooth_cluster, columns=["Latitude", "Longitude"]
)
# smooth_cluster_dataframe.to_clipboard()
logger.debug(f"({len(intermediate_smooth_cluster_dataframe.index)}) Points Found")
intermediate_smooth_cluster_dataframe.head(5)

2022-10-25 13:28:35,540:  7: DEBUG: (11) Points Found


Unnamed: 0,Latitude,Longitude
0,42.205179,-71.435686
1,40.848848,-74.019303
2,40.245894,-75.379161
3,39.051122,-77.021392
4,40.069596,-82.965908


### Mapping the Intermediate Point Centroids

```NOTE  ```The overlapping circles are what get reduced in [Finding Centroids of Groups of Adjacent Intermediate Points](####Finding-Centroids-of-Groups-of-Adjacent-Intermediate-Points)

In [653]:
cent_points_map = folium.Map(
    location=[
        intermediate_points_centroids_dataframe.Latitude.mean(),
        intermediate_points_centroids_dataframe.Longitude.mean(),
    ],
    zoom_start=4,
)

for point in intermediate_points_centroids:
    folium.Circle([point[0], point[1]], nom._miles_to_meters(25)).add_to(
        cent_points_map
    )

cent_points_map

### Mapping Final Results with Intermediate Points Shown as a Polygon

In [654]:
smoothed_cluster_map = folium.Map(
    location=[
        intermediate_smooth_cluster_dataframe.Latitude.mean(),
        intermediate_smooth_cluster_dataframe.Longitude.mean(),
    ],
    zoom_start=4,
)

circle_format = {"color": "#f77f00", "fill": True, "fillOpacity": 0.5}
for point in intermediate_smooth_cluster:
    folium.Circle(
        [point[0], point[1]], nom._miles_to_meters(50), **circle_format
    ).add_to(smoothed_cluster_map)

file_out = "smoothed_map"

for store, setting in stores_settings.items():
  if setting["enabled"]:
    file_out += f"_{store[:4]}-{setting['radius']}"

for i, j in enumerate(intermediate_points):
    folium.Polygon(j).add_to(smoothed_cluster_map)

smoothed_cluster_map.save(f"../output/maps/{file_out}.html")

logger.info(f"Map file saved in output/maps/{file_out}.html")
smoothed_cluster_map

2022-10-25 13:28:35,605: 26: INFO: Map file saved in output/maps/smoothed_map_ikea-50_micr-30_cont-20_llbe-25.html
