In [19]:
import geopy.distance
import pandas as pd
import regex as re
import requests
import os

# defaults
adjacency_path = os.path.join("test_adjacency.csv")

# global vars
counties = {}
graph = None

### classes   
class County:
    def __init__(self, txt:str):
        self.name = txt.split(",")[0]
        self.state = txt.split(",")[1]
        self.id = txt
        self.neighbors = []
        self.parent = None

        self.g = float('inf') # distance from start
        self.h = float('inf') # heuristic distance from the goal
        self.f = 0
    
    def add_neighbor(self, neighbor):
        if f"{self.name}, {self.state}" != f"{neighbor.name}, {neighbor.state}":
            self.neighbors.append(neighbor)

    def update_f(self):
        self.f = self.g + self. h
### end block classes
""""""
### functions
def read_neighbors_file(file_name: os) -> pd.DataFrame: # reads the csv file and converts it into a dataframe
    return pd.read_csv(file_name)

def get_unique_list(df: pd.DataFrame, col_name: str) -> list: # returns the unique values in a df[col_name]
    return list(set(df[col_name]))

def make_object_list(lst: list) -> list: # gets a list of text and returns a list of County objects
    return [County(c) for c in lst]

def preparing_objects(raw_df: pd.DataFrame) -> dict: # making the dataframe into objects and adding their neighbors

    unique_counties = get_unique_list(raw_df, 'countyname')
    county_objects = make_object_list(unique_counties) 
    counties_dict = {county.name + "," + county.state: county for county in county_objects}
    for _, record in raw_df.iterrows():
        county = record['countyname']
        neighbor = record['neighborname']
        cnty_object = counties_dict[county]
        neighbor_object = counties_dict[neighbor]
        cnty_object.add_neighbor(neighbor_object)
    return counties_dict

### functions 
if __name__ == "__main__":
    # initiallizing
    raw_df = read_neighbors_file(adjacency_path)
    counties = preparing_objects(raw_df) # dict: {county.name, county.state: county object}. this is the same dict as neighbors so it is enough for one of them    


In [27]:
# finds the shortest path from the starting locations to the goal location using a search method
def find_path(starting_locations, goal_locations, search_method, detail_output):
    if search_method == 1:
        pathes = []
        for start in starting_locations:
            pathes.append(a_star(start, goal_locations))
            print(pathes[0])
    else:
        pass

# returns the minimum distance between a start county and one of the goals
def get_county_coordinates(county_name):
    url = f"https://nominatim.openstreetmap.org/search?q={county_name}&format=json"
    headers = {
        'User-Agent': 'YourAppName/1.0 (your.email@example.com)'
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        if data:
            return float(data[0]['lat']), float(data[0]['lon'])
        else:
            raise Exception(f"No data found for {county_name}")
    else:
        raise Exception(f"Problem with the API. Status code: {response.status_code}")

def heuristic_calc(start, goal):
    start_cord = get_county_coordinates(start)
    goal_cord = get_county_coordinates(goal)
    distance = geopy.distance.geodesic(start_cord, goal_cord).km
    return distance

# perform A* search from a starting location to one of the ending locations in the goal list
def a_star(start, goals):
    frontier = [] # have been visited but not expanded 
    explored = set() # visited and expanded
    path = []

    # retraces the path
    def retracePath(c):
        if c.parent:
            retracePath(c.parent)
        path.append(c.id)

    # initiallizing the start
    start_county = counties[start]
    start_county.g = 0
    start_county.h = min(heuristic_calc(start_county.id, counties[goal].id) for goal in goals)
    start_county.update_f()
    frontier.append(counties[start])
    while frontier:
        current = min(frontier, key = lambda county:county.f) # takes the county with the minimum f
        print(current.id)
        if current.id in goals:
            return retracePath(current)
        else:
            frontier.remove(current)
            explored.add(current)
            for neighbor in current.neighbors:
                if neighbor in explored: continue
                tentative_g = current.g + heuristic_calc(current.id, neighbor.id)
                if tentative_g < neighbor.g:
                    neighbor.parent = current
                    neighbor.g = tentative_g
                    neighbor.h = min(heuristic_calc(neighbor.id, counties[goal].id) for goal in goals)
                    neighbor.update_f()

                if neighbor not in frontier:
                    frontier.append(neighbor)
    
# returns a list for the same color
def get_list_per_color(lst, pattern):
    return [loc.replace('Red, ', '').replace('Blue, ', '') for loc in lst if re.search(pattern, loc)]

# Example input
start_locations = ['Red, Washington County, UT', 'Blue, Chicot County, AR']
goal_locations = ['Red, San Diego County, CA', 'Blue, Bienville Parish, LA']

# dividing the starting locations and goal locations into different lists according to their colors
red_starts = get_list_per_color(start_locations, r'Red,')
red_goals = get_list_per_color(goal_locations, r'Red,')
blue_starts = get_list_per_color(start_locations, r'Blue,')
blue_goals = get_list_per_color(goal_locations, r'Blue,')

# assuming a 'graph' object exists and represents the nodes and edges
# red parties paths
find_path(red_starts, red_goals, 1, 0)


Washington County, UT
Mohave County, AZ
San Bernardino County, CA
Riverside County, CA
San Diego County, CA
None
