diff --git a/examples/stpaul/project_cards/9a_multi_transit_attribute_change.yml b/examples/stpaul/project_cards/9a_multi_transit_attribute_change.yml index 97d950b..e4c150a 100644 --- a/examples/stpaul/project_cards/9a_multi_transit_attribute_change.yml +++ b/examples/stpaul/project_cards/9a_multi_transit_attribute_change.yml @@ -4,6 +4,6 @@ tags: transit_property_change: service: route_long_name: [Express, 'Ltd Stop'] - property_changs: + property_changes: headway_secs: set: 1800 diff --git a/network_wrangler/projects/__init__.py b/network_wrangler/projects/__init__.py index 3338cc8..08a1691 100644 --- a/network_wrangler/projects/__init__.py +++ b/network_wrangler/projects/__init__.py @@ -1,8 +1,11 @@ -from .add_new_roadway import apply_new_roadway -from .calculated_roadway import apply_calculated_roadway -from .parallel_managed_lanes import apply_parallel_managed_lanes +from .roadway_add_new import apply_new_roadway +from .roadway_calculated import apply_calculated_roadway +from .roadway_parallel_managed_lanes import apply_parallel_managed_lanes from .roadway_deletion import apply_roadway_deletion from .roadway_property_change import apply_roadway_property_change +from .transit_property_change import apply_transit_property_change +from .transit_routing_change import apply_transit_routing_change +from .transit_calculated import apply_calculated_transit __all__ = [ "apply_new_roadway", @@ -10,4 +13,7 @@ "apply_parallel_managed_lanes", "apply_roadway_deletion", "apply_roadway_property_change", + "apply_transit_property_change", + "apply_transit_routing_change", + "apply_calculated_transit", ] diff --git a/network_wrangler/projects/add_new_roadway.py b/network_wrangler/projects/roadway_add_new.py similarity index 100% rename from network_wrangler/projects/add_new_roadway.py rename to network_wrangler/projects/roadway_add_new.py diff --git a/network_wrangler/projects/calculated_roadway.py b/network_wrangler/projects/roadway_calculated.py similarity index 100% rename from network_wrangler/projects/calculated_roadway.py rename to network_wrangler/projects/roadway_calculated.py diff --git a/network_wrangler/projects/parallel_managed_lanes.py b/network_wrangler/projects/roadway_parallel_managed_lanes.py similarity index 100% rename from network_wrangler/projects/parallel_managed_lanes.py rename to network_wrangler/projects/roadway_parallel_managed_lanes.py diff --git a/network_wrangler/projects/transit_calculated.py b/network_wrangler/projects/transit_calculated.py new file mode 100644 index 0000000..439f2e9 --- /dev/null +++ b/network_wrangler/projects/transit_calculated.py @@ -0,0 +1,18 @@ +from ..logger import WranglerLogger + + +def apply_calculated_transit( + net: "TransitNetwork", + pycode: str, +) -> "TransitNetwork": + """ + Changes transit network object by executing pycode. + + Args: + net: transit network to manipulate + pycode: python code which changes values in the transit network object + """ + WranglerLogger.debug("Applying calculated transit project.") + exec(pycode) + + return net diff --git a/network_wrangler/projects/transit_property_change.py b/network_wrangler/projects/transit_property_change.py new file mode 100644 index 0000000..777807a --- /dev/null +++ b/network_wrangler/projects/transit_property_change.py @@ -0,0 +1,73 @@ +from ..logger import WranglerLogger + +import numpy as np +import pandas as pd + +TABLE_TO_APPLY_BY_PROPERTY = { + "headway_secs": "frequencies", +} + +# Tables which can be selected by trip_id +IMPLEMENTED_TABLES = [ + "trips", + "frequencies", + "stop_times" +] + +class TransitPropertyChangeError(Exception): + pass + +def apply_transit_property_change( + net: 'TransitNetwork', selection: 'Selection', property_changes: dict +) -> 'TransitNetwork': + WranglerLogger.debug("Applying transit property change project.") + + for property, property_change in property_changes.items(): + table = TABLE_TO_APPLY_BY_PROPERTY.get(property) + if not table: + table = net.feed.tables_with_property(property) + if not len(table == 1): + raise TransitPropertyChangeError("Found property {property} in multiple tables: {table}") + table = table[0] + if not table: + raise NotImplementedError("No table found to modify: {property}") + + if table not in IMPLEMENTED_TABLES: + raise NotImplementedError(f"{table} table changes not currently implemented.") + + WranglerLogger.debug(f"...modifying {property} in {table}.") + net = _apply_transit_property_change_to_table(net,selection,table,property,property_change) + + return net + + +def _apply_transit_property_change_to_table( + net: 'TransitNetwork', + selection: 'Selection', + table_name: str, + property: str, + property_change: dict +) -> 'TransitNetwork': + + table_df = net.feed.get(table_name) + # Grab only those records matching trip_ids (aka selection) + set_df = table_df[table_df.trip_id.isin(selection.selected_trips)].copy() + + # Check all `existing` properties if given + if "existing" in property_change: + if not all(set_df[property] == property_change["existing"]): + WranglerLogger.error(f"Existing does not match {property_change['existing']} for at least 1 trip.") + raise TransitPropertyChangeError("Existing does not match.") + + # Calculate build value + if "set" in property_change: + set_df["_set_val"] = property_change["set"] + else: + set_df["_set_val"] = set_df[property] + property_change["change"] + set_df[property] = set_df["_set_val"] + set_df= set_df.drop(columns=["_set_val"]) + + # Update in feed + net.feed.set_by_id(table_name, set_df, id_property = "trip_id", properties = [property]) + + return net \ No newline at end of file diff --git a/network_wrangler/projects/transit_routing_change.py b/network_wrangler/projects/transit_routing_change.py new file mode 100644 index 0000000..9f7f596 --- /dev/null +++ b/network_wrangler/projects/transit_routing_change.py @@ -0,0 +1,237 @@ +from ..logger import WranglerLogger + +import numpy as np +import pandas as pd + +def apply_transit_routing_change( + net: 'TransitNetwork', selection: 'Selection', routing_change: dict +) -> 'TransitNetwork': + WranglerLogger.debug("Applying transit routing change project.") + + trip_ids = selection.selected_trips + routing = pd.Series(routing_change["set"]) + + # Copy the tables that need to be edited since they are immutable within partridge + shapes = net.feed.shapes.copy() + stop_times = net.feed.stop_times.copy() + stops = net.feed.stops.copy() + + # A negative sign in "set" indicates a traversed node without a stop + # If any positive numbers, stops have changed + stops_change = False + if any(x > 0 for x in routing_change["set"]): + # Simplify "set" and "existing" to only stops + routing_change["set_stops"] = [ + str(i) for i in routing_change["set"] if i > 0 + ] + if routing_change.get("existing") is not None: + routing_change["existing_stops"] = [ + str(i) for i in routing_change["existing"] if i > 0 + ] + stops_change = True + + # Convert ints to objects + routing_change["set_shapes"] = [str(abs(i)) for i in routing_change["set"]] + if routing_change.get("existing") is not None: + routing_change["existing_shapes"] = [ + str(abs(i)) for i in routing_change["existing"] + ] + + # Replace shapes records + trips = net.feed.trips # create pointer rather than a copy + shape_ids = trips[trips["trip_id"].isin(trip_ids)].shape_id + for shape_id in shape_ids: + # Check if `shape_id` is used by trips that are not in + # parameter `trip_ids` + trips_using_shape_id = trips.loc[trips["shape_id"] == shape_id, ["trip_id"]] + if not all(trips_using_shape_id.isin(trip_ids)["trip_id"]): + # In this case, we need to create a new shape_id so as to leave + # the trips not part of the query alone + WranglerLogger.warning( + "Trips that were not in your query selection use the " + "same `shape_id` as trips that are in your query. Only " + "the trips' shape in your query will be changed." + ) + old_shape_id = shape_id + shape_id = str(int(shape_id) + net.ID_SCALAR) + if shape_id in shapes["shape_id"].tolist(): + WranglerLogger.error("Cannot create a unique new shape_id.") + dup_shape = shapes[shapes.shape_id == old_shape_id].copy() + dup_shape["shape_id"] = shape_id + shapes = pd.concat([shapes, dup_shape], ignore_index=True) + + # Pop the rows that match shape_id + this_shape = shapes[shapes.shape_id == shape_id] + + # Make sure they are ordered by shape_pt_sequence + this_shape = this_shape.sort_values(by=["shape_pt_sequence"]) + + shape_node_fk, rd_field = net.TRANSIT_FOREIGN_KEYS_TO_ROADWAY[ + "shapes" + ]["links"] + # Build a pd.DataFrame of new shape records + new_shape_rows = pd.DataFrame( + { + "shape_id": shape_id, + "shape_pt_lat": None, # FIXME Populate from self.road_net? + "shape_pt_lon": None, # FIXME + "shape_osm_node_id": None, # FIXME + "shape_pt_sequence": None, + shape_node_fk: routing_change["set_shapes"], + } + ) + + # If "existing" is specified, replace only that segment + # Else, replace the whole thing + if routing_change.get("existing") is not None: + # Match list + nodes = this_shape[shape_node_fk].tolist() + index_replacement_starts = [ + i + for i, d in enumerate(nodes) + if d == routing_change["existing_shapes"][0] + ][0] + index_replacement_ends = [ + i + for i, d in enumerate(nodes) + if d == routing_change["existing_shapes"][-1] + ][-1] + this_shape = pd.concat( + [ + this_shape.iloc[:index_replacement_starts], + new_shape_rows, + this_shape.iloc[index_replacement_ends + 1 :], + ], + ignore_index=True, + sort=False, + ) + else: + this_shape = new_shape_rows + + # Renumber shape_pt_sequence + this_shape["shape_pt_sequence"] = np.arange(len(this_shape)) + + # Add rows back into shapes + shapes = pd.concat( + [shapes[shapes.shape_id != shape_id], this_shape], + ignore_index=True, + sort=False, + ) + + # Replace stop_times and stops records (if required) + if stops_change: + # If node IDs in routing_change["set_stops"] are not already + # in stops.txt, create a new stop_id for them in stops + existing_fk_ids = set(stops[net.STOPS_FOREIGN_KEY].tolist()) + nodes_df = net.road_net.nodes_df.loc[ + :, [net.STOPS_FOREIGN_KEY, "X", "Y"] + ] + for fk_i in routing_change["set_stops"]: + if fk_i not in existing_fk_ids: + WranglerLogger.info( + "Creating a new stop in stops.txt for node ID: {}".format(fk_i) + ) + # Add new row to stops + new_stop_id = str(int(fk_i) + net.ID_SCALAR) + if new_stop_id in stops["stop_id"].tolist(): + WranglerLogger.error("Cannot create a unique new stop_id.") + stops.loc[ + len(stops.index) + 1, + [ + "stop_id", + "stop_lat", + "stop_lon", + net.STOPS_FOREIGN_KEY, + ], + ] = [ + new_stop_id, + nodes_df.loc[ + nodes_df[net.STOPS_FOREIGN_KEY] == int(fk_i), "Y" + ], + nodes_df.loc[ + nodes_df[net.STOPS_FOREIGN_KEY] == int(fk_i), "X" + ], + fk_i, + ] + + # Loop through all the trip_ids + for trip_id in trip_ids: + # Pop the rows that match trip_id + this_stoptime = stop_times[stop_times.trip_id == trip_id] + + # Merge on node IDs using stop_id (one node ID per stop_id) + this_stoptime = this_stoptime.merge( + stops[["stop_id", net.STOPS_FOREIGN_KEY]], + how="left", + on="stop_id", + ) + + # Make sure the stop_times are ordered by stop_sequence + this_stoptime = this_stoptime.sort_values(by=["stop_sequence"]) + + # Build a pd.DataFrame of new shape records from properties + new_stoptime_rows = pd.DataFrame( + { + "trip_id": trip_id, + "arrival_time": None, + "departure_time": None, + "pickup_type": None, + "drop_off_type": None, + "stop_distance": None, + "timepoint": None, + "stop_is_skipped": None, + net.STOPS_FOREIGN_KEY: routing_change["set_stops"], + } + ) + + # Merge on stop_id using node IDs (many stop_id per node ID) + new_stoptime_rows = ( + new_stoptime_rows.merge( + stops[["stop_id", net.STOPS_FOREIGN_KEY]], + how="left", + on=net.STOPS_FOREIGN_KEY, + ) + .groupby([net.STOPS_FOREIGN_KEY]) + .head(1) + ) # pick first + + # If "existing" is specified, replace only that segment + # Else, replace the whole thing + if routing_change.get("existing") is not None: + # Match list (remember stops are passed in with node IDs) + nodes = this_stoptime[net.STOPS_FOREIGN_KEY].tolist() + index_replacement_starts = nodes.index( + routing_change["existing_stops"][0] + ) + index_replacement_ends = nodes.index( + routing_change["existing_stops"][-1] + ) + this_stoptime = pd.concat( + [ + this_stoptime.iloc[:index_replacement_starts], + new_stoptime_rows, + this_stoptime.iloc[index_replacement_ends + 1 :], + ], + ignore_index=True, + sort=False, + ) + else: + this_stoptime = new_stoptime_rows + + # Remove node ID + del this_stoptime[net.STOPS_FOREIGN_KEY] + + # Renumber stop_sequence + this_stoptime["stop_sequence"] = np.arange(len(this_stoptime)) + + # Add rows back into stoptime + stop_times = pd.concat( + [stop_times[stop_times.trip_id != trip_id], this_stoptime], + ignore_index=True, + sort=False, + ) + + net.feed.shapes = shapes + net.feed.stops = stops + net.feed.stop_times = stop_times + return net \ No newline at end of file diff --git a/network_wrangler/transit/feed.py b/network_wrangler/transit/feed.py index 823570c..b4287ec 100644 --- a/network_wrangler/transit/feed.py +++ b/network_wrangler/transit/feed.py @@ -17,7 +17,7 @@ TripsSchema, ShapesSchema, ) -from ..utils import fk_in_pk +from ..utils import fk_in_pk, update_df_by_col_value from ..logger import WranglerLogger @@ -147,6 +147,20 @@ def __deepcopy__(self, memo): # Return the newly created deep copy of the Feed object return new_feed + @property + def table_names(self) -> list[str]: + """ + Returns list of tables from config. + """ + return list(self.config.nodes.keys()) + + @property + def tables(self) -> list[pd.DataFrame]: + """ + Returns list of tables from config. + """ + return [self.get(table_name) for table_name in self.config.nodes.keys()] + def _read_from_file(self, node: str) -> pd.DataFrame: """Read node from file + validate to schema if table name in SCHEMAS and return dataframe. @@ -174,6 +188,32 @@ def get(self, table: str, validate: bool = True) -> pd.DataFrame: df = self.validate_df_as_table(table, df) return df + def set_by_id( + self, + table_name: str, + set_df: pd.DataFrame, + id_property: str = "trip_id", + properties: list[str] = None, + ): + """ + Set property values in a specific table for a list of IDs. + + Args: + table_name (str): Name of the table to modify. + set_df (pd.DataFrame): DataFrame with columns 'trip_id' and 'value' containing + trip IDs and values to set for the specified property. + id_property: Property to use as ID to set by. Defaults to "trip_id. + properties: List of properties to set which are in set_df. If not specified, will set + all properties. + + """ + table_df = self.get(table_name) + updated_df = update_df_by_col_value( + table_df, set_df, id_property, properties=properties + ) + self.validate_df_as_table(table_name, updated_df) + self.__dict__[table_name] = updated_df + def validate_df_as_table(self, table: str, df: pd.DataFrame) -> bool: """Validate a dataframe as a table: relevant schemas and foreign keys. @@ -295,3 +335,9 @@ def feed_hash(self) -> str: _hash = hashlib.sha256(_value).hexdigest() return _hash + + def tables_with_property(self, property: str) -> list[str]: + """ + Returns feed tables in the feed which contain the property. + """ + return [t for t in self.table_names if property in self.get(t).columns] diff --git a/network_wrangler/transitnetwork.py b/network_wrangler/transitnetwork.py index aa82439..6651229 100644 --- a/network_wrangler/transitnetwork.py +++ b/network_wrangler/transitnetwork.py @@ -8,7 +8,6 @@ from typing import Union import networkx as nx -import numpy as np import pandas as pd from projectcard import ProjectCard @@ -16,6 +15,11 @@ from .logger import WranglerLogger from .utils import fk_in_pk from .transit import Feed, TransitSelection +from .projects import ( + apply_transit_routing_change, + apply_transit_property_change, + apply_calculated_transit, +) class TransitRoadwayConsistencyError(Exception): @@ -394,23 +398,19 @@ def apply( Defaults to False. Will be set to true with code when necessary. """ - if isinstance(project_card, ProjectCard): - project_card_dictionary = project_card.__dict__ - elif isinstance(project_card, dict): + if isinstance(project_card, dict): project_card_dictionary = project_card + elif isinstance(project_card, ProjectCard): + project_card_dictionary = project_card.__dict__ else: - WranglerLogger.error( - f"project_card is of invalid type: {type(project_card)}" + raise ValueError( + f"Expecting ProjectCard or dict instance but found \ + {type(project_card)}." ) - raise TypeError("project_card must be of type ProjectCard or dict") - - WranglerLogger.info( - f"Applying Project to Transit Network: { project_card_dictionary['project']}" - ) if not _subproject: WranglerLogger.info( - "Applying Project to Roadway Network: {}".format( + "Applying Project to Transit Network: {}".format( project_card_dictionary["project"] ) ) @@ -422,329 +422,28 @@ def apply( project_dictionary = project_card_dictionary if "transit_property_change" in project_dictionary: - return self.apply_transit_feature_change( - self.get_selection( - project_dictionary["transit_property_change"]["service"] - ).selected_trips, - project_dictionary["transit_property_change"]["property_changes"], + _proj = project_dictionary["transit_property_change"] + return apply_transit_property_change( + self, + self.get_selection(_proj["service"]), + _proj["property_changes"], ) - elif project_dictionary.get("pycode"): - return self.apply_python_calculation(project_dictionary["pycode"]) - - else: - msg = "Cannot find transit project in project_dictionary – not implemented yet." - WranglerLogger.error(msg) - raise (msg) - - def apply_python_calculation(self, pycode: str) -> "TransitNetwork": - """ - Changes roadway network object by executing pycode. - - Args: - pycode: python code which changes values in the roadway network object - """ - net = copy.deepcopy(self) - exec(pycode) - return net - - def apply_transit_feature_change( - self, - trip_ids: pd.Series, - property_changes: dict, - ) -> "TransitNetwork": - """ - Changes the transit attributes for the selected features based on the - project card information passed - - Args: - net: transit network to manipulate - trip_ids : pd.Series - all trip_ids to apply change to - properties : list of dictionaries - transit properties to change - - Returns: - None - """ - net = copy.deepcopy(self) - - # Grab the list of nodes in the facility from road_net - # It should be cached because managed lane projects are - # processed by RoadwayNetwork first via - # Scenario.apply_all_projects - # managed_lane_nodes = self.road_net.selections( - # self.road_net.build_selection_key(project_dictionary["facility"]) - # )["route"] - - for property, p_changes in property_changes.items(): - if property in ["headway_secs"]: - net = TransitNetwork._apply_transit_feature_change_frequencies( - net, trip_ids, property, p_changes - ) - - elif property in ["routing"]: - net = TransitNetwork._apply_transit_feature_change_routing( - net, trip_ids, p_changes - ) - return net - - def _apply_transit_feature_change_routing( - self, trip_ids: pd.Series, routing_change: dict - ) -> TransitNetwork: - net = copy.deepcopy(self) - shapes = net.feed.shapes.copy() - stop_times = net.feed.stop_times.copy() - stops = net.feed.stops.copy() - - # A negative sign in "set" indicates a traversed node without a stop - # If any positive numbers, stops have changed - stops_change = False - if any(x > 0 for x in routing_change["set"]): - # Simplify "set" and "existing" to only stops - routing_change["set_stops"] = [ - str(i) for i in routing_change["set"] if i > 0 - ] - if routing_change.get("existing") is not None: - routing_change["existing_stops"] = [ - str(i) for i in routing_change["existing"] if i > 0 - ] - stops_change = True - - # Convert ints to objects - routing_change["set_shapes"] = [str(abs(i)) for i in routing_change["set"]] - if routing_change.get("existing") is not None: - routing_change["existing_shapes"] = [ - str(abs(i)) for i in routing_change["existing"] - ] - - # Replace shapes records - trips = net.feed.trips # create pointer rather than a copy - shape_ids = trips[trips["trip_id"].isin(trip_ids)].shape_id - for shape_id in shape_ids: - # Check if `shape_id` is used by trips that are not in - # parameter `trip_ids` - trips_using_shape_id = trips.loc[trips["shape_id"] == shape_id, ["trip_id"]] - if not all(trips_using_shape_id.isin(trip_ids)["trip_id"]): - # In this case, we need to create a new shape_id so as to leave - # the trips not part of the query alone - WranglerLogger.warning( - "Trips that were not in your query selection use the " - "same `shape_id` as trips that are in your query. Only " - "the trips' shape in your query will be changed." - ) - old_shape_id = shape_id - shape_id = str(int(shape_id) + TransitNetwork.ID_SCALAR) - if shape_id in shapes["shape_id"].tolist(): - WranglerLogger.error("Cannot create a unique new shape_id.") - dup_shape = shapes[shapes.shape_id == old_shape_id].copy() - dup_shape["shape_id"] = shape_id - shapes = pd.concat([shapes, dup_shape], ignore_index=True) - - # Pop the rows that match shape_id - this_shape = shapes[shapes.shape_id == shape_id] - - # Make sure they are ordered by shape_pt_sequence - this_shape = this_shape.sort_values(by=["shape_pt_sequence"]) - - shape_node_fk, rd_field = self.net.TRANSIT_FOREIGN_KEYS_TO_ROADWAY[ - "shapes" - ]["links"] - # Build a pd.DataFrame of new shape records - new_shape_rows = pd.DataFrame( - { - "shape_id": shape_id, - "shape_pt_lat": None, # FIXME Populate from self.road_net? - "shape_pt_lon": None, # FIXME - "shape_osm_node_id": None, # FIXME - "shape_pt_sequence": None, - shape_node_fk: routing_change["set_shapes"], - } + elif "transit_routing_changes" in project_dictionary: + _proj = project_dictionary["transit_routing_change"] + return apply_transit_routing_change( + self, + self.get_selection(_proj["service"]), + _proj["routing_changes"], ) - # If "existing" is specified, replace only that segment - # Else, replace the whole thing - if routing_change.get("existing") is not None: - # Match list - nodes = this_shape[shape_node_fk].tolist() - index_replacement_starts = [ - i - for i, d in enumerate(nodes) - if d == routing_change["existing_shapes"][0] - ][0] - index_replacement_ends = [ - i - for i, d in enumerate(nodes) - if d == routing_change["existing_shapes"][-1] - ][-1] - this_shape = pd.concat( - [ - this_shape.iloc[:index_replacement_starts], - new_shape_rows, - this_shape.iloc[index_replacement_ends + 1 :], - ], - ignore_index=True, - sort=False, - ) - else: - this_shape = new_shape_rows - - # Renumber shape_pt_sequence - this_shape["shape_pt_sequence"] = np.arange(len(this_shape)) - - # Add rows back into shapes - shapes = pd.concat( - [shapes[shapes.shape_id != shape_id], this_shape], - ignore_index=True, - sort=False, - ) - - # Replace stop_times and stops records (if required) - if stops_change: - # If node IDs in routing_change["set_stops"] are not already - # in stops.txt, create a new stop_id for them in stops - existing_fk_ids = set(stops[TransitNetwork.STOPS_FOREIGN_KEY].tolist()) - nodes_df = net.road_net.nodes_df.loc[ - :, [TransitNetwork.STOPS_FOREIGN_KEY, "X", "Y"] - ] - for fk_i in routing_change["set_stops"]: - if fk_i not in existing_fk_ids: - WranglerLogger.info( - "Creating a new stop in stops.txt for node ID: {}".format(fk_i) - ) - # Add new row to stops - new_stop_id = str(int(fk_i) + TransitNetwork.ID_SCALAR) - if new_stop_id in stops["stop_id"].tolist(): - WranglerLogger.error("Cannot create a unique new stop_id.") - stops.loc[ - len(stops.index) + 1, - [ - "stop_id", - "stop_lat", - "stop_lon", - TransitNetwork.STOPS_FOREIGN_KEY, - ], - ] = [ - new_stop_id, - nodes_df.loc[ - nodes_df[TransitNetwork.STOPS_FOREIGN_KEY] == int(fk_i), "Y" - ], - nodes_df.loc[ - nodes_df[TransitNetwork.STOPS_FOREIGN_KEY] == int(fk_i), "X" - ], - fk_i, - ] - - # Loop through all the trip_ids - for trip_id in trip_ids: - # Pop the rows that match trip_id - this_stoptime = stop_times[stop_times.trip_id == trip_id] - - # Merge on node IDs using stop_id (one node ID per stop_id) - this_stoptime = this_stoptime.merge( - stops[["stop_id", TransitNetwork.STOPS_FOREIGN_KEY]], - how="left", - on="stop_id", - ) - - # Make sure the stop_times are ordered by stop_sequence - this_stoptime = this_stoptime.sort_values(by=["stop_sequence"]) - - # Build a pd.DataFrame of new shape records from properties - new_stoptime_rows = pd.DataFrame( - { - "trip_id": trip_id, - "arrival_time": None, - "departure_time": None, - "pickup_type": None, - "drop_off_type": None, - "stop_distance": None, - "timepoint": None, - "stop_is_skipped": None, - TransitNetwork.STOPS_FOREIGN_KEY: routing_change["set_stops"], - } - ) - - # Merge on stop_id using node IDs (many stop_id per node ID) - new_stoptime_rows = ( - new_stoptime_rows.merge( - stops[["stop_id", TransitNetwork.STOPS_FOREIGN_KEY]], - how="left", - on=TransitNetwork.STOPS_FOREIGN_KEY, - ) - .groupby([TransitNetwork.STOPS_FOREIGN_KEY]) - .head(1) - ) # pick first - - # If "existing" is specified, replace only that segment - # Else, replace the whole thing - if routing_change.get("existing") is not None: - # Match list (remember stops are passed in with node IDs) - nodes = this_stoptime[TransitNetwork.STOPS_FOREIGN_KEY].tolist() - index_replacement_starts = nodes.index( - routing_change["existing_stops"][0] - ) - index_replacement_ends = nodes.index( - routing_change["existing_stops"][-1] - ) - this_stoptime = pd.concat( - [ - this_stoptime.iloc[:index_replacement_starts], - new_stoptime_rows, - this_stoptime.iloc[index_replacement_ends + 1 :], - ], - ignore_index=True, - sort=False, - ) - else: - this_stoptime = new_stoptime_rows - - # Remove node ID - del this_stoptime[TransitNetwork.STOPS_FOREIGN_KEY] - - # Renumber stop_sequence - this_stoptime["stop_sequence"] = np.arange(len(this_stoptime)) - - # Add rows back into stoptime - stop_times = pd.concat( - [stop_times[stop_times.trip_id != trip_id], this_stoptime], - ignore_index=True, - sort=False, - ) - - net.feed.shapes = shapes - net.feed.stops = stops - net.feed.stop_times = stop_times - return net - - def _apply_transit_feature_change_frequencies( - self, trip_ids: pd.Series, property: str, property_change: dict - ) -> TransitNetwork: - net = copy.deepcopy(self) - freq = net.feed.frequencies.copy() - - # Grab only those records matching trip_ids (aka selection) - freq = freq[freq.trip_id.isin(trip_ids)] - - # Check all `existing` properties if given - if property_change.get("existing") is not None: - if not all(freq.headway_secs == property_change["existing"]): - WranglerLogger.error( - "Existing does not match for at least " - "1 trip in:\n {}".format(trip_ids.to_string()) - ) - raise ValueError + elif "pycode" in project_dictionary: + return self.apply_calculated_transit(self, project_dictionary["pycode"]) - # Calculate build value - if property_change.get("set") is not None: - build_value = property_change["set"] else: - build_value = [i + property_change["change"] for i in freq.headway_secs] - - q = net.feed.frequencies.trip_id.isin(freq["trip_id"]) - - net.feed.frequencies.loc[q, property] = build_value - return net + msg = f"Not a currently valid transit project: {project_dictionary.keys()}." + WranglerLogger.error(msg) + raise NotImplementedError(msg) def create_empty_transit_network() -> TransitNetwork: diff --git a/network_wrangler/utils/__init__.py b/network_wrangler/utils/__init__.py index ed747c8..aa7ab0b 100644 --- a/network_wrangler/utils/__init__.py +++ b/network_wrangler/utils/__init__.py @@ -16,9 +16,11 @@ from .geo import update_points_in_linestring from .data import DictQueryAccessor from .data import dfHash +from .data import update_df_by_col_value __all__ = [ + "update_df_by_col_value", "DictQueryAccessor", "dfHash", "delete_keys_from_dict", diff --git a/network_wrangler/utils/data.py b/network_wrangler/utils/data.py index 38ce690..5e3d796 100644 --- a/network_wrangler/utils/data.py +++ b/network_wrangler/utils/data.py @@ -111,3 +111,98 @@ def __call__(self): _value = str(self._obj.values).encode() hash = hashlib.sha1(_value).hexdigest() return hash + + +def _check_compatable_df_update( + destination_df: pd.DataFrame, + source_df: pd.DataFrame, + join_col: str, + properties: list[str] = None, +) -> None: + """Evaluates if destination df is comptable for being updated with source_df based on id_col. + + Args: + destination_df (pd.DataFrame): Dataframe to modify. + source_df (pd.DataFrame): Dataframe with updated columns + join_col (str): column to join on + properties (list[str]): List of properties to use. If None, will default to all. + """ + + if join_col not in source_df.columns: + raise ValueError(f"source_df must have {join_col}.") + + if properties is None: + properties = [p for p in source_df.columns if p != join_col] + + _missing_cols = set(properties) - set(source_df.columns) + if _missing_cols: + raise ValueError(f"Properties missing from source_df: {_missing_cols}") + + if join_col not in destination_df: + raise ValueError(f"joim_col {join_col} not in destination_df columns.") + + _missing_t_cols = set(properties) - set(destination_df.columns) + if _missing_t_cols: + raise NotImplementedError( + f"Properties missing from destination_df: {_missing_t_cols}" + ) + + _missing_ids = set(source_df[join_col]) - set(destination_df[join_col]) + if _missing_ids: + raise ValueError( + f"join values specified in set_df missing from destionation_df table:\ + {_missing_ids}" + ) + + +def update_df_by_col_value( + destination_df: pd.DataFrame, + source_df: pd.DataFrame, + join_col: str, + properties: list[str] = None, +) -> pd.DataFrame: + """Creates an updated destination_df based on values in source_df with same join_col. + + ``` + >> destination_df + trip_id property1 property2 + 1 10 100 + 2 20 200 + 3 30 300 + 4 40 400 + + >> source_df + trip_id property1 property2 + 2 25 250 + 3 35 350 + + >> updated_df + trip_id property1 property2 + 0 1 10 100 + 1 2 25 250 + 2 3 35 350 + 3 4 40 400 + ``` + + Args: + destination_df (pd.DataFrame): Dataframe to modify. + source_df (pd.DataFrame): Dataframe with updated columns + join_col (str): column to join on + properties (list[str]): List of properties to use. If None, will default to all + in source_df. + """ + if properties is None: + properties = [p for p in properties if p != join_col] + + _check_compatable_df_update( + destination_df, source_df, join_col, properties=properties + ) + + merged_df = destination_df.merge( + source_df, on=join_col, how="left", suffixes=("", "_updated") + ) + for prop in properties: + merged_df[prop] = merged_df[f"{prop}_updated"].combine_first(merged_df[prop]) + updated_df = merged_df.drop([f"{prop}_updated" for prop in properties], axis=1) + + return updated_df