In [102]:
import ast
import pandas as pd
import os
import json
import numpy as np

from mappymatch import package_root
from mappymatch.constructs.trace import Trace
from mappymatch.utils.plot import plot_trace
from mappymatch.constructs.geofence import Geofence
from mappymatch.utils.plot import plot_geofence
from mappymatch.maps.nx.nx_map import NxMap, NetworkType
from mappymatch.utils.plot import plot_map
from mappymatch.matchers.lcss.lcss import LCSSMatcher
from mappymatch.matchers.valhalla import ValhallaMatcher
from mappymatch.matchers.osrm import OsrmMatcher
from mappymatch.utils.plot import plot_matches
from mappymatch.utils.plot import plot_path

In [2]:
def read_trip_data(csv_file):
    # Read the CSV file
    df = pd.read_csv(csv_file)

    # Convert string representations of lists back to lists
    df['trajectory'] = df['trajectory'].apply(ast.literal_eval)
    df['velocity_profile'] = df['velocity_profile'].apply(ast.literal_eval)
    df['altitude_profile'] = df['altitude_profile'].apply(ast.literal_eval)

    return df

In [3]:
trip_data_read = read_trip_data('data/trips/Murphy/TL5-218_2020W33_trip_data.csv')
print(trip_data_read.head())

           trip_start_time            trip_end_time  travel_time  \
0  2020-08-10 05:59:15.000  2020-08-10 06:01:59.000        164.0   
1  2020-08-10 06:02:00.000  2020-08-10 06:02:14.000         14.0   
2  2020-08-10 06:28:47.100  2020-08-10 08:01:20.100       5553.0   
3  2020-08-10 08:11:58.900  2020-08-10 08:44:03.900       1925.0   
4  2020-08-10 09:42:13.400  2020-08-10 10:32:56.400       3043.0   

                                    altitude_profile  \
0  [531.9, 531.9, 531.9, 531.9, 531.9, 531.9, 531...   
1  [276.0, 276.0, 276.0, 275.8, 275.7, 275.6, 275...   
2  [294.5, 293.7, 292.7, 291.9, 291.3, 290.6, 290...   
3  [275.5, 274.2, 273.3, 272.8, 272.4, 272.1, 271...   
4  [323.9, 323.5, 323.1, 322.7, 322.2, 321.7, 321...   

                                    velocity_profile   weight  total_fuel  \
0  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   8000.0    0.203792   
1  [3.4102, 3.4648, 3.4258, 3.4805, 3.4922, 3.507...   8000.0    0.016903   
2  [3.8867, 5.1758, 6.2

In [51]:
first_trip_trajectory = trip_data_read['trajectory'].iloc[6]

# Convert the list of tuples (latitude, longitude) into a DataFrame
trajectory_df = pd.DataFrame(first_trip_trajectory, columns=['latitude', 'longitude'])

print(trajectory_df.head())

    latitude  longitude
0  44.788085 -93.462496
1  44.788090 -93.462495
2  44.788096 -93.462492
3  44.788097 -93.462490
4  44.788100 -93.462487


In [52]:
trace = Trace.from_dataframe(trajectory_df, lat_column="latitude", lon_column="longitude")

In [23]:
def collect_lat_long(directory_path):
    all_latitudes = []
    all_longitudes = []
    
    # Iterate through each CSV file in the directory
    for file_name in os.listdir(directory_path):
        if file_name.endswith('_trip_data.csv'):  # Check if the file is a trip data CSV
            file_path = os.path.join(directory_path, file_name)
            trip_data = read_trip_data(file_path)
            
            # Append latitudes and longitudes from each trip's trajectory
            for trajectory in trip_data['trajectory']:
                latitudes, longitudes = zip(*trajectory)
                all_latitudes.extend(latitudes)
                all_longitudes.extend(longitudes)
    
    return all_latitudes, all_longitudes


In [24]:
def calculate_bounding_box(all_latitudes, all_longitudes):
    latitudes = np.array(all_latitudes)
    longitudes = np.array(all_longitudes)
    
    # Calculate Q1 (25th percentile) and Q3 (75th percentile) for latitudes and longitudes
    Q1_lat, Q3_lat = np.percentile(latitudes, [25, 75])
    Q1_lon, Q3_lon = np.percentile(longitudes, [25, 75])
    
    # Calculate the Interquartile Range (IQR)
    IQR_lat = Q3_lat - Q1_lat
    IQR_lon = Q3_lon - Q1_lon
    
    # Calculate the bounds for non-outliers
    lower_bound_lat = Q1_lat - 1.5 * IQR_lat
    upper_bound_lat = Q3_lat + 1.5 * IQR_lat
    lower_bound_lon = Q1_lon - 1.5 * IQR_lon
    upper_bound_lon = Q3_lon + 1.5 * IQR_lon
    
    # Filter out outlier points
    non_outlier_mask = ((latitudes >= lower_bound_lat) & (latitudes <= upper_bound_lat) &
                        (longitudes >= lower_bound_lon) & (longitudes <= upper_bound_lon))
    
    # Calculate min and max from non-outliers
    filtered_latitudes = latitudes[non_outlier_mask]
    filtered_longitudes = longitudes[non_outlier_mask]
    min_lat, max_lat = filtered_latitudes.min(), filtered_latitudes.max()
    min_lon, max_lon = filtered_longitudes.min(), filtered_longitudes.max()
    
    # Define bounding box from non-outliers
    bounding_box = {
        'min_latitude': min_lat,
        'max_latitude': max_lat,
        'min_longitude': min_lon,
        'max_longitude': max_lon
    }
    
    return bounding_box


In [25]:
def bounding_box_to_geojson(bounding_box, output_file):
    # Define the coordinates of the bounding box (Polygon)
    # The coordinates list must start and end at the same point, forming a closed loop
    coordinates = [
        [
            [bounding_box['min_longitude'], bounding_box['min_latitude']],  # Lower-left corner
            [bounding_box['min_longitude'], bounding_box['max_latitude']],  # Upper-left corner
            [bounding_box['max_longitude'], bounding_box['max_latitude']],  # Upper-right corner
            [bounding_box['max_longitude'], bounding_box['min_latitude']],  # Lower-right corner
            [bounding_box['min_longitude'], bounding_box['min_latitude']]   # Closing the loop at lower-left corner
        ]
    ]
    
    # Define the GeoJSON structure
    geojson_object = {
        "type": "Feature",
        "properties": {},  # Properties can be added if needed
        "geometry": {
            "type": "Polygon",
            "coordinates": coordinates
        }
    }
    
    # Write the GeoJSON object to a file
    with open(output_file, 'w') as f:
        json.dump(geojson_object, f, indent=4)
    
    print(f"GeoJSON file saved to {output_file}")

In [45]:
murphy_folder = 'data/trips/Murphy'
all_latitudes, all_longitudes = collect_lat_long(murphy_folder)
bounding_box = calculate_bounding_box(all_latitudes, all_longitudes)

# Predefined bounding box
# bounding_box = {
#     'min_latitude': 43.96958146789451,
#     'max_latitude': 45.995818532105496,
#     'min_longitude': -94.65258355209289,
#     'max_longitude': -91.7879164479071
# }
print(bounding_box)

{'min_latitude': 44.403672, 'max_latitude': 45.450154, 'min_longitude': -94.073366, 'max_longitude': -92.696696}


In [46]:
output_geojson_file = 'results/bounding_box_mn.geojson'
bounding_box_to_geojson(bounding_box, output_geojson_file)

GeoJSON file saved to results/bounding_box_mn.geojson


In [47]:
geofence = Geofence.from_geojson(output_geojson_file)

In [90]:
plot_trace(trace, point_color="black", m=plot_geofence(geofence))

In [49]:
def is_within_bounding_box(trajectory, bounding_box):
    # Check if all points in the trajectory are within the bounding box
    return all(
        bounding_box['min_latitude'] <= lat <= bounding_box['max_latitude'] and
        bounding_box['min_longitude'] <= lon <= bounding_box['max_longitude']
        for lat, lon in trajectory
    )


# Apply the is_within_bounding_box function to each trajectory
inside_mask = trip_data_read['trajectory'].apply(is_within_bounding_box, bounding_box=bounding_box)

# Filter the DataFrame to only include trips inside the bounding box
filtered_trips = trip_data_read[inside_mask]

print(len(filtered_trips))


21


In [50]:
filtered_trips.head()

Unnamed: 0,trip_start_time,trip_end_time,travel_time,altitude_profile,velocity_profile,weight,total_fuel,ambTemperature,trajectory
0,2020-08-10 05:59:15.000,2020-08-10 06:01:59.000,164.0,"[531.9, 531.9, 531.9, 531.9, 531.9, 531.9, 531...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",8000.0,0.203792,11.281,"[(44.980863, -93.219328), (44.980865, -93.2193..."
1,2020-08-10 06:02:00.000,2020-08-10 06:02:14.000,14.0,"[276.0, 276.0, 276.0, 275.8, 275.7, 275.6, 275...","[3.4102, 3.4648, 3.4258, 3.4805, 3.4922, 3.507...",8000.0,0.016903,11.5,"[(44.982194, -93.219045), (44.982191, -93.2190..."
6,2020-08-10 12:28:47.300,2020-08-10 13:13:17.300,2670.0,"[255.7, 255.2, 254.7, 254.2, 253.8, 252.9, 252...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",30000.0,12.920958,15.188,"[(44.788085, -93.462496), (44.78809, -93.46249..."
7,2020-08-10 13:40:07.100,2020-08-10 13:57:35.100,1048.0,"[271.3, 272.8, 274.6, 276.8, 279.6, 281.9, 284...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",8000.0,2.221917,16.875,"[(45.000419, -93.217756), (45.000417, -93.2177..."
8,2020-08-10 14:03:13.200,2020-08-10 14:04:30.200,77.0,"[301.0, 302.2, 303.1, 303.9, 304.7, 305.4, 306...","[1.9531, 1.8516, 2.1328, 2.9453, 3.8359, 4.269...",8000.0,0.099333,16.188,"[(44.981764, -93.219438), (44.981764, -93.2193..."


In [54]:
nx_map = NxMap.from_geofence(geofence, network_type=NetworkType.DRIVE)



In [62]:
type(nx_map)

mappymatch.maps.nx.nx_map.NxMap

In [157]:
matcher = LCSSMatcher(nx_map, distance_epsilon = 100, distance_threshold = 500)

match_result = matcher.match_trace(trace)

In [158]:
plot_matches(match_result.matches)

In [176]:
# convert the matches to a dataframe
df = match_result.matches_to_dataframe()

In [177]:
df.head()

Unnamed: 0,coordinate_id,road_id,distance_to_road,geom,origin_junction_id,origin_destination_id,road_key,kilometers,travel_time
0,0,,,,,,,,
1,1,,,,,,,,
2,2,,,,,,,,
3,3,,,,,,,,
4,4,,,,,,,,


In [179]:
match_result.path[:5]

[Road(road_id=RoadId(start=8146751356, end=188063558, key=0), geom=<LINESTRING (-10404539.537 5589125.344, -10404634.615 5589150.144, -10404686...>, metadata={'kilometers': 0.108204, 'travel_time': 10.3}),
 Road(road_id=RoadId(start=188063558, end=188055017, key=0), geom=<LINESTRING (-10404686.957 5589164.921, -10404847.68 5589210.601, -10404912....>, metadata={'kilometers': 0.244521, 'travel_time': 23.2}),
 Road(road_id=RoadId(start=188055017, end=10805239549, key=0), geom=<LINESTRING (-10405018.667 5589259.497, -10405018.589 5589250.947)>, metadata={'kilometers': 0.006059999999999999, 'travel_time': 0.4}),
 Road(road_id=RoadId(start=10805239549, end=10805239548, key=0), geom=<LINESTRING (-10405018.589 5589250.947, -10405018.4 5589236.531)>, metadata={'kilometers': 0.010220000000000002, 'travel_time': 0.7}),
 Road(road_id=RoadId(start=10805239548, end=539723216, key=0), geom=<LINESTRING (-10405018.4 5589236.531, -10405015.249 5588966.8, -10405014.849...>, metadata={'kilometers': 0.511