In [11]:
import pandas as pd

peachtree_df = pd.read_csv('/home/zach/Downloads/NGSIM_Peachtree_Vehicle_Trajectories.csv')


In [9]:
I_80_df = pd.read_csv('/home/zach/Downloads/I-80.csv')
I_80_df['DateTime'] = pd.to_datetime(I_80_df['Global_Time'], unit='ms')

In [8]:
US_101_df = pd.read_csv('/home/zach/Downloads/US-101.csv')
US_101_df['DateTime'] = pd.to_datetime(US_101_df['Global_Time'], unit='ms')

In [23]:
def categorize_vehicle_class(v_class):
    return {1: 'Motorcycle', 2: 'Auto', 3: 'Truck'}.get(v_class, 'Unknown')

def categorize_movement(v_vel, v_acc, movements, df):
    # Define thresholds
    aggressive_speed_threshold = df['v_Vel'].quantile(0.90)
    defensive_speed_threshold = df['v_Vel'].quantile(0.40)
    aggressive_acceleration_threshold = df['v_Acc'].quantile(0.90)
    defensive_acceleration_threshold = df['v_Acc'].quantile(0.40)

    # Determine driving behavior
    if v_vel > aggressive_speed_threshold or v_acc > aggressive_acceleration_threshold or any(m in [2, 3] for m in movements):
        return 'Aggressive'
    elif v_vel < defensive_speed_threshold and v_acc < defensive_acceleration_threshold:
        return 'Defensive'
    else:
        return 'Regular'

In [24]:
import pandas as pd

# Assuming 'peachtree_df' is your DataFrame

# Convert 'Global_Time' to datetime format
peachtree_df['DateTime'] = pd.to_datetime(peachtree_df['Global_Time'], unit='s')

# Create a unique identifier for each vehicle by combining 'Vehicle_ID' and the date
peachtree_df['Unique_Vehicle_ID'] = peachtree_df['Vehicle_ID'].astype(str) + "_" + peachtree_df['DateTime'].dt.date.astype(str)

# Now use 'Unique_Vehicle_ID' for further analysis

# Calculate mean velocity and acceleration for each unique vehicle
vehicle_means = peachtree_df.groupby('Unique_Vehicle_ID').agg({
    'v_Vel': 'mean',
    'v_Acc': 'mean'
}).reset_index()

# Extract time and movement for each unique vehicle
vehicle_times_movement = peachtree_df.groupby('Unique_Vehicle_ID').agg({
    'DateTime': 'mean',
    'Movement': lambda x: list(x),
    'v_Class': 'first'  # Assuming the first v_Class is representative
}).reset_index()

# Merge the two DataFrames
simplified_df = pd.merge(vehicle_means, vehicle_times_movement, on='Unique_Vehicle_ID')

# Determine vehicle type and driving behavior
simplified_df['Vehicle_Type'] = simplified_df['v_Class'].apply(categorize_vehicle_class)
simplified_df['Driving_Behavior'] = simplified_df.apply(lambda row: categorize_movement(row['v_Vel'], row['v_Acc'], row['Movement'], simplified_df), axis=1)

# Group by six-hour blocks for aggregated data
simplified_df['SixHourBlock'] = simplified_df['DateTime'].dt.hour // 6 * 6
simplified_df['SixHourBlock'] = simplified_df['SixHourBlock'].apply(lambda x: f"{x:02d}:00")
aggregated_data = simplified_df.groupby('SixHourBlock').agg({
    'Vehicle_Type': lambda x: x.value_counts().to_dict(),
    'Driving_Behavior': lambda x: x.value_counts().to_dict()
}).reset_index()

# Save to CSV
aggregated_data.to_csv('NGSIM_data.csv', index=False)

# Print the aggregated data
print(aggregated_data)


  SixHourBlock                                 Vehicle_Type  \
0        00:00                   {'Auto': 597, 'Truck': 10}   
1        06:00  {'Auto': 795, 'Truck': 20, 'Motorcycle': 1}   
2        12:00                   {'Auto': 614, 'Truck': 12}   
3        18:00                   {'Auto': 594, 'Truck': 14}   

                                    Driving_Behavior  
0  {'Aggressive': 370, 'Regular': 231, 'Defensive...  
1  {'Aggressive': 364, 'Regular': 352, 'Defensive...  
2  {'Aggressive': 350, 'Regular': 235, 'Defensive...  
3  {'Aggressive': 408, 'Regular': 165, 'Defensive...  


In [27]:
print(aggregated_data['Driving_Behavior'][0])


{'Aggressive': 370, 'Regular': 231, 'Defensive': 6}
