In [96]:
import pandas as pd

# Replace this with the path to your dataset file

# Load the dataset
peachtree_df = pd.read_csv('/home/zach/Downloads/NGSIM_Peachtree_Vehicle_Trajectories.csv')

In [97]:

# Copy the DataFrame to avoid modifying the original dataset
peachtree_copy = peachtree_df.copy()

# Convert the Global_Time column from milliseconds to a datetime format
peachtree_copy['DateTime'] = pd.to_datetime(peachtree_copy['Global_Time'], unit='s')

peachtree_copy.set_index('DateTime', inplace=True)

# Resample the data by hour and count unique vehicle IDs
vehicle_counts_per_hour = peachtree_copy['Vehicle_ID'].resample('H').nunique()

# Display the result
print(vehicle_counts_per_hour)

DateTime
2006-11-08 20:00:00     2
2006-11-08 21:00:00     5
2006-11-08 22:00:00    12
2006-11-08 23:00:00    18
2006-11-09 00:00:00    30
                       ..
2006-11-20 18:00:00     6
2006-11-20 19:00:00     6
2006-11-20 20:00:00     6
2006-11-20 21:00:00     5
2006-11-20 22:00:00     2
Freq: H, Name: Vehicle_ID, Length: 291, dtype: int64


In [98]:
peachtree_copy['v_Class'].min()

1

In [101]:
import pandas as pd


# Convert the Global_Time column from milliseconds to a datetime format
peachtree_df['DateTime'] = pd.to_datetime(peachtree_df['Global_Time'], unit='s')

# Set the DateTime column as the index
peachtree_df.set_index('DateTime', inplace=True)

# Resample the data every 3 hours and count unique vehicle IDs
vehicle_counts_per_3hours = peachtree_df['Vehicle_ID'].resample('3H').nunique()

# Calculate the count of different vehicle types and movements every 3 hours
vehicle_type_counts = peachtree_df.groupby([pd.Grouper(freq='3H'), 'v_Class'])['Vehicle_ID'].nunique().unstack(fill_value=0)
movement_counts = peachtree_df.groupby([pd.Grouper(freq='3H'), 'Movement'])['Vehicle_ID'].nunique().unstack(fill_value=0)

# Combine the counts into a single DataFrame
combined_counts = pd.concat([vehicle_counts_per_3hours, vehicle_type_counts, movement_counts], axis=1)
combined_counts.columns = ['Total_Vehicles'] + ['v_Class_' + str(i) for i in vehicle_type_counts.columns] + ['Movement_' + str(i) for i in movement_counts.columns]

# Display the result
combined_counts

Unnamed: 0_level_0,Total_Vehicles,v_Class_1,v_Class_2,v_Class_3,Movement_1,Movement_2,Movement_3
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-11-08 18:00:00,2,0,2,0,1,1,0
2006-11-08 21:00:00,21,0,21,0,20,1,6
2006-11-09 00:00:00,56,0,56,0,49,4,11
2006-11-09 03:00:00,55,1,53,1,46,10,10
2006-11-09 06:00:00,53,1,51,1,47,6,8
...,...,...,...,...,...,...,...
2006-11-20 09:00:00,18,0,17,1,17,2,1
2006-11-20 12:00:00,11,0,10,1,11,1,0
2006-11-20 15:00:00,9,0,8,1,9,1,0
2006-11-20 18:00:00,6,0,6,0,6,0,1


In [103]:

# Define thresholds for categorization (these values are examples and should be adjusted based on the dataset's specifics)
high_velocity_threshold = peachtree_df['v_Vel'].quantile(0.75)  # 75th percentile
low_velocity_threshold = peachtree_df['v_Vel'].quantile(0.25)  # 25th percentile
high_acceleration_threshold = peachtree_df['v_Acc'].quantile(0.75)  # 75th percentile

# Function to categorize each vehicle
def categorize_vehicle(row):
    if row['v_Vel'] > high_velocity_threshold and row['v_Acc'] > high_acceleration_threshold:
        if row['Movement'] in [2, 3]:  # Assuming 2 and 3 are sudden lane changes or turns
            return 'Aggressive'
        else:
            return 'Regular'
    elif row['v_Vel'] < low_velocity_threshold:
        return 'Defensive'
    else:
        return 'Regular'

# Apply the categorization
peachtree_df['Driving_Style'] = peachtree_df.apply(categorize_vehicle, axis=1)

# Example: Count the number of each type of driving style
style_counts = peachtree_df['Driving_Style'].value_counts()
print(style_counts)


Driving_Style
Regular       661129
Defensive     212430
Aggressive       328
Name: count, dtype: int64


In [110]:
def categorize_vehicle_class(row):
    # Vehicle class categorization based on the v_Class value
    if row['v_Class'] == 1:
        return 'Motorcycle'
    elif row['v_Class'] == 2:
        return 'Auto'
    elif row['v_Class'] == 3:
        return 'Truck'
    else:
        return 'Unknown'
def categorize_movement(row):
    # Example thresholds for aggressive, defensive, and regular driving
    aggressive_speed_threshold = 40  # feet/second
    defensive_speed_threshold = 20  # feet/second
    aggressive_acceleration_threshold = 5  # feet/second^2
    defensive_acceleration_threshold = -5  # feet/second^2

    # Aggressive driving: High speed or high acceleration
    if row['v_Vel'] > aggressive_speed_threshold or row['v_Acc'] > aggressive_acceleration_threshold:
        return 'Aggressive'

    # Defensive driving: Low speed or decelerating
    elif row['v_Vel'] < defensive_speed_threshold or row['v_Acc'] < defensive_acceleration_threshold:
        return 'Defensive'

    # Regular driving: Neither aggressive nor defensive
    else:
        return 'Regular'

In [114]:
# Convert the 'Global_Time' to a datetime object
peachtree_df['DateTime'] = pd.to_datetime(peachtree_df['Global_Time'], unit='s')
peachtree_df.set_index('DateTime', inplace=True)

# Apply the functions to create new columns
peachtree_df['Vehicle_Class'] = peachtree_df.apply(categorize_vehicle_class, axis=1)
peachtree_df['Movement_Type'] = peachtree_df.apply(categorize_movement, axis=1)

aggregated_data = peachtree_df.resample('6H').agg({
    'Vehicle_ID': pd.Series.nunique,
    'Vehicle_Class': lambda x: x.value_counts().to_dict(),
    'Movement_Type': lambda x: x.value_counts().to_dict()
})

# Display the result
print(aggregated_data.head())

                     Vehicle_ID  \
DateTime                          
2006-11-08 18:00:00          21   
2006-11-09 00:00:00          73   
2006-11-09 06:00:00          66   
2006-11-09 12:00:00          66   
2006-11-09 18:00:00          95   

                                                        Vehicle_Class  \
DateTime                                                                
2006-11-08 18:00:00                                     {'Auto': 740}   
2006-11-09 00:00:00     {'Auto': 6222, 'Motorcycle': 63, 'Truck': 26}   
2006-11-09 06:00:00    {'Auto': 8452, 'Motorcycle': 216, 'Truck': 22}   
2006-11-09 12:00:00  {'Auto': 10104, 'Truck': 366, 'Motorcycle': 216}   
2006-11-09 18:00:00  {'Auto': 13151, 'Truck': 426, 'Motorcycle': 216}   

                                                         Movement_Type  
DateTime                                                                
2006-11-08 18:00:00  {'Regular': 313, 'Defensive': 250, 'Aggressive...  
2006-11-09 00:00:00  {'

In [115]:

# Display the result
print(aggregated_data)

                     Vehicle_ID  \
DateTime                          
2006-11-08 18:00:00          21   
2006-11-09 00:00:00          73   
2006-11-09 06:00:00          66   
2006-11-09 12:00:00          66   
2006-11-09 18:00:00          95   
2006-11-10 00:00:00         122   
2006-11-10 06:00:00         110   
2006-11-10 12:00:00         116   
2006-11-10 18:00:00         125   
2006-11-11 00:00:00         147   
2006-11-11 06:00:00         143   
2006-11-11 12:00:00         134   
2006-11-11 18:00:00         133   
2006-11-12 00:00:00         133   
2006-11-12 06:00:00         168   
2006-11-12 12:00:00         147   
2006-11-12 18:00:00         138   
2006-11-13 00:00:00         124   
2006-11-13 06:00:00         148   
2006-11-13 12:00:00         130   
2006-11-13 18:00:00         123   
2006-11-14 00:00:00         111   
2006-11-14 06:00:00         108   
2006-11-14 12:00:00         138   
2006-11-14 18:00:00         135   
2006-11-15 00:00:00         120   
2006-11-15 06:00:00 

In [None]:
import pandas as pd

# Assuming peachtree_df is your DataFrame

# Convert 'Global_Time' to datetime and set it as the index
peachtree_df['DateTime'] = pd.to_datetime(peachtree_df['Global_Time'], unit='ms')
peachtree_df.set_index('DateTime', inplace=True)

# Group by time of day (e.g., 6-9 AM, 9-12 PM, etc.)
# First, extract the time from the DateTime
peachtree_df['TimeOfDay'] = peachtree_df.index.time

# Group by 'TimeOfDay' and aggregate
aggregated_data = peachtree_df.groupby('TimeOfDay').agg({
    'Vehicle_ID': 'nunique',
    'Vehicle_Class': 'nunique',
    'Movement_Type': 'nunique
    # ...
}).reset_index()

# Display the aggregated data
print(aggregated_data)