In [1]:
import pandas as pd
import os
import datetime

In [2]:
vru_approach_dict = {
    'NB': 'S',
    'SB': 'N',
    'EB': 'W',
    'WB': 'E'
}

class_dict = {
    'Mobility Aid': 'Pedestrian',
    'Motorcycle': 'Passenger Vehicle',
    'Articulated Truck': 'Semi Truck',
    'Single Unit Truck': 'Box Truck',
    'Person Mobility Device': 'Pedestrian'
}

severity_dict = {
    'High': 'Severe',
    'Low': 'Moderate'
}

default_date = '1900-01-01'

In [3]:
def import_and_concatenate_csvs(folder_path):
    # List all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    
    # Initialize an empty list to store DataFrames
    df_list = []

    # Iterate through each CSV file
    for file in csv_files:
        # Construct the full file path
        file_path = os.path.join(folder_path, file)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)

        df_list.append(df)
        concatenated_df = pd.concat(df_list, ignore_index=True)
        # pull the location from the file name based on everything before the first underscore
        location = file.split('_')[0]
        df['Location'] = location
    
    return concatenated_df

In [10]:
all_traffic_df = import_and_concatenate_csvs('Data')

In [11]:
# get times
all_traffic_df[['time', 'end_time']] = all_traffic_df['timeInterval'].str.split(' - ', expand=True)
all_traffic_df['time'] = pd.to_datetime(default_date + ' ' + all_traffic_df['time'])
# convert time to just time format
all_traffic_df['time'] = all_traffic_df['time'].dt.time

# get dates as datetime
all_traffic_df['date'] = pd.to_datetime(all_traffic_df['date'])
all_traffic_df['date'] = all_traffic_df['date'].dt.date

# drop and rename excess columns
all_traffic_df.drop(columns=['timeInterval', 'movement',  'end_time'], inplace=True)
all_traffic_df.rename(columns={'movementType': 'movement', 'count': 'volume', 'date':'date_count', 'time':'time_count'}, inplace=True)

all_traffic_df['class'] = all_traffic_df['class'].str.replace('_', ' ').str.title()
all_traffic_df['class'] = all_traffic_df['class'].replace(class_dict)

mask = all_traffic_df['movement'] == 'CROSSING'
all_traffic_df.loc[mask, 'approach'] = all_traffic_df.loc[mask, 'approach'].map(vru_approach_dict)

all_traffic_df

  all_traffic_df['time'] = pd.to_datetime(default_date + ' ' + all_traffic_df['time'])


Unnamed: 0,volume,date_count,lane,approach,movement,dayOfTheWeek,class,Location,time_count
0,1,2024-08-14,1,NB,TH,Wednesday,Passenger Vehicle,SR28SandHarbor,00:00:00
1,1,2024-07-17,1,NB,TH,Wednesday,Passenger Vehicle,SR28SandHarbor,00:00:00
2,20,2024-11-20,1,NB,TH,Wednesday,Passenger Vehicle,SR28SandHarbor,00:00:00
3,7,2024-11-13,1,NB,TH,Wednesday,Passenger Vehicle,SR28SandHarbor,00:00:00
4,1,2024-10-23,1,NB,TH,Wednesday,Passenger Vehicle,SR28SandHarbor,00:00:00
...,...,...,...,...,...,...,...,...,...
228754,1,2024-05-07,1,NB,TH,Tuesday,Passenger Vehicle,,16:30:00
228755,2,2024-09-17,1,SB,TH,Tuesday,Passenger Vehicle,,16:30:00
228756,1,2024-05-07,1,NB,TH,Tuesday,Passenger Vehicle,,16:50:00
228757,1,2024-05-07,1,NB,TH,Tuesday,Box Truck,,17:10:00


In [9]:
all_traffic_df.to_csv('all_traffic.csv', index=False)