In [None]:
import pandas as pd
import gzip
import os

In [None]:
# Function to read a .gz file and return a DataFrame
def read_gz_to_dataframe(file_path, columnnames=None):
    with gzip.open(file_path, 'rt') as f:
        df = pd.read_csv(f, header=None, names=columnnames)
    return df

# Function to load and concatenate all .gz files in a folder
def load_and_concatenate_files(folder_path, column_names=None):
    dataframes = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.gz'):
            file_path = os.path.join(folder_path, file_name)
            df = read_gz_to_dataframe(file_path, column_names)
            dataframes.append(df)
    concatenated_df = pd.concat(dataframes, ignore_index=True)
    return concatenated_df


# Function to filter data and produce the summary table
def summarize_flow(df, confidence_level, timestamp_col, percent_observed_col, total_flow_col):
    # Ensure the timestamp is a datetime object
    df[timestamp_col] = pd.to_datetime(df[timestamp_col])
    
    # Filter the DataFrame by the specified confidence level
    filtered_df = df[df[percent_observed_col] >= confidence_level]
    
    # Add columns for the year, month, and day of the week
    filtered_df['year'] = filtered_df[timestamp_col].dt.year
    filtered_df['month'] = filtered_df[timestamp_col].dt.month
    filtered_df['day_of_week'] = filtered_df[timestamp_col].dt.day_name()
    
    # Group by the station, year, month, and day of the week
    summary = filtered_df.groupby(['Station', 'year', 'month', 'day_of_week']).agg(
        average_flow=(total_flow_col, 'mean'),
        observed_days=(timestamp_col, 'nunique')
    ).reset_index()
    
    # Sort by station, year, month, and day of the week
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    summary['day_of_week'] = pd.Categorical(summary['day_of_week'], categories=day_order, ordered=True)
    summary = summary.sort_values(['Station', 'year', 'month', 'day_of_week'])
    summary['confidence_level'] = confidence_level
    
    return summary
# Example usage
columnnames = ['Timestamp',	'Station',	'District',	'Route',	'Direction of Travel',	'Lane Type',	'Station Length',	'Samples',	'Percent_Observed',	'total_flow',	'Delay (V_t=35)',	'Delay (V_t=40)',	'Delay (V_t=45)',	'Delay (V_t=50)',	'Delay (V_t=55)',	'Delay (V_t=60)']
folder_path = r'F:\Research and Analysis\Transportation\TrafficCounts\PEMS\Daily_Counts'
df = load_and_concatenate_files(folder_path, columnnames)

# Specify the column names and confidence level
timestamp_col = 'Timestamp'
percent_observed_col = 'Percent_Observed'
total_flow_col = 'total_flow'
confidence_level_75 = 75
confidence_level_25 = 25
# Produce the summary table
summary_table_75 = summarize_flow(df, confidence_level_75, timestamp_col, percent_observed_col, total_flow_col)
summary_table_25 = summarize_flow(df, confidence_level_25, timestamp_col, percent_observed_col, total_flow_col)
# Display the summary table
summary_table = pd.concat([summary_table_75, summary_table_25])
print(summary_table)

In [None]:
summary_table.to_csv(r'F:\Research and Analysis\Transportation\TrafficCounts\PEMS\summary_table.csv', index=False)

In [21]:
station_metadata = pd.read_csv(r'F:\Research and Analysis\Transportation\TrafficCounts\PEMS\d03_text_meta_2024_06_04.txt', sep='\t')
station_metadata.to_csv(r'F:\Research and Analysis\Transportation\TrafficCounts\PEMS\station_metadata.csv', index=False)

In [22]:
pems_geodatabase = r'F:\Research and Analysis\Transportation\TrafficCounts\PEMS\PEMS_Locations.gdb'
#make a feature class from the station metadata
import arcpy
arcpy.env.workspace = pems_geodatabase
arcpy.management.XYTableToPoint(r'F:\Research and Analysis\Transportation\TrafficCounts\PEMS\station_metadata.csv', 'station_locations', 'Longitude', 'Latitude')