In [None]:
import pandas as pd

def parse_file(file_path):
    # Define column names based on the provided description
    column_names = [
        "Timestamp", "Station", "District", "Freeway", "Direction of Travel", "Lane Type", 
        "Station Length", "Samples", "% Observed", "Total Flow", "Avg Occupancy", "Avg Speed"
    ]
    
    # Add columns for dynamic Lane N data
    max_lanes = 10  # Adjust this number if more lanes are expected
    for n in range(1, max_lanes + 1):
        column_names.extend([
            f"Lane {n} Samples", f"Lane {n} Flow", f"Lane {n} Avg Occ", 
            f"Lane {n} Avg Speed", f"Lane {n} Observed"
        ])

    # Read the CSV file with the correct number of columns
    try:
        df = pd.read_csv(file_path, names=column_names, index_col=False)
    except pd.errors.ParserError as e:
        print(f"Error parsing file: {e}")
        return None

    # Convert 'Timestamp' to datetime for easier manipulation
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%m/%d/%Y %H:%M:%S', errors='coerce')

    # Clean and handle missing values
    df = df.fillna(0)  # Replace NaNs with 0 for easier computation
    df["Station Length"] = pd.to_numeric(df["Station Length"], errors='coerce')  # Handle numeric conversion
    
    # Display first few rows for verification
    print(df.head())
    
    return df

# Example usage
file_path = "./d07_text_station_5min_2015_01_01.txt"  # Replace with your file path
data = parse_file(file_path)

# Optionally save the cleaned data to a new CSV
# if data is not None:
#     data.to_csv("parsed_output.csv", index=False)


   Timestamp  Station  District  Freeway Direction of Travel Lane Type  \
0 2015-01-01   715898         7        5                   S        ML   
1 2015-01-01   715900         7        5                   S        OR   
2 2015-01-01   715901         7        5                   N        OR   
3 2015-01-01   715903         7        5                   N        OR   
4 2015-01-01   715906         7        5                   S        OR   

   Station Length  Samples  % Observed  Total Flow  ...  Lane 9 Samples  \
0            0.43        0           0        97.0  ...             0.0   
1            0.00        0           0         0.0  ...             0.0   
2            0.00        0           0         0.0  ...             0.0   
3            0.00        0           0         0.0  ...             0.0   
4            0.00        0           0         0.0  ...             0.0   

   Lane 9 Flow  Lane 9 Avg Occ  Lane 9 Avg Speed  Lane 9 Observed  \
0          0.0             0.0     