In [2]:
import pandas as pd
import numpy as np

In [4]:
# Merge all 202401 - 202412 files into a single DataFrame
def merge_dataframes(file_list, date_column='started_at'):
    """
    Merge multiple CSV files into a single DataFrame.
    
    Parameters:
    file_list (list): List of CSV file paths
    date_column (str): Name of the column to parse as dates
    
    Returns:
    pandas.DataFrame: Merged DataFrame
    """
    dataframes = []
    for file in file_list:
        try:
            # Use the correct date column name
            df = pd.read_csv(file, parse_dates=[date_column])
            dataframes.append(df)
            print(f"Successfully loaded {file}")
        except FileNotFoundError:
            print(f"Warning: File {file} not found and will be skipped")
        except ValueError as e:
            # If date column isn't found, load without parsing dates
            print(f"Warning: {e}. Loading {file} without parsing dates")
            df = pd.read_csv(file)
            dataframes.append(df)
    
    if dataframes:
        merged_df = pd.concat(dataframes, ignore_index=True)
        return merged_df
    else:
        raise ValueError("No valid files were found to merge")

# Then call the function with the correct date column
# merged_df = merge_dataframes(file_lists, date_column='started_at')

file_lists = [ '202401-divvy-tripdata.csv',
               '202402-divvy-tripdata.csv',
               '202403-divvy-tripdata.csv',
               '202404-divvy-tripdata.csv',
               '202405-divvy-tripdata.csv',
               '202406-divvy-tripdata.csv',
               '202407-divvy-tripdata.csv',
               '202408-divvy-tripdata.csv',
               '202409-divvy-tripdata.csv',
               '202410-divvy-tripdata.csv',
               '202411-divvy-tripdata.csv',
               '202412-divvy-tripdata.csv']
# Merge the dataframes
merged_df = merge_dataframes(file_lists)


Successfully loaded 202401-divvy-tripdata.csv
Successfully loaded 202402-divvy-tripdata.csv
Successfully loaded 202403-divvy-tripdata.csv
Successfully loaded 202404-divvy-tripdata.csv
Successfully loaded 202405-divvy-tripdata.csv
Successfully loaded 202406-divvy-tripdata.csv
Successfully loaded 202407-divvy-tripdata.csv
Successfully loaded 202408-divvy-tripdata.csv
Successfully loaded 202409-divvy-tripdata.csv
Successfully loaded 202410-divvy-tripdata.csv
Successfully loaded 202411-divvy-tripdata.csv
Successfully loaded 202412-divvy-tripdata.csv


In [5]:
merged_df.shape

(5860568, 13)

In [7]:
# Export the merged DataFrame to a new excel file
merged_df.to_csv('merged_divvy_tripdata_2024.csv', index=False)