In [21]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Read in data
df = pd.read_csv('../../data/wmata.csv', encoding='utf-16', delimiter="\t")

# Remove columns Servicetype_This_Year(group), Holiday_Last_Year, Holiday_This_Year, Servicetype_This_Year, Time_Period, and Date_Last_Year
df = df.drop(['Day_of_Date_This_Year', 'Servicetype_This_Year_(group)', 'Holiday_Last_Year', 'Holiday_This_Year', 'Servicetype_This_Year', 'Time_Period', 'Date_Last_Year', 'Entries_Last_Year'], axis=1)

# Rename columns 
df = df.rename(columns={'Date_This_Year': 'Date'})
df = df.rename(columns={'Entries_This_Year': 'Entries'})

# Pivot data
pivot_df = df.pivot_table(index='Date', columns='Station', values='Entries')

# Convert index of pivot_df to datetime
pivot_df.index = pd.to_datetime(pivot_df.index)

# Organize index of pivot_df from earliest to latest date
pivot_df = pivot_df.sort_index()

# Save pivot_df to csv
pivot_df.to_csv('../../data/cleaned_data/wmata_cleaned.csv')

# Build interactive timeseries plot using plotly
# Import plotly libraries
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# List all stations
stations = list(pivot_df.columns)

# Create subplot with one trace per station
fig = make_subplots(rows=1, cols=1)
for station in stations:
    fig.add_trace(
        go.Scatter(x=pivot_df.index, y=pivot_df[station], name=station),
        row=1, col=1
    )

# Create dropdown menu to select station
buttons = []
for station in stations:
    buttons.append(
        dict(method='update', label=station, args=[{'visible': [station == s for s in stations]}])
    )
dropdown = dict(
    active=0, buttons=buttons, direction='down', showactive=True, x=1.1, y=1.1
)

# Update layout
fig.update_layout(
    updatemenus=[dropdown], height=600, width=900,
    title='WMATA Metro Entries by Selected Station and Date in March 2023', xaxis_title='Date', yaxis_title='Entries',
    yaxis=dict(range=[0, 3500])
)

# Show plot
fig.show()


In [22]:
import plotly.graph_objects as go

pivot_df

fig = go.Figure(data=[go.Table(
    header=dict(values=list(pivot_df.columns),
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[df[0]],
               fill_color='lavender',
               align='left'))
])

fig.show()

KeyError: 0

# Index.qmd

After analyzing accessibility by foot and bike, we wanted to explore another dimension of walkability--the metro. In Washington, D.C. the metro is established and managed by the Washington Metropolitan Area Transit Authority (WMATA).

The plotly graph above shows the number of entries at the metro stations in the Washington, D.C. area in March 2023. The data is from the Metrorail Ridership Year-over-Year Change data [here](https://www.wmata.com/initiatives/ridership-portal/Metrorail-Ridership-Year-over-Year-Change.cfm). The graph is interactive, so you can hover over each line where a tooltip will reveal the number of entries for that specific day and station.

Overall, we see that there is a weekly pattern in ridership, where generally there are more entries on weekdays than weekends across almost all stations. This is perhaps due to the use of the metro to commute to work and activites on weekdays.

# Methods.qmd

### 4.1 Data Collection


### 4.2 Data Cleaning

In answering this question, we built an interactive timeseries plot of the Washington Metropolitan Area Transit Authority (WMATA) Metro entries by selected station and date in March 2023 using the Plotly library. Then, we cleaned the data by renaming the 'Date_This_Year' and 'Entry_This_Year' columns to 'Date' and 'Entries' respectively. We also converted the 'Date' column to datetime format.

### 4.3 Data Visualization