# Cyclistic Bike-Share Analysis: Visualizing Rider Trends

## Visualization Objectives

Key analyses include:

- Rider segmentation
- Temporal usage patterns (daily, weekly, monthly)
- Ride duration analysis
- Bike type choices
- Station popularity and geographic patterns

## Setup and Configuration

### Environment Setup

In [None]:
# Import required libraries
import os
from IPython.display import Image, display, HTML
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import sys

# Add the scripts directory to sys.path
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "scripts"))

# Import config
from config import ANALYSIS_OUTPUTS_DIR, PROCESSED_DATA_DIR
FIGURES_DIR = os.path.join(os.path.dirname(os.getcwd()), "results", "figures")
os.makedirs(FIGURES_DIR, exist_ok=True)


def set_visualization_style():
    """
    Set consistent style parameters for all visualizations
    """
    # Set the style to seaborn for better default aesthetics
    plt.style.use("seaborn-v0_8")

    # Set consistent font sizes
    SMALL_SIZE = 12  # For tick labels
    MEDIUM_SIZE = 14  # For axis labels and legends
    LARGE_SIZE = 16  # For titles

    # Update all font sizes
    plt.rc("font", size=SMALL_SIZE)  # Default text sizes
    plt.rc("axes", titlesize=MEDIUM_SIZE, titleweight="bold")  # Axes title
    plt.rc("axes", labelsize=MEDIUM_SIZE, titleweight="bold")  # X and Y labels
    plt.rc("xtick", labelsize=SMALL_SIZE)  # X tick labels
    plt.rc("ytick", labelsize=SMALL_SIZE)  # Y tick labels
    plt.rc("legend", fontsize=MEDIUM_SIZE)  # Legend text
    plt.rc("figure", titlesize=LARGE_SIZE, titleweight="bold")  # Figure title

    # Set color palette
    sns.set_palette("Paired")

    # Set other styling parameters
    plt.rc("axes", grid=True)
    plt.rc("grid", linestyle="--", alpha=0.7)

    # Set figure DPI for better quality
    plt.rc("figure", dpi=500)

    # Set default figure size
    plt.rc("figure", figsize=(12, 8))

    # Set figure facecolor to white
    plt.rc("figure", facecolor="white")
    plt.rc("axes", facecolor="white")


# Custom formatting functions for consistent plot styling
def format_plot(ax, title, xlabel, ylabel, legend_title=None):
    """
    Apply consistent formatting to a plot
    """
    ax.set_title(title, pad=20, fontweight="bold")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    if legend_title:
        ax.legend(title=legend_title, bbox_to_anchor=(1.05, 1), loc="upper left")

    ax.grid(True, linestyle="--", alpha=0.7)


def format_pie_chart(ax, title):
    """
    Apply consistent formatting to a pie chart
    """
    ax.set_title(title, pad=20, fontweight="bold", y=-0.1)


def format_heatmap(ax, title, xlabel, ylabel):
    """
    Apply consistent formatting to a heatmap
    """
    ax.set_title(title, pad=20, fontweight="bold")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)


# Set the visualization style at the start of your script

def save_all_figures(figures_dict):
    """
    Save all figures stored in the figures_dict to the FIGURES_DIR.
    """
    for figure_name, fig in figures_dict.items():
        file_path = os.path.join(FIGURES_DIR, f"{figure_name}.png")
        fig.savefig(file_path, bbox_inches="tight", dpi=500)
        print(f"Saved {figure_name} to {file_path}")


In [None]:
set_visualization_style()
figures_dict = {}

### Load Pre-analyzed Results

Analysis is based on Cyclistic trip data from November 2023 through October 2024. 

In [None]:
# Load all pre-processed analysis files
cleaned_bike_data = pd.read_csv(
    os.path.join(PROCESSED_DATA_DIR, "cleaned_bike_data.csv"), index_col=0
).squeeze()
trips_by_rider_group = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "trips_by_rider_group.csv"), index_col=0
).squeeze()
rider_group_distribution = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "rider_group_distribution.csv"), index_col=0
).squeeze()
monthly_usage_pct = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "monthly_usage_pct.csv"), index_col=0
).squeeze()
daily_usage_pct = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "daily_usage_pct.csv"), index_col=0
).squeeze()
hourly_usage_pct = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "hourly_usage_pct.csv"), index_col=0
).squeeze()
trip_duration_dist_pct = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "trip_duration_dist_pct.csv"), index_col=0
).squeeze()
bike_preference_counts = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "bike_preference_counts.csv"), index_col=0
)
bike_preference_pct = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "bike_preference_pct.csv"), index_col=0
)
popular_start_stations = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "popular_start_stations.csv"), index_col=0
).squeeze()
popular_end_stations = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "popular_end_stations.csv"), index_col=0
).squeeze()

## Visuals

### Ride Counts by Group

Identifies the proportion of casual riders vs. members to assess potential conversion opportunities.


In [None]:
# Plotting a bar chart
set_visualization_style()
fig1, ax = plt.subplots()

trips_by_rider_group.plot(kind="bar", ax=ax)
format_plot(
    ax,
    title="NUMBER OF TRIPS BY RIDER GROUP (NOV 2023 - DEC 2024)",
    xlabel="RIDER GROUP",
    ylabel="NUMBER OF TRIPS",
)
ax.tick_params(rotation=0)

for i, count in enumerate(trips_by_rider_group):
    ax.text(i, count + 5000, f"{count:,}", ha="center", va="bottom", fontsize=12)

plt.tight_layout()
figures_dict["rider_group_bar_chart"] = fig1 
plt.show()


### Distribution of Rides

In [None]:
# Plotting a pie chart
set_visualization_style()
fig2, ax = plt.subplots()
explode = (0.1, 0)

ax.pie(
    rider_group_distribution,
    labels=[
        f"{label} ({value:.0f}%)"
        for label, value in zip(
            rider_group_distribution.index,
            rider_group_distribution / rider_group_distribution.sum() * 100,
        )
    ],
    explode=explode,
)
format_pie_chart(ax, "DISTRIBUTION OF ANNUAL RIDER ACTIVITY")

plt.tight_layout()
plt.show()
figures_dict["rider_group_pie"] = fig2


## Usage Patterns

Examines when users ride (daily, monthly, time of day) to optimize marketing efforts based on peak usage times.  


#### Monthly Trends

In [None]:
set_visualization_style()
fig3, ax = plt.subplots()

plt.plot(
    monthly_usage_pct.index,
    monthly_usage_pct["casual"],
    marker="o",
    label="Casual Riders",
)
plt.plot(
    monthly_usage_pct.index,
    monthly_usage_pct["member"],
    marker="o",
    label="Annual Members",
)

format_plot(
    ax,
    title="MONTHLY USAGE TREND BY RIDER GROUP",
    xlabel="MONTH",
    ylabel="TRIP DISTRIBUTION (%)",
    legend_title="RIDER GROUP",
)

plt.xticks(rotation=45)
plt.tight_layout()
figures_dict["monthly_usage_line_plot"] = fig3
plt.show()

#### Weekly Trends

In [None]:
set_visualization_style()
fig4, ax = plt.subplots()
bar_width = 0.35
x = range(len(daily_usage_pct.index))

plt.bar(x, daily_usage_pct["casual"], width=bar_width, label="Casual Riders")
plt.bar(
    [i + bar_width for i in x],
    daily_usage_pct["member"],
    width=bar_width,
    label="Annual Members",
)

format_plot(
    ax,
    title="WEEKLY USAGE TREND BY RIDER GROUP",
    xlabel="DAY",
    ylabel="TRIP DISTRIBUTION (%)",
    legend_title="RIDER GROUP",
)

plt.xticks([i + bar_width / 2 for i in x], daily_usage_pct.index, rotation=45)
plt.tight_layout()
figures_dict["weekly_usage_grouped_bar_1"] = fig4
plt.show()

In [None]:
set_visualization_style()
fig5, ax = plt.subplots()
daily_usage_pct.T.plot(kind="bar", width=0.8, ax=ax)
format_plot(
    ax,
    title="WEEKLY USAGE TREND BY RIDER GROUP",
    xlabel="RIDER GROUP",
    ylabel="TRIP DISTRIBUTION (%)",
    legend_title="DAY",
)
ax.tick_params(rotation=0)
plt.tight_layout()
figures_dict["weekly_usage_grouped_bar_2"] = fig5
plt.show()


#### Daily Period Trends

In [None]:
set_visualization_style()
fig6, ax = plt.subplots()

ordered_periods = ["Morning", "Afternoon", "Evening", "Night"]
hourly_usage_pct_ordered = hourly_usage_pct.reindex(ordered_periods)

hourly_usage_pct_ordered.T.plot(kind="bar", width=0.8, ax=ax)

format_plot(
    ax=ax,
    title="DAILY PERIOD USAGE TREND BY RIDER GROUP",
    xlabel="RIDER GROUP",
    ylabel="TRIP DISTRIBUTION (%)",
    legend_title="DAILY PERIOD RANGE",
)

ax.tick_params(rotation=0)
plt.tight_layout()
figures_dict["daily_period_bar"] = fig6
plt.show()

#### Combined Monthly and Daily Usage

In [None]:
# Plotting a heatmap
set_visualization_style()
monthly_usage = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "monthly_usage_pct.csv"), index_col="month"
)
daily_usage = pd.read_csv(
    os.path.join(ANALYSIS_OUTPUTS_DIR, "daily_usage_pct.csv"), index_col="day_of_week"
)

heatmap_data_casual = pd.DataFrame(index=monthly_usage.index, columns=daily_usage.index)
heatmap_data_member = pd.DataFrame(index=monthly_usage.index, columns=daily_usage.index)

for month in monthly_usage.index:
    for day in daily_usage.index:
        casual_avg = (
            monthly_usage.loc[month, "casual"] + daily_usage.loc[day, "casual"]
        ) / 2
        member_avg = (
            monthly_usage.loc[month, "member"] + daily_usage.loc[day, "member"]
        ) / 2
        heatmap_data_casual.loc[month, day] = casual_avg
        heatmap_data_member.loc[month, day] = member_avg

heatmap_data_casual = heatmap_data_casual.astype(float)
heatmap_data_member = heatmap_data_member.astype(float)

plt.rcParams.update(
    {
        "font.size": 16,
        "axes.titlesize": 20,
        "axes.labelsize": 18,
        "xtick.labelsize": 20,
        "ytick.labelsize": 20,
    }
)

fig7, (ax1, ax2) = plt.subplots(1, 2, figsize=(22, 12))

sns.heatmap(
    heatmap_data_casual,
    cmap="YlOrRd",
    annot=True,
    fmt=".1f",
    annot_kws={"size": 18},
    cbar_kws={"shrink": 0.8},
    ax=ax1,
)
format_heatmap(ax1, "CASUAL RIDERS", "DAY", "MONTH")

sns.heatmap(
    heatmap_data_member,
    cmap="YlOrRd",
    annot=True,
    fmt=".1f",
    annot_kws={"size": 18},
    cbar_kws={"shrink": 0.8},
    ax=ax2,
)
format_heatmap(ax2, "ANNUAL MEMBERS", "DAY", "MONTH")

fig7.suptitle("AVERAGE DAILY RIDE SHARE", fontsize=24, fontweight="bold")
fig7.text(0.5, 0.92, "Nov 2023 – Dec 2024", ha="center", fontsize=20, color="gray")

plt.tight_layout(pad=2.0)
plt.show()
figures_dict["combined_usage_heatmap"] = fig7


### Ride Duration Analysis

In [None]:
# Plotting a boxplot
set_visualization_style()
subset_bike_data = cleaned_bike_data.loc[:, "member_casual":"trip_duration"]

fig8, ax = plt.subplots()
category_order = ["casual", "member"]

sns.boxplot(
    data=subset_bike_data,
    x="member_casual",
    y="trip_duration",
    hue="member_casual",
    legend=True,
    order=category_order,
    hue_order=category_order,
    fliersize=2,
    flierprops={"alpha": 0.5},
)

format_plot(
    ax, "DISTRIBUTION OF TRIP DURATIONS", "Rider GROUP", "DURATION (minutes)"
)

ylim = subset_bike_data["trip_duration"].quantile(0.95)
plt.ylim(0, ylim)

medians = subset_bike_data.groupby("member_casual")["trip_duration"].median()
for i, median in enumerate(medians):
    plt.text(
        i,
        median,
        f"Median: {median:.1f}",
        horizontalalignment="center",
        verticalalignment="bottom",
        color="black",
        bbox=dict(facecolor="white", edgecolor="black", boxstyle="square,pad=0.3"),
    )

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, ["Casual", "Member"], title="RIDER GROUP", loc="upper right")

plt.tight_layout()
figures_dict["ride_duration_boxplot"] = fig8
plt.show()

### Bike Choices by Rider Group

In [None]:
set_visualization_style()
fig9, ax = plt.subplots()

bike_preference_pct.plot(kind="bar", width=0.8, ax=ax)

format_plot(
    ax=ax,
    title="BIKE CHOICES BY RIDER GROUP",
    xlabel="RIDER GROUP",
    ylabel="TRIP DISTRIBUTION (%)",
    legend_title="BIKE TYPE",
)

ax.tick_params(rotation=0)

plt.tight_layout()
figures_dict["bike_choices_bar_chart"] = fig9
plt.show()

### Top 10 Destination Stations by Rider Group

In [None]:
set_visualization_style()
fig10, ax = plt.subplots()

sns.barplot(
    data=popular_end_stations,
    x="count",
    y="end_station_name",
    hue="member_casual",
    ax=ax,
)

format_plot(
    ax=ax,
    title="TOP 10 DESTINATION STATIONS BY RIDER GROUP",
    xlabel="NUMBER OF RIDES",
    ylabel="STATIONS",
    legend_title="RIDER GROUP",
)

plt.tight_layout()
figures_dict["top_destination_stations_bar"] = fig10
plt.show()

### Geographic Distribution of Top Destination Stations

In [None]:
set_visualization_style()
popular_end_stations_reset = popular_end_stations.reset_index()

fig11 = px.scatter_map(
    popular_end_stations_reset,
    lat='latitude',
    lon='longitude',
    size='count',
    color='member_casual',
    hover_name='end_station_name',
    hover_data=['count'],
    title='TOP DESTINATION STATION PATTERNS BY RIDER GROUP',
    map_style='carto-positron',  # Updated from mapbox_style to map_style
    zoom=11,
    width=1000,
    height=600,
    size_max=30,
    color_discrete_map={
        'casual': '#FF9999',
        'member': '#66B2FF'
    }
)

fig11.update_layout(
    title_x=0.5,
    legend_title_text='RIDER GROUP'
)

# Save the Plotly figure directly to results/figures/
html_filepath = os.path.join(FIGURES_DIR, 'top_destination_stations.html')
image_filepath = os.path.join(FIGURES_DIR, 'geographic_distribution_scatter.png')


# Save as HTML and PNG
fig11.write_html(html_filepath)
fig11.write_image(image_filepath, scale=2)

# Display the saved image in the notebook

display(HTML(f'<a href="{html_filepath}" target="_blank">Click here to view the interactive map</a>'))
Image(filename=image_filepath)

In [None]:
save_all_figures(figures_dict)