In [None]:
# plotly
import csv
import pathlib
from collections import defaultdict
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

# CSV file path
csv_file = pathlib.Path.cwd().parent.parent / "30122-project-gitmoney" / "data" / "calls_money.csv"


# Use relative path from current directory to data directory
#csv_file = pathlib.Path(__file__).parent.parent / "data" / "calls_money.csv"
print(f"File path: {csv_file}")


def plot_calls_by_year_and_ward(csv_file: pathlib.Path):
    """
    Plot four interactive stacked bar charts: calls and money spent by year and ward, categorized.

    Parameters:
        csv_file (Path): Path to CSV file with year, ward, category, calls, and total_cost

    Returns:
        None (displays four plots)
    """
    # Load CSV data 
  
    with open(csv_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        data = [{k.strip(): float(v) if k in ('calls', 'total_cost') else int(v) if k in ('year', 'ward') else v 
                    for k, v in row.items()} for row in reader]
 
    # Single-pass aggregation
    calls_by_year = defaultdict(lambda: defaultdict(int))
    calls_by_ward = defaultdict(lambda: defaultdict(int))
    money_by_year = defaultdict(lambda: defaultdict(float))
    money_by_ward = defaultdict(lambda: defaultdict(float))

    for entry in data:
        year = entry['year']
        ward = entry['ward']
        category = entry['category']
        calls = entry['calls']
        cost = entry['total_cost']

        if 2019 <= year <= 2023:
            calls_by_year[year][category] += calls
            money_by_year[year][category] += cost
        calls_by_ward[ward][category] += calls
        money_by_ward[ward][category] += cost

    # Extract sorted keys
    years = sorted(calls_by_year.keys())
    wards = sorted(calls_by_ward.keys())
    categories = sorted(set().union(*[set(calls_by_year[y].keys()) for y in years]))

    # Optimized 10-color palette
    color_palette = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 
        '#ffcc00', '#e377c2', '#7f7f7f', '#17becf', '#75701F'
    ]
    if len(categories) > len(color_palette):
        print(f"Warning: {len(categories)} categories exceed palette size ({len(color_palette)}).")
    category_colors = {cat: color_palette[i % len(color_palette)] for i, cat in enumerate(categories)}

    # Interactive stacked bar chart function 
    def plot_stacked_bars(x_values, data_dict, title, x_label, y_label, rotate_x=False):
        traces = []
        for category in categories:
            values = [data_dict[x].get(category, 0) for x in x_values]
            traces.append(go.Bar(
                x=x_values,
                y=values,
                name=category,
                hovertemplate='%{y}<extra></extra>',
                marker=dict(color=category_colors[category]),
                hoverinfo='y+name',
                width=0.5 if rotate_x else 0.8  
            ))

        # layout 
        layout = go.Layout(
            title=dict(text=title, font_size=16),
            barmode='stack',
            xaxis=dict(
                title=x_label,
                tickangle=45 if rotate_x else 0, 
                tickfont=dict(size=12 if rotate_x else 14),  
                tickmode='array',
                tickvals=x_values, 
                ticktext=[str(int(x)) for x in x_values],  
                automargin=True,  
                ticklen=8,  
                tickwidth=1
            ),
            yaxis=dict(title=y_label),
            hovermode='x unified',
            template="plotly_white",
            legend=dict(title="Categories", x=1, y=1, bgcolor="rgba(255,255,255,0.5)"),
            width=1000 if rotate_x else 800  
        )

        fig = go.Figure(data=traces, layout=layout)
        fig.show()

    # Plot all four charts
    plot_stacked_bars(years, calls_by_year, '311 Calls by Year and Category (2019-2023)', 'Year', 'Number of 311 Calls', rotate_x=False)
    plot_stacked_bars(wards, calls_by_ward, '311 Calls by Ward and Category', 'Ward', 'Number of 311 Calls', rotate_x=True)
    plot_stacked_bars(years, money_by_year, 'Money Spent by Year and Category (2019-2023)', 'Year', 'Total Money Spent ($)', rotate_x=False)
    plot_stacked_bars(wards, money_by_ward, 'Money Spent by Ward and Category', 'Ward', 'Total Money Spent ($)', rotate_x=True)

# Run the real data
if __name__ == "__main__":
    plot_calls_by_year_and_ward(csv_file)

File path: c:\Users\getne\30122-project-gitmoney\data\calls_money.csv


In [10]:
import csv
import pathlib
from collections import defaultdict
import altair as alt
import pandas as pd

# CSV file path
csv_file = pathlib.Path.cwd().parent.parent / "30122-project-gitmoney" / "data" / "calls_money.csv"
print(f"File path: {csv_file}")

def plot_calls_by_year_and_ward(csv_file: pathlib.Path):
    """
    Plot four separate stacked bar charts: calls and money spent by year and ward, categorized.

    Parameters:
        csv_file (Path): Path to CSV file with year, ward, category, calls, and total_cost

    Returns:
        None (displays four plots)
    """
    # Load CSV data directly into a pandas DataFrame
    df = pd.read_csv(csv_file)

    # Convert 'year' and 'ward' columns to integer type, 'calls' and 'total_cost' to float
    df['year'] = df['year'].astype(int)
    df['ward'] = df['ward'].astype(int)
    df['calls'] = df['calls'].astype(float)
    df['total_cost'] = df['total_cost'].astype(float)

    # Filter data for years 2019-2023
    df = df[(df['year'] >= 2019) & (df['year'] <= 2023)]

    # Aggregating calls and money by year, ward, and category
    calls_by_year = df.groupby(['year', 'category'])['calls'].sum().reset_index()
    money_by_year = df.groupby(['year', 'category'])['total_cost'].sum().reset_index()
    calls_by_ward = df.groupby(['ward', 'category'])['calls'].sum().reset_index()
    money_by_ward = df.groupby(['ward', 'category'])['total_cost'].sum().reset_index()

    # Define the color palette for the categories
    color_palette = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 
        '#ffcc00', '#e377c2', '#7f7f7f', '#17becf', '#75701F'
    ]
    categories = df['category'].unique()
    category_colors = {cat: color_palette[i % len(color_palette)] for i, cat in enumerate(categories)}

    # Function to plot the stacked bar chart using Altair
    def plot_stacked_bars(data, x_col, y_col, color_col, title, x_label, y_label):
        chart = alt.Chart(data).mark_bar().encode(
            x=alt.X(f'{x_col}:O', title=x_label),  # Use ordinal encoding for x-axis
            y=alt.Y(f'sum({y_col}):Q', title=y_label),  # Use sum aggregation for y-axis
            color=alt.Color(color_col, scale=alt.Scale(domain=list(category_colors.keys()), range=list(category_colors.values()))),
            tooltip=[x_col, alt.Tooltip(f'sum({y_col}):Q', format='.2f'), color_col]
        ).properties(
            title=title,
            width=600,  # Set a fixed width for better visibility
            height=400  # Set a fixed height for better visibility
        ).configure_axis(
            labelAngle=0
        )
        return chart

    # Plot each of the four separate charts
    charts = [
        plot_stacked_bars(calls_by_year, 'year', 'calls', 'category', '311 Calls by Year and Category (2019-2023)', 'Year', 'Number of 311 Calls'),
        plot_stacked_bars(calls_by_ward, 'ward', 'calls', 'category', '311 Calls by Ward and Category', 'Ward', 'Number of 311 Calls'),
        plot_stacked_bars(money_by_year, 'year', 'total_cost', 'category', 'Money Spent by Year and Category (2019-2023)', 'Year', 'Total Money Spent ($)'),
        plot_stacked_bars(money_by_ward, 'ward', 'total_cost', 'category', 'Money Spent by Ward and Category', 'Ward', 'Total Money Spent ($)')
    ]

    # Display all charts
    for chart in charts:
        chart.show()

# Run the real data
if __name__ == "__main__":
    plot_calls_by_year_and_ward(csv_file)


File path: c:\Users\getne\30122-project-gitmoney\data\calls_money.csv
