In [109]:
import os
import pandas as pd
# Pandas also has plotly included in it but for the sake of this exercise we will use plotly (python data vizualization library) separately
# pd.options.plotting.backend = "plotly"
import plotly.graph_objects as go
import plotly.express as px
from datetime import (
    datetime, 
    timedelta
)
# make sure ipykernal is installed

In [110]:
# OpenData DSNY Monthly Tonnage Data:
todays_date = datetime.now()
yester_month = todays_date.replace(day=1) - timedelta(days=1)

tonnage_filename = "DSNY_Monthly_Tonnage_Data_20240815.csv"
tonnage_data = os.path.join(".", tonnage_filename)

In [111]:
# Convert csv file to dataframe (df) for easy manipulation with Python
source_df = pd.DataFrame(data=pd.read_csv(tonnage_data, encoding = "utf-8", dtype = "object"))

# Make sure all NA values are empty strings
source_df = source_df.fillna("")
# Convert all numeric columns to integers or floats
source_df["REFUSETONSCOLLECTED"] = source_df["REFUSETONSCOLLECTED"].apply(pd.to_numeric, errors="coerce")
source_df["PAPERTONSCOLLECTED"] = source_df["PAPERTONSCOLLECTED"].apply(pd.to_numeric, errors="coerce")
source_df.sort_values(["COMMUNITYDISTRICT"], inplace=True)

target_columns = ["MONTH", "BOROUGH", "COMMUNITYDISTRICT", "REFUSETONSCOLLECTED", "PAPERTONSCOLLECTED"]
refuse_and_paper_df = source_df[target_columns]

# This just sets a variable to format the date to fit OpenData formatted month date i.e. "2024 / 7"
get_month = datetime.strftime(yester_month, '%Y / %m')

filter_dataframe = lambda df: df[df["MONTH"] == get_month]

refuse_and_paper_df = filter_dataframe(refuse_and_paper_df)
# print(refuse_and_paper_df)


In [112]:
# CHART OPTIONS

# Labels
total_district_labels = {
    "COMMUNITYDISTRICT": "District",
    "BOROUGH": "Borough",
    "MONTH": "Month",
    "count": "Total Districts"
}


average_chart_labels = {
    "COMMUNITYDISTRICT": "District",
    "BOROUGH": "Borough",
    "MONTH": "Month",
    "mean": "Average Refuse (Tons)"
}


general_labels = {
    "REFUSETONSCOLLECTED": "Refuse (Tons)",
    "PAPERTONSCOLLECTED": "Paper Refuse (Tons)",
    "COMMUNITYDISTRICT": "District",
    "BOROUGH": "Borough",
    "MONTH": "Month",
}


# brough colors based off flags
borough_colors = {
    "Bronx": "#FE6635",
    "Brooklyn": "#E4A849",
    "Manhattan":"#004B8D",
    "Queens": "#0084FF",
    "Staten Island": "#4E6956"
}

In [113]:
# Bar graph for all refusecollected by community district
total_districts_df = refuse_and_paper_df.groupby("BOROUGH")["COMMUNITYDISTRICT"].agg(["count"]).reset_index()
fig1 = px.scatter(
    title="Total Districts in Each Borough (2024)",
    data_frame=total_districts_df,
    x="BOROUGH",
    y="count",
    size="count",
    labels=total_district_labels,
    color="BOROUGH",
    color_discrete_map=borough_colors,
    size_max=80
)
fig1.show()

In [114]:
# Bar graph for all refusecollected by community district
fig2 = px.bar(
    title="Total Refuse Collected by District (July 2024)",
    data_frame=refuse_and_paper_df, 
    x="COMMUNITYDISTRICT", 
    y="REFUSETONSCOLLECTED",
    labels=general_labels,
    barmode="group", 
    color="BOROUGH",
    color_discrete_map=borough_colors
)
fig2.show()

In [115]:
# Exercise 1.0
# Please create a plotly bar graph like fig2 BUT for PAPERTONSCOLLECTED
fig3 = None

In [116]:
# TOTAL REFUSE COLLECTED BY BOROUGH
# Create new dataframe fot total of all refuge collected by Borough
sum_df = refuse_and_paper_df.groupby("BOROUGH")["REFUSETONSCOLLECTED"].agg(["sum"]).reset_index()

# Get y values ONLY from dataframe
Sum_Pie_y = [x for x in sum_df["sum"]]
# Get x values ONLY from dataframe
Sum_Pie_x = [x for x in sum_df["BOROUGH"]]
# Get only color from "borough_colors" values
Sum_Pie_colors = [ x for x in borough_colors.values()]


find_largest_item = lambda value: Sum_Pie_x[Sum_Pie_y.index(value)]
max_value = max(Sum_Pie_y)
largest_item = find_largest_item(value=max_value)

# Find largest Borough share dynamically
dynamic_pie_pull = [0.0 if x != Sum_Pie_x.index(largest_item) else 0.3 for x in range(0, len(Sum_Pie_x))]

fig4 = go.Figure(
    data=[
        go.Pie(
            values=Sum_Pie_y, 
            labels=Sum_Pie_x,
            hole=.3,
            pull=dynamic_pie_pull,
            textinfo="percent+label",
            marker={"colors": Sum_Pie_colors}
        )
    ],
    layout={
        "title": "Borough Share of Total Refuse Collected (July 2024)"
    }
)
fig4.show()

In [117]:
# Exercise 1.1
# Please create a plotly pie chart similar to fig4 BUT for total PAPERTONSCOLLECTED for all boroughs
fig5 = None

In [118]:
# Calculating the mean (average of refuse collected per iteration)
mean_df = refuse_and_paper_df.groupby("BOROUGH")["REFUSETONSCOLLECTED"].agg(["mean"]).reset_index()
fig6 = px.bar(
    title="Average Refuse Collected by Brough (July 2024)",
    data_frame=mean_df, 
    x="BOROUGH", 
    y="mean",
    labels=average_chart_labels, 
    color="BOROUGH",
    color_discrete_map=borough_colors
)
fig6.show()

In [None]:
# Exercise 1.2
# Please create a plotly bar graph like fig6 BUT for average PAPERTONSCOLLECTED for all boroughs
fig7 = None