In [8]:
from colors import cat_colors
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
import csv

pd.set_option('display.float_format', '{:.2f}'.format)
#pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)


In [9]:
cat_colors_df = pd.DataFrame([list(cat_colors.keys()), list(cat_colors.values())])

cat_colors_df = cat_colors_df.transpose().rename(columns={0: "category", 1: "color"})

What category of menu money spending is most prevalent? 
Query: group by (category) summarize (project cost, (count(total projects))

In [10]:
calls_money = pd.read_csv("../data/calls_money.csv")

calls_money["year"] = pd.to_numeric(calls_money["year"])

calls_money_18_23 = calls_money.query('year >= 2018 and year <= 2023')

calls_money_18_23 = calls_money_18_23.merge(cat_colors_df, on="category")

calls_money_18_23.head()

Unnamed: 0,year,category,ward,calls,num_projects,total_cost,color
0,2018,Beautification,1,1,0,0,#c9a1be
1,2018,Beautification,4,0,1,23069,#c9a1be
2,2018,Beautification,11,0,9,300739,#c9a1be
3,2018,Beautification,12,0,11,166436,#c9a1be
4,2018,Beautification,17,0,3,120000,#c9a1be


In [11]:
count_cats = pd.DataFrame(calls_money_18_23.groupby(['category', 'color']).size().rename('count').reset_index())

count_cats

Unnamed: 0,category,color,count
0,Beautification,#c9a1be,268
1,Bike Infrastructure,#e1ae7d,238
2,Lighting,#f3da76,300
3,Parks & Recreation,#9ccdc9,252
4,"Plants, Gardens, & Sustainability",#87be81,300
5,Schools & Libraries,#ffbac1,33
6,Security Cameras,#82a0c2,273
7,Streets & Transportation,#d37171,300


In [12]:
alt.Chart(count_cats).mark_bar().encode(
    y='category',
    x='count',
    color=alt.Color('color').scale(None),
    tooltip=['count'],
).interactive()


In [13]:
count_cats_cost = pd.DataFrame(calls_money_18_23.groupby(['category', 'color'])["total_cost"].sum().rename('sum_money').reset_index())

count_cats_cost

Unnamed: 0,category,color,sum_money
0,Beautification,#c9a1be,3881703
1,Bike Infrastructure,#e1ae7d,2574263
2,Lighting,#f3da76,33535280
3,Parks & Recreation,#9ccdc9,9069393
4,"Plants, Gardens, & Sustainability",#87be81,964927
5,Schools & Libraries,#ffbac1,3578399
6,Security Cameras,#82a0c2,9345600
7,Streets & Transportation,#d37171,347292351


In [14]:
alt.Chart(count_cats_cost).mark_bar().encode(
    y='category',
    x='sum_money',
    color=alt.Color('color').scale(None),
    tooltip=['sum_money'],
).interactive()

What’s the average cost of a project per category? Per ward? 
Query: group by (category, year) MEAN (project cost) 

In [15]:
avg_cost_cat = pd.DataFrame(calls_money_18_23.groupby(['category', 'color'])["total_cost"].mean().rename('avg_money').reset_index())

avg_cost_cat

Unnamed: 0,category,color,avg_money
0,Beautification,#c9a1be,14483.97
1,Bike Infrastructure,#e1ae7d,10816.23
2,Lighting,#f3da76,111784.27
3,Parks & Recreation,#9ccdc9,35989.65
4,"Plants, Gardens, & Sustainability",#87be81,3216.42
5,Schools & Libraries,#ffbac1,108436.33
6,Security Cameras,#82a0c2,34232.97
7,Streets & Transportation,#d37171,1157641.17


In [16]:
alt.Chart(avg_cost_cat).mark_bar().encode(
    y='category',
    x='avg_money',
    color=alt.Color('color').scale(None),
    tooltip=['avg_money'],
).interactive()

In [17]:
med_cost_cat = pd.DataFrame(calls_money_18_23.groupby(['category', 'color'])["total_cost"].median().rename('median_money').reset_index())

med_cost_cat

Unnamed: 0,category,color,median_money
0,Beautification,#c9a1be,0.0
1,Bike Infrastructure,#e1ae7d,0.0
2,Lighting,#f3da76,2000.0
3,Parks & Recreation,#9ccdc9,0.0
4,"Plants, Gardens, & Sustainability",#87be81,0.0
5,Schools & Libraries,#ffbac1,55000.0
6,Security Cameras,#82a0c2,0.0
7,Streets & Transportation,#d37171,1218048.5


In [18]:
alt.Chart(med_cost_cat).mark_bar().encode(
    y='category',
    x='median_money',
    color=alt.Color('color').scale(None),
    tooltip=['median_money'],
).interactive()

In [19]:
avg_cost_ward = pd.DataFrame(calls_money_18_23.groupby(['ward', 'year', 'category', 'color'])["total_cost"].mean().rename('avg_money').reset_index())

avg_cost_ward

Unnamed: 0,ward,year,category,color,avg_money
0,1,2018,Beautification,#c9a1be,0.00
1,1,2018,Lighting,#f3da76,0.00
2,1,2018,Parks & Recreation,#9ccdc9,150000.00
3,1,2018,"Plants, Gardens, & Sustainability",#87be81,0.00
4,1,2018,Schools & Libraries,#ffbac1,470100.00
...,...,...,...,...,...
1959,50,2023,Lighting,#f3da76,327500.00
1960,50,2023,Parks & Recreation,#9ccdc9,0.00
1961,50,2023,"Plants, Gardens, & Sustainability",#87be81,0.00
1962,50,2023,Security Cameras,#82a0c2,0.00


In [28]:
num_projects_ward = pd.DataFrame(calls_money_18_23.groupby(['ward']).size().rename('count').reset_index())

num_projects_ward

Unnamed: 0,ward,count
0,1,43
1,2,39
2,3,39
3,4,39
4,5,38
5,6,39
6,7,37
7,8,37
8,9,37
9,10,39


In [29]:
alt.Chart(num_projects_ward).mark_bar().encode(
    x='ward',
    y='count',
    #color=alt.Color('color').scale(None),
    tooltip=['ward', 'count'],
).interactive()

In [94]:
med_cost_ward = pd.DataFrame(calls_money_18_23.groupby(['ward'])["total_cost"].median().rename('med_money').reset_index())

med_cost_ward

Unnamed: 0,ward,med_money
0,1,500.0
1,2,2700.0
2,3,0.0
3,4,8000.0
4,5,0.0
5,6,0.0
6,7,0.0
7,8,0.0
8,9,0.0
9,10,0.0


How many projects are funded every year PER ward?
Query: group by (category, year, ward), summarize (count(total projects))