In [0]:
from pyspark.sql.types import DoubleType
from pyspark.sql.functions import col, lit, sum

database_name = "data_pipelines_tutorial"

# read population table
pop_df = spark.read.table(f"{database_name}.kenya_subnational_population").alias("pop_df")

agg_df = (spark.read.table(f"{database_name}.kenya_gold")
    .groupBy("country_name", "year", "geo1", "func",)
    .agg(sum("executed").alias("expenditure"))
).alias("agg_df")

agg_with_pop_df = (agg_df
    .join(pop_df, 
          (agg_df.country_name == pop_df.country_name) & 
          (agg_df.geo1 == pop_df.adm1_name) & 
          (agg_df.year == pop_df.year), 
          "inner")
    .select("agg_df.country_name", "agg_df.year", "agg_df.geo1", "agg_df.func", "agg_df.expenditure", "pop_df.population")
)

agg_with_pop_df = agg_with_pop_df.withColumn(
    'per_capita_spending',
    (col('expenditure') / col('population')).alias('per_capita_spending')
)

agg_with_pop_df.write.format("delta").mode("overwrite").saveAsTable(f"{database_name}.kenya_func_geo1_agg")



In [0]:
display(agg_with_pop_df)

country_name,year,geo1,func,expenditure,population,per_capita_spending
Kenya,2016,Tharaka Nithi,Education,7485580.0,423626,17.6702563109913
Kenya,2016,Murang’A,Education,201831546.95,1123956,179.5724627565492
Kenya,2016,Kisii,Health,2465505239.15,1355863,1818.4029206121857
Kenya,2017,Turkana,Education,517862834.55,977164,529.9651179842892
Kenya,2018,Kitui,Housing and community amenities,232698247.65000004,1236405,188.2055213704248
Kenya,2018,Siaya,Health,1600953117.8499997,1074637,1489.7617687181807
Kenya,2018,Uasin Gishu,Education,736232645.45,1244465,591.6057466059713
Kenya,2019,West Pokot,Environmental protection,12143635.0,682913,17.78211133775459
Kenya,2019,Laikipia,Education,141331865.0,569446,248.1918654270993
Kenya,2020,Meru,Education,1037800585.0,1721672,602.7864686188775


In [0]:
agg_with_pop_df.toPandas()

Unnamed: 0,country_name,year,geo1,func,expenditure,country_name.1,adm1_name,year.1,population,per_capita_spending
0,Kenya,2016,Tharaka Nithi,Education,7.485580e+06,Kenya,Tharaka Nithi,2016,423626,17.670256
1,Kenya,2016,Murang’A,Education,2.018315e+08,Kenya,Murang’A,2016,1123956,179.572463
2,Kenya,2016,Kisii,Health,2.465505e+09,Kenya,Kisii,2016,1355863,1818.402921
3,Kenya,2017,Turkana,Education,5.178628e+08,Kenya,Turkana,2017,977164,529.965118
4,Kenya,2018,Kitui,Housing and community amenities,2.326982e+08,Kenya,Kitui,2018,1236405,188.205521
...,...,...,...,...,...,...,...,...,...,...
2751,Kenya,2021,Embu,Public order and safety,2.219383e+08,Kenya,Embu,2021,690267,321.525323
2752,Kenya,2021,Kirinyaga,Public order and safety,2.088584e+08,Kenya,Kirinyaga,2021,689786,302.787177
2753,Kenya,2021,Lamu,General public services,8.848894e+08,Kenya,Lamu,2021,168905,5238.976865
2754,Kenya,2021,Laikipia,Economic affairs,1.269650e+09,Kenya,Laikipia,2021,598812,2120.281814
