In [0]:
from pyspark.sql.functions import col

# Define the database name and location
database_name = "data_pipelines_tutorial"
# Path and options for reading data
TOP_DIR = "/mnt/DAP/data/BOOSTProcessed"
WORKSPACE_DIR = f"{TOP_DIR}/Workspace"
COUNTRY = 'Kenya'
DATA_DIR = f'{WORKSPACE_DIR}/microdata_csv/{COUNTRY}'

CSV_READ_OPTIONS = {
    "header": "true",
    "multiline": "true",
    "quote": '"',
    "escape": '"',
}

# Load the data from CSV
bronze_df = (spark.read
             .format("csv")
             .options(**CSV_READ_OPTIONS)
             .option("inferSchema", "true")
             .load(DATA_DIR))

# Clean column names by replacing spaces and special characters
for old_col_name in bronze_df.columns:
    new_col_name = old_col_name.replace(" ", "_").replace("(", "").replace(")", "").replace(",", "")
    bronze_df = bronze_df.withColumnRenamed(old_col_name, new_col_name)


# Create the database in Databricks SQL
spark.sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")

# Save to bronze table (Databricks Delta format for optimization)
bronze_df.write.format("delta").mode("overwrite").saveAsTable(f"{database_name}.kenya_bronze")


In [0]:
display(bronze_df)

Year,Class,Vote_Groups_adm1,National_Government_Votes_&_Counties_adm2,Sub_Votes_adm3,Head/Dept_adm4,SubHead/SubDept_adm5,County_Government_Ministries,National/County_Geo1,Counties_Geo2,District/Constituency_Geo3,Wards_Geo4,Category_econ1,Chapter_econ2,Sub-chapter_econ3,Item_econ4,Sub-Item_econ5,SOF2,Sector_prog1,Programme_pro2,Sub-programme_prog3,Initial_Budget_Printed_Estimate,Final_Budget_Approved_Estimate,Final_Expenditure_Total_Payment_Comm.,environment,irrigation,energy,transport,recreation
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31105 Construction And Civil Works,3110504 Other Infrastructure And Civil Works,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070201 Entrepreneurial and Management Training,140000000.0,222974584.0,222672326.25,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31107 Purchase Of Vehicles & Other Transport Equipment,3110701 Purchase Of Motor Vehicles,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070201 Entrepreneurial and Management Training,0.0,22000000.0,10052290.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31107 Purchase Of Vehicles & Other Transport Equipment,3110705 Purchase Of Trucks And Trailers,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070201 Entrepreneurial and Management Training,22000000.0,227344.0,0.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31107 Purchase Of Vehicles & Other Transport Equipment,3110705 Purchase Of Trucks And Trailers,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070205,3800000.0,1536514.0,0.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31107 Purchase Of Vehicles & Other Transport Equipment,3110708 Purchase Of Minibuses And Buses,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070201 Entrepreneurial and Management Training,0.0,25000000.0,23557357.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31109 Purch. Of Household Furn. & Institutional Eqpt.,3110902 Purchase Of Household And Institutional Appliances,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070201 Entrepreneurial and Management Training,1000000.0,0.0,0.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31109 Purch. Of Household Furn. & Institutional Eqpt.,3110902 Purchase Of Household And Institutional Appliances,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070205,1800000.0,0.0,0.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31110 Purch. Of Office Furn. & General Eqpt.,3111001 Purchase Of Office Furniture And Fittings,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070201 Entrepreneurial and Management Training,10000000.0,53122319.0,29934159.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31110 Purch. Of Office Furn. & General Eqpt.,3111001 Purchase Of Office Furniture And Fittings,00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070205,2100000.0,648100.0,648100.0,,,,,0
2014/15,1 Development Expenditure,3 Counties,301 Mombasa County,3011 County Assembly,30110001 Mombasa County- County Assembly HeadQuarters,3011000101 Headquarters,01 County Assembly,02 Counties,3010 Mombasa County,301000 Mombasa County,30100001 Mombasa County-Headquarters,3 Acquisition of Non Financial Assets,31 Acquisition Of Non- Financial Assets,311 Acquisition Of Fixed Capital Assets,31110 Purch. Of Office Furn. & General Eqpt.,"3111002 Purchase Of Computers, Printers And Other It Equipment",00 Domestic Resources,07 Public Administration And International Relations,0702 Cabinet Affairs,070201 Entrepreneurial and Management Training,2000000.0,0.0,0.0,,,,,0
