# Config

Change root to src/, so that modules are visible to this notebook and install libraries

In [0]:
%run "./includes/config"

In [0]:
!pip install openpyxl

Import libraries and functions

In [0]:
from dbc.services.read_data import read_sheet
from dbc.services.save_data import save_sheet
from utils.constants import *
from dbc.utils.helpers import get_year_month_params

Get year and month parameters

In [0]:
year, month =  get_year_month_params()

# Sales sheet processing

Read sales sheet from Data Lake

In [0]:
# Load sales workbook
sales_wb = read_sheet(ADLS_LAYER_BRONZE, year, month)

# Read sales worksheet
sales_ws = sales_wb.active

Normalize sheet: delete useless rows, update columns names with their headers to make them unique and  delete headers row

In [0]:
# Remove initial rows that prevent correct reading
# Delete from bottom to top to avoid index issues
for row_num in SALES_SHEET_ROWS_DELETE_RANGE:
    sales_ws.delete_rows(row_num)

In [0]:
# Change column names by adding their header
# Find the last column with data in row 2 (was row 1 in 0-indexed)
max_col = sales_ws.max_column

current_header = None

for col in range(1, max_col + 1):

    column_header_value = sales_ws.cell(1, col).value

    if (column_header_value is not None) and (column_header_value != ''):
        current_header = column_header_value

    sales_ws.cell(2, col).value = sales_ws.cell(2, col).value.replace(' ', '_') + f'_{current_header}' 

In [0]:
# Delete the headers (row 1 in 1-indexed, was row 0 in 0-indexed)
sales_ws.delete_rows(1)

Save sheet in ADLS

In [0]:
save_sheet(sales_wb, year, month)