In [1]:
from pathlib import Path
import os

import pandas as pd
from openpyxl.styles import Font, Alignment
from openpyxl.formatting.rule import CellIsRule
from openpyxl.chart import BarChart, Reference
from openpyxl.chart.shapes import GraphicalProperties
from openpyxl.drawing.line import LineProperties
from openpyxl import load_workbook

In [2]:
# Directory of this file
this_dir = os.getcwd()

In [3]:
# Read in all files
parts = []

for file in Path(this_dir).rglob('*.xls*') :
    print(f'Reading {file.name}')
    part = pd.read_excel(file)
    parts.append(part)

Reading sales_report_openpyxl.xlsx
Reading February.xlsx
Reading August.xlsx
Reading April.xlsx
Reading June.xlsx
Reading October.xlsx
Reading September.xlsx
Reading July.xlsx
Reading January.xlsx
Reading May.xlsx
Reading November.xlsx
Reading December.xlsx
Reading March.xlsx
Reading October.xls
Reading September.xls
Reading April.xls
Reading June.xls
Reading January.xls
Reading July.xls
Reading February.xls
Reading December.xls
Reading August.xls
Reading November.xls
Reading March.xls
Reading May.xls


In [4]:
# Combine the DataFrames from each file into a single DataFrame
df = pd.concat(parts)
df.head()

Unnamed: 0.1,Unnamed: 0,Sales Report,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,transaction_id,store,status,transaction_date,plan,contract_type,amount
0,,,,,,,,,,,,,NaT,,,
1,,Month,Washington DC,Boston,Las Vegas,New York,Chicago,San Francisco,Total,,,,NaT,,,
2,,2019-01-31 00:00:00,14057.6,21784.1,23012.75,49872.85,51187.7,58629.85,218544.85,,,,NaT,,,
3,,2019-02-28 00:00:00,15235.4,21454.9,25493.1,46669.85,52330.85,55218.65,216402.75,,,,NaT,,,
4,,2019-03-31 00:00:00,14177.05,20043,23451.1,41572.25,48897.25,52712.95,200853.6,,,,NaT,,,


In [5]:
# Pivot each store into a column and sum up all transactions per date
pivot = pd.pivot_table(df, 
                       index="transaction_date", columns="store",
                       values="amount", aggfunc="sum")

In [6]:
# Resample to end of month and assign an index name
summary = pivot.resample("M").sum()
summary.index.name = "Month"

In [7]:
summary.head()

store,Boston,Chicago,Las Vegas,New York,San Francisco,Washington DC
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-31,21784.1,51187.7,23012.75,49872.85,58629.85,14057.6
2019-02-28,21454.9,52330.85,25493.1,46669.85,55218.65,15235.4
2019-03-31,20043.0,48897.25,23451.1,41572.25,52712.95,14177.05
2019-04-30,18791.05,47396.35,22710.15,41714.3,49324.65,13339.15
2019-05-31,18036.75,45117.05,21526.55,40610.4,47759.6,13147.1


In [8]:
# Sort columns by total revenue
summary = summary.loc[:, summary.sum().sort_values().index]

In [9]:
summary.head()

store,Washington DC,Boston,Las Vegas,New York,Chicago,San Francisco
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-31,14057.6,21784.1,23012.75,49872.85,51187.7,58629.85
2019-02-28,15235.4,21454.9,25493.1,46669.85,52330.85,55218.65
2019-03-31,14177.05,20043.0,23451.1,41572.25,48897.25,52712.95
2019-04-30,13339.15,18791.05,22710.15,41714.3,47396.35,49324.65
2019-05-31,13147.1,18036.75,21526.55,40610.4,45117.05,47759.6


In [10]:
# Add row total
summary.loc[:, "Total"] = summary.sum(axis=1)

In [11]:
# Add column total
summary.loc['Total'] = summary.sum()

# Write summary report to Excel file

In [12]:
# DataFrame position and number of rows/columns
# openpxyl uses 1-based indices
startrow, startcol = 3, 2
nrows, ncols = summary.shape

In [13]:
# with pd.ExcelWriter(Path(this_dir)/"sales_report_openpyxl.xlsx",
#                    engine="openpyxl", engine_kwargs={"write_only": True}) as writer:

with pd.ExcelWriter(Path(this_dir)/"sales_report_openpyxl.xlsx",
                   engine="openpyxl") as writer:    
    # pandas uses 0-based indices
    summary.to_excel(writer, sheet_name="Sheet1",
                    startrow=startrow-1, startcol=startcol-1)
    
    # Get openpyxl book and sheet object
    book = writer.book
    sheet = writer.sheets["Sheet1"]
    
    # Set title
    sheet.cell(row=1, column=startcol, value="Sales Report")
    sheet.cell(row=1, column=startcol).font = Font(size=24, bold=True)
    
    # Sheet formatting
    sheet.sheet_view.showGridLines = False
    
    # Format the DataFrame with
    # - number format
    # - column width
    # - conditional formatting
    for row in range(startrow + 1, startrow + nrows + 1):
        for col in range(startcol + 1, startcol + ncols + 1):
            cell = sheet.cell(row=row, column=col)
            cell.number_format = "#,##0"
            cell.alignment = Alignment(horizontal="center")
            
    for cell in sheet["B"]:
        cell.number_format = "mmm yy"
    
    for col in range(startcol, startcol + ncols + 1):
        cell = sheet.cell(row=startrow, column=col)
        sheet.column_dimensions[cell.column_letter].width = 14
    
    first_cell = sheet.cell(row=startrow + 1, column=startcol + 1)
    last_cell = sheet.cell(row=startrow + nrows, column=startcol + ncols)
    range_address = f"{first_cell.coordinate}:{last_cell.coordinate}"
    sheet.conditional_formatting.add(range_address,
                                     CellIsRule(operator="lessThan",
                                                formula=["20000"],
                                                stopIfTrue=True,
                                                font=Font(color="E93423"))) 
    
    # Chart
    chart = BarChart()
    chart.type = "col"
    chart.title = "Sales per Month and Store"
    chart.height = 11.5
    chart.width = 20.5
    
    # Add each column as a series, ignoring total row and col
    data = Reference(sheet, min_col=startcol + 1, min_row=startrow,
                     max_row=startrow + nrows - 1,
                     max_col=startcol + ncols - 1)
    categories = Reference(sheet, min_col=startcol, min_row=startrow + 1,
                           max_row=startrow + nrows - 1) 
    chart.add_data(data, titles_from_data=True)
    chart.set_categories(categories)
    cell = sheet.cell(row=startrow + nrows + 2, column=startcol)
    sheet.add_chart(chart=chart, anchor=cell.coordinate)
    
    # Chart formatting
    chart.y_axis.title = "Sales"
    chart.x_axis.title = summary.index.name
    # Hide y-axis line: spPR stands for ShapeProperties 
    chart.y_axis.spPr = GraphicalProperties(ln=LineProperties(noFill=True))