In [1]:
from pathlib import Path
import os

import pandas as pd

In [2]:
# Directory of this file
this_dir = os.getcwd()

In [3]:
# Read in all files
parts = []

for file in Path(this_dir).rglob('*.xls*') :
    print(f'Reading {file.name}')
    part = pd.read_excel(file)
    parts.append(part)

Reading sales_report_openpyxl.xlsx
Reading February.xlsx
Reading August.xlsx
Reading April.xlsx
Reading June.xlsx
Reading October.xlsx
Reading September.xlsx
Reading July.xlsx
Reading January.xlsx
Reading May.xlsx
Reading November.xlsx
Reading December.xlsx
Reading March.xlsx
Reading October.xls
Reading September.xls
Reading April.xls
Reading June.xls
Reading January.xls
Reading July.xls
Reading February.xls
Reading December.xls
Reading August.xls
Reading November.xls
Reading March.xls
Reading May.xls


In [4]:
# Combine the DataFrames from each file into a single DataFrame
df = pd.concat(parts)

In [5]:
# Pivot each store into a column and sum up all transactions per date
pivot = pd.pivot_table(df, 
                       index="transaction_date", columns="store",
                       values="amount", aggfunc="sum")

In [6]:
# Resample to end of month and assign an index name
summary = pivot.resample("M").sum()
summary.index.name = "Month"

In [7]:
summary.head()

store,Boston,Chicago,Las Vegas,New York,San Francisco,Washington DC
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-31,21784.1,51187.7,23012.75,49872.85,58629.85,14057.6
2019-02-28,21454.9,52330.85,25493.1,46669.85,55218.65,15235.4
2019-03-31,20043.0,48897.25,23451.1,41572.25,52712.95,14177.05
2019-04-30,18791.05,47396.35,22710.15,41714.3,49324.65,13339.15
2019-05-31,18036.75,45117.05,21526.55,40610.4,47759.6,13147.1


In [8]:
# Sort columns by total revenue
summary = summary.loc[:, summary.sum().sort_values().index]

In [9]:
# Add row total
summary.loc[:, "Total"] = summary.sum(axis=1)

In [10]:
# Add column total
summary.loc['Total'] = summary.sum()

# Write summary report to Excel file

In [11]:
# DataFrame position and number of rows/columns
# xlsxwriter uses 0-based indices
startrow, startcol = 2, 1
nrows, ncols = summary.shape

In [12]:
with pd.ExcelWriter(Path(this_dir)/"sales_report_xlsxwriter.xlsx",
                   engine="xlsxwriter", datetime_format="mmm yy") as writer:    
    summary.to_excel(writer, sheet_name="Sheet1",
                    startrow=startrow, startcol=startcol)
    
    # Get xlsxwriter book and sheet object
    book = writer.book
    sheet = writer.sheets["Sheet1"]
    
    # Set title
    title_format = book.add_format({"bold": True, "size": 24})
    sheet.write(0, startcol, "Sales Report", title_format)
    
    # Sheet formatting
    # 2 = hide on screen and when printing
    sheet.hide_gridlines(2)
    
    # Format the DataFrame with
    # - number format
    # - column width
    # - conditional formatting
    number_format = book.add_format({"num_format": "#,##0",
                                     "align": "center"})
    below_target_format = book.add_format({"font_color": "#E93423"})
    sheet.set_column(first_col=startcol, last_col=startcol + ncols,
                     width=14, cell_format=number_format)
    sheet.conditional_format(first_row=startrow + 1,
                             first_col=startcol + 1,
                             last_row = startrow + nrows,
                             last_col = startcol + ncols,
                             options={"type": "cell", 
                                      "criteria": "<=",
                                      "value": 20000,
                                      "format": below_target_format})
    
    # Chart
    chart = book.add_chart({"type": "column"})
    chart.set_title({"name": "Sales per Month and Store"})
    chart.set_size({"width": 830, "height": 450})
    
    # Add each column as a series, ignoring total row and col
    for col in range(1, ncols):
        chart.add_series({
            # [sheetname, first_row, first_col, last_row, last_col]
            "name": ["Sheet1", startrow, startcol + col],
            "categories": ["Sheet1", startrow + 1, startcol,
                           startrow + nrows - 1, startcol],
            "values": ["Sheet1", startrow + 1, startcol + col,
                       startrow + nrows - 1, startcol + col],
        })

    # Chart formatting
    chart.set_x_axis({"name": summary.index.name,
                      "major_tick_mark": "none"})
    chart.set_y_axis({"name": "Sales",
                      "line": {"none": True},
                      "major_gridlines": {"visible": True},
                      "major_tick_mark": "none"})

    # Add the chart to the sheet
    sheet.insert_chart(startrow + nrows + 2, startcol, chart)