[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wasim/Data-Science/blob/main/data-analyst-roadmap/07_excel_for_analysts/04_python_excel_integration.ipynb)

# Python-Excel Integration

Automate Excel tasks with Python!

## Why Python + Excel?
- âœ… Automate repetitive tasks
- âœ… Process multiple files
- âœ… Advanced data analysis
- âœ… Create formatted reports
- âœ… Schedule automated reports

## Libraries Overview

### pandas
Read/write Excel, data analysis

### openpyxl
Read/write Excel, formatting, formulas

### xlrd/xlwt
Legacy Excel support (.xls files)

In [None]:
import pandas as pd
import openpyxl
from openpyxl.styles import Font, PatternFill
from openpyxl.chart import BarChart, Reference
import numpy as np

## 1. Reading Excel Files

In [None]:
# Create sample data
data = {
    'Product': ['Laptop', 'Mouse', 'Keyboard', 
                'Monitor', 'Desk'],
    'Category': ['Electronics', 'Electronics', 
                 'Electronics', 'Electronics', 
                 'Furniture'],
    'Price': [1000, 25, 75, 400, 300],
    'Quantity': [10, 50, 30, 15, 8]
}

df = pd.DataFrame(data)
df['Total'] = df['Price'] * df['Quantity']
df

In [None]:
# Save to Excel
df.to_excel('sample_data.xlsx', 
            index=False, 
            sheet_name='Products')
print("âœ… File created: sample_data.xlsx")

In [None]:
# Read Excel file
df_read = pd.read_excel('sample_data.xlsx', 
                        sheet_name='Products')
df_read.head()

In [None]:
# Read specific columns
df_subset = pd.read_excel(
    'sample_data.xlsx',
    usecols=['Product', 'Price', 'Total']
)
df_subset

## 2. Writing Multiple Sheets

In [None]:
# Create multiple DataFrames
electronics = df[df['Category'] == 'Electronics']
furniture = df[df['Category'] == 'Furniture']

# Write to multiple sheets
with pd.ExcelWriter('multi_sheet.xlsx') as writer:
    electronics.to_excel(
        writer, 
        sheet_name='Electronics',
        index=False
    )
    furniture.to_excel(
        writer, 
        sheet_name='Furniture',
        index=False
    )

print("âœ… Multi-sheet file created")

## 3. Formatting with openpyxl

In [None]:
# Create workbook
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Sales Report"

# Add headers
headers = ['Product', 'Price', 'Quantity', 'Total']
ws.append(headers)

# Add data
for _, row in df.iterrows():
    ws.append([
        row['Product'],
        row['Price'],
        row['Quantity'],
        row['Total']
    ])

print("âœ… Data added to workbook")

In [None]:
# Format headers
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill(
    start_color="4472C4",
    end_color="4472C4",
    fill_type="solid"
)

for cell in ws[1]:
    cell.font = header_font
    cell.fill = header_fill

print("âœ… Headers formatted")

In [None]:
# Adjust column widths
ws.column_dimensions['A'].width = 15
ws.column_dimensions['B'].width = 12
ws.column_dimensions['C'].width = 12
ws.column_dimensions['D'].width = 12

# Save
wb.save('formatted_report.xlsx')
print("âœ… Formatted file saved")

## 4. Adding Formulas

In [None]:
# Create new workbook
wb = openpyxl.Workbook()
ws = wb.active

# Add data
ws['A1'] = 'Item'
ws['B1'] = 'Price'
ws['C1'] = 'Quantity'
ws['D1'] = 'Total'

ws['A2'] = 'Laptop'
ws['B2'] = 1000
ws['C2'] = 5
ws['D2'] = '=B2*C2'  # Formula!

ws['A3'] = 'Mouse'
ws['B3'] = 25
ws['C3'] = 10
ws['D3'] = '=B3*C3'  # Formula!

# Add sum formula
ws['D4'] = '=SUM(D2:D3)'

wb.save('with_formulas.xlsx')
print("âœ… File with formulas created")

## 5. Creating Charts

In [None]:
# Create workbook with data
wb = openpyxl.Workbook()
ws = wb.active

# Add data
ws.append(['Product', 'Sales'])
for _, row in df.iterrows():
    ws.append([row['Product'], row['Total']])

# Create chart
chart = BarChart()
chart.title = "Sales by Product"
chart.x_axis.title = "Product"
chart.y_axis.title = "Sales ($)"

# Set data range
data = Reference(
    ws, 
    min_col=2, 
    min_row=1, 
    max_row=len(df)+1
)
categories = Reference(
    ws, 
    min_col=1, 
    min_row=2, 
    max_row=len(df)+1
)

chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)

# Add chart to sheet
ws.add_chart(chart, "D2")

wb.save('with_chart.xlsx')
print("âœ… File with chart created")

## 6. Processing Multiple Files

In [None]:
import glob
import os

# Create sample files
for month in ['Jan', 'Feb', 'Mar']:
    df_month = df.copy()
    df_month['Month'] = month
    df_month.to_excel(
        f'sales_{month}.xlsx', 
        index=False
    )

print("âœ… Sample files created")

In [None]:
# Read and combine all files
all_files = glob.glob('sales_*.xlsx')
dfs = []

for file in all_files:
    df_temp = pd.read_excel(file)
    dfs.append(df_temp)

# Combine
combined = pd.concat(dfs, ignore_index=True)
combined.head(10)

In [None]:
# Save combined data
combined.to_excel(
    'combined_sales.xlsx', 
    index=False
)
print("âœ… Combined file created")

## 7. Real-World Example: Sales Report

In [None]:
# Generate sample sales data
np.random.seed(42)
dates = pd.date_range('2023-01-01', periods=100)
products = ['Laptop', 'Mouse', 'Keyboard', 
            'Monitor']
regions = ['East', 'West', 'North', 'South']

sales_data = pd.DataFrame({
    'Date': np.random.choice(dates, 200),
    'Product': np.random.choice(products, 200),
    'Region': np.random.choice(regions, 200),
    'Quantity': np.random.randint(1, 20, 200),
    'Price': np.random.randint(20, 1000, 200)
})

sales_data['Revenue'] = (
    sales_data['Quantity'] * sales_data['Price']
)

sales_data.head()

In [None]:
# Create summary tables
by_product = sales_data.groupby('Product').agg({
    'Revenue': 'sum',
    'Quantity': 'sum'
}).reset_index()

by_region = sales_data.groupby('Region').agg({
    'Revenue': 'sum',
    'Quantity': 'sum'
}).reset_index()

print("âœ… Summaries created")

In [None]:
# Create comprehensive report
with pd.ExcelWriter(
    'sales_report.xlsx', 
    engine='openpyxl'
) as writer:
    # Raw data
    sales_data.to_excel(
        writer, 
        sheet_name='Raw Data',
        index=False
    )
    
    # By product
    by_product.to_excel(
        writer, 
        sheet_name='By Product',
        index=False
    )
    
    # By region
    by_region.to_excel(
        writer, 
        sheet_name='By Region',
        index=False
    )

print("âœ… Sales report created")

## 8. Cleanup

In [None]:
# Clean up sample files
import os

files_to_remove = [
    'sample_data.xlsx',
    'multi_sheet.xlsx',
    'formatted_report.xlsx',
    'with_formulas.xlsx',
    'with_chart.xlsx',
    'sales_Jan.xlsx',
    'sales_Feb.xlsx',
    'sales_Mar.xlsx',
    'combined_sales.xlsx',
    'sales_report.xlsx'
]

for file in files_to_remove:
    if os.path.exists(file):
        os.remove(file)

print("âœ… Cleanup complete")

## Key Takeaways

âœ… **pandas** - Read/write, data analysis  
âœ… **openpyxl** - Formatting, charts, formulas  
âœ… **Automation** - Process multiple files  
âœ… **Reports** - Create formatted outputs  
âœ… **Integration** - Best of both worlds  

## Use Cases

- ðŸ“Š Automated monthly reports
- ðŸ“ˆ Data consolidation
- ðŸŽ¨ Formatted dashboards
- ðŸ”„ Batch processing
- ðŸ“§ Email reports

**Next:** [README](README.md) â†’