# Basic Excel Manipulation with Python

In [20]:
import pandas as pd

In [21]:
# Load the Excel file
df = pd.read_excel("./data/iris_data.xlsx", sheet_name='setosa')

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [22]:
import openpyxl

In [23]:
# Load the Excel file
wb = openpyxl.load_workbook("./data/iris_data.xlsx")

# Select the sheet
sheet = wb['versicolor']

# Extract the values (including headers)
sheet_data_raw = sheet.values

# Separate the headers into a variable
headers = next(sheet_data_raw)[0:]

# Create a DataFrame based on the second and subsequent lines of data with the header as column names
sheet_data = pd.DataFrame(sheet_data_raw, columns=headers)

sheet_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,7.0,3.2,4.7,1.4,versicolor
1,6.4,3.2,4.5,1.5,versicolor
2,6.9,3.1,4.9,1.5,versicolor
3,5.5,2.3,4.0,1.3,versicolor
4,6.5,2.8,4.6,1.5,versicolor


#### Multiple Sheets

In [24]:
from openpyxl import load_workbook

def read_single_sheet(workbook, sheet_name):
    # Select the sheet
    sheet = workbook[sheet_name]

    # Extract the values (including headers)
    sheet_data_raw = sheet.values

    # Separate the headers into a variable
    columns = next(sheet_data_raw)[0:]

    # Create a DataFrame based on the second and subsequent lines of data with the header as column names
    return pd.DataFrame(sheet_data_raw, columns=columns)

def read_multiple_sheets(file_path):

    # Load the Excel file
    workbook = load_workbook(file_path, data_only=True)

    # Get all sheet names
    sheet_names = workbook.sheetnames

    # Cycle through the sheet names, load the data for each, and concatenate them into a single DataFrame
    return pd.concat([read_single_sheet(workbook=workbook, sheet_name=sheet_name) for sheet_name in sheet_names], ignore_index=True)

# Define the file path and sheet names
file_path = "./data/iris_data.xlsx"

# Read the data from multiple sheets
consolidated_data = read_multiple_sheets(file_path)

# Display the consolidated data
consolidated_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


#### Export to Excel

In [25]:
# Create a DataFrame with sample data
data = {
    'Name': ['John', 'Jane', 'Mike'],
    'Age': [25, 30, 35],
    'City': ['New York', 'London', 'Sydney']
}
df = pd.DataFrame(data)

# Export the DataFrame to an Excel file
df.to_excel("./data/data.xlsx", index=False)

#### Create a workbook with openpyxl

In [26]:
# Create a new workbook
workbook = openpyxl.Workbook()
# Add a new sheet
workbook.create_sheet(title="Sheet2")
# Save the changes
workbook.save("./data/example.xlsx")

#### Delete a sheet

In [27]:
# Load the workbook
workbook = openpyxl.load_workbook("./data/example.xlsx")

# Delete a sheet
sheet_name = "Sheet2"
sheet = workbook[sheet_name]
workbook.remove(sheet)

# Save the changes
workbook.save("./data/example_removed.xlsx")

#### Update a cell

In [28]:
# Load the workbook
workbook = openpyxl.load_workbook("./data/example_removed.xlsx")

# Add a new sheet
workbook.create_sheet("New Sheet")

# Select a sheet
sheet_name = "New Sheet"
sheet = workbook[sheet_name]

# Update a cell value
sheet["A1"] = "Hello, World!"

# Save the changes
workbook.save("./data/example_updated.xlsx")