In [5]:
import pandas as pd

# ===============================
# 1. Load Dataset
# ===============================
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df_orig = df.copy()  # Keep original copy for comparison

# ===============================
# 2. Handle Missing Values
# ===============================
# Drop rows with missing CustomerID
df = df.dropna(subset=['CustomerID'])

# Fill missing Description with "Unknown"
df['Description'] = df['Description'].fillna("Unknown")

# Remove duplicates
df = df.drop_duplicates()

# Remove negative Quantity or UnitPrice
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]

# ===============================
# 3. Highlight Filled Values in HTML Table
# ===============================
def highlight_filled(val, col, original_col):
    """Highlight cells in red if they were originally missing."""
    if pd.isna(original_col[val.name]):
        return 'background-color: #FAA0A0; font-weight: bold;'
    else:
        return ''

# Select first 5 rows for display
df_display = df.head(5)

# Apply highlighting only to 'Description' column (CustomerID rows were dropped)
styled_table = df_display.style.apply(
    lambda x: [highlight_filled(x, 'Description', df_orig['Description']) for _ in x],
    subset=['Description']
).set_table_styles([
    {'selector': 'th', 'props': [('background-color', '#0b57a4'), ('color', 'white'), ('padding', '6px')]},
    {'selector': 'td', 'props': [('padding', '6px')]}
]).set_properties(**{
    'border': '1px solid #ccc',
    'text-align': 'center'
})

# ===============================
# 4. Display Cleaned Dataset with Highlighted Filled Values
# ===============================
display(styled_table)

# ===============================
# 5. Save Cleaned Dataset
# ===============================
df.to_csv("cleaned_online_retail.csv", index=False)
print("Cleaned dataset saved as 'cleaned_online_retail.csv'")


KeyError: 'Description'

<pandas.io.formats.style.Styler at 0x24c6b8e2e90>

Cleaned dataset saved as 'cleaned_online_retail.csv'
