In [None]:
# Week 1 - Applied Data Science with AI
# Project: E-Commerce Recommendation System
# Dataset: Ecommerce Customer Service Satisfaction

# Import libraries
import pandas as pd
from tabulate import tabulate

# Expand display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 200)

# Load dataset (make sure the dataset is uploaded in Colab or working directory)
df = pd.read_csv("Customer_support_data.csv", encoding="utf-8")

# ---- Dataset Info ----
dataset_name = "Customer_support_data"
rows, cols = df.shape

# Table for dataset rows & columns
dataset_shape = pd.DataFrame({
    "Dataset Name": [dataset_name],
    "Total Rows": [rows],
    "Total Columns": [cols]
})

print("📌 Dataset Information:")
print(tabulate(dataset_shape, headers="keys", tablefmt="grid"))

# ---- Column Descriptions ----
# (Modify according to actual dataset columns)
column_info = {
    "CustomerID": "Unique identifier for each customer",
    "Gender": "Gender of the customer",
    "Age": "Age of the customer",
    "Country": "Country of residence",
    "ProductCategory": "Category of the purchased product",
    "Rating": "Customer rating (1–5)",
    "Review": "Customer review text/feedback"
}

print("\n📌 Column Descriptions:")
column_desc_table = pd.DataFrame(list(column_info.items()), columns=["Column Name", "Description"])
print(tabulate(column_desc_table, headers="keys", tablefmt="grid"))

# ---- Sample Data ----
print("\n📌 Dataset Sample (First 10 Rows):")
print(tabulate(df.head(10), headers="keys", tablefmt="grid"))

# ---- Missing Values ----
missing_values = df.isnull().sum().reset_index()
missing_values.columns = ["Column Name", "Missing Values"]

print("\n📌 Missing Values per Column:")
print(tabulate(missing_values, headers="keys", tablefmt="grid"))

# ---- Dataset Summary ----
info_data = {
    "Total Rows": [rows],
    "Total Columns": [cols],
    "Duplicate Rows": [df.duplicated().sum()],
    "Missing Cells": [df.isnull().sum().sum()],
    "Numeric Columns": [df.select_dtypes(include='number').shape[1]],
    "Categorical Columns": [df.select_dtypes(exclude='number').shape[1]]
}

print("\n📌 Dataset Summary:")
print(tabulate(pd.DataFrame(info_data), headers="keys", tablefmt="grid"))


📌 Dataset Information:
+----+-----------------------+--------------+-----------------+
|    | Dataset Name          |   Total Rows |   Total Columns |
|  0 | Customer_support_data |        85907 |              20 |
+----+-----------------------+--------------+-----------------+

📌 Column Descriptions:
+----+-----------------+-------------------------------------+
|    | Column Name     | Description                         |
|  0 | CustomerID      | Unique identifier for each customer |
+----+-----------------+-------------------------------------+
|  1 | Gender          | Gender of the customer              |
+----+-----------------+-------------------------------------+
|  2 | Age             | Age of the customer                 |
+----+-----------------+-------------------------------------+
|  3 | Country         | Country of residence                |
+----+-----------------+-------------------------------------+
|  4 | ProductCategory | Category of the purchased product   |
+---