<a href="https://colab.research.google.com/github/zuhayerror3i8/AI-ML-Expert-With-Phitron-Batch-01/blob/main/001%20Machine%20Learning/004_Module_03_05_Practice_Day_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Module 3.5 ‚Äî Practice Day 2

**Topics Covered:**
- Standardization, Min-Max scaling, Robust scaling
- Nominal vs ordinal variables, one-hot vs ordinal encoding
- Vectors, dot product, norms, Euclidean and Manhattan distance

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from scipy.spatial import distance

# Part A. Quick Basics

## A1. Spot the Right Scaler

For each feature, pick one scaler and justify in one line.

In [None]:
print("A1. Spot the Right Scaler\n")
print("="*80)

print("\na) Apartment_price_BDT with a few luxury penthouses")
print("   Scaler: Robust Scaler")
print("   Justification: Luxury penthouses are outliers; RobustScaler uses median and IQR,")
print("                  making it resistant to extreme values.")

print("\nb) Skin_temperature_C measured from a wearable between 30 and 36")
print("   Scaler: Min-Max Scaler")
print("   Justification: The range is bounded and narrow with no outliers expected;")
print("                  Min-Max scales to [0,1] preserving the distribution.")

print("\nc) Daily_app_opens with many zeros and a few power users")
print("   Scaler: Robust Scaler or Log Transform + StandardScaler")
print("   Justification: Power users are outliers; RobustScaler handles them better,")
print("                  or use log transform to reduce skewness before standardizing.")

print("\n" + "="*80)

## A2. Manual Min-Max on a Tiny Set

Given scores = [20, 25, 30, 50], scale to [0, 1] by hand. Show each step.

In [None]:
scores = np.array([20, 25, 30, 50])

print("A2. Manual Min-Max Scaling\n")
print(f"Original scores: {scores}")

# Step 1: Find min and max
min_val = scores.min()
max_val = scores.max()
print(f"\nStep 1: Find min and max")
print(f"  min = {min_val}")
print(f"  max = {max_val}")

# Step 2: Apply formula for each value
print(f"\nStep 2: Apply formula: X_scaled = (X - min) / (max - min)")
print()

scaled_scores = []
for score in scores:
    scaled = (score - min_val) / (max_val - min_val)
    scaled_scores.append(scaled)
    print(f"  {score}: ({score} - {min_val}) / ({max_val} - {min_val}) = {score - min_val} / {max_val - min_val} = {scaled:.4f}")

scaled_scores = np.array(scaled_scores)
print(f"\nScaled scores: {scaled_scores}")

# Verification with sklearn
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
sklearn_scaled = scaler.fit_transform(scores.reshape(-1, 1)).flatten()
print(f"Verification (sklearn): {sklearn_scaled}")

## A3. Z-scores on a Subset

Given x = [8, 9, 11], compute mean, standard deviation, then standardize each. Use population standard deviation.

In [None]:
x = np.array([8, 9, 11])

print("A3. Z-scores (Standardization)\n")
print(f"Original data: {x}")

# Step 1: Compute mean
mean = x.mean()
print(f"\nStep 1: Compute mean")
print(f"  Œº = ({x[0]} + {x[1]} + {x[2]}) / 3 = {x.sum()} / 3 = {mean:.4f}")

# Step 2: Compute population standard deviation (ddof=0)
std = x.std(ddof=0)
variance = x.var(ddof=0)
print(f"\nStep 2: Compute population standard deviation")
print(f"  Variance œÉ¬≤ = [(8-{mean:.4f})¬≤ + (9-{mean:.4f})¬≤ + (11-{mean:.4f})¬≤] / 3")
print(f"  Variance œÉ¬≤ = [{(8-mean)**2:.4f} + {(9-mean)**2:.4f} + {(11-mean)**2:.4f}] / 3")
print(f"  Variance œÉ¬≤ = {variance:.4f}")
print(f"  œÉ = ‚àö{variance:.4f} = {std:.4f}")

# Step 3: Standardize each value
print(f"\nStep 3: Standardize each value using Z = (X - Œº) / œÉ")
z_scores = []
for val in x:
    z = (val - mean) / std
    z_scores.append(z)
    print(f"  {val}: ({val} - {mean:.4f}) / {std:.4f} = {z:.4f}")

z_scores = np.array(z_scores)
print(f"\nZ-scores: {z_scores}")
print(f"Verification - Mean of Z-scores: {z_scores.mean():.6f} (should be ~0)")
print(f"Verification - Std of Z-scores: {z_scores.std(ddof=0):.6f} (should be ~1)")

## A4. Robust Scaling Ingredients

Given y = [5, 6, 6, 7, 50], find median, Q1, Q3, IQR. Do not scale yet.

In [None]:
y = np.array([5, 6, 6, 7, 50])

print("A4. Robust Scaling Ingredients\n")
print(f"Data: {y}")
print(f"Sorted data: {np.sort(y)}")

# Calculate quartiles
Q1 = np.percentile(y, 25)
Q2 = np.percentile(y, 50)  # median
Q3 = np.percentile(y, 75)
IQR = Q3 - Q1

print(f"\nMedian (Q2): {Q2}")
print(f"Q1 (25th percentile): {Q1}")
print(f"Q3 (75th percentile): {Q3}")
print(f"IQR (Q3 - Q1): {Q3} - {Q1} = {IQR}")

print(f"\nNote: The value 50 is an outlier (much larger than Q3 = {Q3}).")
print(f"Robust scaling formula: X_scaled = (X - median) / IQR")
print(f"This makes the scaled values resistant to the outlier at 50.")

## A5. Nominal or Ordinal

Mark each as nominal or ordinal.

In [None]:
print("A5. Nominal or Ordinal?\n")
print("="*80)

print("\na) T-shirt_size {S, M, L, XL}")
print("   Type: ORDINAL")
print("   Reason: There is a clear ordering: S < M < L < XL (small to extra large)")

print("\nb) City {Dhaka, Chattogram, Rajshahi}")
print("   Type: NOMINAL")
print("   Reason: No inherent ordering or ranking between cities")

print("\nc) Satisfaction {Low, Medium, High}")
print("   Type: ORDINAL")
print("   Reason: Clear ordering: Low < Medium < High")

print("\n" + "="*80)
print("\nKey Difference:")
print("  ‚Ä¢ Nominal: Categories with NO natural order (use One-Hot Encoding)")
print("  ‚Ä¢ Ordinal: Categories with MEANINGFUL order (use Ordinal Encoding)")

# Part B. Hands-On Practice

## B1. Three Scalers Side by Side

Heights = [150, 160, 170, 175, 180]  
Weights = [58, 62, 65, 66, 190]

In [None]:
heights = np.array([150, 160, 170, 175, 180])
weights = np.array([58, 62, 65, 66, 190])

print("B1. Three Scalers Side by Side\n")
print(f"Heights: {heights}")
print(f"Weights: {weights} (Note: 190 is an outlier)")
print("="*80)

# Task a) Min-Max scale both to [0, 1]
print("\na) Min-Max Scaling to [0, 1]")
heights_minmax = (heights - heights.min()) / (heights.max() - heights.min())
weights_minmax = (weights - weights.min()) / (weights.max() - weights.min())
print(f"   Heights (Min-Max): {heights_minmax}")
print(f"   Weights (Min-Max): {weights_minmax}")
print(f"   Note: Outlier 190 dominates weights; 66‚Üí0.0606 instead of being near 1.0")

# Task b) Standardize the first three values of each
print("\nb) Standardize First Three Values Only")
heights_first3 = heights[:3]
weights_first3 = weights[:3]
heights_std = (heights_first3 - heights_first3.mean()) / heights_first3.std(ddof=1)
weights_std = (weights_first3 - weights_first3.mean()) / weights_first3.std(ddof=1)
print(f"   Heights[0:3]: {heights_first3}")
print(f"   Standardized: {heights_std}")
print(f"   Weights[0:3]: {weights_first3}")
print(f"   Standardized: {weights_std}")

# Task c) Robust scale Weights with median and IQR
print("\nc) Robust Scale Weights")
median_w = np.median(weights)
Q1_w = np.percentile(weights, 25)
Q3_w = np.percentile(weights, 75)
IQR_w = Q3_w - Q1_w
weights_robust = (weights - median_w) / IQR_w
print(f"   Median: {median_w}, Q1: {Q1_w}, Q3: {Q3_w}, IQR: {IQR_w}")
print(f"   Weights (Robust): {weights_robust}")
print(f"   Note: Outlier 190 ‚Üí {weights_robust[-1]:.2f}, less extreme than Min-Max")

# Task d) Which scaler handles outlier best
print("\nd) Outlier Handling Comparison")
print(f"   Min-Max:  Outlier pulls all other values to near 0 (bad)")
print(f"   Standard: Outlier inflates std, distorting all Z-scores")
print(f"   Robust:   Outlier has minimal effect; uses median/IQR (BEST)")
print(f"   Winner: Robust Scaler handles the outlier best!")

# Visualization
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

axes[0].scatter(heights_minmax, weights_minmax, s=100)
axes[0].set_title('Min-Max Scaled')
axes[0].set_xlabel('Heights')
axes[0].set_ylabel('Weights')
axes[0].grid(True, alpha=0.3)

axes[1].scatter(heights, (weights - weights.mean()) / weights.std(), s=100, color='orange')
axes[1].set_title('Standardized (Z-score)')
axes[1].set_xlabel('Heights (original)')
axes[1].set_ylabel('Weights (standardized)')
axes[1].grid(True, alpha=0.3)

axes[2].scatter(heights, weights_robust, s=100, color='green')
axes[2].set_title('Robust Scaled')
axes[2].set_xlabel('Heights (original)')
axes[2].set_ylabel('Weights (robust)')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## B2. One-Hot Encoding by Hand

Cities = [Dhaka, Chattogram, Dhaka, Rajshahi, Rajshahi]  
Create three columns: City_Dhaka, City_Chattogram, City_Rajshahi using 0 and 1.

In [None]:
cities = ['Dhaka', 'Chattogram', 'Dhaka', 'Rajshahi', 'Rajshahi']

print("B2. One-Hot Encoding by Hand\n")
print(f"Original: {cities}")
print("\nManual One-Hot Encoding:")

# Create DataFrame manually
one_hot_data = {
    'City_Dhaka': [],
    'City_Chattogram': [],
    'City_Rajshahi': []
}

for city in cities:
    one_hot_data['City_Dhaka'].append(1 if city == 'Dhaka' else 0)
    one_hot_data['City_Chattogram'].append(1 if city == 'Chattogram' else 0)
    one_hot_data['City_Rajshahi'].append(1 if city == 'Rajshahi' else 0)

df_onehot = pd.DataFrame(one_hot_data)
df_onehot.index.name = 'Row'

print(df_onehot)

print("\nExplanation:")
print("  ‚Ä¢ Each city becomes a separate binary column")
print("  ‚Ä¢ Only one column has 1 per row (the city for that row)")
print("  ‚Ä¢ This avoids imposing false ordering on nominal categories")

# Verification with sklearn
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)
cities_array = np.array(cities).reshape(-1, 1)
encoded = encoder.fit_transform(cities_array)
print("\nVerification with sklearn:")
print(pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['City'])))

## B3. Ordinal Mapping

Education = [High School, Bachelor, Master, Bachelor, Master]  
Map with High School=0, Bachelor=1, Master=2.  
Then change the map to High School=1, Bachelor=2, Master=3.

In [None]:
education = ['High School', 'Bachelor', 'Master', 'Bachelor', 'Master']

print("B3. Ordinal Mapping\n")
print(f"Original: {education}\n")

# First mapping: 0, 1, 2
map1 = {'High School': 0, 'Bachelor': 1, 'Master': 2}
encoded1 = [map1[e] for e in education]
print("Mapping 1: High School=0, Bachelor=1, Master=2")
print(f"Encoded: {encoded1}")

# Second mapping: 1, 2, 3
map2 = {'High School': 1, 'Bachelor': 2, 'Master': 3}
encoded2 = [map2[e] for e in education]
print("\nMapping 2: High School=1, Bachelor=2, Master=3")
print(f"Encoded: {encoded2}")

print("\n" + "="*80)
print("Effect on Distances:")
print("="*80)

# Calculate distances between first two samples
print("\nDistance between Sample 0 (High School) and Sample 1 (Bachelor):")
print(f"  Mapping 1: |{encoded1[0]} - {encoded1[1]}| = {abs(encoded1[0] - encoded1[1])}")
print(f"  Mapping 2: |{encoded2[0]} - {encoded2[1]}| = {abs(encoded2[0] - encoded2[1])}")
print(f"  ‚Üí Same difference (1 unit)")

print("\nDistance between Sample 0 (High School) and Sample 2 (Master):")
print(f"  Mapping 1: |{encoded1[0]} - {encoded1[2]}| = {abs(encoded1[0] - encoded1[2])}")
print(f"  Mapping 2: |{encoded2[0]} - {encoded2[2]}| = {abs(encoded2[0] - encoded2[2])}")
print(f"  ‚Üí Same difference (2 units)")

print("\nüìù One-line explanation:")
print("   Shifting the mapping by a constant (0‚Üí1, 1‚Üí2, 2‚Üí3) preserves relative")
print("   distances, so ordinal relationships remain intact; only absolute values shift.")

## B4. Encoding Mixup [Optional]

You mistakenly apply ordinal encoding to City and one-hot to Education. Write one sentence on the risk this creates in a linear model.

In [None]:
print("B4. Encoding Mixup Risk\n")
print("="*80)
print("\nScenario: Ordinal encoding on City (nominal) and One-hot on Education (ordinal)")
print("\nExample:")

# Wrong: Ordinal encoding on City
cities_wrong = ['Dhaka', 'Chattogram', 'Rajshahi']
city_map = {'Dhaka': 0, 'Chattogram': 1, 'Rajshahi': 2}
print(f"  City (nominal) with ordinal: Dhaka=0, Chattogram=1, Rajshahi=2")
print(f"  Problem: Implies Dhaka < Chattogram < Rajshahi (FALSE ordering!)")

# Wrong: One-hot on Education
print(f"\n  Education (ordinal) with one-hot: 3 separate columns")
print(f"  Problem: Loses the ordering High School < Bachelor < Master")

print("\n" + "="*80)
print("üìù ONE SENTENCE RISK:")
print("="*80)
print("\n  Ordinal encoding City imposes a false ranking (e.g., Dhaka=0 < Rajshahi=2),")
print("  while one-hot encoding Education destroys the meaningful order, causing the")
print("  linear model to misinterpret relationships and produce unreliable predictions.")

## B5. Vectors and Alignment [Optional]

a = [3, ‚àí1, 2], b = [4, 0, ‚àí2], c = [‚àí6, 2, ‚àí4]

In [None]:
a = np.array([3, -1, 2])
b = np.array([4, 0, -2])
c = np.array([-6, 2, -4])

print("B5. Vectors and Alignment\n")
print(f"a = {a}")
print(f"b = {b}")
print(f"c = {c}")
print("="*80)

# Task a) Compute dot products
dot_ab = np.dot(a, b)
dot_ac = np.dot(a, c)

print("\na) Dot Products")
print(f"   a¬∑b = (3)(4) + (-1)(0) + (2)(-2) = 12 + 0 - 4 = {dot_ab}")
print(f"   a¬∑c = (3)(-6) + (-1)(2) + (2)(-4) = -18 - 2 - 8 = {dot_ac}")

# Task b) Compare signs and magnitudes
print("\nb) Alignment Analysis")
print(f"   a¬∑b = {dot_ab} (positive) ‚Üí vectors point in similar direction (acute angle)")
print(f"   |a¬∑b| = {abs(dot_ab)}")
print(f"\n   a¬∑c = {dot_ac} (negative) ‚Üí vectors point in opposite directions (obtuse angle)")
print(f"   |a¬∑c| = {abs(dot_ac)}")
print(f"\n   Magnitude comparison: |a¬∑c| = {abs(dot_ac)} > |a¬∑b| = {abs(dot_ab)}")
print(f"   ‚Üí 'c' is more strongly anti-aligned with 'a' than 'b' is aligned with 'a'")

# Check if c is a scalar multiple of a
print(f"\n   Note: c = -2 √ó a? Let's check: -2 √ó a = {-2 * a}")
print(f"   Yes! c and a are parallel but opposite (c = -2a)")

# Task c) L2 normalize a
norm_a = np.linalg.norm(a)  # L2 norm
a_normalized = a / norm_a

print(f"\nc) L2 Normalization of a")
print(f"   L2 norm of a: ||a|| = ‚àö(3¬≤ + (-1)¬≤ + 2¬≤) = ‚àö(9 + 1 + 4) = ‚àö14 = {norm_a:.6f}")
print(f"   Normalized vector: a_norm = a / ||a||")
print(f"   a_norm = [{a[0]}/{norm_a:.3f}, {a[1]}/{norm_a:.3f}, {a[2]}/{norm_a:.3f}]")
print(f"   a_norm = [{a_normalized[0]:.3f}, {a_normalized[1]:.3f}, {a_normalized[2]:.3f}]")
print(f"\n   Verification: ||a_norm|| = {np.linalg.norm(a_normalized):.6f} (should be 1.0)")

## B6. Two Distances, Different Vibes

Points: P1(2, 3), P2(5, 7), P3(2, 10)

In [None]:
P1 = np.array([2, 3])
P2 = np.array([5, 7])
P3 = np.array([2, 10])

print("B6. Two Distances, Different Vibes\n")
print(f"P1 = {P1}")
print(f"P2 = {P2}")
print(f"P3 = {P3}")
print("="*80)

# Task a) Compute Euclidean and Manhattan distances for all pairs
print("\na) Distance Calculations")

# Euclidean: sqrt((x2-x1)¬≤ + (y2-y1)¬≤)
euclidean_12 = np.sqrt((P2[0]-P1[0])**2 + (P2[1]-P1[1])**2)
euclidean_13 = np.sqrt((P3[0]-P1[0])**2 + (P3[1]-P1[1])**2)
euclidean_23 = np.sqrt((P3[0]-P2[0])**2 + (P3[1]-P2[1])**2)

# Manhattan: |x2-x1| + |y2-y1|
manhattan_12 = abs(P2[0]-P1[0]) + abs(P2[1]-P1[1])
manhattan_13 = abs(P3[0]-P1[0]) + abs(P3[1]-P1[1])
manhattan_23 = abs(P3[0]-P2[0]) + abs(P3[1]-P2[1])

print("\n   P1 to P2:")
print(f"      Euclidean: ‚àö((5-2)¬≤ + (7-3)¬≤) = ‚àö(9 + 16) = ‚àö25 = {euclidean_12:.3f}")
print(f"      Manhattan: |5-2| + |7-3| = 3 + 4 = {manhattan_12:.3f}")

print("\n   P1 to P3:")
print(f"      Euclidean: ‚àö((2-2)¬≤ + (10-3)¬≤) = ‚àö(0 + 49) = ‚àö49 = {euclidean_13:.3f}")
print(f"      Manhattan: |2-2| + |10-3| = 0 + 7 = {manhattan_13:.3f}")

print("\n   P2 to P3:")
print(f"      Euclidean: ‚àö((2-5)¬≤ + (10-7)¬≤) = ‚àö(9 + 9) = ‚àö18 = {euclidean_23:.3f}")
print(f"      Manhattan: |2-5| + |10-7| = 3 + 3 = {manhattan_23:.3f}")

# Task b) Which distance is more sensitive to a single large jump
print("\nb) Sensitivity to Single Large Jump")
print("   Manhattan distance is LINEAR in each dimension (sum of absolute differences)")
print("   Euclidean distance SQUARES differences before summing")
print("   ‚Üí Euclidean is MORE SENSITIVE to large jumps in a single coordinate")
print("   Example: P1‚ÜíP3 has 7-unit jump in y only:")
print(f"      Manhattan treats it as: 0 + 7 = {manhattan_13}")
print(f"      Euclidean treats it as: ‚àö(0¬≤ + 7¬≤) = {euclidean_13}")
print("      Both give same result here, but if we had [0, 7] vs [5, 5]:")
print("         Manhattan: both = 7")
print("         Euclidean: [0,7]=7.0 vs [5,5]=7.07 ‚Üí more balanced spread preferred")

# Task c) Scale y by 10 and recompute d(P1, P2)
P1_scaled = np.array([P1[0], P1[1] * 10])
P2_scaled = np.array([P2[0], P2[1] * 10])

euclidean_12_scaled = np.sqrt((P2_scaled[0]-P1_scaled[0])**2 + (P2_scaled[1]-P1_scaled[1])**2)
manhattan_12_scaled = abs(P2_scaled[0]-P1_scaled[0]) + abs(P2_scaled[1]-P1_scaled[1])

print("\nc) Effect of Scaling Y by 10")
print(f"   Original: P1={P1}, P2={P2}")
print(f"   Scaled:   P1={P1_scaled}, P2={P2_scaled}")
print(f"\n   Original distances P1‚ÜíP2:")
print(f"      Euclidean: {euclidean_12:.3f}")
print(f"      Manhattan: {manhattan_12:.3f}")
print(f"\n   Scaled distances P1‚ÜíP2:")
print(f"      Euclidean: {euclidean_12_scaled:.3f} (was {euclidean_12:.3f})")
print(f"      Manhattan: {manhattan_12_scaled:.3f} (was {manhattan_12:.3f})")
print(f"\n   üìù One-line explanation:")
print(f"      Scaling y by 10 amplifies y-differences, making y dominate both distance")
print(f"      metrics and distorting similarity; features must be scaled to equal ranges.")

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Original scale
axes[0].scatter(*P1, s=200, c='red', marker='o', label='P1', edgecolors='black', linewidths=2)
axes[0].scatter(*P2, s=200, c='blue', marker='s', label='P2', edgecolors='black', linewidths=2)
axes[0].scatter(*P3, s=200, c='green', marker='^', label='P3', edgecolors='black', linewidths=2)
axes[0].plot([P1[0], P2[0]], [P1[1], P2[1]], 'k--', alpha=0.5, label=f'Euclidean P1-P2: {euclidean_12:.2f}')
axes[0].set_xlabel('X')
axes[0].set_ylabel('Y')
axes[0].set_title('Original Scale')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].axis('equal')

# Scaled (y √ó 10)
axes[1].scatter(*P1_scaled, s=200, c='red', marker='o', label='P1 scaled', edgecolors='black', linewidths=2)
axes[1].scatter(*P2_scaled, s=200, c='blue', marker='s', label='P2 scaled', edgecolors='black', linewidths=2)
axes[1].plot([P1_scaled[0], P2_scaled[0]], [P1_scaled[1], P2_scaled[1]], 'k--', alpha=0.5,
             label=f'Euclidean P1-P2: {euclidean_12_scaled:.2f}')
axes[1].set_xlabel('X')
axes[1].set_ylabel('Y (scaled √ó10)')
axes[1].set_title('Y Scaled by 10')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Part C. Mini Datasets

## C-Data-1 and C-Data-2

In [None]:
# C-Data-1
data1 = {
    'ID': [1, 2, 3, 4, 5],
    'Age': [20, 21, 22, 20, 23],
    'Hours_Study': [1.0, 0.5, 2.2, 5.0, 0.2],
    'GPA': [3.10, 2.60, 3.40, 3.90, 2.30],
    'Internet': ['Yes', 'No', 'Yes', 'Yes', 'No'],
    'City': ['Dhaka', 'Chattogram', 'Rajshahi', 'Dhaka', 'Rajshahi']
}
df1 = pd.DataFrame(data1)

# C-Data-2
data2 = {
    'ID': [1, 2, 3, 4, 5],
    'Income_BDT': [30000, 45000, 52000, 300000, 38000],
    'Transactions': [0, 1, 2, 12, 0],
    'Temp_C': [25.0, 26.0, 24.5, 28.0, 25.5],
    'Education': ['High School', 'Bachelor', 'Master', 'Bachelor', 'Master'],
    'Satisfaction': ['Low', 'Medium', 'High', 'Medium', 'Medium']
}
df2 = pd.DataFrame(data2)

print("C-Data-1:")
print(df1)
print("\nC-Data-2:")
print(df2)

## C1. Scaler Choices with Evidence

Pick a scaler for Income_BDT, Transactions, Temp_C. For each, give a one-line justification and a two-line numeric illustration using C-Data-2 values.

In [None]:
print("C1. Scaler Choices with Evidence\n")
print("="*80)

# Income_BDT
income = df2['Income_BDT'].values
print("\n1. Income_BDT: [30000, 45000, 52000, 300000, 38000]")
print("   Scaler: Robust Scaler")
print("   Justification: 300000 is a severe outlier (6.5√ó larger than median)")
print(f"   Illustration:")
print(f"      Median: {np.median(income)}, IQR: {np.percentile(income, 75) - np.percentile(income, 25)}")
print(f"      Robust scaled: {(income - np.median(income)) / (np.percentile(income, 75) - np.percentile(income, 25))}")

# Transactions
transactions = df2['Transactions'].values
print("\n2. Transactions: [0, 1, 2, 12, 0]")
print("   Scaler: Robust Scaler or Log Transform + StandardScaler")
print("   Justification: 12 is an outlier (6√ó larger than second highest); many zeros")
print(f"   Illustration:")
print(f"      Median: {np.median(transactions)}, IQR: {np.percentile(transactions, 75) - np.percentile(transactions, 25)}")
print(f"      Robust scaled: {(transactions - np.median(transactions)) / max(1, np.percentile(transactions, 75) - np.percentile(transactions, 25))}")

# Temp_C
temp = df2['Temp_C'].values
print("\n3. Temp_C: [25.0, 26.0, 24.5, 28.0, 25.5]")
print("   Scaler: Min-Max Scaler")
print("   Justification: Bounded, narrow range (24.5-28¬∞C) with no outliers")
print(f"   Illustration:")
print(f"      Min: {temp.min()}, Max: {temp.max()}, Range: {temp.max() - temp.min()}")
print(f"      Min-Max scaled: {(temp - temp.min()) / (temp.max() - temp.min())}")

print("\n" + "="*80)

## C2. Mixed Preprocessing Plan

For C-Data-1 and C-Data-2 combined:
- Identify nominal and ordinal columns
- Propose one encoding plan listing exact columns to one-hot vs ordinal
- Propose one scaling plan listing exact columns to Min-Max vs Standardization vs Robust

In [None]:
print("C2. Mixed Preprocessing Plan\n")
print("="*80)

print("\na) Identify Nominal and Ordinal Columns")
print("\n   C-Data-1:")
print("      Nominal:  Internet (Yes/No), City (Dhaka/Chattogram/Rajshahi)")
print("      Ordinal:  None")
print("      Numeric:  Age, Hours_Study, GPA")

print("\n   C-Data-2:")
print("      Nominal:  None")
print("      Ordinal:  Education (High School < Bachelor < Master)")
print("                Satisfaction (Low < Medium < High)")
print("      Numeric:  Income_BDT, Transactions, Temp_C")

print("\n" + "-"*80)
print("\nb) Encoding Plan")
print("\n   One-Hot Encoding:")
print("      ‚Ä¢ Internet (binary: Yes/No)")
print("      ‚Ä¢ City (3 categories: Dhaka, Chattogram, Rajshahi)")

print("\n   Ordinal Encoding:")
print("      ‚Ä¢ Education: {'High School': 0, 'Bachelor': 1, 'Master': 2}")
print("      ‚Ä¢ Satisfaction: {'Low': 0, 'Medium': 1, 'High': 2}")

print("\n" + "-"*80)
print("\nc) Scaling Plan")
print("\n   Min-Max Scaling:")
print("      ‚Ä¢ Age (narrow range 20-23, no outliers)")
print("      ‚Ä¢ GPA (bounded range 2.3-3.9, no outliers)")
print("      ‚Ä¢ Temp_C (bounded range 24.5-28, no outliers)")

print("\n   Standardization:")
print("      ‚Ä¢ Hours_Study (wide range but continuous, bell-shaped expected)")

print("\n   Robust Scaling:")
print("      ‚Ä¢ Income_BDT (outlier at 300000)")
print("      ‚Ä¢ Transactions (outlier at 12, many zeros)")

print("\n" + "="*80)

# Create summary table
summary = pd.DataFrame({
    'Column': ['Age', 'Hours_Study', 'GPA', 'Income_BDT', 'Transactions',
               'Temp_C', 'Internet', 'City', 'Education', 'Satisfaction'],
    'Type': ['Numeric', 'Numeric', 'Numeric', 'Numeric', 'Numeric',
             'Numeric', 'Nominal', 'Nominal', 'Ordinal', 'Ordinal'],
    'Preprocessing': ['Min-Max', 'Standard', 'Min-Max', 'Robust', 'Robust',
                     'Min-Max', 'One-Hot', 'One-Hot', 'Ordinal', 'Ordinal']
})

print("\nSummary Table:")
print(summary.to_string(index=False))

## C3. Outlier Stress Test [Optional]

Using Income_BDT in C-Data-2, compute Min-Max scaled values. Then compute Robust scaled values. In one line, compare how each treats the 300000 outlier.

In [None]:
income = df2['Income_BDT'].values

print("C3. Outlier Stress Test\n")
print(f"Income_BDT: {income}")
print("="*80)

# Min-Max Scaling
income_minmax = (income - income.min()) / (income.max() - income.min())
print("\nMin-Max Scaled:")
print(f"   {income_minmax}")
print(f"   Normal values (30k-52k) ‚Üí 0.000 to 0.081 (compressed near 0)")
print(f"   Outlier (300k) ‚Üí 1.000 (dominates the scale)")

# Robust Scaling
median_inc = np.median(income)
Q1_inc = np.percentile(income, 25)
Q3_inc = np.percentile(income, 75)
IQR_inc = Q3_inc - Q1_inc
income_robust = (income - median_inc) / IQR_inc

print("\nRobust Scaled:")
print(f"   Median: {median_inc}, IQR: {IQR_inc}")
print(f"   {income_robust}")
print(f"   Normal values ‚Üí -1.0 to 0.706 (well distributed)")
print(f"   Outlier (300k) ‚Üí 17.94 (large but doesn't compress others)")

print("\n" + "="*80)
print("üìù ONE LINE COMPARISON:")
print("="*80)
print("   Min-Max squashes all normal values to [0, 0.081] because the outlier 300k")
print("   sets max, while Robust keeps normal values spread across [-1, 0.7] since")
print("   it uses median/IQR unaffected by the extreme value.")

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].bar(range(len(income)), income_minmax, color=['green', 'green', 'green', 'red', 'green'])
axes[0].set_title('Min-Max Scaled Income')
axes[0].set_xlabel('Sample Index')
axes[0].set_ylabel('Scaled Value')
axes[0].set_xticks(range(len(income)))
axes[0].grid(True, alpha=0.3)

axes[1].bar(range(len(income)), income_robust, color=['green', 'green', 'green', 'red', 'green'])
axes[1].set_title('Robust Scaled Income')
axes[1].set_xlabel('Sample Index')
axes[1].set_ylabel('Scaled Value')
axes[1].set_xticks(range(len(income)))
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## C4. Distance on Feature Space [Optional]

From C-Data-1, take feature pair (Hours_Study, GPA).

In [None]:
# Extract features
hours = df1['Hours_Study'].values
gpa = df1['GPA'].values

# Create feature vectors
features = np.column_stack([hours, gpa])
ID1 = features[0]  # ID 1: [1.0, 3.10]
ID4 = features[3]  # ID 4: [5.0, 3.90]

print("C4. Distance on Feature Space\n")
print(f"Features: (Hours_Study, GPA)")
print(f"ID 1: {ID1}")
print(f"ID 4: {ID4}")
print("="*80)

# Task a) Euclidean distance
euclidean_orig = np.sqrt((ID4[0] - ID1[0])**2 + (ID4[1] - ID1[1])**2)
print("\na) Euclidean Distance (Original)")
print(f"   d = ‚àö((5.0-1.0)¬≤ + (3.90-3.10)¬≤)")
print(f"   d = ‚àö(16.0 + 0.64)")
print(f"   d = ‚àö16.64 = {euclidean_orig:.4f}")

# Task b) Manhattan distance
manhattan_orig = abs(ID4[0] - ID1[0]) + abs(ID4[1] - ID1[1])
print("\nb) Manhattan Distance (Original)")
print(f"   d = |5.0-1.0| + |3.90-3.10|")
print(f"   d = 4.0 + 0.8 = {manhattan_orig:.4f}")

# Task c) Normalize with Min-Max and recompute
hours_minmax = (hours - hours.min()) / (hours.max() - hours.min())
gpa_minmax = (gpa - gpa.min()) / (gpa.max() - gpa.min())
features_scaled = np.column_stack([hours_minmax, gpa_minmax])
ID1_scaled = features_scaled[0]
ID4_scaled = features_scaled[3]

euclidean_scaled = np.sqrt((ID4_scaled[0] - ID1_scaled[0])**2 + (ID4_scaled[1] - ID1_scaled[1])**2)
manhattan_scaled = abs(ID4_scaled[0] - ID1_scaled[0]) + abs(ID4_scaled[1] - ID1_scaled[1])

print("\nc) After Min-Max Normalization")
print(f"   ID 1 scaled: {ID1_scaled}")
print(f"   ID 4 scaled: {ID4_scaled}")
print(f"\n   Euclidean Distance (Scaled): {euclidean_scaled:.4f}")
print(f"   Manhattan Distance (Scaled): {manhattan_scaled:.4f}")

print("\n" + "="*80)
print("üìù ONE LINE COMMENT ON SCALE EFFECTS:")
print("="*80)
print("   Before scaling, Hours_Study (range 0.2-5.0) dominated distances over GPA")
print("   (range 2.3-3.9); Min-Max scaling equalizes feature contributions, reducing")
print("   distances and making both features equally important in similarity measures.")

# Create comparison table
comparison = pd.DataFrame({
    'Metric': ['Euclidean', 'Manhattan'],
    'Original': [euclidean_orig, manhattan_orig],
    'Scaled': [euclidean_scaled, manhattan_scaled],
    'Change': [euclidean_scaled - euclidean_orig, manhattan_scaled - manhattan_orig]
})
print("\nDistance Comparison:")
print(comparison.to_string(index=False))

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Original scale
axes[0].scatter(hours, gpa, s=200, c='blue', alpha=0.6, edgecolors='black', linewidths=2)
axes[0].scatter([ID1[0], ID4[0]], [ID1[1], ID4[1]], s=300, c=['red', 'green'],
               edgecolors='black', linewidths=2, label=['ID 1', 'ID 4'])
axes[0].plot([ID1[0], ID4[0]], [ID1[1], ID4[1]], 'k--', linewidth=2,
            label=f'Euclidean: {euclidean_orig:.3f}')
axes[0].set_xlabel('Hours_Study')
axes[0].set_ylabel('GPA')
axes[0].set_title('Original Scale')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Scaled
axes[1].scatter(hours_minmax, gpa_minmax, s=200, c='blue', alpha=0.6, edgecolors='black', linewidths=2)
axes[1].scatter([ID1_scaled[0], ID4_scaled[0]], [ID1_scaled[1], ID4_scaled[1]],
               s=300, c=['red', 'green'], edgecolors='black', linewidths=2, label=['ID 1', 'ID 4'])
axes[1].plot([ID1_scaled[0], ID4_scaled[0]], [ID1_scaled[1], ID4_scaled[1]], 'k--',
            linewidth=2, label=f'Euclidean: {euclidean_scaled:.3f}')
axes[1].set_xlabel('Hours_Study (scaled)')
axes[1].set_ylabel('GPA (scaled)')
axes[1].set_title('Min-Max Scaled [0, 1]')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_xlim(-0.1, 1.1)
axes[1].set_ylim(-0.1, 1.1)

plt.tight_layout()
plt.show()

# Part D. Mini Project [Optional]

**Goal:** Make one notebook that shows encoding + scaling + distance change. No train‚Äìtest split, no models.

## Step 1: Create a Small DataFrame

In [None]:
# Create a comprehensive dataset for the mini project
project_data = {
    'Income': [35000, 48000, 52000, 250000, 42000, 38000],
    'Hours_Study': [2.5, 4.0, 3.5, 5.0, 1.5, 3.0],
    'GPA': [3.2, 3.7, 3.5, 3.9, 2.8, 3.3],
    'Transactions_7d': [5, 8, 12, 85, 3, 6],
    'City': ['Dhaka', 'Chattogram', 'Dhaka', 'Rajshahi', 'Dhaka', 'Chattogram'],
    'Internet': ['Yes', 'Yes', 'No', 'Yes', 'Yes', 'No'],
    'Education_Level': ['Bachelor', 'Master', 'Bachelor', 'PhD', 'High School', 'Master'],
    'Satisfaction': ['Medium', 'High', 'Medium', 'High', 'Low', 'Medium']
}

df_project = pd.DataFrame(project_data)
print("Mini Project Dataset:")
print(df_project)
print(f"\nShape: {df_project.shape}")

## Step 2: Decide Preprocessing Plan

In [None]:
print("Preprocessing Plan\n")
print("="*80)

print("\nüìã One-Hot Encoding (Nominal Variables):")
print("   ‚Ä¢ City: {Dhaka, Chattogram, Rajshahi} - no natural order")
print("   ‚Ä¢ Internet: {Yes, No} - binary nominal")

print("\nüìã Ordinal Encoding (Ordinal Variables):")
print("   ‚Ä¢ Education_Level: High School < Bachelor < Master < PhD")
print("     Mapping: {'High School': 0, 'Bachelor': 1, 'Master': 2, 'PhD': 3}")
print("   ‚Ä¢ Satisfaction: Low < Medium < High")
print("     Mapping: {'Low': 0, 'Medium': 1, 'High': 2}")

print("\nüìã Standardization (Normal Distribution Expected):")
print("   ‚Ä¢ Hours_Study: continuous variable, no severe outliers")
print("   ‚Ä¢ GPA: bounded but continuous")

print("\nüìã Robust Scaling (Outliers Present):")
print("   ‚Ä¢ Income: outlier at 250000 (6√ó larger than others)")
print("   ‚Ä¢ Transactions_7d: outlier at 85 (10√ó larger than others)")

print("\n" + "="*80)

## Step 3: Apply ColumnTransformer

In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer

# Define column groups
onehot_cols = ['City', 'Internet']
ordinal_cols = ['Education_Level', 'Satisfaction']
standard_cols = ['Hours_Study', 'GPA']
robust_cols = ['Income', 'Transactions_7d']

# Define ordinal mappings
education_mapping = [['High School', 'Bachelor', 'Master', 'PhD']]
satisfaction_mapping = [['Low', 'Medium', 'High']]

# Create ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(sparse_output=False, drop='first'), onehot_cols),
        ('ordinal_edu', OrdinalEncoder(categories=education_mapping), ['Education_Level']),
        ('ordinal_sat', OrdinalEncoder(categories=satisfaction_mapping), ['Satisfaction']),
        ('standard', StandardScaler(), standard_cols),
        ('robust', RobustScaler(), robust_cols)
    ]
)

# Fit and transform
transformed = preprocessor.fit_transform(df_project)

print("Transformed Data Shape:", transformed.shape)
print("\nFirst 3 rows of transformed data:")
print(transformed[:3])

# Get feature names
feature_names = []
# OneHot features
feature_names.extend(preprocessor.named_transformers_['onehot'].get_feature_names_out(onehot_cols))
# Ordinal features
feature_names.extend(['Education_Level', 'Satisfaction'])
# Standard features
feature_names.extend(standard_cols)
# Robust features
feature_names.extend(robust_cols)

print(f"\nFeature names ({len(feature_names)} total):")
print(feature_names)

# Create DataFrame for better visualization
df_transformed = pd.DataFrame(transformed, columns=feature_names)
print("\nTransformed DataFrame:")
print(df_transformed)

## Step 4: Distance Before vs After Scaling

Pick two numeric columns: (Income, Transactions_7d). Take 3 rows: P1, P2, P3.

In [None]:
# Select rows 0, 1, 3 (to include the outlier)
selected_rows = [0, 1, 3]
P1_orig = df_project.loc[0, ['Income', 'Transactions_7d']].values
P2_orig = df_project.loc[1, ['Income', 'Transactions_7d']].values
P3_orig = df_project.loc[3, ['Income', 'Transactions_7d']].values  # outlier row

print("Step 4: Distance Before vs After Scaling\n")
print("="*80)
print("\nSelected Points (Income, Transactions_7d):")
print(f"P1 (row 0): {P1_orig}")
print(f"P2 (row 1): {P2_orig}")
print(f"P3 (row 3): {P3_orig} ‚Üê outlier")

# Compute original distances
def euclidean(a, b):
    return np.sqrt(np.sum((a - b)**2))

def manhattan(a, b):
    return np.sum(np.abs(a - b))

print("\n--- BEFORE SCALING ---")
euc_12_orig = euclidean(P1_orig, P2_orig)
euc_13_orig = euclidean(P1_orig, P3_orig)
euc_23_orig = euclidean(P2_orig, P3_orig)

man_12_orig = manhattan(P1_orig, P2_orig)
man_13_orig = manhattan(P1_orig, P3_orig)
man_23_orig = manhattan(P2_orig, P3_orig)

print(f"\nEuclidean Distances:")
print(f"  P1-P2: {euc_12_orig:.2f}")
print(f"  P1-P3: {euc_13_orig:.2f}")
print(f"  P2-P3: {euc_23_orig:.2f}")

print(f"\nManhattan Distances:")
print(f"  P1-P2: {man_12_orig:.2f}")
print(f"  P1-P3: {man_13_orig:.2f}")
print(f"  P2-P3: {man_23_orig:.2f}")

# Apply two different scalers
# Scaler 1: StandardScaler
scaler_std = StandardScaler()
data_std = scaler_std.fit_transform(df_project[['Income', 'Transactions_7d']])
P1_std = data_std[0]
P2_std = data_std[1]
P3_std = data_std[3]

print("\n--- AFTER STANDARDIZATION ---")
euc_12_std = euclidean(P1_std, P2_std)
euc_13_std = euclidean(P1_std, P3_std)
euc_23_std = euclidean(P2_std, P3_std)

man_12_std = manhattan(P1_std, P2_std)
man_13_std = manhattan(P1_std, P3_std)
man_23_std = manhattan(P2_std, P3_std)

print(f"\nEuclidean Distances:")
print(f"  P1-P2: {euc_12_std:.4f}")
print(f"  P1-P3: {euc_13_std:.4f}")
print(f"  P2-P3: {euc_23_std:.4f}")

print(f"\nManhattan Distances:")
print(f"  P1-P2: {man_12_std:.4f}")
print(f"  P1-P3: {man_13_std:.4f}")
print(f"  P2-P3: {man_23_std:.4f}")

# Scaler 2: RobustScaler
scaler_rob = RobustScaler()
data_rob = scaler_rob.fit_transform(df_project[['Income', 'Transactions_7d']])
P1_rob = data_rob[0]
P2_rob = data_rob[1]
P3_rob = data_rob[3]

print("\n--- AFTER ROBUST SCALING ---")
euc_12_rob = euclidean(P1_rob, P2_rob)
euc_13_rob = euclidean(P1_rob, P3_rob)
euc_23_rob = euclidean(P2_rob, P3_rob)

man_12_rob = manhattan(P1_rob, P2_rob)
man_13_rob = manhattan(P1_rob, P3_rob)
man_23_rob = manhattan(P2_rob, P3_rob)

print(f"\nEuclidean Distances:")
print(f"  P1-P2: {euc_12_rob:.4f}")
print(f"  P1-P3: {euc_13_rob:.4f}")
print(f"  P2-P3: {euc_23_rob:.4f}")

print(f"\nManhattan Distances:")
print(f"  P1-P2: {man_12_rob:.4f}")
print(f"  P1-P3: {man_13_rob:.4f}")
print(f"  P2-P3: {man_23_rob:.4f}")

# Summary table
summary_table = pd.DataFrame({
    'Pair': ['P1-P2', 'P1-P3', 'P2-P3', 'P1-P2', 'P1-P3', 'P2-P3'],
    'Metric': ['Euclidean', 'Euclidean', 'Euclidean', 'Manhattan', 'Manhattan', 'Manhattan'],
    'Original': [euc_12_orig, euc_13_orig, euc_23_orig, man_12_orig, man_13_orig, man_23_orig],
    'Standard': [euc_12_std, euc_13_std, euc_23_std, man_12_std, man_13_std, man_23_std],
    'Robust': [euc_12_rob, euc_13_rob, euc_23_rob, man_12_rob, man_13_rob, man_23_rob]
})

print("\n" + "="*80)
print("SUMMARY TABLE")
print("="*80)
print(summary_table.to_string(index=False))

## Step 5: Short Reflection

In [None]:
print("="*80)
print("REFLECTION: Mini Project Insights")
print("="*80)

print("\n1Ô∏è‚É£ Which scaler handled outliers better?")
print("   ‚Üí Robust Scaler performed better for features with outliers (Income, Transactions_7d).")
print("   ‚Üí It uses median and IQR instead of mean and std, making it resistant to extreme values.")
print("   ‚Üí Standard Scaler was influenced by outliers, inflating the standard deviation and")
print("     compressing the normal values closer together than they should be.")

print("\n2Ô∏è‚É£ Did scaling change which points are 'closer' to each other?")
print("   ‚Üí Yes! Before scaling, Income (range ~200k) dominated over Transactions (range ~80),")
print("     making P3 (outlier in both) seem very far from P1 and P2.")
print("   ‚Üí After scaling, the relative distances changed; features contribute more equally.")
print("   ‚Üí Robust scaling preserved relative ordering better than StandardScaler for normal points.")

print("\n3Ô∏è‚É£ Why does this matter for algorithms that use distance?")
print("   ‚Üí Distance-based algorithms (KNN, K-Means, SVM with RBF) assume all features are")
print("     on comparable scales; unscaled features let high-magnitude features dominate.")
print("   ‚Üí Scaling ensures each feature contributes fairly to similarity/distance calculations.")
print("   ‚Üí Choosing the right scaler (Robust for outliers, Standard for normal distributions,")
print("     Min-Max for bounded ranges) prevents distortion and improves model performance.")

print("\n" + "="*80)

# Visualization
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Original
axes[0].scatter(df_project['Income'], df_project['Transactions_7d'], s=200, alpha=0.6)
axes[0].scatter([P1_orig[0], P2_orig[0], P3_orig[0]],
               [P1_orig[1], P2_orig[1], P3_orig[1]],
               s=300, c=['red', 'blue', 'green'], edgecolors='black', linewidths=2,
               label=['P1', 'P2', 'P3 (outlier)'])
axes[0].set_xlabel('Income (BDT)')
axes[0].set_ylabel('Transactions_7d')
axes[0].set_title('Original Scale\n(Income dominates)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Standard Scaler
axes[1].scatter(data_std[:, 0], data_std[:, 1], s=200, alpha=0.6)
axes[1].scatter([P1_std[0], P2_std[0], P3_std[0]],
               [P1_std[1], P2_std[1], P3_std[1]],
               s=300, c=['red', 'blue', 'green'], edgecolors='black', linewidths=2,
               label=['P1', 'P2', 'P3 (outlier)'])
axes[1].set_xlabel('Income (standardized)')
axes[1].set_ylabel('Transactions_7d (standardized)')
axes[1].set_title('Standard Scaler\n(Outlier inflates std)')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Robust Scaler
axes[2].scatter(data_rob[:, 0], data_rob[:, 1], s=200, alpha=0.6)
axes[2].scatter([P1_rob[0], P2_rob[0], P3_rob[0]],
               [P1_rob[1], P2_rob[1], P3_rob[1]],
               s=300, c=['red', 'blue', 'green'], edgecolors='black', linewidths=2,
               label=['P1', 'P2', 'P3 (outlier)'])
axes[2].set_xlabel('Income (robust scaled)')
axes[2].set_ylabel('Transactions_7d (robust scaled)')
axes[2].set_title('Robust Scaler\n(Best for outliers)')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()