In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler, Normalizer

# Load the CSV file into a DataFrame
df = pd.read_csv('data.csv')
print("Original Data (first 5 rows):")
print(df.head())

# Define the numerical columns to be normalized
cols = ['age', 'salary', 'experience']

# 1. Min-Max Normalization
scaler_minmax = MinMaxScaler()
df_minmax = df.copy()
df_minmax[cols] = scaler_minmax.fit_transform(df[cols])
print("\n1. Min-Max Normalization:")
print(df_minmax.head())

# 2. Z-score Normalization (Standardization)
scaler_standard = StandardScaler()
df_standard = df.copy()
df_standard[cols] = scaler_standard.fit_transform(df[cols])
print("\n2. Z-score Normalization (Standardization):")
print(df_standard.head())

# 3. Decimal Scaling Normalization (Custom Implementation)
def decimal_scaling(series):
    max_abs = series.abs().max()
    # Calculate the scaling factor as the next power of 10 greater than max_abs
    power = np.ceil(np.log10(max_abs + 1))
    return series / (10**power)

df_decimal = df.copy()
df_decimal[cols] = df_decimal[cols].apply(decimal_scaling)
print("\n3. Decimal Scaling Normalization:")
print(df_decimal.head())

# 4. L2 Normalization (row-wise)
normalizer_l2 = Normalizer(norm='l2')
df_l2 = df.copy()
df_l2_values = normalizer_l2.fit_transform(df[cols])
df_l2[cols] = df_l2_values
print("\n4. L2 Normalization (row-wise):")
print(df_l2.head())

# 5. L1 Normalization (row-wise)
normalizer_l1 = Normalizer(norm='l1')
df_l1 = df.copy()
df_l1_values = normalizer_l1.fit_transform(df[cols])
df_l1[cols] = df_l1_values
print("\n5. L1 Normalization (row-wise):")
print(df_l1.head())

# 6. Robust Scaling (using median and IQR)
scaler_robust = RobustScaler()
df_robust = df.copy()
df_robust[cols] = scaler_robust.fit_transform(df[cols])
print("\n6. Robust Scaling:")
print(df_robust.head())

# 7. Max-Abs Scaling
scaler_maxabs = MaxAbsScaler()
df_maxabs = df.copy()
df_maxabs[cols] = scaler_maxabs.fit_transform(df[cols])
print("\n7. Max-Abs Scaling:")
print(df_maxabs.head())

# 8. Log Transformation (using np.log1p to handle zeros)
df_log = df.copy()
df_log[cols] = df_log[cols].apply(lambda x: np.log1p(x))
print("\n8. Log Transformation (using log1p):")
print(df_log.head())


Original Data (first 5 rows):
               name  age  salary  experience
0       Alice Smith   28   75000           5
1       Bob Johnson   34   92000          10
2  Charlie Williams   41  105000          15
3       David Brown   23   32000           2
4         Eva Jones   30   85000           7

1. Min-Max Normalization:
               name       age    salary  experience
0       Alice Smith  0.135135  0.380531    0.090909
1       Bob Johnson  0.297297  0.530973    0.242424
2  Charlie Williams  0.486486  0.646018    0.393939
3       David Brown  0.000000  0.000000    0.000000
4         Eva Jones  0.189189  0.469027    0.151515

2. Z-score Normalization (Standardization):
               name       age    salary  experience
0       Alice Smith -1.114831 -0.461168   -1.072113
1       Bob Johnson -0.416607  0.271529   -0.402042
2  Charlie Williams  0.397988  0.831827    0.268028
3       David Brown -1.696685 -2.314462   -1.474155
4         Eva Jones -0.882090 -0.030170   -0.804084

3. 