Explore and implement diverse data transformation techniques
(Z-score, Min-Max, Mean normalization, Max Absolute, Robust scaling) in Python,
understanding their impact on data distribution for effective preprocessing.

In [9]:
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler


np.random.seed(0)
random_data = np.random.randn(100, 3)

print("Original random dataset:")
print(random_data[:5])  



Original random dataset:
[[ 1.76405235  0.40015721  0.97873798]
 [ 2.2408932   1.86755799 -0.97727788]
 [ 0.95008842 -0.15135721 -0.10321885]
 [ 0.4105985   0.14404357  1.45427351]
 [ 0.76103773  0.12167502  0.44386323]]


In [3]:
# Z-score normalization (Standardization)
scaler = StandardScaler()
zscore_normalized_data = scaler.fit_transform(random_data)
print("\nZ-score normalized data:")
print(zscore_normalized_data[:5]) 


Z-score normalized data:
[[ 1.62132212  0.29928405  1.13006146]
 [ 2.08606382  1.79354171 -0.87408569]
 [ 0.82801135 -0.26232438  0.02148113]
 [ 0.30221021  0.03848293  1.61729839]
 [ 0.64375754  0.01570498  0.58202515]]


In [4]:
# Min-Max normalization
scaler = MinMaxScaler()
minmax_normalized_data = scaler.fit_transform(random_data)

print("\nMin-max normalized data:")
print(minmax_normalized_data[:5]) 


Min-max normalized data:
[[0.86560599 0.62498652 0.7338962 ]
 [0.9691197  0.91404355 0.32743435]
 [0.68890884 0.51634604 0.5090646 ]
 [0.57179513 0.57453578 0.83271291]
 [0.64786928 0.5701295  0.62274875]]


In [5]:
# Mean normalization
mean = np.mean(random_data, axis=0)
std_dev = np.std(random_data, axis=0)
mean_normalized_data = (random_data - mean) / std_dev

print("\nMean normalized data:")
print(mean_normalized_data[:5]) 


Mean normalized data:
[[ 1.62132212  0.29928405  1.13006146]
 [ 2.08606382  1.79354171 -0.87408569]
 [ 0.82801135 -0.26232438  0.02148113]
 [ 0.30221021  0.03848293  1.61729839]
 [ 0.64375754  0.01570498  0.58202515]]


In [6]:
# Max Absolute Scaling
scaler = MaxAbsScaler()
maxabs_scaled_data = scaler.fit_transform(random_data)

print("\nMax absolute scaled data:")
print(maxabs_scaled_data[:5])  


Max absolute scaled data:
[[ 0.74022039  0.144326    0.38336933]
 [ 0.9403093   0.67357818 -0.38279741]
 [ 0.39867004 -0.05459049 -0.04043058]
 [ 0.17229272  0.05195266  0.56963545]
 [ 0.31934179  0.04388492  0.17386017]]


In [7]:
# Robust scaling
scaler = RobustScaler()
robust_scaled_data = scaler.fit_transform(random_data)

print("\nRobust scaled data:")
print(robust_scaled_data[:5])  


Robust scaled data:
[[ 1.42748688  0.18917235  0.81371635]
 [ 1.82637616  1.22768273 -0.60739831]
 [ 0.74658574 -0.20114599  0.02763647]
 [ 0.29528897  0.00791535  1.15920971]
 [ 0.58844012 -0.00791535  0.42511094]]
