In [20]:
import numpy as np  # to deal with array
import matplotlib.pyplot as pyplot  # to deal with graph
import pandas as pd # to deal with data manipulation
from sklearn.impute import SimpleImputer # for handling missing/not defined values
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler

In [6]:
data = np.array([
    [1,2,np.nan],
    [4,np.nan,6],
    [np.nan,8,9]
])

imputer = SimpleImputer(strategy='constant',fill_value=1)
imputed_data = imputer.fit_transform(data)
print(imputed_data)

[[1. 2. 1.]
 [4. 1. 6.]
 [1. 8. 9.]]


In [21]:
data = {
    "Name": ["John", "Anna", "Jan", "Yashwani"],
    "Age": [28, np.nan, 22, 21],
    "Salary": [30000, 40000, np.nan, 60000],
    "Purchased": ["Yes", "No", "Yes", "No"]
}
df = pd.DataFrame(data)

# Handle missing data
Imputer = SimpleImputer(strategy="constant", fill_value=1)
df["Age"] = Imputer.fit_transform(df[["Age"]])
df["Salary"] = Imputer.fit_transform(df[["Salary"]])
print("Handle Missing data:")
print(df)

# Encode Categorical data
label_encoder = LabelEncoder()
df["Purchased"] = label_encoder.fit_transform(df["Purchased"])
print("After Encoding categorical data:")
print(df)

# Scale numerical data
scaler = StandardScaler()
df[["Age","Salary"]] = scaler.fit_transform(df[["Age","Salary"]])
print("After Scaling data:")
print(df)

# Scale using MinMaxScaling Method
mm_scaler = MinMaxScaler()
df[["Age","Salary"]] = mm_scaler.fit_transform(df[["Age","Salary"]])
print("After Scaling using MinMaxScaling:")
print(df)


Handle Missing data:
       Name   Age   Salary Purchased
0      John  28.0  30000.0       Yes
1      Anna   1.0  40000.0        No
2       Jan  22.0      1.0       Yes
3  Yashwani  21.0  60000.0        No
After Encoding categorical data:
       Name   Age   Salary  Purchased
0      John  28.0  30000.0          1
1      Anna   1.0  40000.0          0
2       Jan  22.0      1.0          1
3  Yashwani  21.0  60000.0          0
After Scaling data:
       Name       Age    Salary  Purchased
0      John  0.982946 -0.115484          1
1      Anna -1.671009  0.346405          0
2       Jan  0.393179 -1.501102          1
3  Yashwani  0.294884  1.270181          0
After Scaling using MinMaxScaling:
       Name       Age    Salary  Purchased
0      John  1.000000  0.499992          1
1      Anna  0.000000  0.666661          0
2       Jan  0.777778  0.000000          1
3  Yashwani  0.740741  1.000000          0
