### UNIFORM BINNING

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
np.random.seed(42) 
mean = 50
std_dev = 10
size = 1000
data = np.random.normal(mean, std_dev, size)
df = pd.DataFrame(data, columns=['Age'])

In [None]:
df.head()

In [None]:
plt.hist(df["Age"])
plt.show()

In [None]:
len(df)

In [None]:
## Struge Rule
n = len(df)
k = int(np.ceil(np.log2(n) + 1))

In [None]:
print(k)

In [None]:
from sklearn.preprocessing import KBinsDiscretizer

kbins = KBinsDiscretizer(n_bins=k, encode='ordinal', strategy='uniform')

df['Age_Binned'] = kbins.fit_transform(df[['Age']])

In [None]:
df.head()

In [None]:
plt.hist(df["Age_Binned"])

In [None]:
sns.countplot(x=df["Age_Binned"],data=df)

### QUANTILE BINNING

In [None]:
np.random.seed(42)  # For reproducibility
data = np.random.randint(1, 100, size=200).reshape(-1, 1)
df = pd.DataFrame(data, columns=['Value'])

In [None]:
df.head()

In [None]:
df["Value"].value_counts()

In [None]:
sns.countplot(x=df["Value"],data=df)

In [None]:
from sklearn.preprocessing import KBinsDiscretizer

kbins = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy='quantile')

df['Quantile_Binned'] = kbins.fit_transform(df[['Value']])

In [None]:
df.head()

In [None]:
sns.countplot(x=df["Quantile_Binned"],data=df)

In [None]:
df["Quantile_Binned"].value_counts()

### CUSTOM BINNING

In [2]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma', 'Frank', 'Grace', 'Henry', 'Isabella', 'Jack'],
    'Age': [25, 32, 45, 18, 60, 37, 28, 22, 49, 35]
}
df = pd.DataFrame(data)

In [3]:
df.head()

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,32
2,Charlie,45
3,David,18
4,Emma,60


In [4]:
bins = [0, 18, 30, 40, 50, float('inf')]

labels = ['0-18', '19-30', '31-40', '41-50', '51+']

In [9]:
df['Age_Binned'] = pd.cut(df['Age'], bins=bins, labels=labels)

In [10]:
df.head()

Unnamed: 0,Name,Age,Age_Binned
0,Alice,25,19-30
1,Bob,32,31-40
2,Charlie,45,41-50
3,David,18,0-18
4,Emma,60,51+


In [11]:
df["Age_Binned"].value_counts()

Age_Binned
19-30    3
31-40    3
41-50    2
0-18     1
51+      1
Name: count, dtype: int64

In [12]:
df["Age"].value_counts()

Age
25    1
32    1
45    1
18    1
60    1
37    1
28    1
22    1
49    1
35    1
Name: count, dtype: int64