In [3]:
import pandas as pd
import numpy as np

# 假设我们有一个年龄列
data = {'age': [15, 25, 35, 45, 55, 65, 75]}
df = pd.DataFrame(data)

# 定义年龄的区间（桶）
bins = [0, 18, 30, 40, 50, 60, 100]  # 可以根据需求定义区间
labels = ['0-18', '19-30', '31-40', '41-50', '51-60', '61-100']  # 每个区间对应的标签

# 将年龄按区间进行分桶
df['age_bucket'] = pd.cut(df['age'], bins=bins, labels=labels, right=True)

print(df)

   age age_bucket
0   15       0-18
1   25      19-30
2   35      31-40
3   45      41-50
4   55      51-60
5   65     61-100
6   75     61-100


In [4]:
# 对分桶后的年龄区间进行 one-hot 编码
df_one_hot = pd.get_dummies(df['age_bucket'], prefix='age')

print(df_one_hot)


   age_0-18  age_19-30  age_31-40  age_41-50  age_51-60  age_61-100
0      True      False      False      False      False       False
1     False       True      False      False      False       False
2     False      False       True      False      False       False
3     False      False      False       True      False       False
4     False      False      False      False       True       False
5     False      False      False      False      False        True
6     False      False      False      False      False        True


In [5]:
# 将 one-hot 编码的列与原始数据合并
df_final = pd.concat([df, df_one_hot], axis=1)

print(df_final)

   age age_bucket  age_0-18  age_19-30  age_31-40  age_41-50  age_51-60  \
0   15       0-18      True      False      False      False      False   
1   25      19-30     False       True      False      False      False   
2   35      31-40     False      False       True      False      False   
3   45      41-50     False      False      False       True      False   
4   55      51-60     False      False      False      False       True   
5   65     61-100     False      False      False      False      False   
6   75     61-100     False      False      False      False      False   

   age_61-100  
0       False  
1       False  
2       False  
3       False  
4       False  
5        True  
6        True  


In [6]:
df_final.drop(columns=['age'], axis=1)

Unnamed: 0,age_bucket,age_0-18,age_19-30,age_31-40,age_41-50,age_51-60,age_61-100
0,0-18,True,False,False,False,False,False
1,19-30,False,True,False,False,False,False
2,31-40,False,False,True,False,False,False
3,41-50,False,False,False,True,False,False
4,51-60,False,False,False,False,True,False
5,61-100,False,False,False,False,False,True
6,61-100,False,False,False,False,False,True
