## 数据预处理

In [1]:
from data_preprocess import data_loader, data_preprocess
import numpy as np
import warnings

warnings.filterwarnings("ignore")  # 忽略警告

file_path = '../datasets/dat/'
datasetname = 'iris0'
X, y = data_loader(file_path + datasetname + '.dat')

X_train, X_test, y_train, y_test = data_preprocess(X, y, random_state=42)

## t-SNE降维

In [2]:
from sklearn.preprocessing import StandardScaler
from visualize import tsne_visualization_binary

scaler = StandardScaler()

# 使用t-SNE进行降维
X_tsne = tsne_visualization_binary(scaler.fit_transform(X_train), y_train, save_path='./tsne_results/' + datasetname,
                                   filename=datasetname, perplexity=30)  # 传入的是标准化后的特征数据

t-SNE可视化图片已保存至: ./tsne_results/iris0\iris0.png


## DSSMOTE_P

In [3]:
from de import DSSMOTE_P
from config import EvolutionaryParameterConfig

evol_parameter = EvolutionaryParameterConfig(300, 0.8, 0.2, 100, False)
dsp = DSSMOTE_P(X=X_train, y=y_train, evol_parameter=evol_parameter)

X_syn, y_syn = dsp.fit_resample_synthesis_only()

print(X_syn)
print(y_syn)

# 可视化
y_syn = [2 for _ in range(len(y_syn))]
X_train_resampled = np.vstack((X_train, X_syn))
y_train_resampled = np.hstack((y_train, y_syn))
# 4. 使用t-SNE进行降维
X_tsne_resampled = tsne_visualization_binary(scaler.fit_transform(X_train_resampled), y_train_resampled,
                                             save_path='./tsne_results/' + datasetname,
                                             filename=datasetname + '_dsp', perplexity=30)

第 1 轮合成
########### 	 Start the evolution! 	 ##########
前沿中个体数： 1 合成实例数： 20
第 2 轮合成
########### 	 Start the evolution! 	 ##########
前沿中个体数： 1 合成实例数： 20
[[ 4.95432576  3.50641317  1.23078898  0.15057378]
 [ 4.95430514  3.50642135  1.23092347  0.15056176]
 [ 4.95429616  3.50636517  1.23078792  0.15055936]
 [ 4.95423947  3.50646794  1.23074897  0.15057627]
 [ 4.95431479  3.50636517  1.23078792  0.15055319]
 [ 4.95424254  3.50649084  1.23074897  0.15054586]
 [ 4.95429879  3.5063519   1.23091326  0.15055936]
 [ 4.95429628  3.50635689  1.23077022  0.15055936]
 [ 4.95421034  3.50644592  1.2306167   0.15055319]
 [ 4.95429619  3.50635793  1.23076511  0.15055936]
 [ 4.95421053  3.506433    1.2306167   0.15055319]
 [ 4.95421053  3.506433    1.2306167   0.15055319]
 [ 4.95421053  3.506433    1.2306167   0.15055319]
 [ 4.95421053  3.506433    1.2306167   0.15055319]
 [ 4.95421053  3.506433    1.2306167   0.15055319]
 [ 4.95421053  3.506433    1.2306167   0.15055319]
 [ 4.95435573  3.50634743  1.230

## DSSMOTE_P_A

In [4]:
from de import DSSMOTE_P_A

evol_parameter = EvolutionaryParameterConfig(300, 0.8, 0.2, 100, False)
dspa = DSSMOTE_P_A(X=X_train, y=y_train, evol_parameter=evol_parameter)

X_syn, y_syn = dspa.fit_resample_synthesis_only()

print(X_syn)
print(y_syn)

# 可视化
y_syn = [2 for _ in range(len(y_syn))]
X_train_resampled = np.vstack((X_train, X_syn))
y_train_resampled = np.hstack((y_train, y_syn))
# 4. 使用t-SNE进行降维
X_tsne_resampled = tsne_visualization_binary(scaler.fit_transform(X_train_resampled), y_train_resampled,
                                             save_path='./tsne_results/' + datasetname,
                                             filename=datasetname + '_dspa', perplexity=30)

第 1 轮合成
########### 	 Start the evolution! 	 ##########
前沿中个体数： 1 合成实例数： 20
第 2 轮合成
########### 	 Start the evolution! 	 ##########
前沿中个体数： 1 合成实例数： 20
[[ 4.78568836  3.57528416  0.79616623 -0.03610108]
 [ 4.785722    3.57526559  0.7962941  -0.03610108]
 [ 4.78335081  3.57648568  0.78916336 -0.0390625 ]
 [ 4.785722    3.57525702  0.79631362 -0.03610108]
 [ 4.785722    3.57535662  0.79610147 -0.03610108]
 [ 4.78335611  3.57643913  0.78923938 -0.0390625 ]
 [ 4.78335354  3.57646131  0.78912549 -0.0390625 ]
 [ 4.7856735   3.57528416  0.79605829 -0.03610108]
 [ 4.78325475  3.57655985  0.78911073 -0.0390625 ]
 [ 4.7833644   3.57647947  0.78923938 -0.0390625 ]
 [ 4.78335764  3.57648407  0.78910886 -0.0390625 ]
 [ 4.7833644   3.5764332   0.78923938 -0.0390625 ]
 [ 4.7833644   3.5764332   0.78923938 -0.0390625 ]
 [ 4.7833644   3.57642981  0.78923938 -0.0390625 ]
 [ 4.7833644   3.57642929  0.78923938 -0.0390625 ]
 [ 4.78335764  3.576462    0.78910886 -0.0390625 ]
 [ 4.78335764  3.576462    0.789

## DGSMOTE

In [5]:
from de import DGSMOTE

evol_parameter = EvolutionaryParameterConfig(30, 0.8, 0.2, 100, False)

dg = DGSMOTE(X=X_train, y=y_train, random_state=42, evol_parameter=evol_parameter)
X_syn, y_syn = dg.fit_resample_synthesis_only()

print(X_syn)
print(y_syn)

# 可视化
y_syn = [2 for _ in range(len(y_syn))]
X_train_resampled = np.vstack((X_train, X_syn))
y_train_resampled = np.hstack((y_train, y_syn))
# 4. 使用t-SNE进行降维
X_tsne_resampled = tsne_visualization_binary(scaler.fit_transform(X_train_resampled), y_train_resampled,
                                             save_path='./tsne_results/' + datasetname,
                                             filename=datasetname + '_dg', perplexity=30)

[[5.08117917 3.27924293 2.48581831 0.52398843]
 [5.33701392 3.30519287 2.30646978 0.57965041]
 [5.48456081 3.33360527 1.82631332 0.53333333]
 [5.39622104 3.38223231 2.3074482  0.5896426 ]
 [4.81660046 3.32775261 1.87656277 0.45901546]
 [5.6890041  3.20687701 2.8        0.6       ]
 [5.26368152 3.5182175  2.15451332 0.6179941 ]
 [5.65321814 3.80133951 2.35216784 0.59934641]
 [5.3830452  3.51805645 2.12708721 0.62215678]
 [5.75918367 2.87265608 3.4        1.0314456 ]
 [5.83006984 3.69211394 2.34760948 0.58123168]
 [4.92390162 3.34191903 1.89603466 0.29509759]
 [4.67068679 3.02703756 1.73357901 0.37668934]
 [5.24928109 3.27006886 1.88738478 0.39123506]
 [5.24388955 3.67785517 2.27671047 0.56605175]
 [5.40837698 3.84967455 2.08330458 0.34957136]
 [5.05658659 3.05684985 1.84226152 0.4       ]
 [5.38790733 3.32477121 2.05666204 0.4303888 ]
 [5.12999698 3.29625109 2.39499676 0.4251637 ]
 [5.34264053 3.09687207 2.83388152 0.82313954]
 [4.93633907 3.08370957 1.6781148  0.2       ]
 [5.70015542 

## DGSMOTE

In [6]:
from de import DGSMOTE_SINGLE

evol_parameter = EvolutionaryParameterConfig(30, 0.8, 0.2, 100, False)

dgs = DGSMOTE_SINGLE(X=X_train, y=y_train, random_state=42, evol_parameter=evol_parameter)
X_syn, y_syn = dg.fit_resample_synthesis_only()

print(X_syn)
print(y_syn)

# 可视化
y_syn = [2 for _ in range(len(y_syn))]
X_train_resampled = np.vstack((X_train, X_syn))
y_train_resampled = np.hstack((y_train, y_syn))
# 4. 使用t-SNE进行降维
X_tsne_resampled = tsne_visualization_binary(scaler.fit_transform(X_train_resampled), y_train_resampled,
                                             save_path='./tsne_results/' + datasetname,
                                             filename=datasetname + '_dgs', perplexity=30)

[[5.07669418 3.27566842 2.48271662 0.52357355]
 [5.28681096 3.32561541 2.13943833 0.50164042]
 [5.65690402 3.1998764  2.47153461 0.8       ]
 [5.29803901 3.43878938 1.90430719 0.39586785]
 [4.78648066 3.33763924 1.80747706 0.43622047]
 [5.55408592 3.29710188 2.28243258 0.42243839]
 [5.30292361 3.47501654 2.3093721  0.66974868]
 [5.67436389 3.74342474 2.49635042 0.66579418]
 [5.28353716 3.58180363 1.90646797 0.54430021]
 [5.31063399 3.15945932 2.26402066 0.67727273]
 [5.74881839 3.75893398 1.94657826 0.40686443]
 [5.0672635  3.28007025 2.23661844 0.40530531]
 [4.86213176 3.04619731 2.03961164 0.50078201]
 [5.19113924 3.27433628 1.75714286 0.34285714]
 [5.17260503 3.7399677  2.0899279  0.48301114]
 [5.38754229 3.87094568 2.0346265  0.3301273 ]
 [5.02210352 3.04440818 1.74404718 0.35544041]
 [5.36654139 3.32823913 2.01542678 0.41299639]
 [5.22387259 3.26984156 2.53154658 0.48571429]
 [5.15577193 3.2539435  1.96914796 0.57905882]
 [5.03878587 3.04039006 2.17578576 0.47864769]
 [5.68274327 