In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score


# Read Data

In [None]:

csv_file_path = '/kaggle/input/dataset/combined_tweets.csv'

# 读取CSV文件
combined_tweets = pd.read_csv(csv_file_path)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

# 提取特征和标签
X = combined_tweets['lemma']  # 词形还原后的推文
y = combined_tweets['sentiment']  # 情感标签：积极、消极、中立

X = X.astype(str).fillna("")

# 转换文本数据为TF-IDF特征矩阵
vectorizer = TfidfVectorizer(max_features=5000)  # 选择最多5000个特征
X_tfidf = vectorizer.fit_transform(X)

# 将情感标签编码为数值
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # 例如：positive -> 0, negative -> 1, neutral -> 2


In [5]:
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y_encoded, test_size=0.2, random_state=42)


# Logic Regression

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# 定义模型
logreg = LogisticRegression(max_iter=1000)

# 训练模型
logreg.fit(X_train, y_train)

# 预测
y_pred_logreg = logreg.predict(X_test)

# 输出评估指标
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("Logistic Regression Classification Report:")
print(classification_report(y_test, y_pred_logreg, target_names=label_encoder.classes_))


Logistic Regression Accuracy: 0.8676955702167767
Logistic Regression Classification Report:
              precision    recall  f1-score   support

    Negative       0.89      0.79      0.83      2292
     Neutral       0.80      0.89      0.84      2548
    Positive       0.91      0.91      0.91      3648

    accuracy                           0.87      8488
   macro avg       0.87      0.86      0.86      8488
weighted avg       0.87      0.87      0.87      8488



# Decision Tree

In [7]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# 决策树模型
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)
y_pred_tree = decision_tree.predict(X_test)

# 决策树结果
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_tree))
print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_tree, target_names=label_encoder.classes_))

Decision Tree Accuracy: 0.7196041470311028
Decision Tree Classification Report:
              precision    recall  f1-score   support

    Negative       0.66      0.61      0.63      2292
     Neutral       0.68      0.76      0.72      2548
    Positive       0.78      0.76      0.77      3648

    accuracy                           0.72      8488
   macro avg       0.71      0.71      0.71      8488
weighted avg       0.72      0.72      0.72      8488



# Naive Bayes

In [8]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# 定义模型
nb_model = MultinomialNB()

# 训练模型
nb_model.fit(X_train, y_train)

# 预测
y_pred_nb = nb_model.predict(X_test)

# 输出评估指标
print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))
print("Naive Bayes Classification Report:")
print(classification_report(y_test, y_pred_nb, target_names=label_encoder.classes_))


Naive Bayes Accuracy: 0.7455230914231856
Naive Bayes Classification Report:
              precision    recall  f1-score   support

    Negative       0.82      0.67      0.74      2292
     Neutral       0.76      0.59      0.67      2548
    Positive       0.71      0.90      0.79      3648

    accuracy                           0.75      8488
   macro avg       0.76      0.72      0.73      8488
weighted avg       0.75      0.75      0.74      8488



# Random Forest 

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# 定义模型
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# 训练模型
rf_model.fit(X_train, y_train)

# 预测
y_pred_rf = rf_model.predict(X_test)

# 输出评估指标
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf, target_names=label_encoder.classes_))


Random Forest Accuracy: 0.8109095193213949
Random Forest Classification Report:
              precision    recall  f1-score   support

    Negative       0.84      0.68      0.75      2292
     Neutral       0.74      0.87      0.80      2548
    Positive       0.86      0.85      0.85      3648

    accuracy                           0.81      8488
   macro avg       0.81      0.80      0.80      8488
weighted avg       0.82      0.81      0.81      8488



# xgboost

In [10]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

# 定义模型
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)

# 训练模型
xgb_model.fit(X_train, y_train)

# 预测
y_pred_xgb = xgb_model.predict(X_test)

# 输出评估指标
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("XGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb, target_names=label_encoder.classes_))


XGBoost Accuracy: 0.8036050895381716
XGBoost Classification Report:
              precision    recall  f1-score   support

    Negative       0.87      0.67      0.76      2292
     Neutral       0.71      0.88      0.78      2548
    Positive       0.86      0.84      0.85      3648

    accuracy                           0.80      8488
   macro avg       0.81      0.79      0.80      8488
weighted avg       0.82      0.80      0.80      8488



# Ensemble learning stacking

In [11]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# 定义基分类器
base_estimators = [
    ('logreg', LogisticRegression(max_iter=1000)),
    ('decision_tree', DecisionTreeClassifier(random_state=42)),  # 替换为决策树
    ('naive_bayes', MultinomialNB()),
    ('random_forest', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('xgboost', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42))
]

# 定义元分类器（逻辑回归）
stacking_model = StackingClassifier(
    estimators=base_estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5  # 交叉验证
)

# 训练 Stacking 模型
stacking_model.fit(X_train, y_train)

# 预测
y_pred_stacking = stacking_model.predict(X_test)

# 输出评估指标
print("Stacking Classifier Accuracy:", accuracy_score(y_test, y_pred_stacking))
print("Stacking Classifier Classification Report:")
print(classification_report(y_test, y_pred_stacking, target_names=label_encoder.classes_))


Stacking Classifier Accuracy: 0.8779453345900095
Stacking Classifier Classification Report:
              precision    recall  f1-score   support

    Negative       0.87      0.83      0.85      2292
     Neutral       0.83      0.88      0.85      2548
    Positive       0.92      0.91      0.91      3648

    accuracy                           0.88      8488
   macro avg       0.87      0.87      0.87      8488
weighted avg       0.88      0.88      0.88      8488



# Ensemble learning stacking - voting

In [12]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

# 定义基分类器
voting_estimators = [
    ('logreg', LogisticRegression(max_iter=1000)),
    ('decision_tree', DecisionTreeClassifier(random_state=42)),  # 替换为决策树
    ('naive_bayes', MultinomialNB()),
    ('random_forest', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('xgboost', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42))
]

# 定义 Voting 模型（软投票）
voting_model = VotingClassifier(
    estimators=voting_estimators,
    voting='soft'  # 使用软投票 ('hard' 表示硬投票)
)

# 训练 Voting 模型
voting_model.fit(X_train, y_train)

# 预测
y_pred_voting = voting_model.predict(X_test)

# 输出评估指标
print("Voting Classifier Accuracy:", accuracy_score(y_test, y_pred_voting))
print("Voting Classifier Classification Report:")
print(classification_report(y_test, y_pred_voting, target_names=label_encoder.classes_))


Voting Classifier Accuracy: 0.8210414703110274
Voting Classifier Classification Report:
              precision    recall  f1-score   support

    Negative       0.83      0.71      0.77      2292
     Neutral       0.77      0.85      0.81      2548
    Positive       0.86      0.87      0.86      3648

    accuracy                           0.82      8488
   macro avg       0.82      0.81      0.81      8488
weighted avg       0.82      0.82      0.82      8488



In [9]:
from tensorflow.keras.utils import to_categorical

# 将情感标签转为 One-Hot 编码
y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)


# keras

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义最简单的神经网络模型
basic_nn_model = Sequential([
    Dense(3, activation='softmax', input_shape=(X_train.shape[1],))  # 输入层和输出层
])

# 编译模型
basic_nn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型
basic_nn_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=10,
    batch_size=32,
    verbose=1
)

# 获取预测结果
y_pred_probs = basic_nn_model.predict(X_test)  # 预测概率
y_pred = np.argmax(y_pred_probs, axis=1)  # 将概率转换为类别索引
y_true = np.argmax(y_test_onehot, axis=1)  # 将 one-hot 编码转换为类别索引

# 计算准确率
accuracy = accuracy_score(y_true, y_pred)
print("Basic Neural Network Accuracy:", accuracy)

# 生成分类报告
print("Basic Neural Network Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


Epoch 1/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4388 - loss: 1.0465 - val_accuracy: 0.5622 - val_loss: 0.9429
Epoch 2/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6205 - loss: 0.9051 - val_accuracy: 0.7049 - val_loss: 0.8497
Epoch 3/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7535 - loss: 0.8101 - val_accuracy: 0.7486 - val_loss: 0.7806
Epoch 4/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7932 - loss: 0.7364 - val_accuracy: 0.7779 - val_loss: 0.7271
Epoch 5/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8235 - loss: 0.6830 - val_accuracy: 0.7936 - val_loss: 0.6841
Epoch 6/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8435 - loss: 0.6339 - val_accuracy: 0.8121 - val_loss: 0.6491
Epoch 7/10
[1m1

# Keras + Hidden layer

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义神经网络模型：输入层 + 隐藏层 + 输出层
hidden_nn_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # 隐藏层
    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
hidden_nn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型
hidden_nn_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=10,
    batch_size=32,
    verbose=1
)

# 获取预测结果
y_pred_probs = hidden_nn_model.predict(X_test)  # 预测概率
y_pred = np.argmax(y_pred_probs, axis=1)  # 将概率转换为类别索引
y_true = np.argmax(y_test_onehot, axis=1)  # 将 one-hot 编码转换为类别索引

# 计算准确率
accuracy = accuracy_score(y_true, y_pred)
print("Hidden Neural Network Accuracy:", accuracy)

# 生成分类报告
print("Hidden Neural Network Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.6466 - loss: 0.7785 - val_accuracy: 0.8804 - val_loss: 0.3778
Epoch 2/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9146 - loss: 0.2833 - val_accuracy: 0.8906 - val_loss: 0.3451
Epoch 3/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9353 - loss: 0.2104 - val_accuracy: 0.8911 - val_loss: 0.3434
Epoch 4/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9497 - loss: 0.1602 - val_accuracy: 0.8909 - val_loss: 0.3539
Epoch 5/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9617 - loss: 0.1301 - val_accuracy: 0.8856 - val_loss: 0.3888
Epoch 6/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9719 - loss: 0.0968 - val_accuracy: 0.8812 - val_loss: 0.4270
Epoch 7/10
[1m1

# Keras + Hidden + early stop

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义神经网络模型
earlystopping_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # 隐藏层
    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
earlystopping_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 设置 EarlyStopping 回调
early_stopping = EarlyStopping(
    monitor='val_loss',  # 监控验证集损失
    patience=3,  # 容忍验证损失不改善的轮数
    restore_best_weights=True  # 恢复验证集表现最好的权重
)

# 训练模型
earlystopping_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=50,  # 设置较大的最大训练轮数
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping]
)

# 获取预测结果和评估
y_pred_probs = earlystopping_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test_onehot, axis=1)

print("EarlyStopping Model Accuracy:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.6465 - loss: 0.7819 - val_accuracy: 0.8787 - val_loss: 0.3842
Epoch 2/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9121 - loss: 0.2888 - val_accuracy: 0.8911 - val_loss: 0.3458
Epoch 3/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9358 - loss: 0.2123 - val_accuracy: 0.8880 - val_loss: 0.3482
Epoch 4/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9494 - loss: 0.1650 - val_accuracy: 0.8888 - val_loss: 0.3631
Epoch 5/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9614 - loss: 0.1281 - val_accuracy: 0.8856 - val_loss: 0.3904
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
EarlyStopping Model Accuracy: 0.8911404335532517
              precision    recall  f1-score   support

 

# Keras + Hidden Layer + Early stop + LearningRateScheduler

In [13]:
from tensorflow.keras.callbacks import LearningRateScheduler

# 定义学习率调度函数
def lr_schedule(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr * 0.5  # 每 5 个 epoch 学习率减半

# 定义神经网络模型
lrscheduler_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # 隐藏层
    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
lrscheduler_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 添加 EarlyStopping 和 LearningRateScheduler 回调
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)
lr_scheduler = LearningRateScheduler(lr_schedule)

# 训练模型
lrscheduler_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=50,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping, lr_scheduler]
)

# 评估性能
y_pred_probs = lrscheduler_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

print("LearningRateScheduler Model Accuracy:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.6472 - loss: 0.7812 - val_accuracy: 0.8738 - val_loss: 0.3906 - learning_rate: 0.0010
Epoch 2/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9140 - loss: 0.2849 - val_accuracy: 0.8884 - val_loss: 0.3498 - learning_rate: 0.0010
Epoch 3/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9361 - loss: 0.2129 - val_accuracy: 0.8882 - val_loss: 0.3469 - learning_rate: 0.0010
Epoch 4/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9498 - loss: 0.1637 - val_accuracy: 0.8869 - val_loss: 0.3603 - learning_rate: 0.0010
Epoch 5/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9601 - loss: 0.1317 - val_accuracy: 0.8834 - val_loss: 0.3902 - learning_rate: 0.0010
Epoch 6/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

# Keras + Hidden Layer + Early stop + LearningRateScheduler + BatchNormalization

In [14]:
from tensorflow.keras.layers import BatchNormalization

# 定义神经网络模型
batchnorm_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),  # 批量归一化层
    Dense(3, activation='softmax')
])

# 编译模型
batchnorm_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 添加 EarlyStopping 和 LearningRateScheduler 回调
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)
lr_scheduler = LearningRateScheduler(lr_schedule)

# 训练模型
batchnorm_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=50,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping, lr_scheduler]
)

# 评估性能
y_pred_probs = batchnorm_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

print("BatchNormalization Model Accuracy:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.6940 - loss: 0.6952 - val_accuracy: 0.8612 - val_loss: 0.3984 - learning_rate: 0.0010
Epoch 2/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9222 - loss: 0.2354 - val_accuracy: 0.8679 - val_loss: 0.3794 - learning_rate: 0.0010
Epoch 3/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9589 - loss: 0.1268 - val_accuracy: 0.8682 - val_loss: 0.4508 - learning_rate: 0.0010
Epoch 4/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9791 - loss: 0.0724 - val_accuracy: 0.8612 - val_loss: 0.5277 - learning_rate: 0.0010
Epoch 5/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9850 - loss: 0.0502 - val_accuracy: 0.8605 - val_loss: 0.6028 - learning_rate: 0.0010
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/s

# Keras + multiple hidden layers

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义神经网络模型：多隐藏层 + 输出层
hidden_nn_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # 第1隐藏层
    Dense(64, activation='relu'),  # 第2隐藏层
    Dense(32, activation='relu'),  # 第3隐藏层
    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
hidden_nn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型
hidden_nn_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=20,  # 增加训练轮数
    batch_size=32,
    verbose=1
)

# 获取预测结果
y_pred_probs = hidden_nn_model.predict(X_test)  # 预测概率
y_pred = np.argmax(y_pred_probs, axis=1)  # 将概率转换为类别索引
y_true = np.argmax(y_test_onehot, axis=1)  # 将 one-hot 编码转换为类别索引

# 计算准确率
accuracy = accuracy_score(y_true, y_pred)
print("Hidden Neural Network Accuracy:", accuracy)

# 生成分类报告
print("Hidden Neural Network Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.6765 - loss: 0.7077 - val_accuracy: 0.8799 - val_loss: 0.3490
Epoch 2/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9176 - loss: 0.2376 - val_accuracy: 0.8837 - val_loss: 0.3292
Epoch 3/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9531 - loss: 0.1441 - val_accuracy: 0.8855 - val_loss: 0.3534
Epoch 4/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9752 - loss: 0.0799 - val_accuracy: 0.8817 - val_loss: 0.4449
Epoch 5/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9893 - loss: 0.0347 - val_accuracy: 0.8843 - val_loss: 0.5403
Epoch 6/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9957 - loss: 0.0152 - val_accuracy: 0.8752 - val_loss: 0.6477
Epoch 7/20
[1m1

# Keras + Hidden layer + Dropout

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义神经网络模型：隐藏层 + Dropout + 输出层
dropout_nn_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # 隐藏层
    Dropout(0.5),  # Dropout 层，设置 50% 的节点随机失活
    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
dropout_nn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型
dropout_nn_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=10,
    batch_size=32,
    verbose=1
)

# 获取预测结果
y_pred_probs = dropout_nn_model.predict(X_test)  # 预测概率
y_pred = np.argmax(y_pred_probs, axis=1)  # 将概率转换为类别索引
y_true = np.argmax(y_test_onehot, axis=1)  # 将 one-hot 编码转换为类别索引

# 计算准确率
accuracy = accuracy_score(y_true, y_pred)
print("Dropout Neural Network Accuracy:", accuracy)

# 生成分类报告
print("Dropout Neural Network Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.6102 - loss: 0.8451 - val_accuracy: 0.8716 - val_loss: 0.4155
Epoch 2/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.8928 - loss: 0.3507 - val_accuracy: 0.8914 - val_loss: 0.3474
Epoch 3/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.9239 - loss: 0.2499 - val_accuracy: 0.8951 - val_loss: 0.3301
Epoch 4/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9368 - loss: 0.2078 - val_accuracy: 0.8992 - val_loss: 0.3269
Epoch 5/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9500 - loss: 0.1633 - val_accuracy: 0.8975 - val_loss: 0.3310
Epoch 6/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9559 - loss: 0.1430 - val_accuracy: 0.8976 - val_loss: 0.3404
Epoch 7/10
[1m1

# Keras + multiple Hidden layer + Dropout

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义神经网络模型：多层隐藏层 + Dropout
multi_hidden_nn_model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),  # 第一隐藏层
    Dropout(0.3),
    Dense(128, activation='relu'),  # 第二隐藏层
    Dropout(0.3),
    Dense(64, activation='relu'),  # 第三隐藏层
    Dropout(0.3),
    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
multi_hidden_nn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型
multi_hidden_nn_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=10,
    batch_size=32,
    verbose=1
)

# 获取预测结果
y_pred_probs = multi_hidden_nn_model.predict(X_test)  # 预测概率
y_pred = np.argmax(y_pred_probs, axis=1)  # 将概率转换为类别索引
y_true = np.argmax(y_test_onehot, axis=1)  # 将 one-hot 编码转换为类别索引

# 计算准确率
accuracy = accuracy_score(y_true, y_pred)
print("Multi-Hidden Layer Neural Network Accuracy:", accuracy)

# 生成分类报告
print("Multi-Hidden Layer Neural Network Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - accuracy: 0.6506 - loss: 0.7531 - val_accuracy: 0.8679 - val_loss: 0.3793
Epoch 2/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.9108 - loss: 0.2673 - val_accuracy: 0.8858 - val_loss: 0.3354
Epoch 3/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.9466 - loss: 0.1587 - val_accuracy: 0.8845 - val_loss: 0.3596
Epoch 4/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.9654 - loss: 0.1060 - val_accuracy: 0.8823 - val_loss: 0.4166
Epoch 5/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.9773 - loss: 0.0703 - val_accuracy: 0.8811 - val_loss: 0.4816
Epoch 6/10
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9837 - loss: 0.0474 - val_accuracy: 0.8809 - val_loss: 0.5412
Epoc

# Keras + Multiple Hidden layers + Dropout + Early stop + BatchNormalization

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义学习率调度函数
def lr_scheduler(epoch, lr):
    """根据epoch动态调整学习率."""
    if epoch < 5:
        return lr
    else:
        return lr * 0.5

# 定义回调函数
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=3, 
    restore_best_weights=True, 
    verbose=1
)
lr_scheduler_callback = LearningRateScheduler(lr_scheduler, verbose=1)

# 定义神经网络模型：添加 BatchNormalization
multi_hidden_nn_model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),  # 第一隐藏层
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),  # 第二隐藏层
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),  # 第三隐藏层
    BatchNormalization(),
    Dropout(0.3),
    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
multi_hidden_nn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 训练模型，添加回调函数
multi_hidden_nn_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=20,  # 增加足够的epochs以适应EarlyStopping
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping, lr_scheduler_callback]  # 添加回调
)

# 获取预测结果
y_pred_probs = multi_hidden_nn_model.predict(X_test)  # 预测概率
y_pred = np.argmax(y_pred_probs, axis=1)  # 将概率转换为类别索引
y_true = np.argmax(y_test_onehot, axis=1)  # 将 one-hot 编码转换为类别索引

# 计算准确率
accuracy = accuracy_score(y_true, y_pred)
print("Multi-Hidden Layer Neural Network Accuracy:", accuracy)

# 生成分类报告
print("Multi-Hidden Layer Neural Network Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - accuracy: 0.5653 - loss: 1.0008 - val_accuracy: 0.8464 - val_loss: 0.4289 - learning_rate: 0.0010

Epoch 2: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 2/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 11ms/step - accuracy: 0.8623 - loss: 0.3848 - val_accuracy: 0.8627 - val_loss: 0.3761 - learning_rate: 0.0010

Epoch 3: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 3/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 11ms/step - accuracy: 0.9038 - loss: 0.2748 - val_accuracy: 0.8718 - val_loss: 0.3706 - learning_rate: 0.0010

Epoch 4: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 4/20
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 11ms/step - accuracy: 0.9219 -

# Keras + multiple layers + batch normalization + early stop

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# 定义神经网络模型：多隐藏层 + BatchNormalization + 输出层
batchnorm_nn_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),  # 第1隐藏层
    BatchNormalization(),  # 批量归一化层

    Dense(64, activation='relu'),  # 第2隐藏层
    BatchNormalization(),  # 批量归一化层

    Dense(32, activation='relu'),  # 第3隐藏层
    BatchNormalization(),  # 批量归一化层

    Dense(3, activation='softmax')  # 输出层
])

# 编译模型
batchnorm_nn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 定义 EarlyStopping 回调函数
early_stopping = EarlyStopping(
    monitor='val_loss',        # 监控验证集损失
    patience=5,                # 容忍验证集损失不下降的次数
    restore_best_weights=True  # 恢复验证集表现最好的权重
)

# 训练模型
batchnorm_nn_model.fit(
    X_train,
    y_train_onehot,
    validation_data=(X_test, y_test_onehot),
    epochs=50,                # 设置较大的最大训练轮数
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping]  # 添加 EarlyStopping 回调
)

# 获取预测结果
y_pred_probs = batchnorm_nn_model.predict(X_test)  # 预测概率
y_pred = np.argmax(y_pred_probs, axis=1)  # 将概率转换为类别索引
y_true = np.argmax(y_test_onehot, axis=1)  # 将 one-hot 编码转换为类别索引

# 计算准确率
accuracy = accuracy_score(y_true, y_pred)
print("BatchNormalization Neural Network Accuracy:", accuracy)

# 生成分类报告
print("BatchNormalization Neural Network Classification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.6268 - loss: 0.8520 - val_accuracy: 0.8472 - val_loss: 0.4172
Epoch 2/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.8928 - loss: 0.3015 - val_accuracy: 0.8551 - val_loss: 0.4112
Epoch 3/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9334 - loss: 0.1844 - val_accuracy: 0.8607 - val_loss: 0.4265
Epoch 4/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9593 - loss: 0.1222 - val_accuracy: 0.8651 - val_loss: 0.4846
Epoch 5/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.9698 - loss: 0.0931 - val_accuracy: 0.8563 - val_loss: 0.5517
Epoch 6/50
[1m1061/1061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.9758 - loss: 0.0720 - val_accuracy: 0.8603 - val_loss: 0.5685
Epoch 7/50
[1m1