In [None]:
# 导入必要的库
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [None]:
# 加载和预处理数据
data = pd.read_csv('dataset.csv')
texts = data['text'].values
labels = data['label'].values

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
x_data = pad_sequences(sequences, maxlen=100)
y_data = np.array(labels)


In [None]:
# 构建TextCNN模型
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=100),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(units=10, activation='relu'),
    Dense(units=1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# 训练模型
history = model.fit(x_data, y_data, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
# 评估模型
loss, accuracy = model.evaluate(x_data, y_data)
print(f'Loss: {loss}, Accuracy: {accuracy}')
