In [4]:
# 安装必要的库
# !pip install pandas transformers scikit-learn torch numpy


In [14]:
# 安装必要的库
# !pip install pandas transformers scikit-learn torch numpy

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer, BertForSequenceClassification, RobertaTokenizer, RobertaForSequenceClassification, DistilBertTokenizer, DistilBertForSequenceClassification, AlbertTokenizer, AlbertForSequenceClassification, ElectraTokenizer, ElectraForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# 自定义数据集类
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        text = self.texts[item]
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# 加载数据集
train_df = pd.read_csv("/kaggle/input/stockemotion/train_stockemo.csv", encoding="utf-8")
val_df = pd.read_csv("/kaggle/input/stockemotion/val_stockemo.csv", encoding="utf-8")
test_df = pd.read_csv("/kaggle/input/stockemotion/test_stockemo.csv", encoding="utf-8")

# 标签编码
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_df['emo_label'])
val_labels = label_encoder.transform(val_df['emo_label'])
test_labels = label_encoder.transform(test_df['emo_label'])

# 创建数据集实例
max_len = 128
train_texts = train_df['processed'].tolist()
val_texts = val_df['processed'].tolist()
test_texts = test_df['processed'].tolist()

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds, average='weighted')
    return {'accuracy': acc, 'f1': f1}

def train_and_evaluate(model_name, model_class, tokenizer_class, train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, num_labels):
    # 初始化 tokenizer 和模型
    tokenizer = tokenizer_class.from_pretrained(model_name)
    model = model_class.from_pretrained(model_name, num_labels=num_labels, ignore_mismatched_sizes=True)
    
    # 创建数据集实例
    train_dataset = SentimentDataset(train_texts, train_labels, tokenizer, max_len)
    val_dataset = SentimentDataset(val_texts, val_labels, tokenizer, max_len)
    test_dataset = SentimentDataset(test_texts, test_labels, tokenizer, max_len)
    
    # 定义训练参数
    training_args = TrainingArguments(
        output_dir=f'./results/{model_name}',
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir=f'./logs/{model_name}',
        logging_steps=10,
        evaluation_strategy="epoch",
        report_to="none",  # 禁用wandb日志记录
    )

    # 定义 Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics
    )

    # 训练模型
    trainer.train()

    # 在验证集上进行评估
    eval_result = trainer.evaluate()
    print(f"Validation results for {model_name}: {eval_result}")

    # 在测试集上进行预测
    predictions, labels, _ = trainer.predict(test_dataset)
    predictions = torch.tensor(predictions)
    predicted_labels = torch.argmax(predictions, axis=1)
    
    return predicted_labels.numpy()

# 训练和评估每个模型
num_labels = len(label_encoder.classes_)


In [6]:

# BERT
print("Training and evaluating BERT...")
bert_predictions = train_and_evaluate('bert-base-uncased', BertForSequenceClassification, BertTokenizer, train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, num_labels)


Training and evaluating BERT...


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.4939,0.507711,0.742,0.740966
2,0.4545,0.487821,0.745,0.74394
3,0.2444,0.519773,0.778,0.777629




Validation results for bert-base-uncased: {'eval_loss': 0.5197727680206299, 'eval_accuracy': 0.778, 'eval_f1': 0.777629229801036, 'eval_runtime': 5.6025, 'eval_samples_per_second': 178.491, 'eval_steps_per_second': 5.712, 'epoch': 3.0}




In [7]:

# RoBERTa
print("Training and evaluating RoBERTa...")
roberta_predictions = train_and_evaluate('roberta-base', RobertaForSequenceClassification, RobertaTokenizer, train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, num_labels)


Training and evaluating RoBERTa...


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.4844,0.514728,0.749,0.747637
2,0.4806,0.51349,0.76,0.758815
3,0.3086,0.468512,0.783,0.783127




Validation results for roberta-base: {'eval_loss': 0.46851226687431335, 'eval_accuracy': 0.783, 'eval_f1': 0.7831274836182872, 'eval_runtime': 5.0992, 'eval_samples_per_second': 196.111, 'eval_steps_per_second': 6.276, 'epoch': 3.0}




In [8]:

# DistilBERT
print("Training and evaluating DistilBERT...")
distilbert_predictions = train_and_evaluate('distilbert-base-uncased', DistilBertForSequenceClassification, DistilBertTokenizer, train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, num_labels)


Training and evaluating DistilBERT...


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.5104,0.507942,0.731,0.731369
2,0.4878,0.498584,0.747,0.746176
3,0.2513,0.470583,0.774,0.773571




Validation results for distilbert-base-uncased: {'eval_loss': 0.47058308124542236, 'eval_accuracy': 0.774, 'eval_f1': 0.7735713322921334, 'eval_runtime': 3.1587, 'eval_samples_per_second': 316.589, 'eval_steps_per_second': 10.131, 'epoch': 3.0}




In [9]:

# ALBERT
# print("Training and evaluating ALBERT...")
# albert_predictions = train_and_evaluate('albert-base-v2', AlbertForSequenceClassification, AlbertTokenizer, train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, num_labels)


In [10]:

# FinBERT
print("Training and evaluating FinBERT...")
finbert_predictions = train_and_evaluate('yiyanghkust/finbert-tone', BertForSequenceClassification, BertTokenizer, train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, num_labels)


Training and evaluating FinBERT...


vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at yiyanghkust/finbert-tone and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.488,0.557009,0.712,0.708641
2,0.491,0.512925,0.743,0.743205
3,0.2527,0.513811,0.769,0.769242




Validation results for yiyanghkust/finbert-tone: {'eval_loss': 0.5138107538223267, 'eval_accuracy': 0.769, 'eval_f1': 0.7692421274354924, 'eval_runtime': 5.6239, 'eval_samples_per_second': 177.813, 'eval_steps_per_second': 5.69, 'epoch': 3.0}




In [15]:

# SpanBERT
print("Training and evaluating SpanBERT...")
spanbert_predictions = train_and_evaluate('SpanBERT/spanbert-base-cased', BertForSequenceClassification, BertTokenizer, train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, num_labels)


Training and evaluating SpanBERT...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SpanBERT/spanbert-base-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,2.0826,2.071701,0.297,0.213441
2,1.9112,1.885006,0.341,0.278379
3,1.6433,1.749213,0.399,0.36756




Validation results for SpanBERT/spanbert-base-cased: {'eval_loss': 1.7492130994796753, 'eval_accuracy': 0.399, 'eval_f1': 0.3675595684386063, 'eval_runtime': 5.5599, 'eval_samples_per_second': 179.859, 'eval_steps_per_second': 5.755, 'epoch': 3.0}




In [13]:

# 你可以在这里继续添加更多的模型

# 投票机制
final_predictions = []

for i in range(len(test_labels)):
    votes = [finbert_predictions[i], roberta_predictions[i], bert_predictions[i], distilbert_predictions[i], spanbert_predictions[i]]
    final_predictions.append(np.bincount(votes).argmax())

# 计算准确率和F1值
accuracy = accuracy_score(test_labels, final_predictions)
f1 = f1_score(test_labels, final_predictions, average='weighted')

print(f"Test Accuracy: {accuracy}")
print(f"Test F1 Score: {f1}")

Test Accuracy: 0.808
Test F1 Score: 0.8085280494081316
