In [1]:
!pip install transformers
!pip install datasets

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m52.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
Col

In [2]:
from datasets import load_dataset
from transformers import pipeline
from tqdm import tqdm
from sklearn.metrics import f1_score

In [3]:
dataset = load_dataset('tweet_eval', 'emotion', split='train')

Downloading builder script:   0%|          | 0.00/9.72k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/30.4k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/21.9k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/134k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/60.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/569 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/183 [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/6 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/3257 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1421 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/374 [00:00<?, ? examples/s]

In [4]:
id2emotion = {0: 'anger', 1: 'joy', 2: 'optimism', 3: 'sadness'}
emotion2id = {v: k for k, v in id2emotion.items()}
emotions = list(id2emotion.values())

In [5]:
pipe = pipeline('fill-mask', model='bert-base-uncased')

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [6]:
classification_prompt = 'The tweet is about [MASK].'

In [7]:
pipe(dataset['text'][1] + classification_prompt, targets=emotions)

[{'score': 0.00016380846500396729,
  'token': 4963,
  'token_str': 'anger',
  'sequence': "my roommate : it's okay that we can't spell because we have autocorrect. # terrible # firstworldprobsthe tweet is about anger."},
 {'score': 0.00010516129987081513,
  'token': 6569,
  'token_str': 'joy',
  'sequence': "my roommate : it's okay that we can't spell because we have autocorrect. # terrible # firstworldprobsthe tweet is about joy."},
 {'score': 7.97944376245141e-05,
  'token': 12039,
  'token_str': 'sadness',
  'sequence': "my roommate : it's okay that we can't spell because we have autocorrect. # terrible # firstworldprobsthe tweet is about sadness."},
 {'score': 1.6803533071652055e-05,
  'token': 27451,
  'token_str': 'optimism',
  'sequence': "my roommate : it's okay that we can't spell because we have autocorrect. # terrible # firstworldprobsthe tweet is about optimism."}]

In [8]:
id2emotion[dataset['label'][1]]

'anger'

In [9]:
mask_predictions = []
for tweet in tqdm(dataset['text']):
  mask_predictions.append(
      pipe(tweet + classification_prompt, targets=emotions)[0]['token_str']
  )

100%|██████████| 3257/3257 [11:02<00:00,  4.92it/s]


In [10]:
preds = [emotion2id[pred] for pred in mask_predictions]
f1_score(dataset['label'], preds, average='macro')

0.32211708781592713

In [11]:
pipe = pipeline('zero-shot-classification', 'facebook/bart-large-mnli')

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [12]:
pipe(dataset['text'][2], emotions)

{'sequence': "No but that's so cute. Atsu was probably shy about photos before but cherry helped her out uwu",
 'labels': ['joy', 'optimism', 'sadness', 'anger'],
 'scores': [0.7844396829605103,
  0.16167372465133667,
  0.03000733256340027,
  0.02387925796210766]}

In [13]:
zero_shot_predictions = []
for premise in tqdm(dataset['text']):
  zero_shot_predictions.append(
      pipe(premise, emotions)['labels'][0]
  )

100%|██████████| 3257/3257 [2:04:55<00:00,  2.30s/it]


In [14]:
zero_shot_preds = [emotion2id[pred] for pred in zero_shot_predictions]
f1_score(dataset['label'], zero_shot_preds, average='macro')

0.6560364797677498