In [46]:
import numpy as np
from datasets import  load_dataset
from evaluate import load
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding

In [47]:
tomatoes = load_dataset('rotten_tomatoes')
tomatoes

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
})

In [48]:
train_data, eval_data, test_data = tomatoes['train'], tomatoes['validation'], tomatoes['test']
train_data

Dataset({
    features: ['text', 'label'],
    num_rows: 8530
})

In [49]:
# Load Model and Tokenizer
model_checkpoint = 'bert-base-cased'
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [64]:
# Pad to the longest sequence in the batch
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def preprocess_function(examples):
    '''Tokenize input data'''
    return tokenizer(examples['text'], truncation=True)

# Tokenize train/eval/test data
tokenized_train = train_data.map(preprocess_function, batched=True)
tokenized_eval = eval_data.map(preprocess_function, batched=True)
tokenized_test = test_data.map(preprocess_function, batched=True)

def compute_metrics(eval_pred):
    '''Calculate F1 Score'''
    metric = load('f1')
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    f1_score = metric.compute(predictions=predictions, references=labels)
    return {'f1': f1_score}

Map:   0%|          | 0/8530 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [51]:
# Training arguments for parameter tuning
training_args = TrainingArguments(
    'model',
    learning_rate=2e-5,
    weight_decay=0.01,
    num_train_epochs=1,
    save_strategy='epoch',
    report_to='none',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
    processing_class=tokenizer,
    data_collator=data_collator,
)

In [65]:
for name, params in model.named_parameters():
    print(name)

bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.weight
bert.embeddings.LayerNorm.bias
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.weight
bert.encoder.layer.0.attention.output.LayerNorm.bias
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.weight
bert.encoder.layer.0.output.LayerNorm.bias
bert.encoder.layer.1.attention.self.query.weight
bert.enc

In [66]:
# Training some attention layers and feed-forward layer or classifier header only
for i, (name, param) in enumerate(model.named_parameters()):
    # Trainable Classification Head
    if name.startswith('classifier') or i <= 165:
        param.requires_grad = True
    # Freez everything else
    else:
        param.require_grad = False


In [None]:
trainer.train()

In [None]:
trainer.evaluate()