In [1]:
# Install required libraries
!pip install transformers datasets peft accelerate evaluate scikit-learn

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-man

In [2]:
#Install and use wandb for performing a Hyper-parameter sweep
!pip install -U wandb --quiet          # if not already installed
import wandb
wandb.login()                          # paste the API key when asked

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[?25h

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myaagni-raolji[0m ([33myaagni-raolji-schbangq[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import torch
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

In [5]:
import pandas as pd
from datasets import Dataset
df = pd.read_json("hr_sentiment_starter.jsonl", lines=True)
dataset = Dataset.from_pandas(df)
label_map = {
    "highly engaged": 0,
    "content": 1,
    "disengaged": 2,
    "at risk of leaving": 3
}

dataset = dataset.map(lambda x: {"label": label_map[x["label"]]})

dataset = dataset.train_test_split(test_size=0.2)
dataset['validation'] = dataset.pop('test')

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

In [7]:
# Initialize model
from transformers import AutoModelForSequenceClassification
model_name="j-hartmann/emotion-english-distilroberta-base"

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=4,
        ignore_mismatched_sizes=True
    )

tokenizer = AutoTokenizer.from_pretrained(model_name)

In [8]:
# Tokenize dataset
def tokenize(example):
    return tokenizer(example['pros_cons'], padding='max_length', truncation=True)

tokenized = dataset.map(tokenize, batched=True)

Map:   0%|          | 0/640 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

In [9]:
sweep_config = {
    "method": "grid",          # or "random", "bayes"
    "metric": {
        "name": "eval_f1",
        "goal": "maximize"
    },
    "parameters": {
        "learning_rate":  {"values": [2e-5, 5e-5]},
        "per_device_train_batch_size": {"values": [8,16]},
        "num_train_epochs": {"values": [3,4,5]}
    }
}
sweep_id = wandb.sweep(sweep_config, project="hr-sentiment-sweep-v2")


Create sweep with ID: ril6u5ce
Sweep URL: https://wandb.ai/yaagni-raolji-schbangq/hr-sentiment-sweep-v2/sweeps/ril6u5ce


In [19]:
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import f1_score, accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted")
    }

def sweep_train():
    # Fetch current sweep hyper-params from wandb.config
    run   = wandb.init()            # project/name is inherited from the sweep
    config = run.config

    args = TrainingArguments(
        output_dir="./results/sentiment-model",
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="eval_f1",
        greater_is_better=True,
        learning_rate=config.learning_rate,
        per_device_train_batch_size=config.per_device_train_batch_size,
        per_device_eval_batch_size=config.per_device_train_batch_size,
        num_train_epochs=config.num_train_epochs,
        report_to="wandb",
        run_name=run.name
    )

    trainer = Trainer(
        model_init=model_init,
        args=args,
        train_dataset=tokenized["train"],
        eval_dataset=tokenized["validation"],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    # Save model after training
    model_dir = f"./results/model-{run.id}"
    trainer.save_model(model_dir)

    # At the end, log classification report on validation set
    preds_output = trainer.predict(tokenized["validation"])
    preds  = preds_output.predictions.argmax(-1)
    labels = preds_output.label_ids

    from sklearn.metrics import classification_report
    report = classification_report(
        labels, preds,
        target_names=list(label_map.values()),
        output_dict=True
    )
    wandb.log({"classification_report": report})
    # Log the artifact
    artifact = wandb.Artifact(f"model-{wandb.run.name}", type="model")
    artifact.add_dir("./results/sentiment-model")  # This is where Trainer saved the model
    wandb.log_artifact(artifact)
    run.finish()


In [21]:
wandb.agent(sweep_id, function=sweep_train)

[34m[1mwandb[0m: Agent Starting Run: e6uucqtn with config:
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	per_device_train_batch_size: 8


  trainer = Trainer(


pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.538038,0.825,0.82248
2,No log,0.559849,0.85,0.851751
3,No log,0.517196,0.85625,0.856483


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 15.4s


0,1
eval/accuracy,▁▇█
eval/f1,▁▇█
eval/loss,▄█▁
eval/runtime,▁▄█
eval/samples_per_second,█▅▁
eval/steps_per_second,█▅▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.85625
eval/f1,0.85648
eval/loss,0.5172
eval/runtime,2.3019
eval/samples_per_second,69.508
eval/steps_per_second,8.689
test/accuracy,0.85625
test/f1,0.85648
test/loss,0.5172
test/runtime,2.3647


[34m[1mwandb[0m: Agent Starting Run: cqowe3h2 with config:
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.569428,0.7875,0.788471
2,No log,0.45907,0.8375,0.838801
3,No log,0.459853,0.81875,0.818314


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 91.0s


0,1
eval/accuracy,▁█▅
eval/f1,▁█▅
eval/loss,█▁▁
eval/runtime,▅▁█
eval/samples_per_second,▄█▁
eval/steps_per_second,▄█▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.81875
eval/f1,0.81831
eval/loss,0.45985
eval/runtime,2.4022
eval/samples_per_second,66.605
eval/steps_per_second,4.163
test/accuracy,0.8375
test/f1,0.8388
test/loss,0.45907
test/runtime,2.2987


[34m[1mwandb[0m: Agent Starting Run: 11xm9tuv with config:
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 8


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.559031,0.825,0.821391
2,No log,0.611783,0.81875,0.822939
3,No log,0.582951,0.84375,0.844036
4,No log,0.589575,0.8625,0.862592


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 82.1s


0,1
eval/accuracy,▂▁▅█
eval/f1,▁▁▅█
eval/loss,▁█▄▅
eval/runtime,▁█▂▄
eval/samples_per_second,█▁▇▅
eval/steps_per_second,█▁▇▅
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.8625
eval/f1,0.86259
eval/loss,0.58957
eval/runtime,2.2794
eval/samples_per_second,70.194
eval/steps_per_second,8.774
test/accuracy,0.8625
test/f1,0.86259
test/loss,0.58957
test/runtime,2.2493


[34m[1mwandb[0m: Agent Starting Run: q9txyybx with config:
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.568843,0.8,0.801697
2,No log,0.470189,0.825,0.827029
3,No log,0.472059,0.83125,0.832397
4,No log,0.427402,0.8375,0.837564


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 96.8s


0,1
eval/accuracy,▁▆▇█
eval/f1,▁▆▇█
eval/loss,█▃▃▁
eval/runtime,▇▇▁█
eval/samples_per_second,▂▂█▁
eval/steps_per_second,▂▂█▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.8375
eval/f1,0.83756
eval/loss,0.4274
eval/runtime,2.3736
eval/samples_per_second,67.408
eval/steps_per_second,4.213
test/accuracy,0.8375
test/f1,0.83756
test/loss,0.4274
test/runtime,2.3317


[34m[1mwandb[0m: Agent Starting Run: njz84gkn with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	per_device_train_batch_size: 8


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.714072,0.75,0.74432
2,No log,0.520738,0.8125,0.814848
3,No log,0.515098,0.81875,0.819123


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 91.2s


0,1
eval/accuracy,▁▇█
eval/f1,▁██
eval/loss,█▁▁
eval/runtime,█▁█
eval/samples_per_second,▁█▁
eval/steps_per_second,▁█▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.81875
eval/f1,0.81912
eval/loss,0.5151
eval/runtime,2.2985
eval/samples_per_second,69.612
eval/steps_per_second,8.701
test/accuracy,0.81875
test/f1,0.81912
test/loss,0.5151
test/runtime,2.292


[34m[1mwandb[0m: Agent Starting Run: 91vo1bag with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.823723,0.74375,0.741588
2,No log,0.630706,0.76875,0.765778
3,No log,0.598465,0.76875,0.767771


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 89.5s


0,1
eval/accuracy,▁██
eval/f1,▁▇█
eval/loss,█▂▁
eval/runtime,▁██
eval/samples_per_second,█▁▁
eval/steps_per_second,█▁▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.76875
eval/f1,0.76777
eval/loss,0.59846
eval/runtime,2.3927
eval/samples_per_second,66.87
eval/steps_per_second,4.179
test/accuracy,0.76875
test/f1,0.76777
test/loss,0.59846
test/runtime,2.3566


[34m[1mwandb[0m: Agent Starting Run: nqn6a2o1 with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 8


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.692702,0.75,0.746637
2,No log,0.51203,0.8125,0.815116
3,No log,0.462166,0.8375,0.838529
4,No log,0.46334,0.84375,0.843238


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 113.7s


0,1
eval/accuracy,▁▆██
eval/f1,▁▆██
eval/loss,█▃▁▁
eval/runtime,▃▄▁█
eval/samples_per_second,▆▅█▁
eval/steps_per_second,▆▅█▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.84375
eval/f1,0.84324
eval/loss,0.46334
eval/runtime,2.3288
eval/samples_per_second,68.706
eval/steps_per_second,8.588
test/accuracy,0.84375
test/f1,0.84324
test/loss,0.46334
test/runtime,2.3137


[34m[1mwandb[0m: Agent Starting Run: v1ov461b with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_train_epochs: 4
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.807641,0.725,0.723324
2,No log,0.599995,0.78125,0.781114
3,No log,0.527946,0.80625,0.805669
4,No log,0.50537,0.8125,0.812129


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 99.9s


0,1
eval/accuracy,▁▅▇█
eval/f1,▁▆▇█
eval/loss,█▃▂▁
eval/runtime,▁▄█▂
eval/samples_per_second,█▅▁▇
eval/steps_per_second,█▅▁▇
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.8125
eval/f1,0.81213
eval/loss,0.50537
eval/runtime,2.3492
eval/samples_per_second,68.109
eval/steps_per_second,4.257
test/accuracy,0.8125
test/f1,0.81213
test/loss,0.50537
test/runtime,2.4097


[34m[1mwandb[0m: Agent Starting Run: 5454jsus with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 8


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.681701,0.75625,0.753092
2,No log,0.51099,0.8125,0.815286
3,No log,0.458136,0.84375,0.845094
4,No log,0.459802,0.85625,0.856192
5,No log,0.497027,0.86875,0.869073


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 91.5s


0,1
eval/accuracy,▁▅▆▇█
eval/f1,▁▅▇▇█
eval/loss,█▃▁▁▂
eval/runtime,▂▄▄█▁
eval/samples_per_second,▇▅▅▁█
eval/steps_per_second,▇▅▅▁█
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.86875
eval/f1,0.86907
eval/loss,0.49703
eval/runtime,2.2479
eval/samples_per_second,71.177
eval/steps_per_second,8.897
test/accuracy,0.86875
test/f1,0.86907
test/loss,0.49703
test/runtime,2.2758


[34m[1mwandb[0m: Agent Starting Run: 924gw0eb with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.799267,0.7375,0.734911
2,No log,0.586928,0.79375,0.794183
3,No log,0.500311,0.825,0.82475
4,No log,0.494702,0.8125,0.810799
5,No log,0.486819,0.81875,0.818235


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 114.8s


0,1
eval/accuracy,▁▅█▇█
eval/f1,▁▆█▇▇
eval/loss,█▃▁▁▁
eval/runtime,▁▇█▄█
eval/samples_per_second,█▂▁▅▁
eval/steps_per_second,█▂▁▅▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.81875
eval/f1,0.81824
eval/loss,0.48682
eval/runtime,2.3703
eval/samples_per_second,67.502
eval/steps_per_second,4.219
test/accuracy,0.825
test/f1,0.82475
test/loss,0.50031
test/runtime,2.3413


[34m[1mwandb[0m: Agent Starting Run: q8smpdk6 with config:
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 8


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.565947,0.81875,0.815768
2,No log,0.440267,0.875,0.875697
3,No log,0.56684,0.85,0.850494
4,No log,0.673947,0.8875,0.887225
5,No log,0.702814,0.8625,0.862511


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 124.9s


0,1
eval/accuracy,▁▇▄█▅
eval/f1,▁▇▄█▆
eval/loss,▄▁▄▇█
eval/runtime,▁▅▃▁█
eval/samples_per_second,█▄▆▇▁
eval/steps_per_second,█▄▆▇▁
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.8625
eval/f1,0.86251
eval/loss,0.70281
eval/runtime,2.42
eval/samples_per_second,66.115
eval/steps_per_second,8.264
test/accuracy,0.8875
test/f1,0.88722
test/loss,0.67395
test/runtime,2.2751


[34m[1mwandb[0m: Agent Starting Run: 6oqfswne with config:
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 16


  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint 

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.569167,0.8,0.801697
2,No log,0.484194,0.8125,0.813727
3,No log,0.47233,0.83125,0.832308
4,No log,0.448282,0.85,0.850005
5,No log,0.449279,0.85,0.849799


[34m[1mwandb[0m: Adding directory to artifact (./results/sentiment-model)... Done. 94.7s


0,1
eval/accuracy,▁▃▅██
eval/f1,▁▃▅██
eval/loss,█▃▂▁▁
eval/runtime,▁▅▇█▃
eval/samples_per_second,█▄▂▁▆
eval/steps_per_second,█▄▂▁▆
test/accuracy,▁
test/f1,▁
test/loss,▁
test/runtime,▁

0,1
eval/accuracy,0.85
eval/f1,0.8498
eval/loss,0.44928
eval/runtime,2.3028
eval/samples_per_second,69.48
eval/steps_per_second,4.342
test/accuracy,0.85
test/f1,0.85
test/loss,0.44828
test/runtime,2.3111


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [24]:
# Use this if you want to load the model from artifact
import wandb

# Log in first
wandb.login()

# Restore the best model from W&B
run = wandb.init(project="hr-sentiment-sweep-v2", id="q8smpdk6", resume="allow")

artifact = run.use_artifact("model-worthy-sweep-11:v0")  # or "model:v0" if named that way
model_dir = artifact.download()




KeyboardInterrupt: 

In [27]:
# Use this if you want to load the model from local
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_dir = "./results/model-q8smpdk6"
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)


In [39]:
from transformers import DataCollatorWithPadding
import pandas as pd
from datasets import Dataset

df = pd.read_json("employee_reviews_testset_100_each.jsonl", lines=True)
test_dataset = Dataset.from_pandas(df)

test_dataset = test_dataset.map(lambda x: {"label": label_map[x["label"]]})

test_dataset = test_dataset.map(tokenize, batched=True)

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [38]:
from transformers import Trainer

trainer = Trainer(model=model, compute_metrics=compute_metrics)
results = trainer.evaluate(test_dataset)
print(results)




{'eval_loss': 0.3804772198200226, 'eval_model_preparation_time': 0.0015, 'eval_accuracy': 0.9175, 'eval_f1': 0.9169230320720958, 'eval_runtime': 5.283, 'eval_samples_per_second': 75.714, 'eval_steps_per_second': 9.464}
