In [1]:
!pip install trl transformers datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting trl
  Downloading trl-0.4.4-py3-none-any.whl (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.4/68.4 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.30.1-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m99.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.12.0-py3-none-any.whl (474 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate (from trl)
  Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.1

In [4]:
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
)

from trl import RewardTrainer

from datasets import load_dataset
from huggingface_hub import notebook_login, create_repo
notebook_login()
dataset = load_dataset("yankihue/h_positive_tweets_human_feedback", split="train")

model_name = "redrussianarmy/gpt2-turkish-cased"

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer.eos_token_id = model.config.eos_token_id
tokenizer.pad_token = tokenizer.eos_token

model.config.pad_token_id = model.config.eos_token_id

def formatting_func(examples):
    kwargs = {"padding": "max_length", "truncation": True, "max_length": 512, "return_tensors": "pt"}

    # Assuming original human response is preferred to Falcon's
    chosen_response = examples["chosen"]
    rejected_response = examples["rejected"]
    prompt = examples["prompt"]

    tokens_chosen = tokenizer.encode_plus(prompt, chosen_response, **kwargs)
    tokens_rejected = tokenizer.encode_plus(prompt, rejected_response, **kwargs)

    return {
        "input_ids_chosen": tokens_chosen["input_ids"][0], "attention_mask_chosen": tokens_chosen["attention_mask"][0],
        "input_ids_rejected": tokens_rejected["input_ids"][0], "attention_mask_rejected": tokens_rejected["attention_mask"][0]
    }

formatted_dataset = dataset.map(formatting_func)

trainer = RewardTrainer(
    model=model,
    args=TrainingArguments(        
        output_dir="h_reward_model_positive_tweets/",
        num_train_epochs=5,
        logging_steps=10,
        gradient_accumulation_steps=4,
        save_strategy="steps",
        save_steps=500,
        warmup_steps=100,
        logging_dir="./logs",
        fp16=True,
        bf16=False,
        learning_rate=1e-5,
        deepspeed=False,
        save_total_limit=1,
        push_to_hub=True,),
    tokenizer=tokenizer,
    train_dataset=formatted_dataset
)

trainer.train()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Some weights of the model checkpoint at redrussianarmy/gpt2-turkish-cased were not used when initializing GPT2ForSequenceClassification: ['lm_head.weight']
- This IS expected if you are initializing GPT2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at redrussianarmy/gpt2-turkish-cased and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the as

Step,Training Loss
10,0.9983
20,0.9508
30,0.8059
40,0.5973
50,0.386
60,0.2342
70,0.1637
80,0.1205
90,0.0548
100,0.0587


TrainOutput(global_step=240, training_loss=0.18514860815096956, metrics={'train_runtime': 162.7243, 'train_samples_per_second': 47.903, 'train_steps_per_second': 1.475, 'total_flos': 0.0, 'train_loss': 0.18514860815096956, 'epoch': 4.92})

In [5]:
trainer.push_to_hub()

Upload file pytorch_model.bin:   0%|          | 1.00/475M [00:00<?, ?B/s]

Upload file training_args.bin:   0%|          | 1.00/3.81k [00:00<?, ?B/s]

To https://huggingface.co/yankihue/h_reward_model_positive_tweets
   e928401..b472165  main -> main

   e928401..b472165  main -> main

To https://huggingface.co/yankihue/h_reward_model_positive_tweets
   b472165..eb1a79b  main -> main

   b472165..eb1a79b  main -> main



'https://huggingface.co/yankihue/h_reward_model_positive_tweets/commit/b472165b1263f0e58aa055d8f1c903339ee6606d'