#### **Install the required packages**

In [1]:
! pip install -q transformers trl
! pip install -q trl

#### **Load the required libraries**

In [2]:
import os
import json
import time
import torch
import random
import numpy as np
import pandas as pd
import transformers

from random import choices
from tqdm import tqdm
from google.colab import files
from datasets import Dataset, load_dataset
from torch.utils.data import Dataset
from transformers import pipeline, set_seed
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from transformers import default_data_collator, DataCollatorForLanguageModeling
from trl import RewardTrainer, SFTTrainer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model

In [3]:
# tokenizer = AutoTokenizer.from_pretrained("/home/gpuuser/starcoder_ashish/model")
# model = AutoModelForCausalLM.from_pretrained("/home/gpuuser/starcoder_ashish/tiny_starcoder", use_cache=False)
# tokenizer.pad_token = tokenizer.eos_token
# model.resize_token_embeddings(len(tokenizer))
# tokenizer.pad_token_id = tokenizer.eos_token_id
# model.config.end_token_id = tokenizer.eos_token_id
# model.config.pad_token_id = model.config.eos_token_id

# # Set up the metric
# rouge = evaluate.load("rouge")

#### **Initial parameters setup**

In [4]:
def set_seed(seed_val=42):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

max_input_length = 256
random.seed(42)
learning_rate = 1e-5

#### **Upload the datasets**

In [5]:
# Create an empty directory
folder_name = "data"
if not os.path.exists(folder_name):
    os.makedirs(folder_name)
print(f"An empty directory named '{folder_name}' has been created.")

# Upload files to the folder
print(f"Now please upload your required file(s).")
uploaded_files = files.upload()

# Move uploaded files to the created folder
for file_name in uploaded_files.keys():
    source_path = file_name
    destination_path = os.path.join(folder_name, file_name)
    os.rename(source_path, destination_path)
    print(f"'{file_name}' has been uploaded and moved to '{folder_name}' directory.")

An empty directory named 'data' has been created.
Now please upload your required file(s).


### **Creating the SFT/policy model for human Evaluation**

#### **Load the dataset**

In [6]:
df = pd.read_parquet("data/test.parquet")

In [7]:
df.head()

Unnamed: 0,prompt,label
0,SUBREDDIT: r/relationships\nTITLE: Me [19 F] w...,"I really like this guy, but after having sex w..."
1,SUBREDDIT: r/Parenting\nTITLE: My 11 year old ...,Sons good friend died and his funeral is today...
2,SUBREDDIT: r/relationships\nTITLE: The girl [2...,Girl I'm seeing didn't respond to my texts whi...
3,SUBREDDIT: r/tifu\nTITLE: TIFU by accidently k...,"Tried to stop an old lady falling, kicked her ..."
4,SUBREDDIT: r/relationships\nTITLE: I [32 M] fo...,Wife Cheats on me but I stuck around for kids....


In [8]:
df.iloc[1]

prompt    SUBREDDIT: r/Parenting\nTITLE: My 11 year old ...
label     Sons good friend died and his funeral is today...
Name: 1, dtype: object

#### **Create the data loader**

In [9]:
class TLDRDataset(Dataset):
    def __init__(self, train_path, tokenizer, split, max_length):
        self.post_list = []
        dataset = pd.read_parquet(train_path)
        self.labels = []
        # dataset = dataset[:100]
        for sample in dataset.iterrows():
            self.post_list.append(sample[1]["prompt"])
            self.labels.append(sample[1]["label"])

        self.tokenizer = tokenizer
        self.max_length = max_length
        self.input_ids = []
        self.attn_masks = []

    def __len__(self):
        return len(self.post_list)

    def __getitem__(self, idx):
        txt = self.post_list[idx]
        label = self.labels[idx]

        encodings_dict = self.tokenizer(txt, truncation=True, max_length=self.max_length, padding="max_length")
        encodings_dict_label = self.tokenizer(label,truncation=True, max_length=self.max_length, padding="max_length")
        input_ids = torch.tensor(encodings_dict["input_ids"])
        attn_masks = torch.tensor(encodings_dict["attention_mask"])
        labels_ids = torch.tensor(encodings_dict_label["input_ids"])
        return {
            "input_ids": input_ids,
            "attention_mask": attn_masks,
            "labels": labels_ids,
        }

In [10]:
INITIAL_MODEL_PATH = "bigcode/tiny_starcoder_py"

model = AutoModelForCausalLM.from_pretrained(INITIAL_MODEL_PATH, use_cache=False).to("cuda:0")
tokenizer = AutoTokenizer.from_pretrained(INITIAL_MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token
# model.resize_token_embeddings(len(tokenizer))
# tokenizer.pad_token_id = tokenizer.eos_token_id
# model.config.end_token_id = tokenizer.eos_token_id
# model.config.pad_token_id = model.config.eos_token_id

In [11]:
# Set up the datasets
data_path = "data/test.parquet"
train_dataset = TLDRDataset(data_path, tokenizer, "train", max_length=max_input_length)

# dev_dataset = TLDRDataset(data_path, tokenizer,"valid", max_length=max_input_length)


In [12]:
for i in train_dataset:
    print(i["input_ids"], i["labels"])
    break

tensor([ 7100,   613,  2918,   780,    44,   540,    33, 40186,   203, 13777,
           44,  3110,   428,    35,    43,   506,    79,   623,  1672, 11970,
          428,    35,    43,   488,   614,   646,  3654,   415,   439,  1631,
         1159, 16661,  1246,  6366,   973,  3425,    32,   203,  3705,    44,
        12000, 17964,  3638,  1548,    32,   439,  9845,   458,  7735,  1330,
         5133, 31695,   432,   312,  7000,   372,  7660,   544,  2442,    30,
         1273,   439,  4763,  2583, 42289,   312,  3493,   963,   432,  1672,
         7713,  1412,   561, 12767,   372,   458, 18734,   308,    59,  4763,
         5054,  1755,  1591, 12112,  2670,    30,   461,   436,  5075, 17510,
           30,   561,  1597,   963,   432,   322, 48385,   547,   203,   203,
         7558,   395,    19,  2770,    30,   312, 17142,   432, 22599, 14818,
           30,   439,  7307, 29220,   372,   458,  3932,   107,   544, 18660,
           30,  3919,   312,  9525,  2350,   688,   996,  4528, 

In [13]:
# def compute_metrics(eval_preds):
#     labels_ids = eval_preds.label_ids
#     pred_ids = eval_preds.predictions
#     pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
#     label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
#     result = rouge.compute(predictions=pred_str, references=label_str)
#     return result

In [14]:
# # Create a preprocessing function to extract out the proper logits from the model output
# def preprocess_logits_for_metrics(logits, labels):
#     if isinstance(logits, tuple):
#         logits = logits[0]
#     return logits.argmax(dim=-1)

In [15]:
torch.cuda.set_device(0)

In [16]:
# Prepare the trainer and start training
training_args = TrainingArguments(
    output_dir="supervised-finetuned-model-checkpoint/",
    learning_rate=1e-5,
    per_device_train_batch_size=16,
#   per_device_eval_batch_size=1,
    fp16=False,
    gradient_accumulation_steps=1,
    num_train_epochs=1,
    warmup_steps=100,
    logging_steps=10,
)

In [17]:
training_args.device.index

0

In [18]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    # compute_metrics=compute_metrics,
    # data_collator=default_data_collator,
    # preprocess_logits_for_metrics=preprocess_logits_for_metrics
)
trainer.train()

Step,Training Loss
10,10.1884
20,6.5805
30,2.5874
40,1.4809
50,1.1947
60,1.0418
70,1.044
80,0.9938
90,1.0145
100,1.0254


TrainOutput(global_step=410, training_loss=1.3952962270597131, metrics={'train_runtime': 562.6274, 'train_samples_per_second': 11.647, 'train_steps_per_second': 0.729, 'total_flos': 1208895118245888.0, 'train_loss': 1.3952962270597131, 'epoch': 1.0})

In [19]:
trainer.save_model("sft_model/")

In [20]:
## inference the model
# model = AutoModelForCausalLM.from_pretrained("sft_model/")
# text = df.iloc[1]["prompt"] + df.iloc[1]["label"]
# tokenized_text = tokenizer(text)
# with torch.no_grad():
#     output_tensor = model(
#                           input_ids= torch.tensor(tokenized_text["input_ids"]),
#                           attention_mask= torch.tensor(tokenized_text["attention_mask"]))

In [21]:
# tokenizer.decode(torch.argmax(output_tensor.logits, axis=-1))

In [22]:
# print(text)

In [23]:
# df.iloc[1]["label"]

### **Traning the reward function**

In [24]:
from datasets import Dataset, load_dataset

df = pd.read_parquet("data/test_reward.parquet")
df = df[:10]
raw_dataset = Dataset.from_pandas(df)
raw_dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 10
})

In [25]:
##defininig the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(INITIAL_MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained("sft_model/").to("cuda:0")
tokenizer.pad_token = tokenizer.eos_token

# model.resize_token_embeddings(len(tokenizer))
# tokenizer.pad_token_id = tokenizer.eos_token_id
# model.config.end_token_id = tokenizer.eos_token_id
# model.config.pad_token_id = model.config.eos_token_id

In [26]:
# tokenizer.add_special_tokens({'pad_token': '[PAD]'})

def formatting_func(examples):
    kwargs = {"padding": "max_length",
              "truncation": True,
              "max_length": max_input_length,
              "return_tensors": "pt"
              }

    # Prepend the prompt and a line break to the original_response and response-1 fields.
    prompt_plus_chosen_response = examples["prompt"] + "\n" + examples["chosen"]
    prompt_plus_rejected_response = examples["prompt"] + "\n" + examples["rejected"]

    # Then tokenize these modified fields.
    tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
    tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)

    return {
        "input_ids_chosen": tokens_chosen["input_ids"][0], "attention_mask_chosen": tokens_chosen["attention_mask"][0],
        "input_ids_rejected": tokens_rejected["input_ids"][0], "attention_mask_rejected": tokens_rejected["attention_mask"][0]
    }

In [27]:
formatted_dataset = raw_dataset.map(formatting_func)
formatted_dataset = formatted_dataset.train_test_split()

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [28]:
model.config

GPTBigCodeConfig {
  "_name_or_path": "sft_model/",
  "activation_function": "gelu_pytorch_tanh",
  "architectures": [
    "GPTBigCodeForCausalLM"
  ],
  "attention_softmax_in_fp32": true,
  "attn_pdrop": 0.1,
  "bos_token_id": 0,
  "embd_pdrop": 0.1,
  "eos_token_id": 0,
  "inference_runner": 0,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "max_batch_size": null,
  "max_sequence_length": null,
  "model_type": "gpt_bigcode",
  "multi_query": true,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": 3072,
  "n_layer": 20,
  "n_positions": 8192,
  "pad_key_length": true,
  "pre_allocate_kv_cache": false,
  "resid_pdrop": 0.1,
  "scale_attention_softmax_in_fp32": true,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.32.0",
  "use_cache": false,
  "validate_runner_input": true,
  "vocab_s

In [29]:
### Loading the TRL reward trainer and training the trainer
training_args = TrainingArguments(
        output_dir="rm_checkpoint/",
        num_train_epochs=1,
        logging_steps=10,
        gradient_accumulation_steps=1,
        save_strategy="steps",
        evaluation_strategy="steps",
        per_device_train_batch_size=2,
        per_device_eval_batch_size=1,
        eval_accumulation_steps=1,
        eval_steps=500,
        save_steps=500,
        warmup_steps=100,
        logging_dir="./logs",
        learning_rate=1e-5,
        save_total_limit=1,
        remove_unused_columns=False,
        no_cuda=False
    )

In [30]:
trainer = RewardTrainer(model=model,
                        tokenizer=tokenizer,
                        train_dataset=formatted_dataset['train'],
                        eval_dataset=formatted_dataset['test'],
                        args= training_args
                        )
trainer.train()

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss


TrainOutput(global_step=4, training_loss=0.7495890855789185, metrics={'train_runtime': 1.6134, 'train_samples_per_second': 4.339, 'train_steps_per_second': 2.479, 'total_flos': 0.0, 'train_loss': 0.7495890855789185, 'epoch': 1.0})

In [31]:
trainer.save_model("rm_model/")

In [32]:
# ## inference the model
# rm_model = AutoModelForCausalLM.from_pretrained("rm_model/").to("cuda:0")
# tokenizer = AutoTokenizer.from_pretrained("bigcode/tiny_starcoder_py")
# tokenizer.pad_token = tokenizer.eos_token

In [33]:
# def get_score(model, tokenizer, prompt, response):
#     instructions = tokenizer.encode_plus(
#         prompt,
#         response,
#         padding="max_length",
#         max_length=256,
#         return_tensors="pt",
#         truncation=True
#     )
#     instructions = {key: value.to("cuda:0") for key, value in instructions.items()}

#     with torch.no_grad():
#         outputs = model(**instructions)

#     logits = outputs[0]

#     return logits

In [34]:
# # usage with prompt
# prompt = df.iloc[0]["prompt"]
# example_prefered_response = df.iloc[0]["chosen"]
# example_unprefered_response = df.iloc[0]["rejected"]

In [35]:
# loss1 = get_score(model, tokenizer, prompt, example_prefered_response)
# loss2= get_score(model, tokenizer, prompt, example_unprefered_response)

In [36]:
# from torch import nn
# loss = -nn.functional.logsigmoid(loss1 - loss2).mean()

In [37]:
# tokenizer.decode(torch.max(loss1, axis=-1).indices[0])

### **Policy Model**

In [38]:
##model path
MODEL_PATH = "rm_model/"
DATA_PATH = "data/test_reward.parquet"

In [39]:
df = pd.read_parquet(DATA_PATH)
df = df[:1000]
dataset = Dataset.from_pandas(df)
dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 1000
})

In [40]:
sentiment_pipe_kwargs = {"top_k": None, "function_to_apply": "none"}

config = PPOConfig(
    model_name=MODEL_PATH, steps=51200, learning_rate=1.41e-5, remove_unused_columns=True
)

txt_in_len = 5
txt_out_len = 20
seed = 1

In [41]:
dataset = dataset.rename_columns({"prompt": "review"})
dataset = dataset.filter(lambda x: len(x["review"]) > 500, batched=False)
dataset = dataset.map(lambda x: {"review": x["review"][:1000]}, batched=False)

Filter:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [42]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

In [43]:
txt_in_len = 5
txt_out_len = 32
seed = 1

dataset = dataset.map(
    lambda x: {"input_ids": tokenizer.encode(" " + x["chosen"], return_tensors="pt", truncation=True, padding="max_length", max_length=32)[0]},
    batched=False,
)
dataset = dataset.map(lambda x: {"query": tokenizer.decode(x["input_ids"])}, batched=False)
dataset = dataset[:20480]
from datasets import Dataset

dataset = Dataset.from_dict(dataset)
dataset.set_format("pytorch")

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [44]:
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

In [45]:
sft_model_path = "sft_model/"
rf_model_path = "rm_model/"
starcoder_model = AutoModelForCausalLMWithValueHead.from_pretrained(sft_model_path)
starcoder_model_ref = AutoModelForCausalLMWithValueHead.from_pretrained(rf_model_path)
starcoder_tokenizer = AutoTokenizer.from_pretrained("bigcode/tiny_starcoder_py")
starcoder_tokenizer.pad_token = tokenizer.eos_token

In [46]:
dataset

Dataset({
    features: ['review', 'chosen', 'rejected', 'input_ids', 'query'],
    num_rows: 1000
})

In [47]:
# starcoder_model

In [48]:
optimizer = torch.optim.SGD(starcoder_model.parameters(), lr=config.learning_rate)
ppo_trainer = PPOTrainer(config, starcoder_model, starcoder_model_ref, starcoder_tokenizer, dataset=dataset, data_collator=collator, optimizer=optimizer)

In [49]:
# for i in ppo_trainer.dataloader:
#   print(i)
#   break

In [50]:
ctrl_str = ["[negative]", "[positive]"]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # this should be handled by accelerate
ctrl_tokens = dict((s, starcoder_tokenizer.encode(s, return_tensors="pt").squeeze().to(device)) for s in ctrl_str)

In [51]:
def pos_logit_to_reward(logit, task):
    """
    Take the positive sentiment logit and scale it for the task.
        task [negative]: reward = -logit
        task [neutral]: reward = -2*abs(logit)+4
        task [positive]: reward = logit
    """
    for i in range(len(logit)):
        if task[i] == "[negative]":
            logit[i] = -logit[i]
        elif task[i] == "[positive]":
            pass
        else:
            raise ValueError("task has to be in [0, 1, 2]!")
    return logit

In [52]:
pos_logit_to_reward(torch.Tensor([4, 4]), ctrl_str).to("cuda:0")

tensor([-4.,  4.], device='cuda:0')

In [53]:
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": starcoder_tokenizer.eos_token_id,
    "max_new_tokens": 32,
    "eos_token_id": -1,
}

In [54]:
def get_score(model, tokenizer, responses):
    positive_logist = []
    with torch.no_grad():
        for i in responses:
            instructions = tokenizer.encode_plus(
                i,
                padding="max_length",
                max_length=32,
                return_tensors="pt"
            )
            instructions = {key: value.to(device) for key, value in instructions.items()}

            outputs = model(**instructions)
            logits = outputs[0].mean()
            positive_logist.append(logits)

    return positive_logist

In [55]:
# def get_score(model, tokenizer, responses):
#     positive_logist = []
#     for i in responses:
#         instructions = tokenizer.encode_plus(
#                                            i,
#                                            padding="max_length",
#                                            max_length=32,
#                                            return_tensors="pt")
#         with torch.no_grad():
#             outputs = model(**instructions)

#         logits = outputs[0].mean()
#         positive_logist.append(logits.to('cuda:0')) ####

#     return positive_logist


In [56]:
# responses =["ashish is a goo", "heelow how are you", "__IT_\nr/\n: r RelationshipRelationship]]0]\nlsriend\n2//M]\n [ [ a\n the was to the [. a friends to\n\n:\n [lfriend [ me have a aried in his19 minutes.\n\nWhat Modified:** girlfriend was through the Facebook.. I my my friends.**** my  of lf**\n\n** was d1ing for my few personirl** I had for findoolpping my my the future** but I was that in\n\n** have ali  of to she  tolirt my me girl. and she found my about my.. me few of gir.1viously). was\'t find her was).\n\n** was it about my twoirl and the had  Facebook. the  and she gand historyirl) was in April,\n to, find, were flirted. I a messages.. f.ing on her.\n girlM\n; I1 girirllfriend and the19 months. to my Facebook.. my permission. she her messages. my.lirty with my fewirl.\n found her with me. I through more with\n"]
# get_score(starcoder_model, tokenizer, responses)

In [57]:
for epoch in range(1):
    for batch in tqdm(ppo_trainer.dataloader):
        (logs, game_data,) = (
            dict(),
            dict(),
        )

        print(ctrl_str)
        #### prepend a random control token
        task_list = choices(ctrl_str, k=config.batch_size)
        game_data["query"] = [t + q for t, q in zip(task_list, batch["query"])]
        query_tensors = [torch.cat((ctrl_tokens[t].to('cuda:0'), input_ids.to('cuda:0'))) for t, input_ids in zip(task_list, batch["input_ids"])] ####

        #### get response from gpt2
        response_tensors = []
        for query in query_tensors:
            response = ppo_trainer.generate(query, **generation_kwargs)
            response_tensors.append(response.squeeze()[-txt_out_len:])
#         print(response_tensors)
        game_data["response"] = [starcoder_tokenizer.decode(r.squeeze()) for r in response_tensors]

        #### sentiment analysis
        texts = [q + r for q, r in zip(batch["query"], game_data["response"])]
        logits = get_score(starcoder_model,starcoder_tokenizer, texts)
        rewards = pos_logit_to_reward(logits, task_list)

        #### Run PPO training
        t = time.time()
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)

        for cs in ctrl_str:
            key = "env/reward_" + cs.strip("[]")
            stats[key] = np.mean([r.cpu().numpy() for r, t in zip(rewards, task_list) if t == cs])
        ppo_trainer.log_stats(stats, game_data, rewards)



['[negative]', '[positive]']


You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 33%|███▎      | 1/3 [03:56<07:52, 236.35s/it]

['[negative]', '[positive]']


 67%|██████▋   | 2/3 [07:53<03:56, 236.91s/it]

['[negative]', '[positive]']


100%|██████████| 3/3 [11:50<00:00, 236.83s/it]


In [58]:
# saving the model
starcoder_model.save_pretrained("rhlf_model/")
starcoder_tokenizer.save_pretrained("rhlf_model/")

('rhlf_model/tokenizer_config.json',
 'rhlf_model/special_tokens_map.json',
 'rhlf_model/vocab.json',
 'rhlf_model/merges.txt',
 'rhlf_model/added_tokens.json',
 'rhlf_model/tokenizer.json')

In [59]:
# inference the model
model_path = "rhlf_model/"
set_seed(42)
pipe = pipeline("text-generation",model=model_path, tokenizer=model_path, max_length=30, num_return_sequences=5)

Some weights of the model checkpoint at rhlf_model/ were not used when initializing GPTBigCodeForCausalLM: ['v_head.summary.weight', 'v_head.summary.bias']
- This IS expected if you are initializing GPTBigCodeForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPTBigCodeForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [60]:
text = dataset["rejected"][5]
text

'TL;DR:  My girlfriend found messages in my Facebook from a girl I liked in the past. They were flirtatious but she was never my girlfriend. I never flirted with her again.'

In [62]:
stats

{'objective/kl': -0.7443042993545532,
 'objective/kl_dist': array([ 2.8485279e+00, -1.0372770e+00,  4.0997952e-01,  3.0971456e-01,
        -4.7007573e-01, -8.9723337e-01, -6.6104561e-02,  8.4472024e-01,
         4.3303597e-01,  1.4909620e+00,  3.4496760e-01,  2.4994562e+00,
         3.9599001e-01,  6.0608816e-01, -2.9500778e+01, -3.7686691e-01,
         1.6866257e+00, -5.1669884e+00,  1.3375962e+00, -4.8271017e+00,
        -3.6307490e-01,  5.6373906e-01,  1.5335146e-01,  3.8837385e-01,
        -1.0659189e+00,  7.8638512e-01, -1.0854870e+00,  1.1046568e+00,
        -2.4346215e-01,  2.4608698e+00,  3.1283231e+00, -4.9195227e-01,
        -6.4106345e-02,  1.6595874e+00, -4.7089365e-01, -1.1195879e+00,
         4.1845942e+00,  1.5594051e+00, -4.4533986e-01, -4.0914154e-01,
         6.0475445e-01, -2.8349817e-01,  2.6740065e+00,  6.6300023e-01,
        -6.3538253e-01,  2.8435850e+00, -1.2948693e+01, -5.2789718e-01,
         2.5434053e+00,  4.0385108e+00,  5.1890564e-01, -6.9660654e+00,
     

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

checkpoint = "rhlf_model/"
device = "cuda" # for GPU usage or "cpu" for CPU usage

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)


input_text = "Large language models (LLMs)"
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))