In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install --upgrade pip setuptools wheel
#!{sys.executable} -m pip install --disable-pip-version-check torch==1.13.1 torchdata==0.5.1
!{sys.executable} -m pip install --disable-pip-version-check torch torchdata
!{sys.executable} -m pip install transformers datasets \
    evaluate loralib 

In [None]:
from transformers import AutoModelForCausalLM
from transformers import pipeline, AutoTokenizer
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer
import torch
import numpy as np

dataset = load_dataset("imdb", split="train")

model_pretrain = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

In [None]:
dataset=dataset.filter(lambda example, index : index % 500 ==0, with_indices=True)

In [None]:
sft_config = SFTConfig(dataset_text_field="text",max_seq_length=512,output_dir="/tmp")

In [None]:
trainer = SFTTrainer(
    model_pretrain,
    train_dataset=dataset,
    args=sft_config,
    tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
)

In [None]:
trainer.train()

In [None]:
# On sauvegarde le modèle qu'on vient d'entraîner
trainer.save_model('./opt_post_imdb')

In [None]:
model_fine_tune=AutoModelForCausalLM.from_pretrained("./opt_post_imdb")

In [None]:
# Accéder aux probas du token suivant en utilisant model()
inputs = tokenizer("Today is a", return_tensors="pt")
output=model_pretrain(**inputs)
next_token_logits = output.logits[0, -1, :]
print(next_token_logits.shape)
print(next_token_logits)
print(torch.softmax(next_token_logits, -1))
next_token_probs = torch.topk(torch.softmax(next_token_logits, -1),5)
print(*[(tokenizer.decode(idx), prob) for idx, prob in zip(next_token_probs.indices, next_token_probs.values)], sep="\n")

In [None]:
# Accéder aux probas des tokens suivants en utilisant model.generate
output_generate=model_pretrain.generate(**inputs,max_new_tokens=5, return_dict_in_generate=True, output_scores=True)
transition_scores = model_pretrain.compute_transition_scores(
    output_generate.sequences, output_generate.scores, normalize_logits=True
)

input_length = 1 if model_pretrain.config.is_encoder_decoder else inputs.input_ids.shape[1]

generated_tokens = output_generate.sequences[:, input_length:]

for tok, score in zip(generated_tokens[0], transition_scores[0]):

    # | token | token string | log probability | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")


In [None]:
# Accéder aux probas des tokens suivants en utilisant model.generate
output_generate=model_fine_tune.generate(**inputs,max_new_tokens=5, return_dict_in_generate=True, output_scores=True)
transition_scores = model_fine_tune.compute_transition_scores(
    output_generate.sequences, output_generate.scores, normalize_logits=True
)

input_length = 1 if model_fine_tune.config.is_encoder_decoder else inputs.input_ids.shape[1]

generated_tokens = output_generate.sequences[:, input_length:]

for tok, score in zip(generated_tokens[0], transition_scores[0]):

    # | token | token string | log probability | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")


In [None]:
# Accéder aux poids des modèles sous forme de liste
print(model_pretrain.named_parameters)
parametres_pretrain = [(nom, param.data) for nom, param in model_pretrain.named_parameters()]
parametres_fine_tune = [(nom, param.data) for nom, param in model_fine_tune.named_parameters()]

In [None]:
# Accéder au nom et aux poids d'une couche du modèle
print(parametres_pretrain[2][0])
print(parametres_pretrain[2][1][1 :3, 0 :2])
print(parametres_fine_tune[2][1][1 :3, 0 :2])