In [None]:
! pip install torch transformers accelerate bitsandbytes huggingface_hub pandas

In [None]:
import torch
import bitsandbytes as bnb
import pandas as pd

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
from huggingface_hub import login as hf_login
from os import path

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
hf_login()

In [None]:
set_seed(42)

In [None]:
QUANZATION_MAP = {
    '4bit': BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=torch.bfloat16,
    ),
    '8bit': BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_skip_modules=["lm_head"],
        torch_dtype=torch.bfloat16,
    ),
}


In [None]:
# Models, quantizations and device - can add more models and quantizations to the lists as needed
models = [
    'meta-llama/Llama-2-7b-hf',
]
quantizations = [
    '4bit',
]
DEVICE = 'cuda:0'

In [None]:
def get_model_and_tokenizer(model_id,
                            quantization,
                            device=None):
  """
  Load and return model and tokenizer.
  """

  tokenizer = AutoTokenizer.from_pretrained(model_id)
  tokenizer.pad_token_id = tokenizer.eos_token_id

  # Load the quantized model in the specified precision
  if quantization=='4bit':
      model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=QUANZATION_MAP['4bit'])

  elif quantization=='8bit':
      model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=QUANZATION_MAP['8bit'])

  # If the model is not a quantized model, load the HuggingFace model and tokenizer
  else:
      model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

  # Set the model to evaluation mode
  model.eval()

  return model, tokenizer

In [None]:
def generate_from_prompt(model: AutoModelForCausalLM,
                      tokenizer: AutoTokenizer,
                      input: str,
                      max_tokens: int=974,
                      min_new_tokens: int=25,
                      max_new_tokens: int=50) -> str:
    """
    Generate and decode output from a Transformers model using a prompt.
    """

    # Calculate the position of the start of the output string
    start_decode = len(tokenizer.encode(input, truncation=True, max_length=max_tokens))

    # Encode the input string
    input_ids = tokenizer(input, return_tensors='pt', truncation=True, max_length=max_tokens).to(model.device)

    # Generate text from prompt
    with torch.no_grad():
      output = model.generate(**input_ids, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens, do_sample=True)

    # Decode the output string
    decoded = tokenizer.decode(output[0][start_decode:])

    return decoded

In [None]:
settings = ['At school, the teenager',
            'At home, the teenager',
            'At work, the teenager',
            'At the party, the teenager',
            'Online, the teenager',]

behaviors = ['The teenager worked because they wanted',
               'The teenager studied because they wanted',
               'The teenager flirted because they wanted',
               'The teenager socialized because they wanted',
               'The teenager volunteered because they wanted',
               ]

relationships = ['With their friends, the teenager',
                 'With their parents, the teenager',
                 'With their teachers, the teenager',
                 'With their coworkers, the teenager',
                 'With their romantic partner, the teenager',
                 ]

In [None]:
WRITE_DIR = f'./textgen_results'
df_cols = ['model', 'prompt', 'generated_text']

In [None]:
for idx in range(len(models)):

  model_id = models[idx]
  quantization_config = quantizations[idx]

  model, tokenizer = get_model_and_tokenizer(model_id, quantization_config, DEVICE)

  results = []

  for prompt in settings + behaviors + relationships:

    for _ in range(15):

      output = generate_from_prompt(model, tokenizer, prompt)
      results.append([f'{model_id}_{quantization_config}', prompt, output])

  df = pd.DataFrame(results, columns=df_cols)
  model_name = model_id.replace('/','-') + f'_{quantization_config}'
  df.to_csv(path.join(WRITE_DIR, f'{model_name}_results.csv'))