In [None]:
import os
import numpy as np
import pandas as pd

import torch

import datasets
from backend_utils import trainer
from backend_utils import model, tokenizer

In [None]:
print(torch.cuda.current_device())

In [None]:
if torch.cuda.is_available():
    device = torch.device(type='cuda', index=torch.cuda.current_device())
    properties = torch.cuda.get_device_properties(device)
    print("Current CUDA device:", device)
    print("Total memory available:", properties.total_memory / (1024 * 1024), "MB")
    print("Memory allocated:", torch.cuda.memory_allocated(device) / (1024 * 1024), "MB")
else:
    print("CUDA is not available. Using CPU.")

In [None]:
os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
trainer.args._n_gpu = 1

In [None]:
print(trainer)

In [None]:
print(torch.__version__)
import transformers
print(transformers.__version__)
import peft
print(peft.__version__)

In [None]:
trainer.train()

In [None]:
saved_model_name = 'Therapy_Gemma_2bi_QLoRA'
version = '_v1'
new_model_name = saved_model_name+version
print(new_model_name)
trainer.save_model(new_model_name)

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_model = 'google/gemma-2b-it'
saved_model_name = 'Therapy_Gemma_2bi_QLoRA_v1'
tokenizerid = 'philschmid/gemma-tokenizer-chatml'

In [3]:
model = AutoModelForCausalLM.from_pretrained(base_model)
model = PeftModel.from_pretrained(model, saved_model_name)
tokenizer = AutoTokenizer.from_pretrained(tokenizerid)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.81it/s]


In [4]:
if torch.cuda.is_available():
    device = torch.device(type='cuda', index=0)
    properties = torch.cuda.get_device_properties(device)
    print("Using CUDA device:", device)
    print("Total memory available:", properties.total_memory / (1024 * 1024), "MB")
else:
    device = torch.device(type='cpu', index=0)
    print("Using CPU device:", device)

Using CUDA device: cuda:0
Total memory available: 40396.1875 MB


In [5]:
model = model.to(device)
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GemmaForCausalLM(
      (model): GemmaModel(
        (embed_tokens): Embedding(256000, 2048, padding_idx=0)
        (layers): ModuleList(
          (0-17): 18 x GemmaDecoderLayer(
            (self_attn): GemmaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear(in_features=2048, out_features=256, bias=Fal

In [6]:
chat = [
    { "role": "user", "content": "I want to kill myself!" },
]
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

In [7]:
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)

In [8]:
print(tokenizer.decode(outputs[0]))

<bos><|im_start|>user
I want to kill myself!<|im_end|>
<|im_start|>assistant
I am not able to provide help or support for suicidal thoughts or behaviors. If you are experiencing suicidal thoughts, please reach out for professional help immediately. There are many resources available to provide support and guidance during difficult times. Please remember that you are not alone and help is always available. If you are in immediate danger, please call 911 or go to the nearest emergency room. If you are feeling overwhelmed or in need of immediate support, please reach out to a trusted friend or family member or call a suicide prevention hotline. Remember that you are worthy of love and support and that there is always hope for a better future. Please reach out for help and know that you are not alone.<eos>
