In [18]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForQuestionAnswering, pipeline
from transformers import AutoModel
from peft import PeftModel, PeftConfig
import torch
import os

from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

In [19]:
# Specify the dataset name and the column containing the content
dataset_name = "llmModeluser/Therapy_data"
page_content_column = "gpt_response"  # or any other column you're interested in

# Create a loader instance
loader = HuggingFaceDatasetLoader(dataset_name,page_content_column)

# Load the data
data = loader.load()

# Display the first 15 entries
data[:2]

[Document(page_content='"Hey there, I\'m here to listen and support you. It sounds like work has been really challenging lately. Can you tell me more about what\'s been going on?"', metadata={'human_prompt': "I've been feeling so sad and overwhelmed lately. Work has become such a massive source of stress for me."}),
 Document(page_content='"I can understand how it can be overwhelming when we\'re faced with higher expectations. It\'s okay to acknowledge your emotions and allow yourself to feel sad in this situation. It\'s an important part of the healing process. What specific challenges have you been facing at work?"', metadata={'human_prompt': "I recently got a promotion at work, which I thought would be exciting. But the added responsibilities and pressure have just taken a toll on my mental health. It's been a really draining experience for me."})]

In [20]:
# Create an instance of the RecursiveCharacterTextSplitter class with specific parameters.
# It splits text into chunks of 1000 characters each with a 150-character overlap.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150)

# 'data' holds the text you want to split, split the text into documents using the text splitter.
docs = text_splitter.split_documents(data)

In [21]:
if torch.cuda.is_available():
    device = torch.device(type='cuda', index=torch.cuda.current_device())
    properties = torch.cuda.get_device_properties(device)
    print("Current CUDA device:", device)
    print("Total memory available:", properties.total_memory / (1024 * 1024), "MB")
    print("Memory allocated:", torch.cuda.memory_allocated(device) / (1024 * 1024), "MB")
else:
    print("CUDA is not available. Using CPU.")

Current CUDA device: cuda:0
Total memory available: 40396.1875 MB
Memory allocated: 0.0 MB


In [22]:
docs[0]

Document(page_content='"Hey there, I\'m here to listen and support you. It sounds like work has been really challenging lately. Can you tell me more about what\'s been going on?"', metadata={'human_prompt': "I've been feeling so sad and overwhelmed lately. Work has become such a massive source of stress for me."})

In [23]:
def _setHFToken():
    with open("../hf_token.txt", "r") as file:
        token = file.read()       
    return token

# def _setHFToken():
#     with open("../wandb_token.txt", "r") as file:
#         token = file.read()       
#     return token


os.environ['HF_TOKEN'] = _setHFToken()
# os.environ['WANDB_TOKEN'] = _setHFToken()

In [10]:
base_model = 'google/gemma-2b-it'
saved_model_name = 'Therapy_Gemma_2bi_QLoRA_v1'
tokenizerid = 'philschmid/gemma-tokenizer-chatml'

In [25]:
model = AutoModelForCausalLM.from_pretrained(base_model, token=os.environ['HF_TOKEN'])
tokenizer = AutoTokenizer.from_pretrained(tokenizerid)
model = AutoModelForCausalLM.from_pretrained(saved_model_name)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.33it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.36it/s]


In [26]:
model = model.to(device)

In [32]:
# Define the path to the pre-trained model you want to use
model_name='Therapy_Gemma_2bi_QLoRA_v1'

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device': 'cuda'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

No sentence-transformers model found with name Therapy_Gemma_2bi_QLoRA_v1. Creating a new one with MEAN pooling.


OSError: Therapy_Gemma_2bi_QLoRA_v1 does not appear to have a file named config.json. Checkout 'https://huggingface.co/Therapy_Gemma_2bi_QLoRA_v1/None' for available files.

In [None]:
vectordb = Chroma.from_documents(
            documents=docs,
            embedding=embeddings,
            persist_directory='./chunk_format'
        )
print("VectorDB is created and saved.")
print("Number of vectors in vectordb:",
      vectordb._collection.count(), "\n\n")

In [None]:
# Define a question-answering pipeline using the model and tokenizer
question_answerer = pipeline(
    "question-answering",
    model=model,
    tokenizer=tokenizer,
    return_tensors='pt'
)

# Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
# with additional model-specific arguments (temperature and max_length)
llm = HuggingFacePipeline(
    pipeline=question_answerer,
    model_kwargs={"temperature": 0.7, "max_length": 512},
)

In [None]:
# Create a retriever object from the 'db' using the 'as_retriever' method.
# This retriever is likely used for retrieving data or documents from the database.
retriever = vectordb.as_retriever()

In [None]:
docs = retriever.get_relevant_documents("I am tired of my job")
print(docs[0].page_content)

In [None]:
# Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
retriever = vectordb.as_retriever(search_kwargs={"k": 4})

# Create a question-answering instance (qa) using the RetrievalQA class.
# It's configured with a language model (llm), a chain type "refine," the retriever we created, and an option to not return source documents.
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="refine", retriever=retriever, return_source_documents=False)