In [1]:
import torch
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

GPU: NVIDIA GeForce RTX 2070 SUPER is available.


In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

# TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
# TinyLlama/TinyLlama-1.1B-Chat-v1.0
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"


tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)


pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# prompt 樣板設定測試
messages = [
    {   "role": "user", 
        "content": "Do you know what is the Monoploy?"
    }
]
# 設定 prompt 樣板，方便觀察輸出


In [4]:
outputs = pipe(messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
outputs = outputs[0]["generated_text"]
assistant_reply = next((msg["content"] for msg in outputs if msg["role"] == "assistant"), "")
print(assistant_reply)

Yes, the Monoploy is a board game that was released in 1959 by Hasbro, Inc. It is a classic game that has been played by people of all ages for decades. The game is a two-player game that involves stacking cubes on a board. Each player takes turns moving their cubes up the board, trying to get them to the top of the tower at the end of the game. The first player to reach the top wins the game. The game can be played with a variety of different themes and variations, making it a versatile and fun game for people of all ages.


In [5]:
# 設定 prompt 樣板，方便觀察輸出
chat_history = pipe.tokenizer.apply_chat_template(outputs, tokenize=False, add_generation_prompt=False)
print(chat_history)

<|user|>
Do you know what is the Monoploy?</s>
<|assistant|>
Yes, the Monoploy is a board game that was released in 1959 by Hasbro, Inc. It is a classic game that has been played by people of all ages for decades. The game is a two-player game that involves stacking cubes on a board. Each player takes turns moving their cubes up the board, trying to get them to the top of the tower at the end of the game. The first player to reach the top wins the game. The game can be played with a variety of different themes and variations, making it a versatile and fun game for people of all ages.</s>



RAG

![image.png](MarkDownImage\RAG.png)

In [6]:
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores  import Chroma
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate

model_kwargs = {'device': 'cuda'}
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs=model_kwargs
    )
    

pdf_folder = "documents/"

pdf_files = [f for f in os.listdir(pdf_folder) if f.endswith('.pdf')]

documents = []

for pdf_file in pdf_files:
    document = os.path.join(pdf_folder, pdf_file)
    pdf_loader = PyPDFLoader(document)
    documents.extend(pdf_loader.load())


Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)


In [7]:
vectordb= Chroma.from_documents(
    documents, 
    embedding,
    persist_directory="./knowledge-base"
)

In [8]:
def retrieve_documents(query, num_results=1):
    query_embedding = embedding.embed_query(query)
    results = vectordb.similarity_search_by_vector(query_embedding, k=num_results)
    retrieved_docs = [result.page_content for result in results]
    return retrieved_docs

def generate_answer(query, retrieved_docs):
    
    retrieved_docs_text = "\n".join(retrieved_docs)
    
    content = f"""
You're a professional assistant
Given the following documents, answer the user's question as accurately as possible:

Documents:
{retrieved_docs_text}

Question:
{query}
"""
    messages = [
        {   "role": "system", 
            "content": content,
        }
    ]
    result = pipe(messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    outputs = result[0]['generated_text']
    
    assistant_reply = next((msg["content"] for msg in outputs if msg["role"] == "assistant"), "")
    
    return assistant_reply, retrieved_docs_text


def process_query(query, num_results=1):

    retrieved_docs = retrieve_documents(query, num_results=num_results)
    answer, retrieved_docs_text = generate_answer(query, retrieved_docs)
    
    return answer, retrieved_docs_text


In [10]:
query = "Do you know what is the colon cancer?"
answer, retrieved_docs_text = process_query(query)
print(answer)

According to the given documents, a common side effect of oxaliplatin-based chemotherapy for colorectal cancer is feeling tired. This side effect can be more severe during the treatment period and may last for a short time. If this side effect persists or worsens, it should be reported to the medical team and treated as a medical emergency. The patient should not drive or operate machinery while experiencing fatigue, as it can be dangerous. In rare cases, laryngeal spasm, which can cause difficulty swallowing and breathing, can occur during treatment. If this side effect persists or worsens, it should be reported to the medical team and treated as a medical emergency. The patient should take long, deep breaths through their nose to help their breathing return to normal.
