In [1]:
!pip install torch
!pip install transformers
!pip install accelerate
!pip install sentence_transformers
!pip install einops
!pip install faiss-cpu
!pip install langchain
!pip install langchain-community
!pip install unstructured



In [2]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader
from langchain.vectorstores.faiss import FAISS
from langchain.vectorstores.utils import DistanceStrategy
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.chains import RetrievalQA
from langchain.prompts.prompt import PromptTemplate
#from langchain.prompts.chat import ChatPromptTemplate
#from langchain.prompts import PromptTemplate,ChatPromptTemplate
from langchain.vectorstores.base import VectorStoreRetriever

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

from transformers import TextIteratorStreamer
from threading import Thread

In [3]:
# Prompt template

'''
template = """Instruction:
You are an AI helper
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
=======
{context}
=======
Question: {question}
Output:\n"""
'''


template = """<|system|>
You are a historical expert, and if the quesion is not include others context, just show the original answer<|end|>
<|user|>
Context:
{context}

Question: {question}<|end|>
Output:\n"""


QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
#QA_PROMPT = ChatPromptTemplate(template=template,input_variables=["question","context"])
'''
chat_template = ChatPromptTemplate.from_messages(
  [
    ("system", "You are a knowledgeable AI assistant. You are called {name}."),
    ("user", "Hi, what's the weather like today?"),
    ("ai", "It's sunny and warm outside."),
    ("user", "{user_input}"),
   ]
)

QA_PROMPT = chat_template.format_messages(name="Alice", user_input="Can you tell me a joke?")
'''
# Load Phi-3 model from hugging face hub
model_id = "microsoft/Phi-3-mini-128k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.float32, device_map="auto", trust_remote_code=True
)

# sentence transformers to be used in vector store
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/msmarco-distilbert-base-v4",
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": False},
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
# Returns a faiss vector store retriever given a txt file
def prepare_vector_store_retriever(filename):
    # Load data
    loader = UnstructuredFileLoader(filename)
    raw_documents = loader.load()

    # Split the text
    text_splitter = CharacterTextSplitter(
        separator="\n\n", chunk_size=800, chunk_overlap=0, length_function=len
    )

    documents = text_splitter.split_documents(raw_documents)

    # Creating a vectorstore
    vectorstore = FAISS.from_documents(
        documents, embeddings, distance_strategy=DistanceStrategy.DOT_PRODUCT
    )

    return VectorStoreRetriever(vectorstore=vectorstore, search_kwargs={"k": 2})

In [5]:
#default_text_file = "Oppenheimer-movie-wiki.txt"
#default_retriever = prepare_vector_store_retriever(default_text_file)

# Retrieveal QA chian
def get_retrieval_qa_chain(text_file, hf_model):
    retriever = default_retriever
    if text_file != default_text_file:
        retriever = prepare_vector_store_retriever(text_file)

    chain = RetrievalQA.from_chain_type(
        llm=hf_model,
        retriever=retriever,
        chain_type_kwargs={"prompt": QA_PROMPT},
    )
    return chain

In [6]:
# Generates response using the question answering chain defined earlier
#def generate(question, answer, text_file, max_new_tokens):
import time
import json
import re
def generate(question, text_file, max_new_tokens):
    streamer = TextIteratorStreamer(
        tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
    )
    phi3_pipeline = pipeline(
        "text-generation",
        tokenizer=tokenizer,
        model=model,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        device_map="cuda",
        streamer=streamer,
        temperature="0.7",
        
    )

    hf_model = HuggingFacePipeline(pipeline=phi3_pipeline)
    qa_chain = get_retrieval_qa_chain(text_file, hf_model)

    query = f"{question}"

    if len(tokenizer.tokenize(query)) >= 512:
        query = "Repeat 'Your question is too long!'"

    thread = Thread(target=qa_chain.invoke, kwargs={"input": {"query": query}})
    thread.start()

    ''''
    response = ""
    for token in streamer:
        response += token

    return response.strip()
    '''
    '''
    response = ''
    for token in streamer:
        response += token

        # Print each character with a delay
        for char in token:
            print(char, end='', flush=True)
            time.sleep(0.1)  # Adjust the delay as needed
        print("", end='')

    with open('data.json','w') as file:
        json.dump(response.strip(),file)
    #return response.strip()
    '''
    response = ""
    for token in streamer:
        #pattern = r'^[!@#].*?(Response|response)'
        #match = re.search(r':\s*(.*)', token)
        # 使用正則表達式進行匹配
        #if re.match(pattern, token):
        #    continue
        #print(token)# 定義正則表達式模式，以匹配開頭的空白、特殊字元以及 "Response" 或 "response"
        
        #if "-" in token:
        #    continue
        #pattern = r'^[\s!@#-]*?(?:Response|response)[:]'

        # 使用 sub() 方法替換匹配的部分為空字符串
        #cleaned_string = re.sub(pattern, '', token)

        # 如果處理後的字符串不為空，則進行處理
        #if cleaned_string.strip():
        #    print("Processed string:", cleaned_string)
        
        #response += cleaned_string
        response += token
        #yield response.strip()
    #match = re.search(r':\s*(.*)', response)
    #if match:
        # 如果找到匹配，則取出 ":" 之後的部分
    #    response = match.group(1)
    return response.strip()


In [7]:
default_text_file = "Oppenheimer-movie-wiki.txt"
default_retriever = prepare_vector_store_retriever(default_text_file)
result = generate("What is the capital of China?","Oppenheimer-movie-wiki.txt",128)
print(result)



The capital of China is Beijing. The provided context does not include information about the capital of China. The answer to the question is: Beijing.
