# Lab 4. Retrieval-augmented generation (RAG)

## 1. Basic Completion and Chat
### Download Qwen2

In [1]:
from pathlib import Path
from modelscope import snapshot_download
llm_model_id = "snake7gun/Qwen2-7B-Instruct-int4-ov"
llm_local_path  = "./model/snake7gun/Qwen2-7B-Instruct-int4-ov"

if not Path(llm_local_path).exists():
    model_dir = snapshot_download(llm_model_id, cache_dir="./model/")

### Initialize LLM

In [None]:
from llama_index.llms.openvino import OpenVINOLLM

ov_config = {
    "PERFORMANCE_HINT": "LATENCY",
    "NUM_STREAMS": "1",
    "CACHE_DIR": "",
}

def completion_to_prompt(completion):
   return f"<|im_start|>system\n<|im_end|>\n<|im_start|>user\n{completion}<|im_end|>\n<|im_start|>assistant\n"

def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == "system":
            prompt += f"<|im_start|>system\n{message.content}<|im_end|>\n"
        elif message.role == "user":
            prompt += f"<|im_start|>user\n{message.content}<|im_end|>\n"
        elif message.role == "assistant":
            prompt += f"<|im_start|>assistant\n{message.content}<|im_end|>\n"

    if not prompt.startswith("<|im_start|>system"):
        prompt = "<|im_start|>system\n" + prompt

    prompt = prompt + "<|im_start|>assistant\n"

    return prompt

ov_llm = OpenVINOLLM(
    model_id_or_path=llm_local_path,
    context_window=3900,
    max_new_tokens=1024,
    model_kwargs={"ov_config": ov_config},
    generate_kwargs={"pad_token_id": 32000, "do_sample": False, "temperature": None, "top_p": None},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    device_map="gpu",
)


Compiling the model to GPU ...


### Call complete with a prompt

In [None]:
response = ov_llm.stream_complete("What is OpenVINO ?")

for r in response:
    print(r.delta, end="")

## 2. Basic RAG (Vector Search, Summarization)
### Export Embedding model

In [6]:
embedding_model_id = "BAAI/bge-small-zh-v1.5"
embedding_model_path = "./model/bge-small-zh-v1.5-ov"

if not Path(embedding_model_path).exists():
    !optimum-cli export openvino --model {embedding_model_id} --task feature-extraction {embedding_model_path}

Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`.`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers
Framework not specified. Using pt to export the model.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Using framework PyTorch: 2.3.1+cpu
Overriding 1 configuration item(s)
	- use_cache -> False
Detokenizer is not supported, convert tokenizer only.


### Initialize Embedding model

In [7]:
from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding

ov_embedding = OpenVINOEmbedding(model_id_or_path=embedding_model_path, device="CPU")

Compiling the model to CPU ...


### Basic RAG (Vector Search)

In [8]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex, Settings

Settings.embed_model = ov_embedding
Settings.llm = ov_llm

reader = SimpleDirectoryReader(
    input_files=["./examples/text_example_cn.pdf"]
)
documents = reader.load_data()
index = VectorStoreIndex.from_documents(
    documents,
)
query_engine = index.as_query_engine(streaming=True, similarity_top_k=2)

In [9]:
streaming_response = query_engine.query("英特尔博锐® Enterprise系统提供哪些功能?")
streaming_response.print_response_stream()

英特尔博锐® Enterprise系统提供了以下功能：

1. 动态信任根
2. 系统管理模式（SMM）保护
3. 具有多密钥支持的内存加密
4. 操作系统内核保护
5. 可实现远程KVM控制的带外管理
6. 唯一设备标识符
7. 设备历史记录
8. 带内可管理性插件

In [10]:
streaming_response = query_engine.query("相比英特尔之前的移动处理器产品，英特尔®酷睿™ Ultra处理器的AI推理性能提升了多少？")
streaming_response.print_response_stream()

相比英特尔之前的移动处理器产品，英特尔®酷睿™ Ultra处理器的每瓦AI推理性能最高提升了2.5倍。