# 下载嵌入模型

In [1]:
import torch
from modelscope import snapshot_download

# snapshot_download函数用于下载模型
model_dir = snapshot_download(
    'AI-ModelScope/bge-base-zh-v1.5',  # 模型名称
    cache_dir='/root/autodl-tmp',  # 缓存目录
    revision='master'  # 版本号
)

2024-07-02 15:30:51,776 - modelscope - INFO - PyTorch version 2.1.2+cu121 Found.
2024-07-02 15:30:51,781 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer
2024-07-02 15:30:51,924 - modelscope - INFO - Updating the files for the changes of local files, first time updating will take longer time! Please wait till updating done!
2024-07-02 15:30:51,926 - modelscope - INFO - AST-Scanning the path "/root/miniconda3/lib/python3.10/site-packages/modelscope" with the following sub folders ['models', 'metrics', 'pipelines', 'preprocessors', 'trainers', 'msdatasets', 'exporters']
2024-07-02 15:30:51,926 - modelscope - INFO - Scanning done! A number of 0 components indexed or updated! Time consumed 0.0006194114685058594s
2024-07-02 15:30:51,961 - modelscope - INFO - Loading done! Current index file version is 1.11.0, with md5 53b373ffec968389caf899c2f4d6ff00 and a total number of 953 components indexed
Downloading: 100%|██████████| 190/190 [00:00<00:00, 1.05MB/s]
Dow

In [None]:
# 导入依赖包

In [None]:
!pip install llama-index-core
!pip install llama-index-llms-openai
!pip install llama-index-llms-replicate
!pip install llama-index-embeddings-huggingface

In [1]:
import torch
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding




# 设置提示词模版

In [2]:
# Set prompt template for generation (optional)
from llama_index.core import PromptTemplate

def completion_to_prompt(completion):
   return f"<|im_start|>system\n<|im_end|>\n<|im_start|>user\n{completion}<|im_end|>\n<|im_start|>assistant\n"

def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == "system":
            prompt += f"<|im_start|>system\n{message.content}<|im_end|>\n"
        elif message.role == "user":
            prompt += f"<|im_start|>user\n{message.content}<|im_end|>\n"
        elif message.role == "assistant":
            prompt += f"<|im_start|>assistant\n{message.content}<|im_end|>\n"

    if not prompt.startswith("<|im_start|>system"):
        prompt = "<|im_start|>system\n" + prompt

    prompt = prompt + "<|im_start|>assistant\n"

    return prompt

In [3]:
# 设置语言模型

In [16]:
!pip install accelerate

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
[0m

In [4]:

# Set Qwen2 as the language model and set generation config
Settings.llm = HuggingFaceLLM(
    model_name="/root/autodl-tmp/qwen/Qwen2-7B-Instruct",
    tokenizer_name="/root/autodl-tmp/qwen/Qwen2-7B-Instruct",
    #model_name="Qwen/Qwen2-7B-Instruct",
    #tokenizer_name="Qwen/Qwen2-7B-Instruct",
    context_window=30000,
    max_new_tokens=2000,
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# 加载嵌入模型

In [5]:
# Set embedding model
Settings.embed_model = HuggingFaceEmbedding(
    model_name = "/root/autodl-tmp/AI-ModelScope/bge-base-zh-v1.5"
)

  return self.fget.__get__(instance, owner)()


In [6]:
Settings.transformations = [SentenceSplitter(chunk_size=1024)]

In [10]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("./document").load_data()
index = VectorStoreIndex.from_documents(
    documents,
    embed_model=Settings.embed_model,
    transformations=Settings.transformations
)


In [13]:
query_engine = index.as_query_engine()
your_query = "你是谁？"
print(query_engine.query(your_query).response)

根据给定的上下文信息，无法直接回答“你是谁？”这个问题，因为它要求提供身份或自我介绍的信息，而这在提供的内容中并未提及。所以，基于给定的信息集，这个问题的答案不能被确定。


In [14]:
query_engine = index.as_query_engine()
your_query = "什么是属于你的，但其他人比你使用它更多？ "
print(query_engine.query(your_query).response)

你的名字。


In [15]:
query_engine = index.as_query_engine()
your_query = "路的左边有一座绿房子，路的右边有一座红房子。 那么，白宫在哪里？ "
print(query_engine.query(your_query).response)

在美国华盛顿。


In [None]:
#，数据存储在内存中。 要保留到磁盘（在）：./storage

In [16]:
index.storage_context.persist()

In [17]:

#要从磁盘重新加载：

from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# load index
index = load_index_from_storage(storage_context)

In [18]:
query_engine = index.as_query_engine()
your_query = "没有翅膀我也能飞翔。 没有眼睛我也能哭。 每当我走的时候，黑暗就跟着我。 我是什么？  "
print(query_engine.query(your_query).response)

一朵云。
