In [1]:
# 向量模型下载
from modelscope import snapshot_download
model_dir = snapshot_download("AI-ModelScope/bge-small-zh-v1.5", cache_dir='.')

# 导入必要库
from typing import List
import numpy as np
import torch
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

Downloading [1_Pooling/config.json]:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading [config.json]:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading [config_sentence_transformers.json]:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading [configuration.json]:   0%|          | 0.00/47.0 [00:00<?, ?B/s]

Downloading [model.safetensors]:   0%|          | 0.00/91.4M [00:00<?, ?B/s]

Downloading [modules.json]:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading [pytorch_model.bin]:   0%|          | 0.00/91.4M [00:00<?, ?B/s]

Downloading [README.md]:   0%|          | 0.00/27.5k [00:00<?, ?B/s]

Downloading [sentence_bert_config.json]:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading [special_tokens_map.json]:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading [tokenizer.json]:   0%|          | 0.00/429k [00:00<?, ?B/s]

Downloading [tokenizer_config.json]:   0%|          | 0.00/367 [00:00<?, ?B/s]

Downloading [vocab.txt]:   0%|          | 0.00/107k [00:00<?, ?B/s]

In [2]:
# 向量模型类，用于生成文本嵌入
class EmbeddingModel:
    def __init__(self, path: str) -> None:
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.model = AutoModel.from_pretrained(path).cuda()
        print(f'Loading EmbeddingModel from {path}.')

    def get_embeddings(self, texts: List) -> List[float]:
        encoded_input = self.tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
        encoded_input = {k: v.cuda() for k, v in encoded_input.items()}
        with torch.no_grad():
            model_output = self.model(**encoded_input)
            sentence_embeddings = model_output[0][:, 0]
        sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
        return sentence_embeddings.tolist()

# 创建向量模型实例
print("> Create embedding model...")
embed_model_path = './AI-ModelScope/bge-small-zh-v1___5'
embed_model = EmbeddingModel(embed_model_path)

> Create embedding model...
Loading EmbeddingModel from ./AI-ModelScope/bge-small-zh-v1___5.


In [4]:
# 向量库索引类，用于检索与问题相关的知识文本
class VectorStoreIndex:
    def __init__(self, document_path: str, embed_model: EmbeddingModel) -> None:
        self.documents = []
        for line in open(document_path, 'r', encoding='utf-8'):
            line = line.strip()
            self.documents.append(line)

        self.embed_model = embed_model
        self.vectors = self.embed_model.get_embeddings(self.documents)
        print(f'Loaded {len(self.documents)} documents from {document_path}.')

    def get_similarity(self, vector1: List[float], vector2: List[float]) -> float:
        dot_product = np.dot(vector1, vector2)
        magnitude = np.linalg.norm(vector1) * np.linalg.norm(vector2)
        return dot_product / magnitude if magnitude else 0

    def query(self, question: str, k: int = 1) -> List[str]:
        question_vector = self.embed_model.get_embeddings([question])[0]
        similarities = [self.get_similarity(question_vector, vector) for vector in self.vectors]
        return [self.documents[i] for i in np.argsort(similarities)[-k:][::-1]]

# 创建向量库索引
print("> Create index...")
document_path = './metro_knowledge.txt'
index = VectorStoreIndex(document_path, embed_model)

> Create index...
Loaded 15 documents from ./metro_knowledge.txt.


In [6]:
# 测试问题
question = '地铁清分系统为什么失效？'
print('> Question:', question)

# 从知识库中检索相关背景信息
context = index.query(question)
print('> Context:', context)

> Question: 地铁清分系统为什么失效？
> Context: ['清分系统可能失效的原因多种多样，包括但不限于账号信息不正确、账户密码失效、网络问题、系统升级失败、账户被盗以及电脑硬件故障等。这些问题可能导致清分系统无法正常工作，影响地铁的票务管理和客流分析。为了确保清分系统的有效运行，需要对这些潜在的问题进行识别和解决，同时采取有效的安全措施来保护系统免受网络攻击和数据泄露的威胁。']


In [7]:
# 大语言模型类，用于生成基于上下文的回答
class LLM:
    def __init__(self, model_path: str) -> None:
        print("Creating tokenizer...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, add_eos_token=False, add_bos_token=False, eos_token='<eod>')
        self.tokenizer.add_tokens(['<sep>', '<pad>', '<mask>', '<predict>', '<FIM_SUFFIX>', '<FIM_PREFIX>', '<FIM_MIDDLE>', '<commit_before>', '<commit_msg>', '<commit_after>', '<jupyter_start>', '<jupyter_text>', '<jupyter_code>', '<jupyter_output>', '<empty_output>'], special_tokens=True)

        print("Creating model...")
        self.model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, trust_remote_code=True).cuda()
        print(f'Loaded model from {model_path}.')

    def generate(self, question: str, context: List):
        if context:
            prompt = f'背景：{context}\n问题：{question}\n请基于背景，回答问题。'
        else:
            prompt = question
        prompt += "<sep>"
        inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"].cuda()
        outputs = self.model.generate(inputs, do_sample=False, max_length=1024)
        output = self.tokenizer.decode(outputs[0])
        print(output.split("<sep>")[-1])

# 创建大语言模型实例
print("> Create Yuan2.0 LLM...")
model_path = './IEITYuan/Yuan2-2B-Mars-hf'
llm = LLM(model_path)

# 测试生成
print('> Without RAG:')
llm.generate(question, [])

print('> With RAG:')
llm.generate(question, context)


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


> Create Yuan2.0 LLM...
Creating tokenizer...


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.


Creating model...


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Loaded model from ./IEITYuan/Yuan2-2B-Mars-hf.
> Without RAG:
 地铁清分系统失效可能是由于多种原因引起的。以下是一些可能的原因：
1. 机械故障：清分系统中的机械部件可能由于磨损、摩擦或损坏而失效。例如，轨道上的传感器或执行器可能会出现故障，导致清分系统无法正常运作。
2. 电子故障：清分系统中的电子部分可能由于电源故障、电路问题或电子元件损坏而失效。这可能导致系统无法正常启动、数据丢失或无法完成清分任务。
3. 网络连接问题：清分系统通常通过地铁网络与控制中心进行通信。如果地铁网络出现故障，例如电力供应中断或网络连接中断，清分系统可能会受到干扰，导致无法正常完成任务。
4. 安全系统触发：清分系统可能配置有安全触发机制，用于在检测到异常情况时自动停止或采取其他行动。如果安全系统检测到异常情况，清分系统可能被迫停止运行，直到安全事件得到解决。
5. 人为错误：清分系统中的人工干预也可能导致失效。如果系统出现故障或错误，操作员可能未能正确配置或执行必要的操作，从而导致系统无法正常运行。
要解决地铁清分系统的失效问题，需要首先确定具体的故障原因，然后采取相应的修复措施。这可能包括修理、更换损坏的部件、重新连接地铁网络或调整系统设置等。<eod>
> With RAG:
 地铁清分系统失效的原因可能包括但不限于账号信息不正确、账户密码失效、网络问题、系统升级失败、账户被盗以及电脑硬件故障等。这些问题可能导致清分系统无法正常工作，影响地铁的票务管理和客流分析。为了确保清分系统的有效运行，需要对这些潜在的问题进行识别和解决，同时采取有效的安全措施来保护系统免受网络攻击和数据泄露的威胁。<eod>
