# Huggingface 

`Huggingface`提供了两种方式调用LLM
1. 通过Api token 的方式
2. 本地加载


## 安装环境

In [1]:
! pip install langchain huggingface_hub transformers sentence_transformers accelerate bitsandbytes

Collecting langchain
  Downloading langchain-0.1.17-py3-none-any.whl.metadata (13 kB)
Collecting sentence_transformers
  Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl.metadata (2.2 kB)
Collecting langchain-community<0.1,>=0.0.36 (from langchain)
  Downloading langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)
Collecting langchain-core<0.2.0,>=0.1.48 (from langchain)
  Downloading langchain_core-0.1.52-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)
  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl.metadata (2.0 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.56-py3-none-any.whl.metadata (13 kB)
Collecting packaging>=20.9 (from huggingface_hub)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.

## 使用API  token 调用LLM

In [23]:
from getpass import getpass

HUGGINGFACEHUB_API_TOKEN = getpass()

 ·····································


In [24]:
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

In [25]:
from langchain_community.llms import HuggingFaceHub
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [26]:
### 创建prompt 模板
question = "Where is the capital of China? "

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question" ])

In [27]:
repo_id = "google/flan-t5-base"  # 具体可以参考 https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads 

In [28]:
llm = HuggingFaceHub(
    repo_id=repo_id, 
)
llm_chain = LLMChain(prompt=prompt, llm=llm  , llm_kwargs = {"temperature":0, "max_length":512})

print(llm_chain.run(question))

China is located in the north of the world. The capital of China is Beijing. The answer: Beijing.


### 构建RAG检索

In [8]:
! pip install pypdf  faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


In [9]:
from langchain.document_loaders import PyPDFLoader

###加载文件
loader = PyPDFLoader("https://arxiv.org/pdf/2309.10305.pdf")
pages = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter

###文本切分
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 300,chunk_overlap = 50,)

docs = text_splitter.split_documents(pages[:4])

In [10]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_community.vectorstores import FAISS


embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
)

db = FAISS.from_documents(docs, embeddings)

query = "How large is the baichuan2 vocabulary size?"
result_simi = db.similarity_search(query , k = 3)

In [11]:
source_knowledge = "\n".join([x.page_content for x in result_simi])

In [12]:
augmented_prompt = """Using the contexts below, answer the query.

contexts:
{source_knowledge}

query: {query}"""

In [13]:
prompt = PromptTemplate(template=augmented_prompt, input_variables=["source_knowledge" ,"query"])


llm_chain = LLMChain(prompt=prompt, llm=llm  , llm_kwargs = {"temperature":0, "max_length":1024})

print(llm_chain.run( {"source_knowledge":source_knowledge ,"query" : query }))

125,696


In [14]:
augmented_prompt_2 = f"""Using the contexts below, answer the query.

contexts:
{source_knowledge}

query: {query}"""

In [15]:
print(augmented_prompt_2)

Using the contexts below, answer the query.

contexts:
have taken both these aspects into account. We
have expanded the vocabulary size from 64,000
in Baichuan 1 to 125,696, aiming to strike a
balance between computational efficiency and
model performance.
Tokenizer V ocab Size Compression Rate ↓
LLaMA 2 32,000 1.037
Bloom 250,680 0.501
improve after training on more than 2.6 trillion
tokens. By sharing these intermediary results,
we hope to provide the community with greater
insight into the training dynamics of Baichuan 2.
Understanding these dynamics is key to unraveling
the inner working mechanism of large language
Baichuan 2: Open Large-scale Language Models
Aiyuan Yang, Bin Xiao, Bingning Wang, Borong Zhang, Chao Yin, Chenxu Lv, Da Pan
Dian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Wang, Feng Liu, Guangwei Ai
Guosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji

query: How large is the baichuan2 vocabulary size?


## 本地加载LLM

- baichuan model 为例

In [16]:
! pip install modelscope
#  safetensors xformers


Collecting modelscope
  Downloading modelscope-1.14.0-py3-none-any.whl.metadata (33 kB)
Collecting addict (from modelscope)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting einops (from modelscope)
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Collecting oss2 (from modelscope)
  Downloading oss2-2.18.5.tar.gz (283 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m283.4/283.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting aliyun-python-sdk-kms>=2.4.1 (from oss2->modelscope)
  Downloading aliyun_python_sdk_kms-2.16.3-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting aliyun-python-sdk-core>=2.13.12 (from oss2->modelscope)
  Downloading aliyun-python-sdk-core-2.15.1.tar.gz (443 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m443.1/443.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?2

In [17]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.utils import GenerationConfig

In [18]:
import torch
from modelscope import snapshot_download, Model
model_dir = snapshot_download("baichuan-inc/Baichuan2-7B-Chat", revision='master')
model = Model.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
messages = []
messages.append({"role": "user", "content": "讲解一下“温故而知新”"})
response = model(messages)
print(response)

2024-05-08 22:53:09,573 - modelscope - INFO - PyTorch version 2.1.2 Found.
2024-05-08 22:53:09,577 - modelscope - INFO - TensorFlow version 2.15.0 Found.
2024-05-08 22:53:09,578 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer
2024-05-08 22:53:09,579 - modelscope - INFO - No valid ast index found from /root/.cache/modelscope/ast_indexer, generating ast index from prebuilt!
2024-05-08 22:53:09,664 - modelscope - INFO - Loading done! Current index file version is 1.14.0, with md5 2b7176df9ec6b1a9ffe207a78dcc5d2e and a total number of 976 components indexed
2024-05-08 22:53:10,980 - modelscope - INFO - Use user-specified model revision: master
Downloading: 100%|██████████| 252k/252k [00:00<00:00, 1.25MB/s]
Downloading: 100%|██████████| 198k/198k [00:00<00:00, 1.22MB/s]
Downloading: 100%|██████████| 758/758 [00:00<00:00, 3.14MB/s]
Downloading: 100%|██████████| 215/215 [00:00<00:00, 881kB/s]
Downloading: 100%|██████████| 2.39k/2.39k [00:00<00:00, 8.81MB/s]
Do

{'response': '"温故而知新"是一句中国古代的成语，出自《论语·为政》。这句话的意思是：通过回顾过去的事物，可以了解到新的知识和道理。这个成语强调了学习和记忆的重要性，以及通过不断复习和思考来提高自己的知识水平。\n\n在现代教育中，这句话仍然具有很大的启示意义。学习是一个持续的过程，我们需要不断地回顾和巩固过去的知识，以便更好地理解和掌握新的知识。同时，通过温故，我们还可以发现过去知识的不足和遗漏，从而激发我们的求知欲和创新精神。\n\n总之，"温故而知新"是一种有效的学习方法和策略，可以帮助我们在不断变化的世界中保持知识和技能的更新和提升。', 'history': [{'role': 'user', 'content': '讲解一下“温故而知新”'}, {'role': 'assistant', 'content': '"温故而知新"是一句中国古代的成语，出自《论语·为政》。这句话的意思是：通过回顾过去的事物，可以了解到新的知识和道理。这个成语强调了学习和记忆的重要性，以及通过不断复习和思考来提高自己的知识水平。\n\n在现代教育中，这句话仍然具有很大的启示意义。学习是一个持续的过程，我们需要不断地回顾和巩固过去的知识，以便更好地理解和掌握新的知识。同时，通过温故，我们还可以发现过去知识的不足和遗漏，从而激发我们的求知欲和创新精神。\n\n总之，"温故而知新"是一种有效的学习方法和策略，可以帮助我们在不断变化的世界中保持知识和技能的更新和提升。'}]}


In [19]:
content = '''Using the contexts below, answer the query.

contexts:
have taken both these aspects into account. We
have expanded the vocabulary size from 64,000
in Baichuan 1 to 125,696, aiming to strike a
balance between computational efficiency and
model performance.
Tokenizer V ocab Size Compression Rate ↓
LLaMA 2 32,000 1.037
Bloom 250,680 0.501
improve after training on more than 2.6 trillion
tokens. By sharing these intermediary results,
we hope to provide the community with greater
insight into the training dynamics of Baichuan 2.
Understanding these dynamics is key to unraveling
the inner working mechanism of large language
Baichuan 2: Open Large-scale Language Models
Aiyuan Yang, Bin Xiao, Bingning Wang, Borong Zhang, Chao Yin, Chenxu Lv, Da Pan
Dian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Wang, Feng Liu, Guangwei Ai
Guosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji

query: How large is the baichuan2 vocabulary size?
'''

In [20]:
messages = []
messages.append({"role": "user", "content": content})
response = model(messages)
print(response)

{'response': 'The vocabulary size for Baichuan 2 is 125,696.', 'history': [{'role': 'user', 'content': 'Using the contexts below, answer the query.\n\ncontexts:\nhave taken both these aspects into account. We\nhave expanded the vocabulary size from 64,000\nin Baichuan 1 to 125,696, aiming to strike a\nbalance between computational efficiency and\nmodel performance.\nTokenizer V ocab Size Compression Rate ↓\nLLaMA 2 32,000 1.037\nBloom 250,680 0.501\nimprove after training on more than 2.6 trillion\ntokens. By sharing these intermediary results,\nwe hope to provide the community with greater\ninsight into the training dynamics of Baichuan 2.\nUnderstanding these dynamics is key to unraveling\nthe inner working mechanism of large language\nBaichuan 2: Open Large-scale Language Models\nAiyuan Yang, Bin Xiao, Bingning Wang, Borong Zhang, Chao Yin, Chenxu Lv, Da Pan\nDian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Wang, Feng Liu, Guangwei Ai\nGuosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, H