From 85dad5bf0b2c8a4b05408ecd24f4a493849d6ca8 Mon Sep 17 00:00:00 2001 From: Praveen Venkateswaran Date: Mon, 6 Nov 2023 10:51:31 -0500 Subject: [PATCH] docs: update hf pipeline docs (#12908) - **Description:** Noticed that the Hugging Face Pipeline documentation was a bit out of date. Updated with information about passing in a pipeline directly (consistent with docstring) and a recent contribution of mine on adding support for multi-gpu specifications with Accelerate in 21eeba075c05714f185e5541f25228f7b555f606 --- .../llms/huggingface_pipelines.ipynb | 75 +++++++++++++++++-- 1 file changed, 68 insertions(+), 7 deletions(-) diff --git a/docs/docs/integrations/llms/huggingface_pipelines.ipynb b/docs/docs/integrations/llms/huggingface_pipelines.ipynb index 3fd8e0a0a6ca4..95423649e9342 100644 --- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb +++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb @@ -41,7 +41,9 @@ "id": "91ad075f-71d5-4bc8-ab91-cc0ad5ef16bb", "metadata": {}, "source": [ - "### Load the model" + "### Model Loading\n", + "\n", + "Models can be loaded by specifying the model parameters using the `from_model_id` method." ] }, { @@ -53,12 +55,12 @@ }, "outputs": [], "source": [ - "from langchain.llms import HuggingFacePipeline\n", + "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n", "\n", - "llm = HuggingFacePipeline.from_model_id(\n", - " model_id=\"bigscience/bloom-1b7\",\n", + "hf = HuggingFacePipeline.from_model_id(\n", + " model_id=\"gpt2\",\n", " task=\"text-generation\",\n", - " model_kwargs={\"temperature\": 0, \"max_length\": 64},\n", + " pipeline_kwargs={\"max_new_tokens\": 10},\n", ")" ] }, @@ -66,6 +68,31 @@ "cell_type": "markdown", "id": "00104b27-0c15-4a97-b198-4512337ee211", "metadata": {}, + "source": [ + "They can also be loaded by passing in an existing `transformers` pipeline directly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n", + "\n", + "model_id = \"gpt2\"\n", + "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", + "model = AutoModelForCausalLM.from_pretrained(model_id)\n", + "pipe = pipeline(\n", + " \"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10\n", + ")\n", + "hf = HuggingFacePipeline(pipeline=pipe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "### Create Chain\n", "\n", @@ -87,7 +114,7 @@ "Answer: Let's think step by step.\"\"\"\n", "prompt = PromptTemplate.from_template(template)\n", "\n", - "chain = prompt | llm\n", + "chain = prompt | hf\n", "\n", "question = \"What is electroencephalography?\"\n", "\n", @@ -98,6 +125,40 @@ "cell_type": "markdown", "id": "dbbc3a37", "metadata": {}, + "source": [ + "### GPU Inference\n", + "\n", + "When running on a machine with GPU, you can specify the `device=n` parameter to put the model on the specified device.\n", + "Defaults to `-1` for CPU inference.\n", + "\n", + "If you have multiple-GPUs and/or the model is too large for a single GPU, you can specify `device_map=\"auto\"`, which requires and uses the [Accelerate](https://huggingface.co/docs/accelerate/index) library to automatically determine how to load the model weights. \n", + "\n", + "*Note*: both `device` and `device_map` should not be specified together and can lead to unexpected behavior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gpu_llm = HuggingFacePipeline.from_model_id(\n", + " model_id=\"gpt2\",\n", + " task=\"text-generation\",\n", + " device=0, # replace with device_map=\"auto\" to use the accelerate library.\n", + " pipeline_kwargs={\"max_new_tokens\": 10},\n", + ")\n", + "\n", + "gpu_chain = prompt | gpu_llm\n", + "\n", + "question = \"What is electroencephalography?\"\n", + "\n", + "print(gpu_chain.invoke({\"question\": question}))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "### Batch GPU Inference\n", "\n", @@ -147,7 +208,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.5" } }, "nbformat": 4,