In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Hugging Face Local Inference

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_huggingface_local_inference.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_huggingface_local_inference.ipynb">
      <img alt="GitHub logo" src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to installing the necessary libraries and run local inference with various Hugging Face models in a [Colab Enterprise Instance](https://cloud.google.com/colab/docs).

### Objective

* Run local inference with various transformer or diffusion models.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Install dependencies

Before you begin, make sure you are connecting to a [Colab Enterprise runtime](https://cloud.google.com/colab/docs/connect-to-runtime) with GPU. If not, we recommend [creating a runtime template](https://cloud.google.com/colab/docs/create-runtime-template) with `g2-standard-16` machine type to use the `NVIDIA_L4` GPU. Then, [create a runtime](https://cloud.google.com/colab/docs/create-runtime) from that template.

In [None]:
! pip3 install --upgrade pip
! pip3 install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
! pip3 install transformers~=4.44.2
! pip3 install diffusers~=0.30.1
! pip3 install accelerate~=0.33.0
! pip3 install triton~=2.3.1
! pip3 install xformers~=0.0.27
! pip3 install tesseract~=0.1.3
! pip3 install pytesseract~=0.3.13
! apt-get update
! apt-get install -y --no-install-recommends tesseract-ocr

## Sample code

#### [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers) (Text-to-image)
Generate photo-realistic images given any text input.

**This is a gated model. You need to agree the license displayed in the [model card](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers). Then, create a [Hugging Face read token](https://huggingface.co/docs/hub/en/security-tokens) and paste it below.**

In [None]:
import torch
from diffusers import StableDiffusion3Pipeline

hf_token = ""  # @param {type:"string"}
! huggingface-cli login --token $hf_token

model_id = "stabilityai/stable-diffusion-3-medium-diffusers"
pipe = StableDiffusion3Pipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

prompt = "a photo of an astronaut riding a horse on mars"
image = pipe(prompt).images[0]

display(image)

#### [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers) (Text guided image-to-image)
Generate an image based on an initial image and a text prompt.

**This is a gated model. You need to agree the license displayed in the [model card](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers). Then, create a [Hugging Face read token](https://huggingface.co/docs/hub/en/security-tokens) and paste it below.**

In [None]:
from io import BytesIO

import requests
import torch
from diffusers import StableDiffusion3Img2ImgPipeline
from PIL import Image

hf_token = ""  # @param {type:"string"}
! huggingface-cli login --token $hf_token

device = "cuda"
model_id_or_path = "stabilityai/stable-diffusion-3-medium-diffusers"
pipe = StableDiffusion3Img2ImgPipeline.from_pretrained(
    model_id_or_path, torch_dtype=torch.float16
)
pipe = pipe.to(device)

url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"

response = requests.get(url)
init_image = Image.open(BytesIO(response.content)).convert("RGB")
init_image = init_image.resize((768, 512))
display(init_image)

prompt = "A fantasy landscape, trending on artstation"

images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
display(images[0])

#### [stabilityai/stable-diffusion-2-inpainting](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting) (Image-inpainting)
Generate an image based on an original image and prompt, only editing the areas denoted by a mask image.

In [None]:
from io import BytesIO

import requests
import torch
from diffusers import StableDiffusionInpaintPipeline
from PIL import Image

image_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
image_response = requests.get(image_url)
init_image = Image.open(BytesIO(image_response.content)).convert("RGB")
display(init_image)

mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
mask_response = requests.get(mask_url)
mask_image = Image.open(BytesIO(mask_response.content)).convert("RGB")
display(mask_image)

pipe = StableDiffusionInpaintPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2-inpainting",
    revision="fp16",
    torch_dtype=torch.float16,
)
pipe.to("cuda")

prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
images = pipe(prompt=prompt, image=init_image, mask_image=mask_image).images
display(images[0])

#### [impira/layoutlm-document-qa](https://huggingface.co/impira/layoutlm-document-qa) (Document question answering)
Answer questions about a given document.

In [None]:
from transformers import pipeline

nlp = pipeline(
    "document-question-answering",
    model="impira/layoutlm-document-qa",
)

print(
    nlp(
        "https://templates.invoicehome.com/invoice-template-us-neat-750px.png",
        "What is the invoice number?",
    )
)
# [{'score': 0.4251753091812134, 'answer': 'us-001', 'start': 16, 'end': 16}]

print(
    nlp(
        "https://miro.medium.com/max/787/1*iECQRIiOGTmEFLdWkVIH2g.jpeg",
        "What is the purchase amount?",
    )
)
# [{'score': 0.999853253364563, 'answer': '$1,000,000,000', 'start': 97, 'end': 97}]

print(
    nlp(
        "https://www.accountingcoach.com/wp-content/uploads/2013/10/income-statement-example@2x.png",
        "What are the 2020 net sales?",
    )
)
# [{'score': 0.9726569652557373, 'answer': '$ 3,980', 'start': 11, 'end': 12}]

#### [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) (Feature-extraction)
Get text embeddings from a sentence.

In [None]:
from transformers import AutoTokenizer, pipeline

model_path = "Alibaba-NLP/gte-large-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(model_path)
feature_extraction = pipeline(
    "feature-extraction",
    model=model_path,
    tokenizer=tokenizer,
    trust_remote_code=True,
)
features = feature_extraction("i am sentence")

print(features[0])

#### [google/gemma-2-2b](https://huggingface.co/google/gemma-2-2b) (Text-generation)
Generate text from another text; For example, fill in incomplete text or paraphrase.

**This is a gated model. You need to agree the license displayed in the [model card](https://huggingface.co/google/gemma-2-2b). Then, create a [Hugging Face read token](https://huggingface.co/docs/hub/en/security-tokens) and paste it below.**

In [None]:
from transformers import pipeline

hf_token = ""  # @param {type:"string"}
! huggingface-cli login --token $hf_token

pipe = pipeline(
    "text-generation",
    model="google/gemma-2-2b",
    device="cuda",
)

text = "Once upon a time,"
outputs = pipe(text, max_new_tokens=256)
response = outputs[0]["generated_text"]
print(response)