##### Copyright 2025 The AI Edge Torch Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Exporting Gemma3 270M with AI Edge Torch

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/google-ai-edge/ai-edge-torch/blob/main/ai_edge_torch/generative/colabs/Gemma3_270M_convertion.ipynb)

In this colab, we will show you how to export a Gemma-3-270M model to LiteRT-LM format with AI Edge Torch.

It works with the base Gemma-3-270M-it model and its fine-tuned models. For later, checkout the [Full Model Fine-Tune using Hugging Face Transformers](https://ai.google.dev/gemma/docs/core/huggingface_text_full_finetune) tutorial.

#Prerequisite for exporting google/gemma-3-270m-it

- Create HuggingFace token with permission access to
  - google/gemma-3-270m-it

  This is needed to download the checkpoint and tokenizer.

- Open Colab Secrets: In your Google Colab notebook, locate the Secrets icon in the left-hand sidebar and click on it.
- Add a new secret: Click the "Add Secret" button.
- Name your secret: Enter "HF_TOKEN" for your token in the "Name" field.
- Paste your token: In the "Value" field, paste the actual token you want to store.

#Prerequisite for exporting fine-tuned model

- Access to the finetuned repo in Hugging Face Hub, or

- Access to the finetuned checkpoint


## Note: When running notebooks in this repository with Google Colab, some users may see the following warning message:

![Colab warning](https://github.com/google-ai-edge/ai-edge-torch/blob/main/docs/data/colab_warning.jpg?raw=true)

Please click `Restart Session` and run again.


This colab works with a free tier colab runtime.


In [None]:
# 1. Uninstall existing tensorflow versions to prevent conflicts
!pip uninstall -y tensorflow tf-nightly

# 2. Install the requirements
!pip install -r https://raw.githubusercontent.com/google-ai-edge/ai-edge-torch/main/requirements.txt

# 3. Install ai-edge-torch (if you haven't already)
!pip install ai-edge-torch-nightly

Found existing installation: tensorflow 2.19.0
Uninstalling tensorflow-2.19.0:
  Successfully uninstalled tensorflow-2.19.0
Collecting torch==2.9.0 (from -r https://raw.githubusercontent.com/google-ai-edge/ai-edge-torch/main/requirements.txt (line 2))
  Downloading torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting torchvision==0.24.0 (from -r https://raw.githubusercontent.com/google-ai-edge/ai-edge-torch/main/requirements.txt (line 3))
  Downloading torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Collecting torchaudio==2.9.0 (from -r https://raw.githubusercontent.com/google-ai-edge/ai-edge-torch/main/requirements.txt (line 4))
  Downloading torchaudio-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.9 kB)
Collecting tf-nightly>=2.21.0.dev20250818 (from -r https://raw.githubusercontent.com/google-ai-edge/ai-edge-torch/main/requirements.txt (line 7))
  Downloading tf_nightly-2.21.0.dev20260122-cp312-cp312-manylinux_2_27_x86_64

Collecting ai-edge-torch-nightly
  Downloading ai_edge_torch_nightly-0.8.0.dev20260122-py3-none-any.whl.metadata (2.5 kB)
Downloading ai_edge_torch_nightly-0.8.0.dev20260122-py3-none-any.whl (523 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m523.6/523.6 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ai-edge-torch-nightly
Successfully installed ai-edge-torch-nightly-0.8.0.dev20260122


In [None]:
!pip install -U torchao

Collecting torchao
  Downloading torchao-0.15.0-cp310-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (22 kB)
Downloading torchao-0.15.0-cp310-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (7.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m92.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchao
  Attempting uninstall: torchao
    Found existing installation: torchao 0.10.0
    Uninstalling torchao-0.10.0:
      Successfully uninstalled torchao-0.10.0
Successfully installed torchao-0.15.0


In [None]:
pip install --upgrade "jax[cuda]"

Collecting jax-cuda12-plugin<=0.9.0,>=0.9.0 (from jax-cuda12-plugin[with-cuda]<=0.9.0,>=0.9.0; extra == "cuda"->jax[cuda])
  Downloading jax_cuda12_plugin-0.9.0-cp312-cp312-manylinux_2_27_x86_64.whl.metadata (2.0 kB)
Collecting jax-cuda12-pjrt==0.9.0 (from jax-cuda12-plugin<=0.9.0,>=0.9.0->jax-cuda12-plugin[with-cuda]<=0.9.0,>=0.9.0; extra == "cuda"->jax[cuda])
  Downloading jax_cuda12_pjrt-0.9.0-py3-none-manylinux_2_27_x86_64.whl.metadata (579 bytes)
Collecting nvidia-cuda-nvcc-cu12>=12.6.85 (from jax-cuda12-plugin[with-cuda]<=0.9.0,>=0.9.0; extra == "cuda"->jax[cuda])
  Downloading nvidia_cuda_nvcc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Downloading jax_cuda12_plugin-0.9.0-cp312-cp312-manylinux_2_27_x86_64.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m53.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jax_cuda12_pjrt-0.9.0-py3-none-manylinux_2_27_x86_64.whl (154.9 MB)
[2K   [90m━━━━━━━

In [None]:
# Setup Hugging Face Hub credentials

import os
from google.colab import userdata
#.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

In [None]:
import os
os.environ['JAX_PLATFORMS'] = 'cuda'

import jax

In [None]:
# @title Import needed packages.
from huggingface_hub import snapshot_download
from ai_edge_torch.generative.examples.gemma3 import gemma3
from ai_edge_torch.generative.utilities import converter
from ai_edge_torch.generative.utilities.export_config import ExportConfig
from ai_edge_torch.generative.layers import kv_cache



# Exporting the checkpoint to LiteRT-LM format.

In this example, we directly use the google/gemma-3-270m-it repo. But you can also replace it with your fine-tuned model directory or repo ID.

If you are following the fine-tune colab and storing your checkpoint to Google Drive as the default setup, you can point to the checkpoint with the followings instead of downloading the base checkpoint.

```
from google.colab import drive
drive.mount('/content/drive')
checkpoint_dir = '/content/drive/MyDrive/MyGemmaNPC'
```


In [None]:
# @title Download checkpoint

checkpoint_dir = snapshot_download('unsloth/gemma-3-270m-it')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/536M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

.gitattributes: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

# Convert to LiteRT-LM format

After the following cell, you will be able to download the exported `.litertlm` file under `/contents/`, which will be accessible from the `Files` pannel.

In [None]:
output_dir = '/content/'

# Import the weights and build the PyTorch model
pytorch_model = gemma3.build_model_270m(checkpoint_dir)

# Setup the export configurations and parameters for text generation models.
export_config = ExportConfig()
export_config.kvcache_layout = kv_cache.KV_LAYOUT_TRANSPOSED
export_config.mask_as_input = True

# Configs specific for text generation models.
litertlm_config = {
    "tokenizer_model_path": os.path.join(checkpoint_dir, 'tokenizer.model'),
    "start_token_id": 2,  # "<bos>"
    "stop_token_ids": [1, 106],  # ["<eos>", "<end_of_turn>"]
    "prompt_prefix": "<start_of_turn>user\n",
    "prompt_suffix": "<end_of_turn>\n<start_of_turn>model\n",
    "model_prompt_prefix": "<start_of_turn>model\n",
    "model_prompt_suffix": "<end_of_turn>\n",
    "user_prompt_prefix": "<start_of_turn>user\n",
    "user_prompt_suffix": "<end_of_turn>\n",
    "output_format": "litertlm",
}

# Convert to LiteRT or LiteRT-LM Format
converter.convert_to_litert(
    pytorch_model,
    output_path=output_dir,
    output_name_prefix="gemma",
    prefill_seq_len=2048,
    kv_cache_max_len=4096,
    quantize="dynamic_int8",
    export_config=export_config,
    **litertlm_config
)


In [None]:
!pip freeze

absl-py==1.4.0
absolufy-imports==0.3.1
accelerate==1.10.1
ai-edge-litert-nightly==2.2.0.dev20260121
ai-edge-quantizer-nightly==0.5.0.dev20260122
ai-edge-torch-nightly==0.8.0.dev20260122
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.13.0
aiosignal==1.4.0
alabaster==1.0.0
albucore==0.0.24
albumentations==2.0.8
ale-py==0.11.2
alembic==1.16.5
altair==5.5.0
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
anyio==4.11.0
anywidget==0.9.18
argon2-cffi==25.1.0
argon2-cffi-bindings==25.1.0
array_record==0.8.1
arrow==1.3.0
arviz==0.22.0
astropy==7.1.0
astropy-iers-data==0.2025.10.6.0.35.25
astunparse==1.6.3
atpublic==5.1
attrs==25.4.0
audioread==3.0.1
Authlib==1.6.5
autograd==1.8.0
babel==2.17.0
backcall==0.2.0
backports.strenum==1.2.8
beartype==0.22.2
beautifulsoup4==4.13.5
betterproto==2.0.0b6
bigframes==2.24.0
bigquery-magics==0.10.3
bleach==6.2.0
blinker==1.9.0
blis==1.3.0
blobfile==3.1.0
blosc2==3.10.0
bokeh==3.7.3
Bottleneck==1.4.2
bqplot==0.12.45
branca==0.8.2
Brotli==1.1.0
build

In [None]:
!