<a href="https://colab.research.google.com/github/xo-toybox/colab/blob/main/colab_HF_lighteval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title Environment Setup

import torch; print(f"{torch.__version__=}")
import numpy; print(f"{numpy.__version__=}")

# Downgrade triton: known issue with Unsupported conversion from f16 to f16
# Downgrade vLLM below the breaking change for prompt_token_ids
!uv pip install -qqq "triton<3.3" "vllm<0.10.2" lighteval[vllm]
import vllm; print(f"{vllm.__version__=}")
import triton; print(f"{triton.__version__=}")
assert torch.__version__ == "2.6.0+cu124", "Need to restart session to reload"

# hf auth for upload
import os
from google.colab import userdata
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

torch.__version__='2.6.0+cu124'
numpy.__version__='1.26.4'
INFO 09-17 03:24:30 [__init__.py:239] Automatically detected platform cuda.
vllm.__version__='0.8.5.post1'
triton.__version__='3.2.0'


In [4]:
# @title T4 GPU has compute capability 7.5 < 8 (requirement for bfloat16)
!uv run lighteval vllm "model_name=HuggingFaceTB/SmolLM3-3B,dtype=float16" "lighteval|gsm8k|0|0" --push-to-hub --results-org xouyang

[2025-09-17 02:06:15,453] [[32m    INFO[0m]: NumExpr defaulting to 8 threads. (utils.py:164)[0m
[2025-09-17 02:06:15,663] [[32m    INFO[0m]: TensorFlow version 2.19.0 available. (config.py:112)[0m
[2025-09-17 02:06:15,664] [[32m    INFO[0m]: JAX version 0.5.3 available. (config.py:125)[0m
2025-09-17 02:06:16.166028: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758074776.185648   16123 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758074776.191579   16123 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758074776.206239   16123 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid lin

In [2]:
# L4
!uv run lighteval vllm "model_name=HuggingFaceTB/SmolLM3-3B" "lighteval|gsm8k|0|0" --push-to-hub --results-org xouyang

[2025-09-17 02:27:46,865] [[32m    INFO[0m]: NumExpr defaulting to 12 threads. (utils.py:164)[0m
[2025-09-17 02:27:47,100] [[32m    INFO[0m]: TensorFlow version 2.19.0 available. (config.py:112)[0m
[2025-09-17 02:27:47,102] [[32m    INFO[0m]: JAX version 0.5.3 available. (config.py:125)[0m
2025-09-17 02:27:47.666239: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758076067.687409    4520 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758076067.693838    4520 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758076067.709687    4520 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid li

In [2]:
!uv run lighteval accelerate "model_name=HuggingFaceTB/SmolLM3-3B" "lighteval|gsm8k|0|0" --push-to-hub --results-org xouyang

[2025-09-17 03:01:50,895] [[32m    INFO[0m]: NumExpr defaulting to 12 threads. (utils.py:164)[0m
[2025-09-17 03:01:51,142] [[32m    INFO[0m]: TensorFlow version 2.19.0 available. (config.py:112)[0m
[2025-09-17 03:01:51,144] [[32m    INFO[0m]: JAX version 0.5.3 available. (config.py:125)[0m
2025-09-17 03:01:51.687346: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758078111.708632   13639 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758078111.715293   13639 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758078111.731712   13639 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid li

In [2]:
# @title Python API
# !pip install -qqq transformers
import transformers; print(f"{transformers.__version__=}")
from accelerate.logging import get_logger
logger = get_logger(__name__, log_level="INFO")

from transformers import AutoModelForCausalLM

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters


MODEL_NAME = "HuggingFaceTB/SmolLM3-3B"
BENCHMARKS = "lighteval|gsm8k|0|0"

evaluation_tracker = EvaluationTracker(output_dir="./results")
pipeline_params = PipelineParameters(
    launcher_type=ParallelismManager.ACCELERATE,
)
config = TransformersModelConfig(
    model_name=MODEL_NAME,
)

# if model is already loaded in memory, use model wrapper
model = AutoModelForCausalLM.from_pretrained(
  MODEL_NAME, device_map="auto"
)
model = TransformersModel.from_model(model, config)
# otherwise use model_config

pipeline = Pipeline(
    model=model,
    pipeline_parameters=pipeline_params,
    evaluation_tracker=evaluation_tracker,
    tasks=BENCHMARKS,
)

results = pipeline.evaluate()
pipeline.show_results()
results = pipeline.get_results()

INFO:datasets:TensorFlow version 2.19.0 available.
INFO:datasets:JAX version 0.5.3 available.


transformers.__version__='4.56.1'


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:lighteval.models.transformers.transformers_model:Tokenizer truncation and padding size set to the left side.
INFO:lighteval.utils.parallelism:Test gather tensor
INFO:lighteval.utils.parallelism:gathered_tensor tensor([0], device='cuda:0'), should be [0]
INFO:lighteval.pipeline:--- LOADING MODEL ---
INFO:lighteval.pipeline:--- INIT SEEDS ---
INFO:lighteval.pipeline:--- LOADING TASKS ---
INFO:lighteval.tasks.lighteval_task:gsm8k main
`trust_remote_code` is not supported anymore.
Please check that the Hugging Face dataset 'gsm8k' isn't based on a loading script and remove `trust_remote_code`.
If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet.
ERROR:datasets.load:`trust_remote_code` is not supported anymore.
Please check that the Hugging Face dataset 'gsm8k' isn't based on a loading script and remove `trust_remote_code`.
If the dataset is based on a loading script, please ask the dataset author to r

KeyboardInterrupt: 