In [None]:
%pip install -r requirements.txt
%restart_python

In [None]:
import yaml

with open("../configs/config.yaml", "r") as f:
    config = yaml.safe_load(f)

catalog_name = config.get("catalog_name")
schema_name = config.get("schema_name")
volume_name = config.get("volume_name")
volume_folder = config.get("volume_folder")
model_name = config.get("model_name")
revision = config.get("revision")
secret_scope = config.get("secret_scope")
secret_key = config.get("secret_key")

In [None]:
from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
from PIL import Image
import requests, torch
import mlflow
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec

In [None]:
import os
cache_remote =  f"/Volumes/{catalog_name}/{schema_name}/{}/{volume_name}/{revision}/{volume_folder}"
cache_hf = "/local_disk0/hf_cache"
cache_local = "/local_disk0/{volume_folder}" 

os.environ["HF_HOME"] = cache_hf
os.environ["HF_HUB_CACHE"] = cache_hf
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "True"
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "1000"
os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'  # Enables optimized download backend


In [None]:
from huggingface_hub import login, snapshot_download

login(token=dbutils.secrets.get(scope=secret_scope, key=secret_key), add_to_git_credential=True)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

tokenizer = AutoTokenizer.from_pretrained(model_name, revision = revision)

# Set pad_token 
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

quantization_config = None

# NOTE: Uncomment to use 4bit quantization
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
# )

# NOTE: Uncomment to use 8bit quantization
# quantization_config = BitsAndBytesConfig(
#     load_in_8bit=True,
# )

if quantization_config is not None:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        revision = revision,
        quantization_config=quantization_config,
        torch_dtype=torch.bfloat16,
        device_map="auto"    
    )

else:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        revision = revision,
        torch_dtype=torch.bfloat16,
        device_map="auto"    
    )

In [None]:
tokenizer.save_pretrained(cache_local)
model.save_pretrained(cache_local)

In [None]:
import shutil
try:
    dbutils.fs.mkdirs(cache_remote)
except Exception as e:
    print(f"Error creating directory: {e}")

try:
    shutil.copytree(cache_local, cache_remote, dirs_exist_ok=True) 
    print(f"Successfully copied directory from {cache_local} to {cache_remote}")
except Exception as e:
    print(f"Error copying directory: {e}")