In [0]:
%sh 
apt update
apt-get install -y poppler-utils

In [0]:
%pip install -r requirements.txt
%restart_python

In [0]:
import yaml
import os

with open("../configs/config.yaml", "r") as f:
    config = yaml.safe_load(f)

catalog_name = config.get("catalog_name")
schema_name = config.get("schema_name")
volume_name = config.get("volume_name")
volume_folder = config.get("volume_folder")
model_name = config.get("model_name")
revision = config.get("revision")
secret_scope = config.get("secret_scope")
secret_key = config.get("secret_key")
quantization_type = config.get("quantization_type")

cache_volume =  f"/Volumes/{catalog_name}/{schema_name}/{volume_name}/{model_name}/{revision}/{volume_folder}"
cache_hf = "/local_disk0/hf_cache"
cache_local = f"/local_disk0/{volume_folder}" 

os.environ["HF_HOME"] = cache_hf
os.environ["HF_HUB_CACHE"] = cache_hf
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "True"
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "1000"

In [0]:
print(f"Currently working with model: {model_name} with revision: {revision}")
print(f"Model weights will be saved to {catalog_name}.{schema_name}.{volume_name} in the folder: {volume_folder}")

In [0]:
from huggingface_hub import login, snapshot_download

try: 
  login(token=dbutils.secrets.get(scope=secret_scope, key=secret_key), add_to_git_credential=True)
except Exception as e:
  print(f"Error logging into Hugging Face: {e}")

In [0]:
from transformers import AutoProcessor, AutoModelForVision2Seq, BitsAndBytesConfig
import torch 

processor = AutoProcessor.from_pretrained(model_name)

quantization_config = None

if quantization_type == "4bit":
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
    )
    print("Using 4bit quantization")

if quantization_type == "8bit":
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True,
    )
    print("Using 8bit quantization")


if quantization_config is not None:
    print("Loading model with quantization: {quantization_type}")
    model = AutoModelForVision2Seq.from_pretrained(
        model_name,
        revision = revision,
        quantization_config=quantization_config,
        torch_dtype=torch.bfloat16,
        device_map="auto"    
    )

else:
    print("Loading model without quantization")
    model = AutoModelForVision2Seq.from_pretrained(
        model_name,
        revision = revision,
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )

In [0]:
processor.save_pretrained(cache_local)
model.save_pretrained(cache_local)

In [0]:
import shutil
try:
    dbutils.fs.mkdirs(cache_volume)
except Exception as e:
    print(f"Error creating directory: {e}")

try:
    shutil.copytree(cache_local, cache_volume, dirs_exist_ok=True) 
    print(f"Successfully copied directory from {cache_local} to {cache_volume}")
except Exception as e:
    print(f"Error copying directory: {e}")

### Restart to clear VRAM from the GPU (only for this notebook session)

In [0]:
%restart_python