In [0]:
!pip install transformers==4.44.2 mlflow accelerate 
!pip install -U bitsandbytes
dbutils.library.restartPython()

In [0]:
model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

In [0]:
import os

LOCAL_DISK_HF = "/local_disk0/hf_cache"
os.makedirs(LOCAL_DISK_HF, exist_ok=True)
os.environ["HF_HOME"] = LOCAL_DISK_HF
os.environ["HF_DATASETS_CACHE"] = LOCAL_DISK_HF
os.environ["TRANSFORMERS_CACHE"] = LOCAL_DISK_HF

In [0]:
from huggingface_hub import snapshot_download
snapshot_download(model_id)

In [0]:
import mlflow
import transformers

model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
uc_model_name = "deepseek_r1_distilled_llama8b_v1"

task = "llm/v1/chat"
model = transformers.AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)

transformers_model = {"model": model, "tokenizer": tokenizer}

with mlflow.start_run():
    model_info = mlflow.transformers.log_model(
        transformers_model=transformers_model,
        artifact_path="model",
        task=task,
        registered_model_name=f"accenture.rag_chatbot_salah_a_uddin.{uc_model_name}",
        metadata={
            "task": task,
            "pretrained_model_name": "meta-llama/Llama-3.3-8B-Instruct",
            "databricks_model_family": "LlamaForCausalLM",
            "databricks_model_size_parameters": "8b",
        },
    )

In [0]:
print(model_info.registered_model_version)

In [0]:
from mlflow.deployments import get_deploy_client

client = get_deploy_client("databricks")

endpoint = client.create_endpoint(
    name=uc_model_name,
    config={
        "served_entities": [{
            "entity_name": f"accenture.rag_chatbot_salah_a_uddin.{uc_model_name}",
            "entity_version": model_info.registered_model_version,  
            "min_provisioned_throughput": 0,
            "max_provisioned_throughput": 9500,
            "scale_to_zero_enabled": True
        }],
        "traffic_config": {
            "routes": [{
                "served_model_name": f"{uc_model_name}-{model_info.registered_model_version}",
                "traffic_percentage": 100
            }]
        }
    }
)