In [1]:
import torch  # <--- Explicitly add this line
import bitsandbytes as bnb

print("Bitsandbytes version:", bnb.__version__)

# Explicitly test bitsandbytes CUDA functionality
try:
    a = torch.randn((1024, 1024), device='cuda')
    b = torch.randn((1024, 1024), device='cuda')

    result = bnb.matmul(a, b)
    print("bitsandbytes CUDA kernel explicitly working correctly!")
except Exception as e:
    print("Error with bitsandbytes CUDA kernel explicitly:", e)



NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90.
If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



Bitsandbytes version: 0.45.5
Error with bitsandbytes CUDA kernel explicitly: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.



In [2]:
import bitsandbytes as bnb

print("Bitsandbytes version:", bnb.__version__)

# Explicitly test bitsandbytes CUDA functionality
try:
    a = torch.randn((1024, 1024), device='cuda')
    b = torch.randn((1024, 1024), device='cuda')
    
    result = bnb.matmul(a, b)
    print("bitsandbytes CUDA kernel explicitly working correctly!")
except Exception as e:
    print("Error with bitsandbytes CUDA kernel explicitly:", e)


Bitsandbytes version: 0.45.5
Error with bitsandbytes CUDA kernel explicitly: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.



In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

# Bitsandbytes quantization configuration explicitly
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

try:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=bnb_config
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token

    print("Model explicitly loaded successfully with GPU and quantization.")
except Exception as e:
    print("Error explicitly during advanced GPU model loading test:", e)





Error explicitly during advanced GPU model loading test: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2.
401 Client Error. (Request ID: Root=1-680566ad-4b9897154bf7ec0832403f51;23b70363-edd5-4100-83dc-a4bb532ad4f3)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.2 is restricted. You must have access to it and be authenticated to access it. Please log in.


In [4]:
def gpu_stress_test(size=10000):
    print(f"Running explicit GPU stress test with tensor size {size}...")

    try:
        a = torch.randn((size, size), device='cuda')
        b = torch.randn((size, size), device='cuda')

        c = torch.matmul(a, b)
        print("GPU explicitly computed matrix multiplication correctly.")
        
        del a, b, c
        torch.cuda.empty_cache()
        print("GPU cache explicitly cleared after test.")
    except Exception as e:
        print("Error explicitly during GPU stress test:", e)

gpu_stress_test(size=8192)  # Adjust size if explicit memory limits encountered


Running explicit GPU stress test with tensor size 8192...
Error explicitly during GPU stress test: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.



In [5]:
import os

# Check environment variables explicitly relevant for CUDA
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", "Not explicitly set")
ld_library_path = os.getenv("LD_LIBRARY_PATH", "Not explicitly set")
bnb_cuda_version = os.getenv("BNB_CUDA_VERSION", "Not explicitly set")

print(f"CUDA_VISIBLE_DEVICES: {cuda_visible_devices}")
print(f"LD_LIBRARY_PATH: {ld_library_path}")
print(f"BNB_CUDA_VERSION: {bnb_cuda_version}")


CUDA_VISIBLE_DEVICES: Not explicitly set
LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
BNB_CUDA_VERSION: Not explicitly set
