In [3]:
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import prepare_model_for_kbit_training
from accelerate import Accelerator

# Set your HF token as environment variable
os.environ["HUGGING_FACE_HUB_TOKEN"] = "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"

# Install required packages (run this in terminal first)
"""
pip install transformers torch datasets peft bitsandbytes accelerate sentencepiece
pip install --upgrade huggingface_hub
"""

def setup_model_and_tokenizer():
    try:
        # Model ID
        model_name = "meta-llama/Llama-2-7b-hf"
        
        # Initialize tokenizer with trust_remote_code=True
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            token=os.environ["HUGGING_FACE_HUB_TOKEN"],
            trust_remote_code=True
        )
        
        # Load model with optimizations for training
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            token=os.environ["HUGGING_FACE_HUB_TOKEN"],
            torch_dtype=torch.float16,  # Use float16 for memory efficiency
            device_map="auto",  # Automatically handle device placement
            trust_remote_code=True,
            load_in_8bit=True  # Use 8-bit quantization
        )
        
        # Prepare model for training
        model = prepare_model_for_kbit_training(model)
        
        return model, tokenizer
    
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        raise

def verify_setup():
    # Check CUDA availability
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"Current CUDA device: {torch.cuda.current_device()}")
        print(f"Device name: {torch.cuda.get_device_name()}")
        print(f"Device memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

if __name__ == "__main__":
    # Verify environment
    verify_setup()
    
    try:
        # Setup model and tokenizer
        model, tokenizer = setup_model_and_tokenizer()
        print("Model and tokenizer loaded successfully!")
        
        # Test tokenizer
        test_text = "این یک متن آزمایشی به زبان فارسی است."
        tokens = tokenizer(test_text, return_tensors="pt")
        print(f"\nTest tokenization successful!")
        print(f"Input text: {test_text}")
        print(f"Token count: {len(tokens['input_ids'][0])}")
        
    except Exception as e:
        print(f"Setup failed with error: {str(e)}")



CUDA available: True
Current CUDA device: 0
Device name: NVIDIA A40
Device memory: 47.73 GB


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model and tokenizer loaded successfully!

Test tokenization successful!
Input text: این یک متن آزمایشی به زبان فارسی است.
Token count: 39


In [2]:
!pip install -r requirements.txt


Collecting transformers>=4.31.0 (from -r requirements.txt (line 1))
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
Collecting accelerate>=0.21.0 (from -r requirements.txt (line 3))
  Downloading accelerate-1.2.1-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes>=0.41.0 (from -r requirements.txt (line 4))
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Collecting peft>=0.4.0 (from -r requirements.txt (line 5))
  Downloading peft-0.14.0-py3-none-any.whl.metadata (13 kB)
Collecting sentencepiece>=0.1.99 (from -r requirements.txt (line 6))
  Downloading sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting python-dotenv>=0.21.0 (from -r requirements.txt (line 7))
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting huggingface_hub>=0.16.4 (from -r requirements.txt (line 8))
  Downloading huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
