Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions nb/Kaggle-Magistral_(24B)-Reasoning-Conversational.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1056,17 +1056,19 @@
],
"source": [
"messages = [\n",
" {\"role\" : \"user\", \"content\" : \"Solve (x + 2)^2 = 0.\"}\n",
" {\"role\" : \"user\", \"content\" : [{\"type\": \"text\", \"text\": \"Solve (x + 2)^2 = 0.\"}]}\n",
"]\n",
"text = tokenizer.apply_chat_template(\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize = False,\n",
" tokenize = True,\n",
" add_generation_prompt = True, # Must add for generation\n",
")\n",
" return_tensors = \"pt\",\n",
" return_dict = True,\n",
").to(\"cuda\")\n",
"\n",
"from transformers import TextStreamer\n",
"_ = model.generate(\n",
" **tokenizer(text, return_tensors = \"pt\").to(\"cuda\"),\n",
" **inputs,\n",
" max_new_tokens = 1024, # Increase for longer outputs!\n",
" temperature = 0.7, top_p = 0.95,\n",
" streamer = TextStreamer(tokenizer, skip_prompt = True),\n",
Expand Down Expand Up @@ -6419,4 +6421,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
14 changes: 8 additions & 6 deletions nb/Magistral_(24B)-Reasoning-Conversational.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1056,17 +1056,19 @@
],
"source": [
"messages = [\n",
" {\"role\" : \"user\", \"content\" : \"Solve (x + 2)^2 = 0.\"}\n",
" {\"role\" : \"user\", \"content\" : [{\"type\": \"text\", \"text\": \"Solve (x + 2)^2 = 0.\"}]}\n",
"]\n",
"text = tokenizer.apply_chat_template(\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize = False,\n",
" tokenize = True,\n",
" add_generation_prompt = True, # Must add for generation\n",
")\n",
" return_tensors = \"pt\",\n",
" return_dict = True,\n",
").to(\"cuda\")\n",
"\n",
"from transformers import TextStreamer\n",
"_ = model.generate(\n",
" **tokenizer(text, return_tensors = \"pt\").to(\"cuda\"),\n",
" **inputs,\n",
" max_new_tokens = 1024, # Increase for longer outputs!\n",
" temperature = 0.7, top_p = 0.95,\n",
" streamer = TextStreamer(tokenizer, skip_prompt = True),\n",
Expand Down Expand Up @@ -6419,4 +6421,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
12 changes: 7 additions & 5 deletions original_template/Magistral_(24B)-Reasoning-Conversational.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1033,17 +1033,19 @@
],
"source": [
"messages = [\n",
" {\"role\" : \"user\", \"content\" : \"Solve (x + 2)^2 = 0.\"}\n",
" {\"role\" : \"user\", \"content\" : [{\"type\": \"text\", \"text\": \"Solve (x + 2)^2 = 0.\"}]}\n",
"]\n",
"text = tokenizer.apply_chat_template(\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize = False,\n",
" tokenize = True,\n",
" add_generation_prompt = True, # Must add for generation\n",
")\n",
" return_tensors = \"pt\",\n",
" return_dict = True,\n",
").to(\"cuda\")\n",
"\n",
"from transformers import TextStreamer\n",
"_ = model.generate(\n",
" **tokenizer(text, return_tensors = \"pt\").to(\"cuda\"),\n",
" **inputs,\n",
" max_new_tokens = 1024, # Increase for longer outputs!\n",
" temperature = 0.7, top_p = 0.95,\n",
" streamer = TextStreamer(tokenizer, skip_prompt = True),\n",
Expand Down
60 changes: 31 additions & 29 deletions python_scripts/Kaggle-Magistral_(24B)-Reasoning-Conversational.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,34 @@
# <a href="https://discord.gg/unsloth"><img src="https://github.com/unslothai/unsloth/raw/main/images/Discord button.png" width="145"></a>
# <a href="https://docs.unsloth.ai/"><img src="https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true" width="125"></a></a> Join Discord if you need help + ⭐ <i>Star us on <a href="https://github.com/unslothai/unsloth">Github</a> </i> ⭐
# </div>
#
#
# To install Unsloth your local device, follow [our guide](https://docs.unsloth.ai/get-started/install-and-update). This notebook is licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).
#
#
# You will learn how to do [data prep](#Data), how to [train](#Train), how to [run the model](#Inference), & [how to save it](#Save)
#
#

# ### News

#
#
# Unsloth's [Docker image](https://hub.docker.com/r/unsloth/unsloth) is here! Start training with no setup & environment issues. [Read our Guide](https://docs.unsloth.ai/new/how-to-train-llms-with-unsloth-and-docker).
#
#
# [gpt-oss RL](https://docs.unsloth.ai/new/gpt-oss-reinforcement-learning) is now supported with the fastest inference & lowest VRAM. Try our [new notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/gpt-oss-(20B)-GRPO.ipynb) which creates kernels!
#
#
# Introducing [Vision](https://docs.unsloth.ai/new/vision-reinforcement-learning-vlm-rl) and [Standby](https://docs.unsloth.ai/basics/memory-efficient-rl) for RL! Train Qwen, Gemma etc. VLMs with GSPO - even faster with less VRAM.
#
#
# Unsloth now supports Text-to-Speech (TTS) models. Read our [guide here](https://docs.unsloth.ai/basics/text-to-speech-tts-fine-tuning).
#
#
# Visit our docs for all our [model uploads](https://docs.unsloth.ai/get-started/all-our-models) and [notebooks](https://docs.unsloth.ai/get-started/unsloth-notebooks).
#
#

# # ### Installation
#
#
# # In[ ]:
#
#
#
#
# get_ipython().run_cell_magic('capture', '', 'import os\n\n!pip install pip3-autoremove\n!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu128\n!pip install unsloth\n!pip install transformers==4.56.2\n!pip install --no-deps trl==0.22.2\n')
#
#
#
#
# # ### Unsloth

# In[ ]:
Expand Down Expand Up @@ -240,17 +240,19 @@ def generate_conversation(example):


messages = [
{"role" : "user", "content" : "Solve (x + 2)^2 = 0."}
{"role" : "user", "content" : [{"type": "text", "text": "Solve (x + 2)^2 = 0."}]}
]
text = tokenizer.apply_chat_template(
inputs = tokenizer.apply_chat_template(
messages,
tokenize = False,
tokenize = True,
add_generation_prompt = True, # Must add for generation
)
return_tensors = "pt",
return_dict = True,
).to("cuda")

from transformers import TextStreamer
_ = model.generate(
**tokenizer(text, return_tensors = "pt").to("cuda"),
**inputs,
max_new_tokens = 1024, # Increase for longer outputs!
temperature = 0.7, top_p = 0.95,
streamer = TextStreamer(tokenizer, skip_prompt = True),
Expand All @@ -260,7 +262,7 @@ def generate_conversation(example):
# <a name="Save"></a>
# ### Saving, loading finetuned models
# To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.
#
#
# **[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

# In[21]:
Expand All @@ -287,7 +289,7 @@ def generate_conversation(example):


# ### Saving to float16 for VLLM
#
#
# We also support saving to `float16` directly. Select `merged_16bit` for float16 or `merged_4bit` for int4. We also allow `lora` adapters as a fallback. Use `push_to_hub_merged` to upload to your Hugging Face account! You can go to https://huggingface.co/settings/tokens for your personal tokens.

# In[ ]:
Expand Down Expand Up @@ -316,12 +318,12 @@ def generate_conversation(example):

# ### GGUF / llama.cpp Conversion
# To save to `GGUF` / `llama.cpp`, we support it natively now! We clone `llama.cpp` and we default save it to `q8_0`. We allow all methods like `q4_k_m`. Use `save_pretrained_gguf` for local saving and `push_to_hub_gguf` for uploading to HF.
#
#
# Some supported quant methods (full list on our [Wiki page](https://github.com/unslothai/unsloth/wiki#gguf-quantization-options)):
# * `q8_0` - Fast conversion. High resource use, but generally acceptable.
# * `q4_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K.
# * `q5_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K.
#
#
# [**NEW**] To finetune and auto export to Ollama, try our [Ollama notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)

# In[24]:
Expand Down Expand Up @@ -358,22 +360,22 @@ def generate_conversation(example):


# Now, use the `model-unsloth.gguf` file or `model-unsloth-Q4_K_M.gguf` file in llama.cpp.
#
#
# And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord!
#
#
# Some other links:
# 1. Train your own reasoning model - Llama GRPO notebook [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb)
# 2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)
# 3. Llama 3.2 Vision finetuning - Radiography use case. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb)
# 6. See notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!
#
#
# <div class="align-center">
# <a href="https://unsloth.ai"><img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="115"></a>
# <a href="https://discord.gg/unsloth"><img src="https://github.com/unslothai/unsloth/raw/main/images/Discord.png" width="145"></a>
# <a href="https://docs.unsloth.ai/"><img src="https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true" width="125"></a>
#
#
# Join Discord if you need help + ⭐️ <i>Star us on <a href="https://github.com/unslothai/unsloth">Github</a> </i> ⭐️
# </div>
#
#
# This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).
#
#
60 changes: 31 additions & 29 deletions python_scripts/Magistral_(24B)-Reasoning-Conversational.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,34 @@
# <a href="https://discord.gg/unsloth"><img src="https://github.com/unslothai/unsloth/raw/main/images/Discord button.png" width="145"></a>
# <a href="https://docs.unsloth.ai/"><img src="https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true" width="125"></a></a> Join Discord if you need help + ⭐ <i>Star us on <a href="https://github.com/unslothai/unsloth">Github</a> </i> ⭐
# </div>
#
#
# To install Unsloth your local device, follow [our guide](https://docs.unsloth.ai/get-started/install-and-update). This notebook is licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).
#
#
# You will learn how to do [data prep](#Data), how to [train](#Train), how to [run the model](#Inference), & [how to save it](#Save)
#
#

# ### News

#
#
# Unsloth's [Docker image](https://hub.docker.com/r/unsloth/unsloth) is here! Start training with no setup & environment issues. [Read our Guide](https://docs.unsloth.ai/new/how-to-train-llms-with-unsloth-and-docker).
#
#
# [gpt-oss RL](https://docs.unsloth.ai/new/gpt-oss-reinforcement-learning) is now supported with the fastest inference & lowest VRAM. Try our [new notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/gpt-oss-(20B)-GRPO.ipynb) which creates kernels!
#
#
# Introducing [Vision](https://docs.unsloth.ai/new/vision-reinforcement-learning-vlm-rl) and [Standby](https://docs.unsloth.ai/basics/memory-efficient-rl) for RL! Train Qwen, Gemma etc. VLMs with GSPO - even faster with less VRAM.
#
#
# Unsloth now supports Text-to-Speech (TTS) models. Read our [guide here](https://docs.unsloth.ai/basics/text-to-speech-tts-fine-tuning).
#
#
# Visit our docs for all our [model uploads](https://docs.unsloth.ai/get-started/all-our-models) and [notebooks](https://docs.unsloth.ai/get-started/unsloth-notebooks).
#
#

# # ### Installation
#
#
# # In[ ]:
#
#
#
#
# get_ipython().run_cell_magic('capture', '', 'import os, re\nif "COLAB_" not in "".join(os.environ.keys()):\n !pip install unsloth\nelse:\n # Do this only in Colab notebooks! Otherwise use pip install unsloth\n import torch; v = re.match(r"[0-9]{1,}\\.[0-9]{1,}", str(torch.__version__)).group(0)\n xformers = "xformers==" + ("0.0.33.post1" if v=="2.9" else "0.0.32.post2" if v=="2.8" else "0.0.29.post3")\n !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo\n !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer\n !pip install --no-deps unsloth\n!pip install transformers==4.56.2\n!pip install --no-deps trl==0.22.2\n')
#
#
#
#
# # ### Unsloth

# In[ ]:
Expand Down Expand Up @@ -240,17 +240,19 @@ def generate_conversation(example):


messages = [
{"role" : "user", "content" : "Solve (x + 2)^2 = 0."}
{"role" : "user", "content" : [{"type": "text", "text": "Solve (x + 2)^2 = 0."}]}
]
text = tokenizer.apply_chat_template(
inputs = tokenizer.apply_chat_template(
messages,
tokenize = False,
tokenize = True,
add_generation_prompt = True, # Must add for generation
)
return_tensors = "pt",
return_dict = True,
).to("cuda")

from transformers import TextStreamer
_ = model.generate(
**tokenizer(text, return_tensors = "pt").to("cuda"),
**inputs,
max_new_tokens = 1024, # Increase for longer outputs!
temperature = 0.7, top_p = 0.95,
streamer = TextStreamer(tokenizer, skip_prompt = True),
Expand All @@ -260,7 +262,7 @@ def generate_conversation(example):
# <a name="Save"></a>
# ### Saving, loading finetuned models
# To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.
#
#
# **[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

# In[21]:
Expand All @@ -287,7 +289,7 @@ def generate_conversation(example):


# ### Saving to float16 for VLLM
#
#
# We also support saving to `float16` directly. Select `merged_16bit` for float16 or `merged_4bit` for int4. We also allow `lora` adapters as a fallback. Use `push_to_hub_merged` to upload to your Hugging Face account! You can go to https://huggingface.co/settings/tokens for your personal tokens.

# In[ ]:
Expand Down Expand Up @@ -316,12 +318,12 @@ def generate_conversation(example):

# ### GGUF / llama.cpp Conversion
# To save to `GGUF` / `llama.cpp`, we support it natively now! We clone `llama.cpp` and we default save it to `q8_0`. We allow all methods like `q4_k_m`. Use `save_pretrained_gguf` for local saving and `push_to_hub_gguf` for uploading to HF.
#
#
# Some supported quant methods (full list on our [Wiki page](https://github.com/unslothai/unsloth/wiki#gguf-quantization-options)):
# * `q8_0` - Fast conversion. High resource use, but generally acceptable.
# * `q4_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K.
# * `q5_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K.
#
#
# [**NEW**] To finetune and auto export to Ollama, try our [Ollama notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)

# In[24]:
Expand Down Expand Up @@ -358,22 +360,22 @@ def generate_conversation(example):


# Now, use the `model-unsloth.gguf` file or `model-unsloth-Q4_K_M.gguf` file in llama.cpp.
#
#
# And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord!
#
#
# Some other links:
# 1. Train your own reasoning model - Llama GRPO notebook [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb)
# 2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)
# 3. Llama 3.2 Vision finetuning - Radiography use case. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb)
# 6. See notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!
#
#
# <div class="align-center">
# <a href="https://unsloth.ai"><img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="115"></a>
# <a href="https://discord.gg/unsloth"><img src="https://github.com/unslothai/unsloth/raw/main/images/Discord.png" width="145"></a>
# <a href="https://docs.unsloth.ai/"><img src="https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true" width="125"></a>
#
#
# Join Discord if you need help + ⭐️ <i>Star us on <a href="https://github.com/unslothai/unsloth">Github</a> </i> ⭐️
# </div>
#
#
# This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).
#
#