In [1]:
from transformers import AutoTokenizer

# Load the exact tokenizer used by the model
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

# Save the tokenizer files (tokenizer.json, vocab.txt, etc.)
tokenizer.save_pretrained("Tokenizer")
print("Files saved to 'Tokenizer' folder. Drag this folder into Xcode.")

  from .autonotebook import tqdm as notebook_tqdm


Files saved to 'Tokenizer' folder. Drag this folder into Xcode.


In [4]:
!pip install "optimum[coreml]" transformers

Collecting optimum[coreml]
  Downloading optimum-2.1.0-py3-none-any.whl.metadata (14 kB)
Downloading optimum-2.1.0-py3-none-any.whl (161 kB)
Installing collected packages: optimum
Successfully installed optimum-2.1.0


In [6]:
import os
import shutil
from optimum.coreml import CoreMLConfig
from transformers import AutoTokenizer, AutoModel
from optimum.exporters.coreml import export_model

# 1. Configuration
model_id = "sentence-transformers/all-MiniLM-L6-v2"
output_folder = "iOS_Resources"
mlpackage_name = "TextEmbedder" # This will be the class name in Swift

if os.path.exists(output_folder):
    shutil.rmtree(output_folder)
os.makedirs(output_folder)

print(f"Downloading and converting '{model_id}'...")

# 2. Export the Tokenizer files (Needed for Swift)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer_path = os.path.join(output_folder, "Tokenizer")
tokenizer.save_pretrained(tokenizer_path)
print(f"✅ Tokenizer saved to: {tokenizer_path}")

# 3. Export the Core ML Model
# We export it for 'feature-extraction' to get the embeddings
export_model(
    model=AutoModel.from_pretrained(model_id),
    task="feature-extraction",
    tool="coreml",
    output=os.path.join(output_folder, f"{mlpackage_name}.mlpackage"),
)

print(f"✅ Core ML model saved to: {output_folder}/{mlpackage_name}.mlpackage")
print("\n--- NEXT STEPS ---")
print(f"1. Open Xcode.")
print(f"2. Drag the '{output_folder}' folder into your project.")
print(f"3. Make sure to check 'Create folder references' (blue folder icon).")

ModuleNotFoundError: No module named 'optimum.coreml'

In [None]:
!pip install coremltools optimum transformers



In [11]:
import torch
import coremltools as ct
from transformers import AutoModel, AutoTokenizer
import os

# 1. Setup
model_id = "sentence-transformers/all-MiniLM-L6-v2"
output_folder = "iOS_Resources"
os.makedirs(output_folder, exist_ok=True)

print(f"Loading {model_id}...")
# Load model (torchscript=True helps prepare it for tracing)
model = AutoModel.from_pretrained(model_id, torchscript=True).eval()
tokenizer = AutoTokenizer.from_pretrained(model_id)

# 2. Create a Wrapper
class ModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, input_ids, attention_mask):
        # The model returns a tuple: (last_hidden_state, pooler_output, ...)
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        
        # FIX IS HERE: Access the first element (embeddings) by index, not name
        return outputs[0]

# Initialize wrapper
wrapped_model = ModelWrapper(model)

# 3. Create Dummy Input
example_text = "Hello Core ML"
inputs = tokenizer(example_text, return_tensors="pt")
dummy_input_ids = inputs["input_ids"]
dummy_attention_mask = inputs["attention_mask"]

# 4. Trace the Model
print("Tracing model...")
traced_model = torch.jit.trace(wrapped_model, (dummy_input_ids, dummy_attention_mask))

# 5. Convert to Core ML
print("Converting to Core ML...")
mlmodel = ct.convert(
    traced_model,
    inputs=[
        ct.TensorType(name="input_ids", shape=(1, ct.RangeDim(1, 512))),
        ct.TensorType(name="attention_mask", shape=(1, ct.RangeDim(1, 512)))
    ],
    outputs=[
        ct.TensorType(name="last_hidden_state")
    ],
    # minimum_deployment_target=ct.target.iOS16 # Uncomment if you want float16 compression
)

# 6. Save Everything
mlpackage_path = f"{output_folder}/TextEmbedder.mlpackage"
mlmodel.save(mlpackage_path)
tokenizer.save_pretrained(f"{output_folder}/Tokenizer")

print("\nSUCCESS! -------------------")
print(f"1. Model saved to: {mlpackage_path}")
print(f"2. Tokenizer saved to: {output_folder}/Tokenizer")

Loading sentence-transformers/all-MiniLM-L6-v2...
Tracing model...


When both 'convert_to' and 'minimum_deployment_target' not specified, 'convert_to' is set to "mlprogram" and 'minimum_deployment_target' is set to ct.target.iOS15 (which is same as ct.target.macOS12). Note: the model will not run on systems older than iOS15/macOS12/watchOS8/tvOS15. In order to make your model run on older system, please set the 'minimum_deployment_target' to iOS14/iOS13. Details please see the link: https://apple.github.io/coremltools/docs-guides/source/target-conversion-formats.html
Model is not in eval mode. Consider calling '.eval()' on your model prior to conversion


Converting to Core ML...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Converting PyTorch Frontend ==> MIL Ops:   0%|          | 0/236 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.
Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 235/236 [00:00<00:00, 825.80 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 200.91 passes/s]
  return input_var.val.astype(dtype=string_to_nptype(dtype_val))
Running MIL default pipeline: 100%|██████████| 95/95 [00:01<00:00, 60.93 passes/s] 
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 244.41 passes/s]



SUCCESS! -------------------
1. Model saved to: iOS_Resources/TextEmbedder.mlpackage
2. Tokenizer saved to: iOS_Resources/Tokenizer
