In [None]:
import os
os.environ["METAL"] = "1"
# os.environ["CLANG"] = "1"
os.environ["METAL_XCODE"] = "1"
os.environ["DISABLE_COMPILER_CACHE"] = "1"
import numpy as np
import mlx.core as mx
from tinygrad import Tensor, dtypes, TinyJit
from tinygrad.helpers import Timing
import mlx.core as mx
from mlx import nn as mlx_nn
from tinygrad import nn
# from tqdm import tqdm_notebook as tqdm

In [None]:
# @TinyJit
def kp(x:Tensor, y:Tensor):
    return x.dot(y)

In [None]:
l = Tensor([[1, 2],[3, 4]])
m = Tensor([[1, 2],[3, 4]])
with Timing("Time: "):
    kp(l,m).numpy()

In [None]:
def quantized_matmul_tg(x, w_packed, scales, biases, width=4, groups=64):
    M, K = x.shape
    N, K_packed = w_packed.shape

    num_values_per_uint32 = 32 // width  # E.g., for width=4, this is 8
    K_unpacked = K_packed * num_values_per_uint32
    num_groups = K // groups
    packs_per_group = groups // num_values_per_uint32  # Number of uint32 packs per group

    assert K == K_unpacked, f"Mismatch in K dimensions: {K} vs {K_unpacked}"
    assert scales.shape == (N, num_groups), f"Scales must have shape (N, {num_groups}), got {scales.shape}"
    assert biases.shape == (N, num_groups), f"Biases must have shape (N, {num_groups}), got {biases.shape}"
    assert K % groups == 0, "K must be divisible by the number of groups"

    # Prepare bitmask
    bitmask = (1 << width) - 1  # E.g., for width=4, bitmask=15

    # Reshape x for group-wise processing
    x_grouped = x.reshape(M, num_groups, groups)  # Shape: (M, num_groups, groups)

    # Initialize the output matrix
    output = Tensor.zeros((M, N), dtype=dtypes.float16)

    # Prepare shift amounts
    shift_list = [i * width for i in range(num_values_per_uint32)]

    # # Process each group
    for g in range(num_groups):
    #     # Extract scales and biases for the current group
        scale_g = scales[:, g].reshape(N, 1)  # Shape: (N, 1)
        bias_g = biases[:, g].reshape(N, 1)   # Shape: (N, 1)

    #     # Extract the packed weights for the current group
        pack_start = g * packs_per_group
        pack_end = pack_start + packs_per_group
        w_packed_group = w_packed[:, pack_start:pack_end]  # Shape: (N, packs_per_group)

    #     # Initialize a list to collect unpacked values
        unpacked_values = []

    #     # Unpack the quantized weights
        for shift_amount in shift_list:
            # Perform the shift and mask operations
            shifted = w_packed_group >> shift_amount  # Broadcasting scalar shift_amount
            masked = (shifted & bitmask).cast(dtypes.float16)
            masked = masked.reshape(N, -1)  # Flatten over packs_per_group

            unpacked_values.append(masked)

    #     # Stack the unpacked values and transpose to get correct order
    #     # After stacking: Shape becomes (num_values_per_uint32, N, total_packed_values)
        w_unpacked_stack = Tensor.stack(*unpacked_values, dim=0)
        w_unpacked_group = w_unpacked_stack.permute(1, 2, 0).reshape(N, groups)  # Shape: (N, groups)

    #     # Dequantize the unpacked weights
        w_group = w_unpacked_group * scale_g + bias_g  # Shape: (N, groups)

    #     # Extract the input activations for the current group
        x_group = x_grouped[:, g, :]  # Shape: (M, groups)
        
        output = output.add(x_group.dot(w_group.transpose()))

    #     # Perform matrix multiplication and accumulate the result
        # print(x_group.shape, w_group.shape)
        # partial_output = x_group @ w_group.transpose()  # Shape: (M, N)
        # print(partial_output.realize())
        # output += partial_output

    return output


In [None]:
w = Tensor.randint((1024, 512), low=0, high=9, dtype=dtypes.uint32)
s = Tensor.rand(1024, 64, dtype=dtypes.float16)
b = Tensor.rand(1024, 64, dtype=dtypes.float16)
x = Tensor.rand(1, 120, 4096, dtype=dtypes.float16)

In [None]:
quantized_matmul_tg(x, w, s, b).realize().numpy()

In [None]:
def func():
    o = Tensor.zeros(120, 1024)
    for _ in range(64):
        f = Tensor.rand(120, 64, dtype=dtypes.float16)
        g = Tensor.rand(64, 1024, dtype=dtypes.float16)
        o = o.add(f.dot(g))
    return o

In [None]:
func().realize().numpy()

In [None]:
with Timing("Time: "):
    f @ g

In [None]:
with Timing("Time: "):
    (w.T @ s).numpy()

In [None]:
def haha():
    return jp + jp

with Timing("Time: "):
    haha().realize()

In [None]:
def qmm(x, w, scales, biases, width=4):
    w_full = Tensor.cat(
        *[(w // (2**i))[..., None] for i in range(0, 32, width)], dim=-1
    )
    w_full = w_full.reshape(len(w), scales.shape[-1], -1)
    w_full = scales[..., None] * w_full + biases[..., None]
    w_full = w_full.reshape(len(w), -1)

    return x.dot(w_full.T)

In [None]:
def select_bits(w, bits, start):
    shift_left = 32 - (start + bits)
    shift_right = shift_left + start
    return (w * (2**shift_left)) // (2**shift_right)


def qmm(x, w, scales, biases, bits=4):
    w_full = Tensor.cat(
        *[select_bits(w, bits, i)[..., None] for i in range(0, 32, bits)], dim=-1
    )
    w_full = w_full.reshape(len(w), scales.shape[-1], -1)
    w_full = scales[..., None] * w_full + biases[..., None]
    return x.dot(w_full.reshape(len(w), -1).T)

In [None]:
w = Tensor.randint((1024, 512), low=0, high=9, dtype=dtypes.uint32)
s = Tensor.rand(1024, 64, dtype=dtypes.float16)
b = Tensor.rand(1024, 64, dtype=dtypes.float16)
x = Tensor.rand(1, 120, 4096, dtype=dtypes.float16)

# with Timing("time:"):
#     qmm(x, w, s, b).numpy()

In [None]:
with Timing("time:"):
    qmm(x, w, s, b).numpy()

In [None]:
ln = qmm(x, w, s, b).realize().numpy()

In [None]:
lm = mx.quantized_matmul(mx.array(x.numpy()), mx.array(w.numpy()), scales=mx.array(s.numpy()), biases=mx.array(b.numpy()), transpose=True)

In [None]:
lm.shape, ln.shape

In [None]:
(lm == ln).all()

In [None]:
o.shape

In [None]:
ll = Tensor([[1, 2], [2, 3]])
jp = Tensor([0, 1])

In [None]:
ll[jp].numpy()

In [None]:
def select_bits(w, bits, start):
    # Use integer floor division and modulo
    return (w // (1 << start)) % (1 << bits)

def qmm(x, w, scales, biases, bits=4):
    total = None
    num_segments = 32 // bits  # Number of segments based on bit width
    
    for i in range(num_segments):
        start = i * bits
        
        # Extract quantized weights
        w_i = select_bits(w, bits, start)
        
        # Reshape if necessary to match scales and biases dimensions
        # Adjust the dimensions based on your data shapes
        # You might need to reshape or expand dimensions appropriately
        # Here it's assumed w_i has shape [input_dim, output_dim]
        w_i = w_i.reshape(-1, scales.shape[-1])
        
        # Dequantize weights
        scale = scales[..., i]
        bias = biases[..., i]
        w_dequant = w_i * scale + bias
        
        # Compute partial dot product and accumulate
        res = x.dot(w_dequant)
        if total is None:
            total = res
        else:
            total += res
    
    return total

In [None]:
ln = qmm(x, w, s, b).realize().numpy()

In [None]:
jp = (w // (1 << 1))

In [None]:
jp % (1 << 4)

In [None]:
ll = [Tensor([[1,2], [3,4]])[..., None], Tensor([[1,2], [3,4]])[..., None]]
Tensor.cat(*ll, dim=-1).numpy()

In [None]:
ll[0].numpy()

In [None]:
ll[0][..., None].numpy()

In [None]:
ll[0].numpy()

In [None]:
from tinygrad import nn

In [None]:
linear = nn.Linear(4096, 4096)
with Timing("time:"):
    linear(x).realize()

In [None]:
class MLXQuantizedLinear:
  def __init__(self, in_features, out_features, bits=4, group_size=64, bias=False):
    assert in_features % group_size == 0
    assert 32 % bits == 0
    assert (in_features * bits) % 32 == 0
    self.weight = Tensor.kaiming_uniform(out_features, (in_features * bits) // 32, dtype=dtypes.uint32)
    self.scales = Tensor.kaiming_uniform(out_features, in_features // group_size, dtype=dtypes.half)
    if bias:
      self.biases = Tensor.kaiming_uniform(out_features, in_features // group_size, dtype=dtypes.half)
    else:
      self.biases = Tensor.zeros(out_features, in_features // group_size, dtype=dtypes.half)
    self.bits = bits
    self.group_size = group_size

  def __call__(self, x):
    w_full = Tensor.cat(
        *[(self.weight // (2**i))[..., None] for i in range(0, 32, self.bits)], dim=-1
    )
    w_full = w_full.reshape(len(self.weight), self.scales.shape[-1], -1)
    w_full = self.scales[..., None] * w_full + self.biases[..., None]
    w_full = w_full.reshape(len(self.weight), -1)

    return x.dot(w_full.T)

In [None]:
linear = MLXQuantizedLinear(4096, 4096)
with Timing("time:"):
    linear(x).realize()

In [None]:
w = mx.array(np.random.randint((4096, 512), dtype=np.uint32))
s = mx.array(np.random.rand(4096, 64).astype(np.half))
b = mx.array(np.random.rand(4096, 64).astype(np.float16))
x = mx.array(np.random.rand(1, 120, 4096).astype(np.float16))

In [None]:
linear = mlx_nn.QuantizedLinear(4096, 4096)
%timeit linear(x)

In [None]:
linear = mlx_nn.Linear(4096, 4096)
%timeit linear(x)

In [None]:
import time

In [None]:
linear = mlx_nn.Linear(4096, 4096)
t = time.time()
linear(x)
print(time.time()-t)

In [None]:
linear = mlx_nn.QuantizedLinear(4096, 4096)
t = time.time()
linear(x)
print(time.time()-t)

In [None]:
class MLXQuantizedEmbedding:
  def __init__(self, vocab_size, embed_size, bits = 4, group_size= 64):
    self.vocab_sz, self.embed_sz = vocab_size, embed_size
    self.bits = bits
    self.group_size = group_size
    self.weight = Tensor.glorot_uniform(vocab_size, (embed_size * bits) // 32)
    self.scales = Tensor.glorot_uniform(vocab_size, embed_size // group_size)
    
  def __call__(self, x):
      s = x.shape
      x = x.flatten()
      w = self.weight[x]
      scales = self.scales[x]
      w_full = Tensor.cat(
        *[(w // (2**i))[..., None] for i in range(0, 32, self.bits)], dim=-1
      )
      w_full = scales[..., None] * w_full.reshape(len(w), scales.shape[-1], -1)
      return w_full.reshape(*s, -1)

In [None]:
emb = nn.Embedding(128256, 4096)
with Timing("Time it:"):
    emb(Tensor.arange(32)).realize()

In [None]:
w = Tensor.randint((1024, 512), low=0, high=9, dtype=dtypes.uint32)
s = Tensor.rand(1024, 64, dtype=dtypes.float16)
b = Tensor.rand(1024, 64, dtype=dtypes.float16)
x = Tensor.rand(1, 120, 4096, dtype=dtypes.float16)

In [None]:
w = mx.array(np.random.randint(low=0, high=9, size=(128256, 512)).astype(np.uint32))
s = mx.array(np.random.rand(128256, 64).astype(np.half))
b = mx.array(np.zeros((128256, 64)).astype(np.float16))
x = mx.arange(32, dtype=mx.uint32)

In [None]:
emb = MLXQuantizedEmbedding(128256, 4096)
with Timing("Time it:"):
    emb(Tensor.arange(32)).realize()

In [None]:
emb = mlx_nn.QuantizedEmbedding(128256, 4096)
emb.weight = w
emb.scales = s
emb.biases = b
emb(x)

In [None]:
emb = MLXQuantizedEmbedding(128256, 4096)
emb.weight = Tensor(np.array(w))
emb.scales = Tensor(np.array(s))
emb(Tensor.arange(32)).numpy()

In [None]:
emb.scales.shape

In [None]:
emb = MLXQuantizedEmbedding(128256, 4096)
emb(Tensor.arange(32)).numpy().shape

In [None]:
import numpy as np

# Generate a random integer array of shape (x, y)
result = np.random.randint(low=0, high=10, size=(3, 4))
print(result)

In [None]:
class MLXQuantizedLinear:
  def __init__(self, in_features, out_features, bits=4, group_size=64, bias=False):
    self.weight = Tensor.randint((4096, 512), low=0, high=9, dtype=dtypes.uint32).realize()
    self.scales = Tensor.rand(4096, 64, dtype=dtypes.half).realize()
    self.biases = Tensor.rand(4096, 64, dtype=dtypes.half).realize()
    self.bits = bits
    self.group_size = group_size

  def __call__(self, x):
    w_full = Tensor.cat(
        *[select_bits(self.weight, self.bits, i)[..., None] for i in range(0, 32, self.bits)], dim=-1
    )
    print(w_full.shape)
    w_full = w_full.reshape(len(self.weight), self.scales.shape[-1], -1)
    w_full = self.scales[..., None] * w_full + self.biases[..., None]
    return x.linear(w_full.reshape(len(self.weight), -1).T)

def select_bits(w, bits, start):
    shift_left = 32 - (start + bits)
    shift_right = shift_left + start
    return (w * (2**shift_left)) // (2**shift_right)

# class MLXQuantizedLinearNew:
#   def __init__(self, in_features, out_features, bits=4, group_size=64, bias=False):
#     self.weight = Tensor.randint((4096, 512), low=0, high=9, dtype=dtypes.uint32)
#     self.scales = Tensor.rand(4096, 64, dtype=dtypes.half)
#     self.biases = Tensor.rand(4096, 64, dtype=dtypes.half)
#     self.bits = bits
#     self.group_size = group_size
    
#   def old_call(self, x):
#     w_full = Tensor.cat(
#         *[select_bits(self.weight, self.bits, i)[..., None] for i in range(0, 32, self.bits)], dim=-1
#     )
#     print(w_full.shape)
#     w_full = w_full.reshape(len(self.weight), self.scales.shape[-1], -1)
#     w_full = self.scales[..., None] * w_full + self.biases[..., None]
#     return x.linear(w_full.reshape(len(self.weight), -1).T)

#   def __call__(self, x):
#     res = []
#     for i in range(0, 32, self.bits):
#       w_full = select_bits(self.weight, self.bits, i).reshape(len(self.weight), self.scales.shape[-1], -1)
#       w_full = self.scales[..., None] * w_full + self.biases[..., None]
#       res.append(x.linear(w_full.reshape(len(self.weight), -1)).realize())
#     # new = x.linear(Tensor.cat(*res, dim=-1).reshape(len(self.weight), -1).T)
#     return res
#     # old = self.old_call(x)
#     # return new.realize(), old.realize()

In [None]:
from tinygrad import TinyJit
import time
import os
os.environ["METAL"] = "1"
# os.environ["CLANG"] = "1"
os.environ["METAL_XCODE"] = "1"
os.environ["DISABLE_COMPILER_CACHE"] = "1"
import numpy as np
import mlx.core as mx
from tinygrad import Tensor, dtypes, TinyJit
from tinygrad.helpers import Timing, Context
import mlx.core as mx
from mlx import nn as mlx_nn
from tinygrad import nn
import math

In [None]:
x = Tensor.rand(1, 1, 4096)
mlx = MLXQuantizedLinearNew(4096, 4096)
with Context(DEBUG=2):
    ll = mlx(x)

In [None]:
ll[0].realize().numpy()

In [None]:
ll[1].realize().numpy()

In [None]:
x = Tensor.rand(1, 1, 4096)
mlx = MLXQuantizedLinear(4096, 4096)
with Context(DEBUG=4):
    ll = mlx(x).realize()

In [None]:
Tensor.linear

In [None]:
class MLXQuantizedLinear:
    def __init__(self, in_features, out_features, bits=4, group_size=64, bias=False):
        self.weight = Tensor.randint((4096, 512), low=0, high=2**32, dtype=dtypes.uint32)
        self.scales = Tensor.rand(4096, 64, dtype=dtypes.half)
        self.biases = Tensor.rand(4096, 64, dtype=dtypes.half)
        self.bits = bits
        self.group_size = group_size

    def __call__(self, x):
        bits = self.bits
        num_positions = 32 // bits  # e.g., 8 for 4-bit quantization
        
        # Create tensor of start positions for bit extraction
        starts = Tensor.arange(np.arange(0, 32, bits), dtype=dtypes.uint32)  # Shape: (bits_per_value,)
        
        # Expand dimensions of weight tensor for broadcasting
        w_expanded = self.weight[..., None]  # Shape: (4096, 512, 1)
        
        # Perform vectorized bit extraction
        w_bits = (w_expanded >> starts) & ((1 << bits) - 1)  # Shape: (4096, 512, bits_per_value)
        
        # Reshape w_bits to combine the last two dimensions
        # w_bits = w_bits.reshape(len(self.weight), -1)  # Shape: (4096, 4096)
        
        # Reshape w_bits to match the scales and biases dimensions
        w_full = w_bits.reshape(len(self.weight), self.scales.shape[-1], -1)  # Shape: (4096, 64, 64)
        
        # Apply scales and biases
        w_full = self.scales[..., None] * w_full + self.biases[..., None]
        
        # Final reshape for the linear operation
        w_full = w_full.reshape(len(self.weight), -1).T  # Shape: (4096, 4096)
        
        # Perform the linear operation
        return x.linear(w_full)

In [None]:
starts = Tensor.arange(0, 32, 4, dtype=dtypes.uint32)
x = Tensor.ones(30,30, dtype=dtypes.uint32)
x >> starts

In [None]:
y = Tensor.rand(1, 1, 4096)
tiny = nn.Linear(4096, 4096, bias=False)
with Context(DEBUG=4):
    ll = tiny(y).realize()

In [None]:
mlx = MLXQuantizedLinear(4096, 4096)
tiny = nn.Linear(4096, 4096)
for i in [1]*10:
    x = Tensor.rand(1, i, 4096)
    st = time.time()
    tiny(x).realize()
    st1 = time.time()
    mlx(x).realize()
    st2 = time.time()
    print("-"*20)
    print(f"{i}\nmlx: {st2-st1}\n tiny: {st1-st}")

In [None]:
mlx_nn.QuantizedLinear

In [None]:
ll = Tensor.cat(*[Tensor.arange(512)[..., None]]*8, dim=-1).realize()

In [None]:
ll.numpy()

In [None]:
lm = ll.reshape(64,64).numpy()

In [None]:
lm

In [None]:
lm * lm

In [None]:
504 * 504

In [None]:
l1 = Tensor.arange(6400).reshape(100,64,1)
l2 = Tensor.ones(100,64,64)
kp = (l1 * l2).realize().numpy()

In [None]:
kp[:,:,0]

In [None]:
import requests
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor

model_id = "unsloth/Llama-3.2-11B-Vision-Instruct"
# model = MllamaForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
processor = AutoProcessor.from_pretrained(model_id)

messages = [
    [
        {
            "role": "user", 
            "content": [
                {"type": "image"},
                {"type": "text", "text": "What does the image show?"}
            ]
        }
    ],
]
text = processor.apply_chat_template(messages, add_generation_prompt=True)

url = "https://llava-vl.github.io/static/images/view.jpg"
image = Image.open(requests.get(url, stream=True).raw)

print(text, image)

# inputs = processor(text=text, images=image, return_tensors="np")
inputs = processor(text, image, return_tensors="np")
# output = model.generate(**inputs, max_new_tokens=25)
# print(processor.decode(output[0]))

In [None]:
inputs

In [None]:
messages = [
    [
        {
            "role": "user", 
            "content": [
                {"type": "text", "text": "hi explain life?"}
            ]
        }
    ],
]
text = processor.apply_chat_template(messages, add_generation_prompt=True)

# url = "https://llava-vl.github.io/static/images/view.jpg"
# image = Image.open(requests.get(url, stream=True).raw)

inputs = processor(text=text, images=None, return_tensors="pt").to(model.device)
output = model.language_model.generate(**inputs, max_new_tokens=25)
print(processor.decode(output[0]))

In [None]:
model.language_model.save_pretrained("/Users/varb/mllama_language/", save_peft_format=False)

In [None]:
model.language_model.model.embed_tokens.num_embeddings

In [None]:
from tinygrad.tensor import Tensor

def select_bits(w, bits):
    num_bits = 32
    # Create a tensor for shifts as an unsigned integer
    shifts = Tensor.arange(num_bits - bits, -1, -bits)  # Ensure shifts are in appropriate range

    # Expand dimensions of w for broadcasting
    expanded_w = w[..., None]  # Add a new dimension for broadcasting

    # Perform bitwise operations in a vectorized manner
    selected_bits = (expanded_w.rshift(shifts)) & ((1 << bits) - 1)
    
    return selected_bits

# Example usage
weight = Tensor(np.random.randint(0, 2**32, (32,), dtype=np.uint32))  # Example weight tensor as uint32
bits = 8

# Apply the function
w_full = select_bits(weight.astype('uint32'), bits)

# Output the shape and selected bits
print(w_full.shape)
print(w_full)

In [None]:
from tinygrad.tensor import Tensor

def select_bits(w, bits):
    num_bits = 32
    # Create a tensor for shifts as an unsigned integer
    shifts = Tensor.arange(num_bits - bits, -1, -bits, dtype='uint32')  # Ensure shifts are uint32

    # Expand dimensions of w for broadcasting
    expanded_w = w[..., None]  # Add a new dimension for broadcasting

    # Perform bitwise operations in a vectorized manner
    selected_bits = (expanded_w >> shifts) & ((1 << bits) - 1)
    
    return selected_bits

# Example usage
weight = Tensor(np.random.randint(0, 2**32, (32,), dtype=np.uint32))  # Example weight tensor as uint32
bits = 8

# Apply the function
w_full = select_bits(weight, bits)

# Output the shape and selected bits
print(w_full.shape)
print(w_full)

In [None]:
bin(15)

In [None]:
512 / 64

In [None]:
shifts = mx.array([2**i for i in range(0, 32, 4)], dtype=mx.uint32)

In [None]:
shifts.shape

In [None]:
shifts[None:None].shape

In [None]:
ll = mx.ones((100,100), dtype=mx.float16)

In [None]:
ll.sum(-1).shape

In [None]:
m_ll.contiguous()

In [None]:
mx.tanh(m_ll)

In [None]:
ll.size()

In [None]:
ll.shape

In [None]:
np.arange(10000)[ll.flatten() == int(1)].shape

In [None]:
torch.nn.functional.pad

In [None]:
import torch

In [None]:
cross_attention_mask = torch.ones((100, 100), dtype=torch.float32)
# inverted_cross_attn_mask = 0.0 - cross_attention_mask
# cross_attention_mask = inverted_cross_attn_mask.masked_fill(
#     inverted_cross_attn_mask.to(torch.bool), torch.finfo(torch.float32).min
# )

In [None]:
cross_attention_mask

In [None]:
cross_attention_mask[cross_attention_mask==1.0] = -1e9

In [None]:
cross_attention_mask

In [None]:
cross_attention_mask = np.ones((100, 100), dtype=np.float32)
cross_attention_mask[cross_attention_mask==1.0] = -1e9

In [None]:
ll = cross_attention_mask == 1.0

In [None]:
cross_attention_mask[ll] = 100

In [None]:
mx.repeat

In [None]:
import mlx.core as mx

In [None]:
ll = torch.randint(0, 10, (10, 10, 10))

In [None]:
lm = ll.repeat_interleave(4, dim=2)

In [None]:
m_ll = mx.array(ll.numpy())

In [None]:
m_lm = mx.repeat(m_ll, 4, axis=2)

In [None]:
mx.reshape

In [None]:
mx.tile(m_ll, (1, 1, 10, 1)).shape, m_ll.shape

In [None]:
mx.expand_dims(m_ll, axis=1).shape

In [None]:
m_ll.transpose

In [None]:
torch.transpose

In [None]:
(lm.numpy() == m_lm).all()

In [None]:
(ll.repeat(1,1,4,1).numpy() == mx.tile(m_ll, (1,1,4,1))).all()

In [None]:
ll.repeat(1,1,4,1)

In [None]:
mx.tile(m_ll, (1,1,4,1))

In [14]:
def check(l, m, *_, **__):
    print(l, m, _)
    # pass
    
l = 1
m = 2
check(l, m=1)e

1 1 ()


In [15]:
from transformers import AutoTokenizer

In [20]:
tok = AutoTokenizer.from_pretrained("mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx")

tokenizer_config.json:   0%|          | 0.00/4.36k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

In [25]:
import mlx.core as mx

# Example input array
flat_mask = mx.array([0, 1, 0, 2, 3, 0])  # Replace with your actual data

# Use mlx.core.where to get indices of non-zero elements
indices = mx.where(flat_mask != 0)

print(indices)

TypeError: where(): incompatible function arguments. The following argument types are supported:
    1. where(condition: Union[scalar, array], x: Union[scalar, array], y: Union[scalar, array], /, *, stream: Union[None, Stream, Device] = None) -> array

Invoked with types: mlx.core.array

In [27]:
ll = mx.array([1, 2, 3])
ll[[True, False, True]]

ValueError: boolean indices are not yet supported

In [31]:
mx.where(ll > 1)

array([True, False, False], dtype=bool)

In [41]:
import numpy as np

In [36]:
ll = mx.zeros((4, 40, 40 ,1280))

In [42]:
np.pad(ll, [(0,0), (0,0)])

ValueError: operands could not be broadcast together with remapped shapes [original->remapped]: (2,2)  and requested shape (4,2)

In [48]:
import torch
ll = torch.zeros((4, 40, 40 ,1280))
ans = torch.functional.F.pad(ll, (0, 0, 0, 10))

In [50]:
ll = mx.zeros((4, 40, 40 ,1280))
ans_mx = mx.pad(ll, [(0,0), (0,0), (0,10), (0,0)])

In [51]:
ans.numpy().shape

(4, 40, 50, 1280)

In [52]:
ans_mx.shape

(4, 40, 50, 1280)

In [54]:
np.equal(ans, ans_mx).all()

  np.equal(ans, ans_mx).all()


tensor(1, dtype=torch.uint8)

In [55]:
ans_mx.shape

(4, 40, 50, 1280)

In [57]:
ans.shape

torch.Size([4, 40, 50, 1280])

In [60]:
ll @ ll.T

ValueError: [matmul] Last dimension of first input with shape (4,40,40,1280) must match second to last dimension of second input with shape (1280,40,40,4).

In [95]:
npam = np.random.randint(low=0, high=2, size=(1, 6432, 1))
am = mx.array(npam).astype(mx.bfloat16)

In [105]:
ans = am @ am.transpose(0, 2, 1) * -3.3895313892515355e+38

In [99]:
tt = torch.tensor(npam, dtype=torch.bfloat16)

In [104]:
ttans = tt @ tt.transpose(-1, -2) * torch.finfo(torch.bfloat16).min

In [106]:
ans

array([[[-0, -0, -0, ..., -0, -0, -0],
        [-0, -3.38953e+38, -0, ..., -0, -0, -3.38953e+38],
        [-0, -0, -0, ..., -0, -0, -0],
        ...,
        [-0, -0, -0, ..., -0, -0, -0],
        [-0, -0, -0, ..., -0, -0, -0],
        [-0, -3.38953e+38, -0, ..., -0, -0, -3.38953e+38]]], dtype=bfloat16)

In [109]:
ttans

TypeError: Got unsupported ScalarType BFloat16

In [88]:
-3.3895313892515355e+38

-3.3895313892515355e+38

In [90]:
import numpy as np

print(np.finfo(np.float16).tiny)

6.104e-05


In [None]:
from 