In [1]:
import random
from quickgrad.engine import Number
from quickgrad.nn import MLP, Layer, Neuron, Module, create_tensor
import math

In [2]:
from typing import *
import numpy as np

In [None]:
# class vector:
#     def __init__(self, data, _children=(), _op='', label=''):
#         self.data = np.array(data)
#         self.grad = np.zeros_like(data)
#         self.label = label
#         self._backward = lambda : None
#         self._prev = set(_children)
#         self._op = _op

#     def __repr__(self):
#         return f"vector(data={self.data})"

#     def __add__(self, other):
#         # assert self.s==len(other), "should be same shape"
#         other = other if isinstance(other, vector) else vector(other)
#         out = vector(self.data + other.data, (self, other), '+')

#         def _backward():
#             self.grad += 1.0 * out.grad
#             other.grad += 1.0 * out.grad
#         out._backward = _backward
#         return out

#     def exp(self):
#         # print(math.exp(self.data))
#         out = vector(np.exp(self.data), (self,), 'exp')

#         def _backward():
#             self.grad += out.data * out.grad
#         out._backward = _backward
#         return out

#     def log(self):
#         out = vector(np.log(self.data), (self,), 'log')

#         def _backward():
#             self.grad += (1 / self.data ) * out.grad
#         out._backward = _backward
#         return out

#     def __pow__(self, other):
#         other = other if isinstance(other, Number) else Number(other)
#         out = vector(np.power(self.data, other.data), (self, other), '_pow')

#         def _backward():
#             self.grad += (other.data * (self.data ** (other.data -1))) * out.grad
#         out._backward = _backward
#         return out
    
#     def sum(self, axis=None, keepdims=True):
#         out = np.sum(self.data, axis=axis, keepdims=keepdims)
#         out = vector(out, (self,), 'sum')

#         def _backward():
#             self.grad += np.ones_like(self.data, dtype=np.float32) * out.grad
#         out._backward = _backward
#         return out
    
#     def __mul__(self, other):
#         other = other if isinstance(other, Number) else vector(other)
#         out = vector(self.data * other.data, (self, other), '*')

#         def _backward():
#             self.grad += other.data * out.grad
#         out._backward = _backward
#         return out
    
#     def broadcast_to(self, target_shape):
#         input_shape = self.shape()
#         out = vector(np.broadcast_to(self, target_shape), (self,), 'broadcast')

#         def _backward():
#             broadcast_axes = get_broadcast_axes(input_shape, target_shape)[0]
#             ans = out.grad
#             print(broadcast_axes)
#             print(ans)
#             new_ans = ans.sum(broadcast_axes, keepdims=True)
#             print(vector(new_ans).shape())
#             print(vector(self.grad).shape())
#             self.grad += vector(ans.sum(broadcast_axes, keepdims=True)) * out.grad

#         # def _backward():
#         #     broadcast_axes = get_broadcast_axes(input_shape, target_shape)[0]
#         #     ans = out.grad
#         #     print(broadcast_axes)
#         #     print(ans)
#         #     new_ans = ans.sum(broadcast_axes, keepdims=True)
#         #     print(vector(new_ans).shape())
#         #     print(vector(self.grad).shape())
#         #     self.grad += vector(ans.sum(broadcast_axes, keepdims=True))
#         out._backward = _backward
#         return out

    
        
#     def mean(self, axis=None):
#         n = self.shape()[axis]
#         out = self.sum(axis, keepdims=False) * (n ** -1)
#         return out

#     # def std(self, axis):
#     #     # mean = vector(self.mean(axis))
#     #     mean = self.mean(axis)
#     #     print(mean)
#     #     ans = self - mean
#     #     print(ans.shape())
#     #     print(ans)
#     #     numer = ans.sum(axis=axis, keepdims=False)
#     #     denom = self.shape()[axis] - 1
#     #     # print(numer.shape())
#     #     # print(denom)
#     #     out = numer / denom
#     #     return out
        
#     def __neg__(self):
#         return self * -1
    
#     def __sub__(self, other):
#         return self + (- other)
    
#     def __rsub__(self, other):
#         return other - self
    
#     def __radd__(self, other):
#         return - self + other
    
#     def __truediv__(self, other):
#         return self * (other ** -1)
    
#     def __rtruediv__(self, other):
#         return (self ** -1) * (other)

#     def shape(self):
#         return np.shape(self.data)
    
#     def backward(self):
#         topo = []
#         visited = set()
#         def build_topo(v):
#             if v not in visited:
#                 visited.add(v)
#                 for child in v._prev:
#                     build_topo(child)
#                 topo.append(v)
#         build_topo(self)
        
#         self.grad = vector(np.ones_like(self.data))
#         for node in reversed(topo):
#             node._backward()

In [None]:
import numpy as np

class vector:
    def __init__(self, data, _children=(), _op='', label=''):
        self.data = np.array(data)
        self.grad = np.zeros_like(self.data, dtype=np.float64)  # Keep as numpy array
        self.label = label
        self._backward = lambda : None
        self._prev = set(_children)
        self._op = _op

    def __repr__(self):
        return f"vector(data={self.data})"

    # Add __iadd__ to support += operations
    def __iadd__(self, other):
        if isinstance(other, vector):
            self.data += other.data
        else:
            self.data += other
        return self

    def __add__(self, other):
        other = other if isinstance(other, vector) else vector(other)
        out = vector(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad +=  out.grad
            other.grad +=  out.grad
        out._backward = _backward
        return out

    def exp(self):
        out = vector(np.exp(self.data), (self,), 'exp')

        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        return out

    def log(self):
        out = vector(np.log(self.data), (self,), 'log')

        def _backward():
            self.grad += (1 / self.data) * out.grad
        out._backward = _backward
        return out

    def __pow__(self, other):
        if isinstance(other, (int, float)):
            other_data = other
        elif hasattr(other, 'data'):
            other_data = other.data
        else:
            other_data = other
            
        out = vector(np.power(self.data, other_data), (self,), '_pow')

        def _backward():
            self.grad += (other_data * (self.data ** (other_data - 1))) * out.grad
        out._backward = _backward
        return out
    
    def sum(self, axis=None, keepdims=True):
        out = np.sum(self.data, axis=axis, keepdims=keepdims)
        out = vector(out, (self,), 'sum')

        def _backward():
            self.grad += np.ones_like(self.data, dtype=np.float64) * out.grad
        out._backward = _backward
        return out
    
    def __mul__(self, other):
        other = other if isinstance(other, vector) else vector(other)
        other = other.broadcast_to(self.shape())
        out = vector(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad 
            other.grad += self.data * out.grad
            
        out._backward = _backward
        return out
    
    def broadcast_to(self, target_shape):
        input_shape = self.shape()
        out = vector(np.broadcast_to(self.data, target_shape), (self,), 'broadcast')

        def _backward():
            broadcast_axes_result = get_broadcast_axes(input_shape, target_shape)
            broadcast_axes = broadcast_axes_result[0]
    
            if broadcast_axes and isinstance(broadcast_axes[0], list):
                broadcast_axes = broadcast_axes[0]
            ans = out.grad
            reduced_grad = ans
            
            if broadcast_axes:  
                axes_tuple = tuple(broadcast_axes)
                reduced_grad = np.sum(ans, axis=axes_tuple, keepdims=True)
                reduced_grad = reduced_grad.reshape(input_shape)
            self.grad += reduced_grad
            
        out._backward = _backward
        return out
    
    def expand_dims(self, axis):
        return vector(np.expand_dims(self, axis=axis))
    
    def transpose(self, axis):
        out = vector(np.transpose(self.data, axes=axis), (self,), 'T')

        def _backward():
            self.grad += np.transpose(out.grad, axes=axis)
        out._backward = _backward
        return out

    def var(self, axis):
        mean = self.mean(axis)    
        new_ans = mean.broadcast_to(self.shape())
        ans = (self - mean) ** 2
        numer = ans.sum(axis=axis, keepdims=False)
        denom = self.shape()[axis] 
        out = (numer / denom)
        return out 

    def std(self, axis):
        return self.var(axis) ** (1/2)
        
    def mean(self, axis=None):
        n = self.shape()[axis] if axis is not None else self.data.size
        out = self.sum(axis, keepdims=True) * (n ** -1)
        return out

    def reshape(self, target_shape):
        out = vector(np.reshape(self.data, target_shape))

        def _backward():
            self.grad += out.grad.reshape(self.data.shape)
        out._backward = _backward
        return out

    def __setitem__(self, index, val):
        self.data[index] = val.data.copy()
        self.grad[index] = val.grad.copy()
    
    def __getitem__(self, index):
        out = vector(self.data[index], (self,), 'get_item')

        def _backward():
            self.grad[index] += out.grad
        out._backward = _backward
        return out

    # def split(self, indices, axis):
    #     out = vector(np.split(self, indices_or_sections=indices, axis=axis))

    #     def _backward():
    #         grad_pieces = [piece.grad for piece in out]
    #         self.grad += np.concatenate(grad_pieces, axis=axis)

    #     out._backward = _backward
    #     return out

    def concat(self, other, axis):
        self_shape = self.shape()
        axis_self_shape = self_shape[axis]
        out = vector(np.concatenate((self.data, other.data), axis=axis), (self, other), 'cat')

        def _backward():
            self_grad, other_grad = np.array_split(out.grad, [axis_self_shape], axis=axis)
            self.grad += self_grad
            other.grad += other_grad
        out._backward = _backward
        return out

    # def __2dmatmul__(self, other):
    #     out = vector(np.matmul(self.data, other.data), (self, other), '@')

    #     def _backward():
    #         other_data = other.transpose((1,0)).data
    #         self_data = self.transpose((1,0)).data
    #         self.grad += np.matmul(out.grad, other_data)
    #         other.grad += np.matmul(self_data, out.grad)

    #     out._backward = _backward
    #     return out
    
    def __matmul__(self, other):
        out = vector(np.matmul(self.data, other.data), (self, other), '@')
        
        def _backward():        
            other_transposed = np.swapaxes(other.data, -2, -1)
            self_transposed = np.swapaxes(self.data, -2, -1)
            
            self.grad += np.matmul(out.grad, other_transposed)
            other.grad += np.matmul(self_transposed, out.grad)
        
        out._backward = _backward
        return out
    
    def masked_fill(self, condition, val):
        out = vector(np.where(condition, self.data, val.data))
        
        def _backward():
            self.grad += np.where(condition, 0, out.grad)
            val.grad += np.where(condition, out.grad, 0)
        out._backward = _backward
        return out
    
    # def clip(self, min_val, max_val):
    #     out = vector(min(max(self.data, min_val), max_val))

    #     def _backward():
    #         self.grad = np.clip(self.grad, min_val, max_val)
    #     out._backward = _backward
    #     return out
    
    def __neg__(self):
        return self * -1
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rsub__(self, other):
        return vector(other) + (-self)
    
    def __radd__(self, other):
        return vector(other) + self
    
    def __truediv__(self, other):
        return self * (other ** -1)
    
    def __rtruediv__(self, other):
        return vector(other) * (self ** -1)

    def __rmul__(self, other):
        return self * other

    def shape(self):
        return self.data.shape
    
    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        
        # FIXED: Keep grad as numpy array, not vector
        self.grad = np.ones_like(self.data)
        for node in reversed(topo):
            node._backward()

# Helper function (you'll need to implement this based on your needs)
def get_broadcast_axes(input_shape, target_shape):
    """
    Returns the axes that were broadcasted from input_shape to target_shape
    """
    # Simple implementation - you may need to adjust based on your specific needs
    input_ndim = len(input_shape)
    target_ndim = len(target_shape)
    
    # Pad input_shape with 1s at the beginning if needed
    padded_input = (1,) * (target_ndim - input_ndim) + input_shape
    
    broadcast_axes = []
    for i, (inp_dim, tgt_dim) in enumerate(zip(padded_input, target_shape)):
        if inp_dim == 1 and tgt_dim > 1:
            broadcast_axes.append(i)
    
    return [broadcast_axes], []  # Return in expected format

In [257]:
# aa = np.random.randn(2, 1, 2)
# bb = np.random.randn(2, 2, 1)
# a = vector(aa)
# b = vector(bb)

a = vector(np.array([[1, 2],      # shape (2, 3)
     [3, 4]]))
b = vector(np.array([[5, 6],      # shape (2, 2) 
     [7, 8]]))

In [258]:
c = a.__matmul__(b)

In [259]:
c.shape(), c

((2, 2),
 vector(data=[[19 22]
  [43 50]]))

In [260]:
c.backward()

In [261]:
c.grad

array([[1, 1],
       [1, 1]])

In [262]:
a.grad, b.grad

(array([[11., 15.],
        [11., 15.]]),
 array([[4., 4.],
        [6., 6.]]))

In [231]:
aa = np.arange(9)
a = vector(aa)

In [236]:
np.linalg.matmul(np.random.randn(3, 1), np.random.randn(3,1))

AttributeError: module 'numpy.linalg' has no attribute 'matmul'

In [237]:
a = np.random.randn(10, 3, 4)
b = np.random.randn(10, 4, 5)

# result shape: (10, 3, 5)
result = np.matmul(a, b)

In [223]:
aa = np.random.randint(0, 2, (2,2)),
np.where(aa, np.array(0), -float('inf'))

array([[[  0.,   0.],
        [-inf, -inf]]])

In [225]:
np.array(0)

array(0)

In [None]:
np.where(aa, )

In [224]:
aa

(array([[1, 1],
        [0, 0]]),)

In [None]:
np.random.randn(2,2)

In [None]:
np.where()

In [211]:
aa = np.random.randn(2, 3)
bb = np.random.randn(3, 2)

In [212]:
a = vector(aa)
b = vector(bb)

In [213]:
c = a.__2dmatmul__(b)

In [214]:
a

vector(data=[[ 0.76828079 -0.03428902 -0.33034202]
 [-2.1442991  -0.65824457 -0.32136415]])

In [215]:
c.backward()

vector(data=[[-0.48954855  1.18329686]
 [ 0.31236722  0.31378672]
 [-0.31682724 -0.81748092]])


In [216]:
a.grad

array([[ 0.69374831,  0.62615394, -1.13430817],
       [ 0.69374831,  0.62615394, -1.13430817]])

In [217]:
b.grad

array([[-1.37601831, -1.37601831],
       [-0.69253359, -0.69253359],
       [-0.65170616, -0.65170616]])

In [201]:
a@b

TypeError: unsupported operand type(s) for @: 'vector' and 'vector'

In [None]:
vector(aa)@2dvector()

In [187]:
np.matmul(np.random.randn(3, 1), np.random.randn(3, 1))

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 1)