In [5]:
import numpy as np
import torch

In [6]:
class OP:
    def __init__(self):
        self.name = self.__class__.__name__

    def __call__(self, *args):
        self.input = args
        self.output = self.forward(*args)
        self.output.op = self
        return self.output

    def forward(self, *args):
        raise NotImplementedError()

    def backward(self, *args):
        raise NotImplementedError()

    def backward_native(self, grad):

        input_grads = self.backward(grad)
        if not isinstance(input_grads, tuple):
            input_grads = (input_grads, )

        assert len(input_grads) == len(self.input), "Number grads mismatch number input"

        for ig, ip in zip(input_grads, self.input):
            if isinstance(ip, Tensor):
                ip.backward(ig)

    def get_data(self, item):
        if isinstance(item, Tensor):
            return item.data
        else:
            return item


class AddOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a, b):
        return Tensor(self.get_data(a) + self.get_data(b))

    def backward(self, grad):
        return grad, grad


class SubOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a, b):
        return Tensor(self.get_data(a) - self.get_data(b))

    def backward(self, grad):
        return grad, -1 * grad


class MulOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a, b):
        return Tensor(self.get_data(a) * self.get_data(b))

    def backward(self, grad):
        a, b = self.input
        return grad * self.get_data(b), grad * self.get_data(a)


class DivOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a, b):
        return Tensor(self.get_data(a) / self.get_data(b))

    def backward(self, grad):
        a, b = self.input
        return grad / self.get_data(b), grad * self.get_data(a) / (self.get_data(b) ** 2) * -1


class ExpOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a):
        return Tensor(np.exp(self.get_data(a)))

    def backward(self, grad):
        return grad * np.exp(self.get_data(self.input[0]))


class LogOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a):
        return Tensor(np.log(self.get_data(a)))

    def backward(self, grad):
        return grad / self.get_data(self.input[0])


class MatMulOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a, b):
        return Tensor(self.get_data(a) @ self.get_data(b))

    def backward(self, grad):
        a, b = self.input
        return grad @ self.get_data(b).T, self.get_data(a).T @ grad


class SumOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a):
        return Tensor(np.sum(self.get_data(a)))

    def backward(self, grad):
        a = self.input[0]
        return np.full_like(self.get_data(a), grad)


class MeanOP(OP):
    def __init__(self):
        super().__init__()

    def forward(self, a):
        return Tensor(np.mean(self.get_data(a)))

    def backward(self, grad):
        a = self.input[0]
        d = self.get_data(a)
        return np.full_like(d, grad / d.size)


class Tensor:
    def __init__(self, data, op=None):
        self.data = np.array(data, dtype=np.float32)
        self.op = op
        self.grad = 0

    def __radd__(self, other):
        return AddOP()(other, self)

    def __add__(self, other):
        return AddOP()(self, other)

    def __rsub__(self, other):
        return SubOP()(other, self)

    def __sub__(self, other):
        return SubOP()(self, other)

    def __rmul__(self, other):
        return MulOP()(other, self)

    def __mul__(self, other):
        return MulOP()(self, other)

    def __rtruediv__(self, other):
        return DivOP()(other, self)

    def __truediv__(self, other):
        return DivOP()(self, other)

    def __neg__(self):
        return MulOP()(self, -1)

    def __matmul__(self, other):
        return MatMulOP()(self, other)

    def __repr__(self):
        if self.op is not None:
            return f"tensor({self.data}, grad_fn=<{self.op.name}>)"
        else:
            return f"{self.data}"

    def backward(self, grad=1):
        self.grad = self.grad + grad
        if self.op is not None:
            self.op.backward_native(grad)


# 模拟包的形式
class morch:

    @staticmethod
    def exp(value):
        return ExpOP()(value)

    @staticmethod
    def log(value):
        return LogOP()(value)

    @staticmethod
    def sum(value):
        return SumOP()(value)

    @staticmethod
    def mean(value):
        return MeanOP()(value)

In [8]:
torch.set_printoptions(precision=5)
np.set_printoptions(precision=5)


value = np.arange(9).reshape(3, 3).astype(np.float32)
mul_value = np.linspace(0, 1, 9).reshape(3, 3)



print("基于PyTorch的自动微分-动态图版本：==========================================")
a = torch.tensor(value, dtype=torch.float32, requires_grad=True)
b = torch.tensor(mul_value, dtype=torch.float32, requires_grad=True)
t = torch.sum(1 / (1 + torch.exp(-a)) @ b)
t.backward()

print("计算结果是：", t)
print("a的导数是：", a.grad.numpy())
print("b的导数是：", b.grad.numpy())

print("\n\n")
print("自己写的自动微分-动态图版本：==========================================")
# 当你执行完表达式时，就等同于构建了一个计算图。通过计算图反推即可得到梯度
a = Tensor(value)
b = Tensor(mul_value)
t = morch.sum(1 / (1 + morch.exp(-a)) @ b)
t.backward()

print("计算结果是：", t)
print("a的导数是：", a.grad)
print("b的导数是：", b.grad)

计算结果是： tensor(12.53067, grad_fn=<SumBackward0>)
a的导数是： [[0.09375 0.29492 0.27561]
 [0.01694 0.02649 0.01745]
 [0.00092 0.00137 0.00088]]
b的导数是： [[2.4501  2.4501  2.4501 ]
 [2.71216 2.71216 2.71216]
 [2.87377 2.87377 2.87377]]



tensor([[-0. -1. -2.]
 [-3. -4. -5.]
 [-6. -7. -8.]], grad_fn=<MulOP>)
计算结果是： tensor(12.530673027038574, grad_fn=<SumOP>)
a的导数是： [[0.09375 0.29492 0.27561]
 [0.01694 0.02649 0.01745]
 [0.00092 0.00137 0.00088]]
b的导数是： [[2.4501  2.4501  2.4501 ]
 [2.71216 2.71216 2.71216]
 [2.87377 2.87377 2.87377]]
