<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Step11" data-toc-modified-id="Step11-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Step11</a></span></li><li><span><a href="#Step12" data-toc-modified-id="Step12-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Step12</a></span></li><li><span><a href="#Step13" data-toc-modified-id="Step13-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Step13</a></span></li><li><span><a href="#Step14" data-toc-modified-id="Step14-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Step14</a></span></li><li><span><a href="#Step15,16" data-toc-modified-id="Step15,16-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Step15,16</a></span><ul class="toc-item"><li><span><a href="#Step17" data-toc-modified-id="Step17-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Step17</a></span></li><li><span><a href="#Step18" data-toc-modified-id="Step18-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>Step18</a></span></li><li><span><a href="#Step19" data-toc-modified-id="Step19-5.3"><span class="toc-item-num">5.3&nbsp;&nbsp;</span>Step19</a></span></li></ul></li></ul></div>

## Step11

In [3]:
import numpy as np

# 변수 클래스
class Variable:
    def __init__(self, data):
        # 입력 dtype 확인
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} type is not supported.")
        
        self.data = data
        self.grad = None
        self.creator = None
        
    def set_creator(self, func):
        self.creator = func
        
    def backward(self):
        if self.creator is None:
            self.grad = np.ones_like(self.data)
            
        funcs = [self.creator] # 현재 입력 변수를 만든 창조자 함수 가져오기
        while funcs:
            f = funcs.pop()             # 1.현재 입력 변수를 만든 창조자 함수 가져오기
            x, y = f.input, f.output    # 2.창조함수의 입력,출력 변수 가져오기
            x.grad = f.backward(y.grad) # 3.창조함수의 역전파 메소드 호출
            
            if x.creator is not None:
                funcs.append(x.creator)

def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

In [50]:
# 공통함수 클래스
class Function:
    def __call__(self, input):
        x = input.data
        y = self.forward(x)
        output = Variable(as_array(y))
        output.set_creator(self)
        self.input = input
        self.output = output
        
        return output
    
    def forward(self, x):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gy):
        raise NotImplementedError("This method should be called in other function class")
        
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

In [41]:
# 변수 담는 클래스
class Variable:
    def __init__(self, data):
        # 입력변수 dtype 유효성 검증
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported!")
                
        self.data = data
        self.grad = None  # 해당변수의 기울기값
        self.creator = None # 해당변수를 만들어낸 창조함수 기록
        
    def set_creator(self, func):
        self.creator = func
        
    def backward(self):
        # 순전파 후, 특정 변수의 기울기값이 None이라는 것은 최종 출력값에 위치하고있음을 의미
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        # 역전파 수행하기 위해서 해당 변수를 만든 창조자 함수를 가져오기
        funcs = [self.creator]
        while funcs:
            f = funcs.pop()
            x, y = f.input, f.output  # 입력,출력 변수 가져오기 for 미분계산
            x.grad = f.backward(y.grad) # 여기서 y.grad = self.grad 임
            
            # 지금 가져온 해당 변수를 만든 함수의 입력 변수에 창조자 함수가 없다는 것은 입력층까지 모두 다다랐다는 것을 의미
            if x.creator is not None:
                funcs.append(x.creator)
                
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

In [24]:
# 공통 함수 클래스 -> 입/출력을 여러개 받을 수 있도록 하기
class Function:
    def __call__(self, inputs: list):
        """
        Args:
            inputs: [Variable(..), Variable(..), ...]
        """
        xs = [x.data for x in inputs]
        ys = self.forward(xs)
        outputs = [Variable(as_array(y)) for y in ys]
        
        for output in outputs:
            output.set_creator(self)
        self.inputs = inputs
        self.outputs = outputs
        return outputs
        
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gys):
        raise NotImplementedError("This method should be called in other function class")
        
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

In [25]:
# Add 함수 클래스 만들어보기
class Add(Function):
    def forward(self, xs):
        x0, x1 = xs
        y = x0 + x1
        return (y,)  # 출력은 튜플 형태로!
    
def add(xs):
    return Add()(xs) 

In [29]:
xs = [Variable(np.array(2)), Variable(np.array(5))]
ys = add(xs)
y = ys[0]
y.data

array(7)

In [53]:
def func(*inputs):
    res = inputs
    print(res)
    
func(1,2,3)

(1, 2, 3)


## Step12

In [46]:
# 사용자 편의를 개선한 Function 클래스
class Function:
    def __call__(self, *inputs):
        print('type:', type(inputs[0]))
        xs = [x.data for x in inputs]
        ys = self.forward(xs)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # 각 출력에 창조자 함수 설정
        for output in outputs:
            output.set_creator(self)
        self.inputs = inputs
        self.outputs = outputs
        
        # 출력값 원소가 하나면 하나만을 반환
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gys):
        raise NotImplementedError("This method should be called in other function class")
    
    
class Add(Function):
    def forward(self, xs):
        x0, x1 = xs
        y = x0 + x1
        return (y,)
    
def add(*xs):
    return Add()(*xs)

In [48]:
x0 = Variable(np.array(2))
x1 = Variable(np.array(5))

ys = add(x0, x1)
print(type(ys), ys)
ys.data

type: <class '__main__.Variable'>
<class '__main__.Variable'> <__main__.Variable object at 0x7fbb8aa38150>


array(7)

In [52]:
# 변수 담는 클래스
class Variable:
    def __init__(self, data):
        # 입력변수 dtype 유효성 검증
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported!")
                
        self.data = data
        self.grad = None  # 해당변수의 기울기값
        self.creator = None # 해당변수를 만들어낸 창조함수 기록
        
    def set_creator(self, func):
        self.creator = func
        
    def backward(self):
        # 순전파 후, 특정 변수의 기울기값이 None이라는 것은 최종 출력값에 위치하고있음을 의미
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        # 역전파 수행하기 위해서 해당 변수를 만든 창조자 함수를 가져오기
        funcs = [self.creator]
        while funcs:
            f = funcs.pop()
            x, y = f.input, f.output  # 입력,출력 변수 가져오기 for 미분계산
            x.grad = f.backward(y.grad) # 여기서 y.grad = self.grad 임
            
            # 지금 가져온 해당 변수를 만든 함수의 입력 변수에 창조자 함수가 없다는 것은 입력층까지 모두 다다랐다는 것을 의미
            if x.creator is not None:
                funcs.append(x.creator)
                
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

# 이어서 개발자 편의를 위해 개선한 Function 클래스
class Function:
    def __call__(self, *inputs):
        """
        Args:
            inputs: [Variable(..), Variable(..), ...]
        """
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = outputs
        return outputs if len(outputs) > 1 else outputs[0]

    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gys):
        raise NotImplementedError("This method should be called in other function class")

class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return y
    

def add(x0, x1):
    return Add()(x0, x1)

In [54]:
x0 = Variable(np.array(1))
x1 = Variable(np.array(10))
y = add(x0, x1)
print(y.data)

<class '__main__.Variable'> <__main__.Variable object at 0x7fbb8aadb910>
11


## Step13

In [55]:
# 이제 역전파를 구현할 때 가변길이 인수를 받을 수 있도록 대응하기 -> Variable 클래스를 구현

# 기존 클래스
class Variable:
    def __init__(self, data):
        if self.data is not None:
            if not isinstance(self.data, np.ndarray):
                raise TypeError(f"{type(self.data)} dtype is not supported!")
                
        self.data = data
        self.grad = None
        self.creator = None
        
    def set_creator(self, func):
        self.creator = func
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = [self.creator]
        while funcs:
            f = funcs.pop()
            x, y = f.input, f.output
            x.grad = f.backward(y.grad)
            
            if x.creator is not None:
                funcs.append(x.creator)

In [63]:
# 변경된 클래스
class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported!")
                
        self.data = data
        self.grad = None
        self.creator = None
        
    def set_creator(self, func):
        self.creator = func
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
            
        funcs = [self.creator]
        while funcs:
            f = funcs.pop()
            # 여러개 출력의 기울기값을 리스트에 담기
            gys = [output.grad for output in f.outputs]
            gxs = f.backward(*gys) # 역전파 기울기 계산
            # 계산되어 반환된 기울기값을 튜플로 변환
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            # 각 변수에 대응하는 미분값 갱신
            for x, gx in zip(f.inputs, gxs):
                x.grad = gx
                
                if x.creator is not None:
                    funcs.append(x.creator)

In [66]:
# 함수 클래스
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # 출력변수를 만든 창조자 함수 기록
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = outputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gys):
        raise NotImplementedError("This method should be called in other function class")
        
# 제곱함수 클래스
class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
# 덧셈 클래스
class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return y
    
    def backward(self, gy):
        return gy, gy
    
# PythonAPI로 변경
def add(x0, x1):
    return Add()(x0, x1)

def square(x):
    return Square()(x)

In [72]:
x = Variable(np.array(2.0))
y = Variable(np.array(3.0))

z = add(square(x), square(y))
z.backward()

print(z.grad)
print(z.data)
print(x.grad)
print(y.grad)

1.0
13.0
4.0
6.0


In [60]:
import numpy as np

a = np.array(5)
print(id(a))

a += a
print(id(a))

a = a + a
print(id(a))

140457002131792
140457002131792
140456948425456


## Step14

In [77]:
# 같은 변수를 반복해서 사용해 미분하면 계산 결과에서 오차 발생하는 문제
class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported!")
        
        self.data = data
        self.grad = None
        self.creator = None
        
    def set_creator(self, func):
        self.creator = func
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
            
        funcs = [self.creator]
        while funcs:
            f = funcs.pop()
            gys = [output.grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            # 각 x변수에 기울기 매핑
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                    
                if x.creator is not None:
                    funcs.append(x.creator)
                    
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        # 각 출력에 창조자 함수 설정
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = outputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gys):
        raise NotImplementedError("This method should be called in other function class")
        

class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
    
class Add(Function):
    def forward(self, x0, x1):
        return x0 + x1
    
    def backward(self, gy):
        return gy, gy
    

def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

In [80]:
x = Variable(np.array(3.0))
y = add(x, x)

print(y.data)
y.backward()
print(y.grad)
print(x.grad)

6.0
1.0
2.0


In [81]:
x = Variable(np.array(3.0))
y = add(add(x, x), x)

print(y.data)
y.backward()
print(y.grad)
print(x.grad)

9.0
1.0
3.0


In [83]:
# 이번엔 동일한 변수가 서로 다른 미분계산을 수행할때, 즉, 서로 다른 미분 계산에 한쪽의 미분 계산값이 사용되어서 잘못된 결과를 초래
# 미분값을 초기화하는 메서드를 추가

class Variable:
    def __init__(self, data):
        if data is None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported")
        
        self.data = data
        self.grad = None
        self.creator = None
        
    def set_creator(self, func):
        self.creator = func
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
            
        funcs = [self.creator]
        while funcs:
            f = funcs.pop()
            gys = [output.grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
                
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                
                if x.creator is not None:
                    funcs.append(x.creator)
                    

class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        for output in outputs:
            output.set_creator(self)
            
        self.inputs = inputs
        self.outputs = outputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gys):
        raise NotImplementedError("This method should be called in other function class")
        

class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
    
class Add(Function):
    def forward(self, x0, x1):
        return x0 + x1
    
    def backward(self, gy):
        return gy, gy
    

def square(x):
    return Square()(x)

def add(x0, x1):
    return Add()(x0, x1)

In [85]:
x = Variable(np.array(3.0))
y = add(x, x)
print(y.data)
y.backward()
print(y.grad)
print(x.grad)
print('-'*50)
y = add(add(x, x), x)
print(y.data)
y.backward()
print(y.grad)
print(x.grad)

6.0
1.0
2.0
--------------------------------------------------
9.0
1.0
5.0


In [86]:
# 미분 재설정 메소드 추가
x = Variable(np.array(3.0))
y = add(x, x)
print(y.data)
y.backward()
print(y.grad)
print(x.grad)
print('-'*50)
x.cleargrad()
y = add(add(x, x), x)
print(y.data)
y.backward()
print(y.grad)
print(x.grad)

6.0
1.0
2.0
--------------------------------------------------
9.0
1.0
3.0


## Step15,16

In [88]:
funcs = []
seen_set = set()

func1 = Function()
func2 = Function()
func3 = Function()

creators = [func1, func2, func3]

for f in creators:
    if f not in seen_set:
        funcs.append(f)
        seen_set.add(f)
        print(funcs)
        print(seen_set)

[<__main__.Function object at 0x7fbb8abe2710>]
{<__main__.Function object at 0x7fbb8abe2710>}
[<__main__.Function object at 0x7fbb8abe2710>, <__main__.Function object at 0x7fbb8abe2f50>]
{<__main__.Function object at 0x7fbb8abe2710>, <__main__.Function object at 0x7fbb8abe2f50>}
[<__main__.Function object at 0x7fbb8abe2710>, <__main__.Function object at 0x7fbb8abe2f50>, <__main__.Function object at 0x7fbb8abe2510>]
{<__main__.Function object at 0x7fbb8abe2510>, <__main__.Function object at 0x7fbb8abe2710>, <__main__.Function object at 0x7fbb8abe2f50>}


In [90]:
# 세대를 추가해서 역전파 시 함수 간의 우선순위 설정
# 함수와 그 함수에 들어간 입력변수는 같은 세대로!

class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported!")
                
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1  # 함수에 새겨진 세대값 + 1 
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_sets = set()
        
        # 중첩함수 사용 조건: 1.감싸는 메서드에서만 사용 2.감싸는 메서드에 정의된 변수를 사용해야만 할 때
        def add_func(f):
            if f not in seen_sets:
                funcs.append(f)
                seen_sets.add(f)
                funcs.sort(key=lambda x: x.generation)
        
        # 가장 먼저 출력층의 창조함수 add 
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            gys = [output.grad in output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            # 각 변수에 각 grad 갱신
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                
                if x.creator is not None:
                    add_func(x.creator)

---
- 복습

In [14]:
import numpy as np

# 변수 클래스
class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported")
        
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_sets = set()
        
        def add_func(f):
            if f not in seen_sets:
                funcs.append(f)
                seen_sets.add(f)
                funcs.sort(key=lambda x: x.generation) # 해당 함수의 세대가 큰 순서대로 정렬
        
        add_func(self.creator)
        
        while funcs:
            f = funcs.pop()
            # 각 출력변수의 기울기 가져와서 역전파 미분 수행
            gys = [output.grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            # 각 입력변수에다가 기울기 설정
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                    
                # 각 입력변수의 창조자 함수 append
                if x.creator is not None:
                    add_func(x.creator)
            
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x
    

In [15]:
import numpy as np
import heapq

# 변수 클래스
class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported")
        
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
            
        funcs = []
        seen_sets = set()
        flag = 0
        
        def add_func(f):
            if f not in seen_sets:
                heapq.heappush(funcs, (-f.generation, flag, f))
                seen_sets.add(f)
        
        add_func(self.creator)

        while funcs:
            g, flg, f = heapq.heappop(funcs)
            gys = [output.grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                    
                if x.creator is not None:
                    add_func(x.creator)
                    flag += 1
            flag = 0
            
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

In [16]:
# 함수 클래스
class Function:
    def __call__(self, *inputs):
        # 순전파 수행
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        self.inputs = inputs
        self.outputs = outputs
        
        # 해당 함수 세대값은 그 함수의 입력 변수들의 가장 큰 세대수로 설정
        self.generation = max([x.generation for x in inputs])
        
        # 해당 함수가 내뱉은 출력변수에 그 변수를 만든 창조자 함수를 생성하면서 세대수 + 1
        for output in outputs:
            output.set_creator(self)
            
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gy):
        raise NotImplementedError("This method should be called in other function class")

In [17]:
class Add(Function):
    def forward(self, x0, x1):
        return x0 + x1

    def backward(self, gy):
        return gy, gy
    

class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx

def add(x0, x1):
    return Add()(x0, x1)

def square(x):
    return Square()(x)

In [18]:
x = Variable(np.array(2.0))
a = square(x)
b = square(a)
c = square(a)
y = add(b, c)
print(y.data)

y.backward()
print(y.grad)
print(c.grad, b.grad)
print(a.grad)
print(x.grad)

32.0
1.0
1.0 1.0
16.0
64.0


In [19]:
x = Variable(np.array(2.0))
a = square(x)
b = square(a)
c = square(a)
y = add(b, c)
print(y.data)

y.backward()
print(y.grad)
print(c.grad, b.grad)
print(a.grad)
print(x.grad)

32.0
1.0
1.0 1.0
16.0
64.0


---
- 깃헙 기록 이자 복습용

In [28]:
import numpy as np
import heapq


def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x


class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported.")
        
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self, use_heapq=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_sets = set()
        flag = 0 
        
        def add_func(f, use_heapq=False):
            if f not in seen_sets:
                # Python sort 사용
                if not use_heapq:
                    funcs.append(f)
                    seen_sets.add(f)
                    funcs.sort(key=lambda x: x.generation)
                # 우선순위 큐 사용
                else:
                    heapq.heappush(funcs, (-f.generation, flag, f))
                    seen_sets.add(f)
                    
        add_func(self.creator, use_heapq)
        
        while funcs:
            if not heapq:
                f = funcs.pop()
            else:
                f = heapq.heappop(funcs)[2]
            gys = [output.grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)

            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx

                if x.creator is not None:
                    add_func(x.creator, use_heapq)
                    flag += 1
            flag = 0


class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # 함수의 세대값 설정
        self.generation = max([x.generation for x in inputs])
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = outputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gy):
        raise NotImplementedError("This method should be called in other function class")
        
        
class Square(Function):
    def forward(self, x):
        y = x ** 2
        return y

    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx


class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return y

    def backward(self, gy):
        return gy, gy

def square(x):
    return Square()(x)
def add(x0, x1):
    return Add()(x0, x1)

In [29]:
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # 함수의 세대값 설정
        self.generation = max([x.generation for x in inputs])
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = outputs
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be called in other function class")
        
    def backward(self, gy):
        raise NotImplementedError("This method should be called in other function class")

In [30]:
class Square(Function):
    def forward(self, x):
        y = x ** 2
        return y

    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx


def square(x):
    return Square()(x)


class Add(Function):
    def forward(self, x0, x1):
        y = x0 + x1
        return y

    def backward(self, gy):
        return gy, gy


def add(x0, x1):
    return Add()(x0, x1)


In [34]:
x = Variable(np.array(2.0))

y = add(x, x)
print(y.data)

y.backward(use_heapq=True)
print(y.grad)
print(x.grad)

4.0
1.0
2.0


In [33]:
x = Variable(np.array(2.0))
a = square(x)
b = square(a)
c = square(a)
y = add(b, c)
print(y.data)

y.backward(use_heapq=True)
print(y.grad)
print(c.grad, b.grad)
print(a.grad)
print(x.grad)

32.0
1.0
1.0 1.0
16.0
64.0


---
- 다시한번 복습(4.25)

In [20]:
import numpy as np
import heapq


def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x


# 변수 클래스
class Variable:
    def __init__(self, data):
        # 데이터 입력 유효성 검증
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported.")
        
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self, use_heapq=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_sets = set()
        flag = 0
        
        def add_func(f, use_heapq):
            if f not in seen_sets:
                if not use_heapq:
                    funcs.append(f)
                    seen_sets.add(f)
                    funcs.sort(key=lambda x: x.generation)
                else:
                    heapq.heappush(funcs, (-f.generation, flag, f))
                    seen_sets.add(f)
        
        add_func(self.creator, use_heapq)
        
        while funcs:
            if not use_heapq:
                f = funcs.pop()
            else:
                f = heapq.heappop(funcs)[2]
            gys = [y.grad for y in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            # 각 변수에 기울기 갱신
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                    
                # 각 변수의 창조자 함수 첨가
                if x.creator is not None:
                    add_func(x.creator, use_heapq)
                    flag += 1
            flag = 0

In [21]:
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # 창조자 함수의 세대는 그 함수의 입력변수 세대와 동일해야 함!
        self.generation = max([x.generation for x in inputs])
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = outputs
        
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs):
        raise NotImplementedError("This method should be done outside of this Function class")
        
    def backward(self, gy):
        raise NotImplementedError("This method should be done outside of this Function class")     

In [22]:
class Add(Function):
    def forward(self, x0, x1):
        return x0 + x1
    
    def backward(self, gy):
        return gy, gy
    

class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx
    
    
def add(x0, x1):
    return Add()(x0, x1)

def square(x):
    return Square()(x)

In [23]:
x = Variable(np.array(2.0))

y = add(x, x)
print(y.data)

y.backward(use_heapq=True)
print(y.grad)
print(x.grad)

4.0
1.0
2.0


In [24]:
# Test case 1
x = Variable(np.array(2.0))

y = add(x, x)
print(y.data)

y.backward(use_heapq=True)
print(y.grad)
print(x.grad)

# Test case 2
x = Variable(np.array(2.0))
a = square(x)
b = square(a)
c = square(a)
y = add(b, c)
print(y.data)

y.backward(use_heapq=True)
print(y.grad)
print(c.grad, b.grad)
print(a.grad)
print(x.grad)

4.0
1.0
2.0
32.0
1.0
1.0 1.0
16.0
64.0


### Step17

In [2]:
# 순환 참조 문제 즉, 함수와 출력변수 간의 순환참조 문제를 해결하기 위해 약한 참조 사용!
# 약한 참조는 다른 객체가 참조할 때마다 참조카운트를 1증가시키지 않으면서 데이터 참조만 하는 방법!
import numpy as np
import heapq
import weakref

class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported.")
        
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self, use_heap=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_sets = set()
        flag = 0
        
        def add_func(f, use_heap: bool):
            if f not in seen_sets:
                if not use_heap:
                    funcs.append(f)
                    seen_sets.add(f)
                    funcs.sort(key=lambda x: x.generation)
                else:
                    heapq.heappush(funcs, (-f.generation, flag, f))
                    seen_sets.add(f)
        
        add_func(self.creator, use_heap)
        
        while funcs:
            if not use_heap:
                f = funcs.pop()
            else:
                f = heapq.heappop(funcs)[2]
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                
                if x.creator is not None:
                    add_func(x.creator, use_haep)
                    flag += 1
            flag = 0
            
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

In [3]:
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        self.generation = max([x.generation for x in inputs])
        for output in outputs:
            output.set_creator(self)
        
        self.inputs = inputs
        self.outputs = weakref.ref(outputs)
        
        return outputs if len(outputs) > 1 else outputs[0]

### Step18
- 메모리 절약 모드
- 1.필요없는 미분값 삭제
- 2.학습시에는 역전파 활성을, 추론시에는 역전파 비활성 모드로 왔다갔다 하도록 하기

In [4]:
import numpy as np
import heapq
import weakref

class Variable:
    def __init__(self, data):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported.")
        
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def cleargrad(self):
        self.grad = None
        
    def backward(self, retain_grad=False, use_heap=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_sets = set()
        flag = 0
        
        def add_func(f, use_heap: bool):
            if f not in seen_sets:
                if not use_heap:
                    funcs.append(f)
                    seen_sets.add(f)
                    funcs.sort(key=lambda x: x.generation)
                else:
                    heapq.heappush(funcs, (-f.generation, flag, f))
                    seen_sets.add(f)
        
        add_func(self.creator, use_heap)
        
        while funcs:
            if not use_heap:
                f = funcs.pop()
            else:
                f = heapq.heappop(funcs)[2]
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                
                if x.creator is not None:
                    add_func(x.creator, use_haep)
                    flag += 1
            flag = 0
            
            # 말단 입력변수의 기울기를 빼고는 중간 입력/출력 변수들의 기울기값들은 제거
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None  # 현재 캐싱시킨 인스턴스 출력변수들은 약한 참조를하고 있음!
            
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

In [5]:
class Config:
    enable_backprop = True

In [6]:
class Function:
    def __call__(self, *inputs):
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        # 역전파 활성 모드일 때만 역전파를 수행!
        if Config.enable_backprop:
            self.generation = max([x.generation for x in inputs])
            for output in outputs:
                output.set_creator(self)

            self.inputs = inputs
            self.outputs = weakref.ref(outputs)
        
        return outputs if len(outputs) > 1 else outputs[0]


In [7]:
# 위 코드에서 모드 전환을 더 쉽게 해줄수 있도록 변경해보기! -> contextlib의 contextmanager 를 활용하기!
import contextlib

@contextlib.contextmanager
def config_test():
    print('Start...')
    try:
        yield
    finally:
        print('Done')

with config_test():
    print('Process')

Start...
Process
Done


In [8]:
# 역전파 활성 모드 전환하는 contextmanager 만들기
import contextlib

@contextlib.contextmanager
def using_config(name: str, value):
    """
    Args:
        name: Config 속성 이름
        value: 새롭게 업데이트할 해당 Config 속성 값
    
    """
    old_value = getattr(Config, name)  # 해당 Config 클래스에 있는 'name'이라는 속성 값을 가져오라!
    setattr(Config, name, value)
    try:
        yield
    finally:
        setattr(Config, name, old_value)

In [None]:
with using_config('enable_backprop', False):
    x = Variable(np.array(2.0))
    y = square(x)

In [None]:
# 더 편하게 하나의 함수로 만들기
def no_grad():
    return using_config('enable_backprop', False)

with no_grad():
    x = Variable(np.array(2.0))
    y = square(x)

---

### Step19
- 먼저 복습

In [56]:
import numpy as np
import heapq
import weakref


class Config:
    enable_backprop = True
    

class Variable:
    # 만약 좌항에 nd-array가 올경우, 우항의 Variable 인스턴스의 연산 우선순위로 옮겨 버리자
    __array_priority__ = 200       # Numpy에 이미 존재하는 클래스 속성값임!
    
    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                raise TypeError(f"{type(data)} dtype is not supported.")
        
        self.name = name
        self.data = data
        self.grad = None
        self.creator = None
        self.generation = 0
        
        
    # 매직메서드 추가
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        if self.data is None:
            return 'variable(None)'
        p = str(self.data).replace('\n', '\n' + ' ' * 9)
        return 'variable(' + p + ')'
    
    def __mul__(self, other):
        return mul(self, other)
    
    def __add__(self, other):
        return add(self, other)
        
    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1
        
    def clear_grad(self):
        self.grad = None
        
    def backward(self, retain_grad=False, use_heap=False):
        if self.grad is None:
            self.grad = np.ones_like(self.data)
        
        funcs = []
        seen_sets = set()
        flag = 0
        
        def add_func(f, use_heap):
            if f not in seen_sets:
                if not use_heap:
                    funcs.append(f)
                    seen_sets.add(f)
                    funcs.sort(key=lambda x: x.generation)
                else:
                    heapq.heappush(funcs, (-f, flag, f))
                    seen_sets.add(f)
        
        add_func(self.creator, use_heap)
        
        while funcs:
            if not use_heap:
                f = funcs.pop()
            else:
                f = heapq.heappop(funcs)[2]
            gys = [output().grad for output in f.outputs]
            gxs = f.backward(*gys)
            if not isinstance(gxs, tuple):
                gxs = (gxs,)
            for x, gx in zip(f.inputs, gxs):
                if x.grad is None:
                    x.grad = gx
                else:
                    x.grad = x.grad + gx
                    
                if x.creator is not None:
                    add_func(x.creator, use_heap)
                    flag += 1
            flag = 0
            
            # 맨 앞단 입력변수를 제외한 중간 변수들의 기울기 값은 제거
            if not retain_grad:
                for y in f.outputs:
                    y().grad = None
    
    # 변수의 사용성 개선
    @property
    def shape(self):
        return self.data.shape
    
    @property
    def ndim(self):
        return self.data.ndim
    
    @property
    def size(self):
        return self.data.size
    
    @property
    def dtype(self):
        return self.data.dtype
    
    
class Function:
    def __call__(self, *inputs):
        # nd-array가 인풋으로 들어왔을 경우 모두 Variable로 변경
        inputs = [as_variable(x) for x in inputs]
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
            ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]
        
        if Config.enable_backprop:
            self.generation = max([x.generation for x in inputs])
            for output in outputs:
                output.set_creator(self)
                
            self.inputs = inputs
            self.outputs = [weakref.ref(output) for output in outputs]
            
        return outputs if len(outputs) > 1 else outputs[0]
    
    
    def forward(self, xs):
        raise NotImplementedError("This method should be done outside of Function class")
    
    def backward(self, gy):
        raise NotImplementedError("This method should be done outside of Function class")
        

class Add(Function):
    def forward(self, x0, x1):
        return x0 + x1
    
    def backward(self, gy):
        return gy, gy


class Square(Function):
    def forward(self, x):
        return x ** 2
    
    def backward(self, gy):
        x = self.inputs[0].data
        gx = 2 * x * gy
        return gx


class Mul(Function):
    def forward(self, x0, x1):
        return x0 * x1
    
    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        return gy * x1, gy * x0


def add(x0, x1):
    # 우항에 np.isscalar True로 나오는 스칼라값이 올 경우 nd-array로 변환
    x1 = as_array(x1)
    return Add()(x0, x1)

def square(x):
    return Square()(x)

def mul(x0, x1):
    x1 = as_array(x1)
    return Mul()(x0, x1)
          
def as_array(x):
    if np.isscalar(x):
        return np.array(x)
    return x

# nd-array가 들어왔을 때 Variable 클래스 적용해주기
def as_variable(obj):
    if isinstance(obj, Variable):
        return obj
    return Variable(obj)

In [57]:
# contextlib 복습
import contextlib

@contextlib.contextmanager
def using_config(name: str, value):
    old_value = getattr(Config, name)
    setattr(Config, name, value)
    try:
        yield
    finally:
        setattr(Config, name, old_value)

# with using_config('enable_backprop', False):
#     a = Variable(np.array(20))
#     a = square(a)
    
# def no_grad():
#     return using_config('enable_backprop', False)

# with no_grad():
#     a = Variable(np.array(20))
#     a = square(a)

In [58]:
# 함수도 하나의 객체이므로 매직메서드에다가 함수 자체를 할당할 수도 있음
Variable.__mul__ = mul
Variable.__add__ = add


# 좌항에 np.isscalar True가 나오는 스칼라값이 있을 경우 -> radd, rmul 매직 메소드를 수정
# 그런데, 덧셈,곱셈은 좌항/우항 순서가 바뀌어도 결과는 동일하기 때문에 우리가 새롭게 구현한 add, mul 함수 동일하게 적용해도 무방
Variable.__rmul__ = mul
Variable.__radd__ = add

In [59]:
a = Variable(np.array(3.0))
b = Variable(np.array(2.0))
c = Variable(np.array(1.0))

# y = add(mul(a, b), c)
y = (a * b) + c
print(y)
y.backward()

print(y.grad)
print(c.grad)
print(b.grad)
print(a.grad)

variable(7.0)
None
1.0
3.0
2.0


In [60]:
x = Variable(np.array(2.))
y = x + np.array(3)
print(y.data)

5.0


In [61]:
x = Variable(np.array(2.0))
y = x * 3
print(y.data)

6.0


In [62]:
x = Variable(np.array(2.0))
y = 3 * x
print(y.data)

6.0


In [65]:
x0 = np.array(3.0)
x1 = Variable(np.array(2.0))
y = x0 * x1
print(y.data)

x0 = np.array([3.0])
x1 = Variable(np.array([2.0]))
y = x0 * x1
print(y.data)

6.0
[6.]
