#### 코드 7-1 피보나치 수열 계산

In [1]:
def fibonacci(n):
    if n == 0:
        return 1
    elif n == 1:
        return 1
    else:
        return fibonacci(n - 1) + fibonacci(n - 2)

for i in range(8):
    print(fibonacci(i))

1
1
2
3
5
8
13
21


#### 코드 7-2 중복 계산을 하지 않는 피보나치 수열 계산

In [2]:
memo = {0: 1, 1: 1}

def fibonacci(n):
    if n in memo:
        return memo[n]
    memo[n] = fibonacci(n - 1) + fibonacci(n - 2)
    return memo[n]

for i in range(8):
    print(fibonacci(i))

1
1
2
3
5
8
13
21


#### 코드 7-3 Sarsa 학습 구현

In [3]:
import numpy as np

# 상태 설정
S = np.array([0, 1, 2, 3])

# 상태 이동 전과 상태 이동 후를 각각 0과 1로 설정
A = np.array([0, 1])

# 각 상태에서 이동했을 때 보상 설정
R = np.array([[1, -20], [4, -1], [0, 25], [0, 0]])

# 이동 경로 설정
S1 = np.array([[1, 2], [3, 0], [0, 3], [None, None]])

# 이동 확률
p = 0.5

# 학습률
alpha = 0.01

# 할인율
gamma = 0.8

# 학습 횟수
n = 3000

# 행동 가치 함수 초기화
Q = np.zeros(R.shape)

# 확률에 따라 상태 이동 여부 결정
def pi(p):
    if np.random.uniform(0, 1) <= p:
        return 0   # 상태 이동 전임을 반환
    else:
        return 1   # 상태 이동 후임을 반환

def sarsa():
    s = S[0]
    a = pi(p)
    while S1[s, a] != None:
        a_next = pi(p)
        td = R[s, a] + gamma * Q[S1[s, a], a_next] - Q[s, a]
        Q[s, a] += alpha * td
        s = S1[s, a]
        a = a_next
    print(Q[0, 0], Q[0, 1])

# 설정한 학습 횟수만큼 Sarsa 학습 진행
for i in range(n):
    sarsa()

0.01 -0.2
0.0199 -0.398
0.030021 -0.398
0.04035759 -0.78811781728
0.0509044461 -0.78811781728
0.0509044461 -0.9782366391071999
0.0509044461 -1.164474272716128
0.0509044461 -1.3468893299889666
0.061656329319 -1.3468893299889666
0.061656329319 -1.525539638689077
0.0818058147687519 -1.525539638689077
0.09286039184023238 -1.525539638689077
0.11898820189894989 -1.8834147684647324
0.11898820189894989 -2.052876650660285
0.15392032340564724 -2.052876650660285
0.15392032340564724 -2.5725341206369823
0.15392032340564724 -2.9042098171787583
0.1648532899498973 -2.9042098171787583
0.18345430670258878 -2.9042098171787583
0.18345430670258878 -3.395842470260184
0.19467953723528114 -3.395842470260184
0.19467953723528114 -3.542760460559343
0.2060819177266494 -3.542760460559343
0.224670303278674 -3.542760460559343
0.224670303278674 -3.686400506805493
0.224670303278674 -4.150193740966289
0.23634292750992028 -4.150193740966289
0.24817963222621378 -4.150193740966289
0.2728463669969456 -4.468305239541153
0.2

0.30631149917701755 -12.98140344562354
0.2624066197032521 -12.852189440488216
0.30170862575168367 -12.852189440488216
0.30170862575168367 -12.724261575111035
0.3406183510171763 -12.724261575111035
0.37913971091478393 -12.84065701019406
0.4172765817793377 -12.84065701019406
0.4172765817793377 -12.712838528829543
0.2829059565934701 -12.7011833219997
0.3220058823215 -12.816898610222133
0.277222852545112 -12.913680563151315
0.23249569804844408 -12.897101116416923
0.2278731282022672 -12.897101116416923
0.26752479540685925 -12.897101116416923
0.3067806419545392 -12.897101116416923
0.3067806419545392 -12.76869502094745
0.3067806419545392 -12.754553544541308
0.34564461909172484 -12.754553544541308
0.3841206386219713 -12.754553544541308
0.3383858519252661 -12.853880807763392
0.2866144842949258 -13.193439827484838
0.2866144842949258 -13.062053566343637
0.2802406673214133 -13.173976648881485
0.1878692032973366 -13.152038917891241
0.2279249828211427 -13.152038917891241
0.2279249828211427 -13.02105

-0.6092213468484408 -11.92469644997145
-0.5611293339665663 -12.172940772372304
-0.5611293339665663 -12.172018511605367
-0.5611293339665663 -12.171460420345849
-0.5135182392076445 -12.171460420345849
-0.5007883316051669 -12.171460420345849
-0.5352844463859139 -12.04974674347983
-0.5352844463859139 -11.929250194109096
-0.5352844463859139 -11.809958601051429
-0.5221017327987267 -11.809958601051429
-0.5221017327987267 -11.691859914835504
-0.5745160666519377 -11.691859914835504
-0.5590654140184528 -11.819392297929298
-0.5749678079126554 -11.819392297929298
-0.6373635723895655 -11.825790091030663
-0.6373635723895655 -11.707533072009035
-0.6373635723895655 -11.590458614358736
-0.6373635723895655 -11.474554892554242
-0.6682853155119299 -11.488135737340107
-0.6502470532982826 -11.746003877967029
-0.6017447678555299 -11.746003877967029
-0.5537275034163024 -11.746003877967029
-0.538312643278616 -11.746003877967029
-0.4909296964386951 -11.746003877967029
-0.47640580768969126 -11.746003877967029
-0

#### 코드 7-4 Q 학습 구현

In [4]:
import numpy as np

# 상태 설정
S = np.array([0, 1, 2, 3])

# 상태 이동 전과 상태 이동 후를 0과 1로 설정
A = np.array([0, 1])

# 각 상태에서 이동했을 때 보상 설정
R = np.array([[1, -20], [4, -1], [0, 25], [0, 0]])

# 이동 경로 설정
S1 = np.array([[1, 2], [3, 0], [0, 3], [None, None]])

# 이동 확률
p = 0.5

# 학습률
alpha = 0.01

# 할인율
gamma = 0.8

# 학습 횟수
n = 3000

# 행동 가치 함수 초기화
Q = np.zeros(R.shape)

# 확률에 따라 상태 이동 여부 결정
def pi(p):
    if np.random.uniform(0, 1) <= p:
        return 0   # 상태 이동 전임을 반환
    else:
        return 1   # 상태 이동 후임을 반환

def q_learning():
    s = S[0]
    a = 0
    while S1[s, a] != None:
        a = pi(p)
        max_q = max(Q[S1[s, a], 0], Q[S1[s, a], 1])
        td = R[s, a] + gamma * max_q - Q[s, a]
        Q[s, a] += alpha * td
        s = S1[s, a]
    print(Q[0, 0], Q[0, 1])

# 설정한 학습 횟수만큼 Q 학습 진행
for i in range(n):
    q_learning()

0.01 -0.2
0.0199 -0.59004
0.029701 -0.7801596000000001
0.03940399 -1.693010128375562
0.0490099501 -1.693010128375562
0.058839850599 -1.693010128375562
0.06888825209301 -1.693010128375562
0.0791498015720799 -2.0416364388008885
0.0896192312363591 -2.0416364388008885
0.10029135732719552 -2.2133392764128796
0.10029135732719552 -2.3814038936287507
0.11085676215712356 -2.3814038936287507
0.11085676215712356 -2.7087230556071367
0.12162082975472033 -2.7087230556071367
0.12162082975472033 -2.868048894632463
0.13257853032414946 -2.868048894632463
0.17649973256331133 -3.0239173445717227
0.23042268881079978 -3.1782271100115893
0.24117823552241002 -3.1782271100115893
0.252115629030907 -3.3309937777970573
0.252115629030907 -3.628286866117385
0.252115629030907 -3.772880412457972
0.2736633879819969 -4.054144717445028
0.2845624382072608 -4.192670921122321
0.30659910694236203 -4.192670921122321
0.3177332498643311 -4.192670921122321
0.3177332498643311 -4.329811862762841
0.3177332498643311 -4.463790718478

4.174036063877197 -0.10425094811836152
4.1741550482753675 -0.10425094811836152
4.174274249379187 -0.10425094811836152
4.174511858517249 -0.10425094811836152
4.1746302625620775 -0.10425094811836152
4.1746302625620775 -0.1037244988676567
4.1746302625620775 -0.10319815350715413
4.174748847340157 -0.10215103360982734
4.174748847340157 -0.10163025599930241
4.174866246270456 -0.10163025599930241
4.174866246270456 -0.10008408977141026
4.174866246270456 -0.0995740170180306
4.175100222643705 -0.0995740170180306
4.175216796561635 -0.0995740170180306
4.175216796561635 -0.0990641373107414
4.175333528978942 -0.09855449779589617
4.175333528978942 -0.0980499546761994
4.175333528978942 -0.09557834512028388
4.175450405068246 -0.09557834512028388
4.175911452298933 -0.09509399159036379
4.1760251319485935 -0.09413976670106823
4.1760251319485935 -0.0936697989553403
4.176138946860031 -0.0936697989553403
4.176138946860031 -0.09319981658785685
4.176252882960045 -0.09319981658785685
4.176479829491868 -0.092264

4.19993338134243 -0.0002062835541269394
4.199933725831427 -0.00020520569680309428
4.199934066875533 -0.00020306260902284683
4.199934066875533 -0.00020199736008353052
4.199934066875533 -0.00020093310986210074
4.199934066875533 -0.00019986994490908978
4.199934404509199 -0.00019880794994414573
4.199934404509199 -0.00019774720788400468
4.199935069681285 -0.00019774720788400468
4.199935728018226 -0.00019460066014183704
4.19993605225744 -0.00019356353696469187
4.199936373254262 -0.00019356353696469187
4.199936694225922 -0.00019356353696469187
4.199937015140824 -0.00019356353696469187
4.199937015140824 -0.00019252769618506406
4.199937968029634 -0.00019046908035272629
4.1999382793279265 -0.00019046908035272629
4.199938590603451 -0.00019046908035272629
4.199938901825532 -0.00019046908035272629
4.199938901825532 -0.00018843370412220987
4.199938901825532 -0.00018742243687185097
4.199939520991304 -0.0001854127651730843
4.199939825938225 -0.0001854127651730843
4.199940130834109 -0.00018541276517308