In [0]:
# 三目並べの作成
import random

# ゲームの状態
class State:
  # 初期化
  def __init__(self, pieces=None, enemy_pieces=None):
    # 石の配置
    self.pieces = pieces if pieces != None else [0] * 9
    self.enemy_pieces = enemy_pieces if enemy_pieces != None else [0] * 9

  # 石の数の取得
  def piece_count(self, pieces):
    count = 0

    for i in pieces:
      if i == 1:
        count += 1

    return count

  # 負けたかどうか
  def is_lose(self):
    # ３並びかどうか
    def is_comp(x, y, dx, dy):
      for k in range(3):
        if y<0 or y>2 or x<0 or x>2 or self.enemy_pieces[x+y*3] == 0:
          return False
        x, y = x+dx, y+dy
      
      return True

    # 負けたかどうか
    if is_comp(0, 0, 1, 1) or is_comp(0, 2, 1, -1):
      return True
    for i in range(3):
      if is_comp(0, i, 1, 0) or is_comp(i, 0, 0, 1):
        return True

    return False

  # 引き分けかどうか
  def is_draw(self):
    return self.piece_count(self.pieces) + self.piece_count(self.enemy_pieces) == 9

  # ゲーム終了かどうか
  def is_done(self):
    return self.is_lose() or self.is_draw()

  # 次の状態の取得
  def next(self, action):
    pieces = self.pieces.copy()
    pieces[action] = 1
    return State(self.enemy_pieces, pieces)

  # 合法手のリストの取得
  def legal_actions(self):
    actions = []
    for i in range(9):
      if self.pieces[i] == 0 and self.enemy_pieces[i] == 0:
        actions.append(i)

    return actions
  
  # 先手かどうか
  def is_first_player(self):
    return self.piece_count(self.pieces) == self.piece_count(self.enemy_pieces)

  # 文字列表示
  def __str__(self):
    ox = ('o', 'x') if self.is_first_player() else ('x', 'o')
    str = ''
    for i in range(9):
      if self.pieces[i] == 1:
        str += ox[0]
      elif self.enemy_pieces[i] == 1:
        str += ox[1]
      else:
        str += '-'
        
      if i % 3 == 2:
        str += '\n'

    return str

In [0]:
# ランダムで行動選択
def random_action(state):
  legal_actions = state.legal_actions()
  return legal_actions[random.randint(0, len(legal_actions)-1)]

In [0]:
# ランダムとランダムで対戦

# 状態の生成
state = State()

# ゲーム終了までループ
while True:
  # ゲーム終了時
  if state.is_done():
    break;

  # 行動の選択
  action = random_action(state)

  # 次の状態の取得
  state = state.next(action)

   # 文字列表示
  print(state)

---
o--
---

---
o--
--x

---
oo-
--x

x--
oo-
--x

x--
oo-
o-x

xx-
oo-
o-x

xxo
oo-
o-x



In [0]:
# ミニマックス法で状態価値計算
def mini_max(state):
  # 負けは状態価値−１
  if state.is_lose():
    return -1
  
  # 引き分けは状態価値０
  if state.is_draw():
    return 0

  # 合法手の状態価値の計算（再帰的）
  best_score = -float('inf')
  for action in state.legal_actions():
    score = -mini_max(state.next(action))
    if score > best_score:
      best_score = score

  # 合法手の状態価値の最大値を返す
  return best_score

In [0]:
# ミニマックス法で行動選択
def mini_max_action(state):
  # 合法手の状態価値の計算
  best_action = 0
  best_score = -float('inf')
  str = ['', '']
  for action in state.legal_actions():
    score = -mini_max(state.next(action))
    if score > best_score:
      best_action = action
      best_score = score
      

    # 文字列の更新
    str[0] = '{}{:2d},'.format(str[0], action) # 書式設定`:2d`について、2は最低字数（足りない場合は空白で埋まる）、dは整数型
    str[1] = '{}{:2d},'.format(str[1], score)
  # 文字列の表示
  print('action: ', str[0], '\nscore: ', str[1], '\n')

  return best_action

In [0]:
# ミニマックス法とランダムで対戦

# 状態の生成
state = State()

# ゲーム終了までループ
while True:
  if state.is_done():
    break

  # 行動の取得
  if state.is_first_player():
    action = mini_max_action(state)
  else:
    action = random_action(state)

  # 次の状態の取得
  state = state.next(action)

  print(state)

action:   0, 1, 2, 3, 4, 5, 6, 7, 8, 
score:   0, 0, 0, 0, 0, 0, 0, 0, 0, 

o--
---
---

o-x
---
---

action:   1, 3, 4, 5, 6, 7, 8, 
score:  -1, 1, 0, 0, 1, 0, 1, 

o-x
o--
---

oxx
o--
---

action:   4, 5, 6, 7, 8, 
score:   1, 1, 1, 1, 1, 

oxx
oo-
---

oxx
oo-
--x

action:   5, 6, 7, 
score:   1, 1,-1, 

oxx
ooo
--x



In [0]:
# ミニマックス法とランダムでn回対戦
def minmax_vs_random(n):
  minmax_wins_count = 0 # ミニマックス法の勝利回数

  for i in range(n):
    # 状態の生成
    state = State()

    # ゲーム終了までループ
    while True:
      if state.is_done():
        # ミニマックス勝利時に＋１
        if not state.is_first_player():
          minmax_wins_count += 1
        break

      # 行動の取得
      if state.is_first_player():
        action = mini_max_action(state)
      else:
        action = random_action(state)

      # 次の状態の取得
      state = state.next(action)

      print(state)

  return 'minmax won {:d} times!'.format(minmax_wins_count)

In [42]:
print(minmax_vs_random(10)) # 10回対戦

action:   0, 1, 2, 3, 4, 5, 6, 7, 8, 
score:   0, 0, 0, 0, 0, 0, 0, 0, 0, 

o--
---
---

ox-
---
---

action:   2, 3, 4, 5, 6, 7, 8, 
score:   0, 1, 1, 0, 1, 0, 0, 

ox-
o--
---

ox-
o--
x--

action:   2, 4, 5, 7, 8, 
score:  -1, 1,-1, 0,-1, 

ox-
oo-
x--

ox-
oox
x--

action:   2, 7, 8, 
score:   0, 0, 1, 

ox-
oox
x-o

action:   0, 1, 2, 3, 4, 5, 6, 7, 8, 
score:   0, 0, 0, 0, 0, 0, 0, 0, 0, 

o--
---
---

ox-
---
---

action:   2, 3, 4, 5, 6, 7, 8, 
score:   0, 1, 1, 0, 1, 0, 0, 

ox-
o--
---

ox-
o-x
---

action:   2, 4, 6, 7, 8, 
score:   0, 1, 1, 0, 1, 

ox-
oox
---

oxx
oox
---

action:   6, 7, 8, 
score:   1,-1, 1, 

oxx
oox
o--

action:   0, 1, 2, 3, 4, 5, 6, 7, 8, 
score:   0, 0, 0, 0, 0, 0, 0, 0, 0, 

o--
---
---

o--
x--
---

action:   1, 2, 4, 5, 6, 7, 8, 
score:   1, 1, 1, 0, 0, 0, 0, 

oo-
x--
---

oo-
x--
x--

action:   2, 4, 5, 7, 8, 
score:   1, 1, 1, 1, 1, 

ooo
x--
x--

action:   0, 1, 2, 3, 4, 5, 6, 7, 8, 
score:   0, 0, 0, 0, 0, 0, 0, 0, 0, 

o--
---
---

o--
--x
