| Keyboard key | GameBoy equivalant |
| ---          | ---                |
| Up           | Up                 |
| Down         | Down               |
| Left         | Left               |
| Right        | Right              |
| A            | A                  |
| S            | B                  |
| Return       | Start              |
| Backspace    | Select             |

| Keyboard key | Emulator function       |
| ---          | ---                     |
| F11          | Toggle fullscreen       |
| Escape       | Quit                    |
| D            | Debug                   |
| Space        | Unlimited FPS           |
| Z            | Save state              |
| X            | Load state              |
| I            | Toggle screen recording |
| O            | Save screenshot         |
| ,            | Rewind backwards        |
| .            | Rewind forward          |
| J            | Memory Window + 0x100   |
| K            | Memory Window - 0x100   |
| Shift + J    | Memory Window + 0x1000  |
| Shift + K    | Memory Window - 0x1000  |

In [2]:
# Just The Imports
from pyboy import PyBoy
import random
import threading
import json
import os
pyboy = PyBoy(r"rompath")

# Mutes The Sound. I Want To Listen To My Own Music While Running This
pyboy.memory[0xFF26] = 0x00  # NR52 - Sound on/off
pyboy.memory[0xFF24] = 0x00  # NR50 - Channel control / volume
pyboy.memory[0xFF25] = 0x00  # NR51 - Sound panning

# Round Timer Starting From 99
TIMER_CHANNEL = 0xcbb3

# Player Health
P1_HEALTH_CHANNEL = 0xc4b5
P2_HEALTH_CHANNEL = 0xc6b5

# Player Distances
P1_DISTANCE_CHANNEL = 0xc40e
P2_DISTANCE_CHANNEL = 0xc60e

# Just Figures Out What The Current Distance Is
def get_game_state():
    # Gets PLayer Position
    p1_pos = pyboy.memory[P1_DISTANCE_CHANNEL]
    p2_pos = pyboy.memory[P2_DISTANCE_CHANNEL]

    # Handles Cases Where Distance Changes Abnormally When In Close Distance
    # When A Player Gets Too Close, The Game Sometimes Adds The Ones Place In The Number Again, Turning It Into A Hundreds Number. Ex: 48 --> 488
    if p1_pos > 200:
        p1_pos = p1_pos // 10
    if p2_pos > 200:
        p2_pos = p2_pos // 10

    # Finds The Distance Between The Players
    distance = abs(p1_pos - p2_pos)

    # Classified Range Distances I Made Up
    # Close - Determined By Farthest Point A Hit Could Register
    if distance < 62:
        return 0
    # Medium
    elif distance < 101:
        return 1
    # Far
    else:
        return 2

def load_q_table():
    if os.path.exists('q_table.json'):
        try:
            # with Opens The File And Closes The File Once Done
            # 'r' means Read Mode. 
            with open('q_table.json', 'r') as table_file:
                data = json.load(table_file)
                q_table = {}
                for key, value in data.items():
                    q_table[int(key)] = value
                return q_table
        except:
            print('Failed to load previously trained data. Starting with an empty q_table')
            return {}
    else:
        print('No exisiting q_table found. Starting fresh')
        return {}

def save_q_table(q_table):
    data = {}
    for key, value in q_table.items():
        # JSON Doesn't Support Integer Keys, So Convert To A String
        data[str(key)] = value
    # 'w' means Write Mode
    with open('q_table.json', 'w') as table_file:
        # dump Writes The Contents Onto The File
        json.dump(data, table_file, indent = 2)

# Q-Learning Logic. Also A Form Of Reinforcement Learning Under Value-Based Methods
# Exploration Rate. 0.3 Means 30% Of The Time, Explore. Other 70% Of The Time, Use Q-Table. Start High Then Decrease As AI Learns
epsilon = 0.3
# Learning Rate. 0.1 Means Adjust Q-Values By 10%. 0.01 Means Slow Learning. 0.99 Basically Means Forget What The AI Previously Learned
alpha = 0.1
# Discount Factor. Future Rewards vs Immediate Rewards. 0.0 Immediate Only. 1.0 Balances Future Rewards Equally to Immediate Rewards.
gamma = 0.9

# Current Game State Is None As There Is Nothing To Begin With
previous_game_state = None

# Used To Track Frames Specifically For Attacking
action_counter = 0

# Used To Determine What Action The AI Will Do
current_action = 0

# Load Existing Training. If Not, Starts A New Table To Train AI
q_table = load_q_table()
if q_table:
    print(f"Loaded previous training: {q_table}")

# Used To Track Most Recent p1_health. By Default, Health Is 144
last_p1_health = 144
last_p2_health = 144

# Used For The First Two Seconds Of The Game Booting Up
initial_frames = 0
# Keeps Track Of Frames Per Second
frames_passed = 0
# Keeps Track Of What Match We're On
match_number = 1
# . . . Round Ended? . . .
round_ended = False
# . . . Round 1? Or Round 2? . . .
round_number = 1
# Used To Determine What Match We're On
p1_rounds_won = 0
p2_rounds_won = 0

# "while pyboy.tick()" Is The Coding Version Of "while pyboy is running"
while pyboy.tick():
    # Disables Sound For The First Two Seconds
    if initial_frames < 120:
        pyboy.memory[0xFF26] = 0x00
        initial_frames += 1
    else:
        # Don't Want The Frame Count Get Unnecessarily Humongous
        initial_frames = 120

    # Increases Frame Count
    frames_passed += 1
    # Increases Frame Count For Actions
    action_counter += 1

    # Chooses A Random Action Every 10 Frames
    if action_counter == 10 and not round_ended:

        # Just Gets The Current State Of The Game
        current_game_state = get_game_state()

        # Grabs Player Health
        p1_health = pyboy.memory[P1_HEALTH_CHANNEL]
        p2_health = pyboy.memory[P2_HEALTH_CHANNEL]

        # Calculates The Reward Based On The Latest Action
        if p1_health <= 144 and p2_health <= 144:
            damage_dealt = last_p2_health - p2_health
            damage_taken = last_p1_health - p1_health

            # Updates Q-Table If It Has A Previous State (aka: not None)
            if previous_game_state is not None:
                # Creates A Reward Where More Damage Is Rewarded More. If Only Damage Was Taken, Then A Negative Reward
                reward = (damage_dealt * 2) - damage_taken

                # If We Haven't Seen The Previous State Before, Create It In The Table
                if previous_game_state not in q_table:
                    q_table[previous_game_state] = [0] * 4
                # If We Haven't Seen The Current State Before, Create It In The Table
                if current_game_state not in q_table:
                    q_table[current_game_state] = [0] * 4

                # 4 Lines Of Code Below Updates Q-Learning
                # Grabs The Previous Value In The q_table
                old_value = q_table[previous_game_state][current_action]
                # Finds What The Best Value Of The Current State Is
                next_max = max(q_table[current_game_state])
                # The Bellman Equation!!!!!!!!!
                # New Estimate = Old Estimate + Learning From Mistake
                new_value = old_value + alpha * (reward + gamma * next_max - old_value)
                # Updates q_table With New Learned Value
                q_table[previous_game_state][current_action] = new_value
                # What Basically Happened Above Is It Calculated What The Future Actions Will Lead To. So It's Using Multiple Rewards For The Most Optimal Final Reward

            # Just Updates The Health
            last_p1_health = p1_health
            last_p2_health = p2_health
            
        # Choose Next Action To Do
        # Explores
        if random.random() < epsilon:
            current_action = random.randint(0, 3)
        # Uses Q-Table
        else:
            # Creates Values In q_table If Not Already In There
            if current_game_state not in q_table:
                q_table[current_game_state] = [0] * 4
            # Finds Best Action By Finding Which Has Highest Value In q_table
            current_action = q_table[current_game_state].index(max(q_table[current_game_state]))

        # Action Activators
        if current_action == 0:
            pyboy.button_press('left')
        elif current_action == 1:
            pyboy.button_press('right')
        elif current_action == 2:
            pyboy.button_press('b')
        elif current_action == 3:
            pyboy.button_press('a')

        # Remembers Current Game State For Later Use
        previous_game_state = current_game_state

    # Release Buttons
    if action_counter == 15:
        pyboy.button_release('left')
        pyboy.button_release('right')
        pyboy.button_release('b')
        pyboy.button_release('a')
        action_counter = 0
    
    # Because Of How Frequently pyboy.tick() Runs, It Results In 60 Frames Per Second, or 60 Ticks Per Second
    if frames_passed == 60:
        timer = pyboy.memory[TIMER_CHANNEL]
        p1_health = pyboy.memory[P1_HEALTH_CHANNEL]
        p2_health = pyboy.memory[P2_HEALTH_CHANNEL]

        if not round_ended:
            # Determines Who Won
            if p1_health == 255:
                print('PLAYER 2 WINS!', end='\r')
                p2_rounds_won += 1
                round_ended = True
                previous_game_state = None
            elif p2_health == 255:
                print('PLAYER 1 WINS!', end='\r')
                p1_rounds_won += 1
                round_ended = True
                previous_game_state = None

        # Deactivates round_ended
        if round_ended and p1_health == 144 and p2_health == 144:
            round_number += 1
            round_ended = False
            
        # Increases The Match Number Once A Player Has Won Twice
        if p1_health == 144 and p2_health == 144 and (p1_rounds_won == 2 or p2_rounds_won == 2):
            match_number += 1
            p1_rounds_won = 0
            p2_rounds_won = 0
            round_number = 1


        p1_distance = pyboy.memory[P1_DISTANCE_CHANNEL]
        p2_distance = pyboy.memory[P2_DISTANCE_CHANNEL]
        # Prints A Live Chart Of Stuff Happening
        print(f"Match: {match_number} | Round: {round_number} | Timer: {timer:2} | P1 Health: {p1_health:3} | P2 Health: {p2_health:3} | P1 Distance: {p1_distance} | P2 Distance: {p2_distance}", end='\r')
        # Resets Frame Count 
        frames_passed = 0

save_q_table(q_table)
pyboy.stop()

Loaded previous training: {0: [14.17944228557316, 12.006324421353902, 21.717196924876124, 6.5597068877220766], 1: [6.5187805169679836, 6.606340524463887, 7.857512069241469, 6.59339578953752], 2: [3.060889393479474, 5.008312451840862, 4.24176287379106, 2.664607044180602]}
Match: 3 | Round: 2 | Timer: 74 | P1 Health:   0 | P2 Health:   0 | P1 Distance: 154 | P2 Distance: 682

In [3]:
# Stuff I Can Do To Improve AI
# Learn Combos
# Change The Distance Tracker To Also Know If On The Left or Right Side Of The Opponent To Then Know What Direction To Move
# Be Able To Jump
# Use Special Moves
# Respond To Certain Moves For A Counter Attack/Combo/Block
# Extra Reward Points For Winning
# Detect What Match The AI Is On. Currently Just Adding A New Match Even If Constantly Stucky On Same Battle With A Fighter
# Add Live Tracker Of Q-Table
# Add Live Tracker Of Action Performed
# Include Q-Table For Each Character In The Game (play style might differ between characters)
# Adjust AI So It Learns What To Do At Its Distance Rather Than Me Telling It