# Chess RL - Full Training Pipeline

A 3-phase training approach for faster and stronger learning:

1. **Phase 1: Supervised Learning** - Learn from Stockfish moves (~1-2 hours)
2. **Phase 2: Curriculum Learning** - Play against Stockfish at increasing difficulty (~2-4 hours)
3. **Phase 3: Self-Play** - Refine strategies through self-play (~2-4 hours)

Total estimated time: **6-10 hours** for a competent bot

## 1. Setup

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install Stockfish (required for Phase 1 & 2)
!apt-get install -y stockfish
!which stockfish

In [None]:
# Clone repository
!git clone https://github.com/zhihaohong52/chess-rl.git
%cd chess-rl

In [None]:
# Install dependencies
!pip install -q python-chess tqdm

In [None]:
# Verify setup
import tensorflow as tf
print(f"TensorFlow: {tf.__version__}")
print(f"GPU: {tf.config.list_physical_devices('GPU')}")

import chess.engine
engine = chess.engine.SimpleEngine.popen_uci("/usr/games/stockfish")
print(f"Stockfish: OK")
engine.quit()

## 2. Configuration

In [None]:
import sys
sys.path.insert(0, '/content/chess-rl')

from config import Config

config = Config()
CHECKPOINT_DIR = '/content/drive/MyDrive/chess-rl/checkpoints'
STOCKFISH_PATH = '/usr/games/stockfish'

# Training parameters (adjust based on available time)
PHASE1_ITERATIONS = 20      # Supervised learning iterations
PHASE2_ITERATIONS = 50      # Curriculum learning iterations
PHASE3_ITERATIONS = 50      # Self-play iterations

print(f"Checkpoint directory: {CHECKPOINT_DIR}")

## 3. Full Pipeline Training

Run all three phases sequentially:

In [None]:
from src.training.full_pipeline import FullTrainingPipeline

# Initialize pipeline
pipeline = FullTrainingPipeline(
    config=config,
    checkpoint_dir=CHECKPOINT_DIR,
    stockfish_path=STOCKFISH_PATH,
)

print(f"Network has {pipeline.network.trainable_params:,} trainable parameters")

In [None]:
# Run full training pipeline
results = pipeline.train_full_pipeline(
    # Phase 1: Supervised (~1-2 hours)
    supervised_iterations=PHASE1_ITERATIONS,
    supervised_batches=100,
    
    # Phase 2: Curriculum (~2-4 hours)
    curriculum_iterations=PHASE2_ITERATIONS,
    curriculum_games=20,
    curriculum_max_depth=6,
    
    # Phase 3: Self-play (~2-4 hours)
    selfplay_iterations=PHASE3_ITERATIONS,
    selfplay_games=64,
    selfplay_simulations=200,
)

## Alternative: Run Phases Individually

If you want more control, run each phase separately:

In [None]:
# # Phase 1: Supervised Learning
# phase1_history = pipeline.phase1_supervised(
#     num_iterations=20,
#     batch_size=256,
#     batches_per_iteration=100,
#     stockfish_depth=10,
# )

In [None]:
# # Phase 2: Curriculum Learning
# phase2_history = pipeline.phase2_curriculum(
#     num_iterations=50,
#     games_per_iteration=20,
#     training_steps=100,
#     initial_depth=1,
#     max_depth=6,
#     promotion_threshold=0.55,
#     num_simulations=100,
# )

In [None]:
# # Phase 3: Self-Play
# phase3_history = pipeline.phase3_self_play(
#     num_iterations=50,
#     games_per_iteration=64,
#     training_steps=200,
#     num_simulations=200,
#     num_parallel=16,
# )

## 4. Test the Trained Model

In [None]:
from src.game.chess_game import ChessGame
from src.mcts.mcts import MCTS

# Test against Stockfish
def test_against_stockfish(network, stockfish_depth=3, num_games=10):
    """Test the trained model against Stockfish."""
    import chess.engine
    
    engine = chess.engine.SimpleEngine.popen_uci(STOCKFISH_PATH)
    mcts = MCTS(network, num_simulations=200)
    
    wins, losses, draws = 0, 0, 0
    
    for game_num in range(num_games):
        game = ChessGame()
        bot_is_white = (game_num % 2 == 0)
        
        while not game.is_terminal() and game.move_count < 200:
            is_bot_turn = (game.turn == chess.WHITE) == bot_is_white
            
            if is_bot_turn:
                action, _, _ = mcts.get_action(game, temperature=0.1)
                if action >= 0:
                    game.apply_move_index(action)
            else:
                result = engine.play(game.board, chess.engine.Limit(depth=stockfish_depth))
                game.apply_move(result.move)
        
        outcome = game.get_outcome()
        bot_outcome = outcome if bot_is_white else -outcome
        
        if bot_outcome > 0:
            wins += 1
        elif bot_outcome < 0:
            losses += 1
        else:
            draws += 1
        
        print(f"Game {game_num + 1}: {'Win' if bot_outcome > 0 else ('Loss' if bot_outcome < 0 else 'Draw')}")
    
    engine.quit()
    
    print(f"\nResults vs Stockfish depth {stockfish_depth}:")
    print(f"  Wins: {wins}, Losses: {losses}, Draws: {draws}")
    print(f"  Win rate: {wins/num_games:.1%}")

# Test against different Stockfish levels
print("Testing against Stockfish depth 1:")
test_against_stockfish(pipeline.network, stockfish_depth=1, num_games=5)

print("\nTesting against Stockfish depth 3:")
test_against_stockfish(pipeline.network, stockfish_depth=3, num_games=5)

In [None]:
# Watch the bot play against itself
game = ChessGame()
mcts = MCTS(pipeline.network, num_simulations=200)

moves = []
while not game.is_terminal() and game.move_count < 100:
    action, _, _ = mcts.get_action(game, temperature=0.1)
    if action < 0:
        break
    move = game.move_encoder.decode(action)
    try:
        san = game.board.san(game.board.parse_uci(move.uci()))
    except:
        san = move.uci()
    moves.append(san)
    game.apply_move_index(action)

print(f"Self-play game ({game.move_count} moves):")
print(f"Result: {'White wins' if game.get_outcome() > 0 else ('Black wins' if game.get_outcome() < 0 else 'Draw')}")
print(f"\nMoves: {' '.join(moves[:60])}{'...' if len(moves) > 60 else ''}")
print(f"\nFinal position:\n{game}")

## 5. Save Final Model

In [None]:
import os

# Save final model
final_path = os.path.join(CHECKPOINT_DIR, 'model_final')
pipeline.network.save(final_path)
print(f"Model saved to: {final_path}.weights.h5")

# Save as Keras model
keras_path = os.path.join(CHECKPOINT_DIR, 'model_final.keras')
pipeline.network.save_full_model(keras_path)
print(f"Keras model saved to: {keras_path}")

## 6. Resume Training (Optional)

In [None]:
# # To resume from a checkpoint:
# from src.model.network import ChessNetwork
# 
# # Load the checkpoint
# network = ChessNetwork(config)
# network.compile()
# network.load(os.path.join(CHECKPOINT_DIR, 'phase2_curriculum_depth4'))
# 
# # Continue with Phase 3
# pipeline.network = network
# pipeline.phase3_self_play(num_iterations=50)