## Data Preprocessing

In [1]:
import pandas as pd 
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

In [2]:
sudoku = pd.read_csv("sudoku.csv")

In [3]:
sudoku.head()

Unnamed: 0,puzzle,solution
0,0700000430400096108006349000940520003584600200...,6795182435437296188216349577943521863584617292...
1,3010865040465210705000000014008000020803479000...,3719865248465213795924738614638197522853479167...
2,0483015603600080909106700030200009355090102006...,7483915623652487919126754834217869355894132766...
3,0083170000042051090000400703271609049014500000...,2983176457642851391539462783271689549814537266...
4,0408906300001368208007405190004670524500207002...,1428956379751368248367425193984671524513287962...


In [4]:
sudoku.describe()

Unnamed: 0,puzzle,solution
count,9000000,9000000
unique,9000000,9000000
top,0700000430400096108006349000940520003584600200...,6795182435437296188216349577943521863584617292...
freq,1,1


In [5]:
sampled_sudoku = sudoku.sample(n = 100000, random_state = 24)

In [6]:
sampled_sudoku.describe()

Unnamed: 0,puzzle,solution
count,100000,100000
unique,100000,100000
top,0070400398020000640401000570092643183019070000...,1576428398925731646431892575792643183819576424...
freq,1,1


## Initial Model

In [7]:
puzzles = np.array([list(p) for p in sampled_sudoku['puzzle']], dtype=np.int8)
solutions = np.array([list(s) for s in sampled_sudoku['solution']], dtype=np.int8)

In [8]:
puzzles = puzzles.reshape(-1, 9, 9)
solutions = solutions.reshape(-1, 9, 9)

In [9]:
puzzles.shape

(100000, 9, 9)

In [10]:
solutions.shape

(100000, 9, 9)

In [3]:
pipeline = Pipeline([
    ('normalization', MinMaxScaler(feature_range = (0,1))),
    ('one_hot_encoding', OneHotEncoder(categories = [range(1,10)], sparse_output = False))
])

In [15]:
puzzles_flat = puzzles.reshape(puzzles.shape[0], -1)
puzzles_normalized = pipeline.named_steps['normalization'].fit_transform(puzzles_flat)
puzzles_normalized = puzzles_normalized.reshape(puzzles.shape)

In [16]:
solutions_flat = solutions.reshape(-1, 1)
solutions_onehot = pipeline.named_steps['one_hot_encoding'].fit_transform(solutions_flat)
solutions_onehot = solutions_onehot.reshape(solutions.shape[0], 9, 9, 9)

In [4]:
import torch
import torch.nn as nn

In [5]:
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [32]:
train_puzzles, test_puzzles, train_solutions, test_solutions = train_test_split(
    puzzles_normalized, solutions_onehot, test_size=0.1, random_state=42
)

train_puzzles_tensor = torch.tensor(train_puzzles, dtype=torch.float32).unsqueeze(1)
test_puzzles_tensor = torch.tensor(test_puzzles, dtype=torch.float32).unsqueeze(1)

train_solutions_tensor = torch.tensor(train_solutions, dtype=torch.float32).permute(0, 3, 1, 2)
test_solutions_tensor = torch.tensor(test_solutions, dtype=torch.float32).permute(0, 3, 1, 2)

In [33]:
train_dataset = TensorDataset(train_puzzles_tensor, train_solutions_tensor)
test_dataset = TensorDataset(test_puzzles_tensor, test_solutions_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

In [34]:
class SudokuSolverCNN(nn.Module):
    def __init__(self, num_layers=16):
        super(SudokuSolverCNN, self).__init__()
        self.layers = nn.ModuleList()
        
        self.layers.append(nn.Conv2d(1, 512, kernel_size=3, padding=1))
        

        for i in range(num_layers - 2):
            self.layers.append(nn.Conv2d(512, 512, kernel_size=3, padding=1))
            self.layers.append(nn.BatchNorm2d(512))
            self.layers.append(nn.ReLU())
        
        self.final = nn.Conv2d(512, 9, kernel_size=1)
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.final(x)
        return x

In [None]:
def create_sudoku_cnn_pipeline():
    return nn.Sequential(
        nn.Conv2d(1, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        *[nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU()
        ) for _ in range(14)],
        nn.Conv2d(512, 9, kernel_size=1),
    )

model = create_sudoku_cnn_pipeline()

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [17]:
device

device(type='cuda')

In [30]:
model = create_sudoku_cnn_pipeline().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for puzzles, solutions in train_loader:
        puzzles, solutions = puzzles.to(device), solutions.to(device)
        optimizer.zero_grad()
        outputs = model(puzzles)
        loss = criterion(outputs, solutions)
        loss.backward()
        optimizer.step()
    
model.eval()
correct_cells = 0
total_cells = 0
with torch.no_grad():
    for puzzles, solutions in test_loader:
        puzzles, solutions = puzzles.to(device), solutions.to(device)
        outputs = model(puzzles)
        
        _, predicted = torch.max(outputs, 1)
        _, labels = torch.max(solutions, 1)
        
        total_cells += labels.numel()
        correct_cells += (predicted == labels).sum().item()

print(f'Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {100 * correct_cells / total_cells:.2f}%')



Epoch [10/10], Test Accuracy: 73.00%


In [31]:
device

device(type='cuda')

## Enahced Model

In [6]:
sampled_sudoku = sudoku.sample(n = 200000, random_state = 123)

In [7]:
puzzles = np.array([list(p) for p in sampled_sudoku['puzzle']], dtype=np.int8)
solutions = np.array([list(s) for s in sampled_sudoku['solution']], dtype=np.int8)

In [8]:
puzzles = puzzles.reshape(-1, 9, 9)
solutions = solutions.reshape(-1, 9, 9)

In [9]:
puzzles_flat = puzzles.reshape(puzzles.shape[0], -1)
puzzles_normalized = pipeline.named_steps['normalization'].fit_transform(puzzles_flat)
puzzles_normalized = puzzles_normalized.reshape(puzzles.shape)

In [10]:
solutions_flat = solutions.reshape(-1, 1)
solutions_onehot = pipeline.named_steps['one_hot_encoding'].fit_transform(solutions_flat)
solutions_onehot = solutions_onehot.reshape(solutions.shape[0], 9, 9, 9)

In [11]:
train_puzzles, test_puzzles, train_solutions, test_solutions = train_test_split(
    puzzles_normalized, solutions_onehot, test_size=0.2, random_state=42
)

train_puzzles_tensor = torch.tensor(train_puzzles, dtype=torch.float32).unsqueeze(1)
test_puzzles_tensor = torch.tensor(test_puzzles, dtype=torch.float32).unsqueeze(1)

train_solutions_tensor = torch.tensor(train_solutions, dtype=torch.float32).permute(0, 3, 1, 2)
test_solutions_tensor = torch.tensor(test_solutions, dtype=torch.float32).permute(0, 3, 1, 2)

In [12]:
train_dataset = TensorDataset(train_puzzles_tensor, train_solutions_tensor)
test_dataset = TensorDataset(test_puzzles_tensor, test_solutions_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

In [15]:
puzzles.shape

(200000, 9, 9)

In [16]:
solutions.shape

(200000, 9, 9)

In [13]:
class SudokuSolverCNN(nn.Module):
    def __init__(self, num_layers= 20):
        super(SudokuSolverCNN, self).__init__()
        self.layers = nn.ModuleList()

        self.layers.append(nn.Conv2d(1, 512, kernel_size=3, padding=1))
        self.layers.append(nn.BatchNorm2d(512))
        self.layers.append(nn.ReLU())
        
        for i in range(num_layers - 2):
            self.layers.append(nn.Conv2d(512, 512, kernel_size=3, padding=1))
            self.layers.append(nn.BatchNorm2d(512))
            self.layers.append(nn.ReLU())

        self.final_conv = nn.Conv2d(512, 9, kernel_size=1)
    
    def forward(self, x):
        residual = x
        for layer in self.layers:
            x = layer(x)
            if isinstance(layer, nn.ReLU):
                x += residual
                residual = x
        x = self.final_conv(x)
        return x



In [14]:
def create_sudoku_cnn_pipeline():
    return nn.Sequential(
        nn.Conv2d(1, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        *[
            nn.Sequential(
                nn.Conv2d(512, 512, kernel_size=3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU()
            ) for _ in range(18)
        ],
        nn.Conv2d(512, 9, kernel_size=1),
    )
    
model = create_sudoku_cnn_pipeline()

In [18]:
model = create_sudoku_cnn_pipeline().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    for puzzles, solutions in train_loader:
        puzzles, solutions = puzzles.to(device), solutions.to(device)
        outputs = model(puzzles)
        loss = criterion(outputs, solutions)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

model.eval()
correct_train_cells = 0
total_train_cells = 0

with torch.no_grad():
    for puzzles, solutions in train_loader:
        puzzles, solutions = puzzles.to(device), solutions.to(device)
        outputs = model(puzzles)
        
        _, predicted = torch.max(outputs, 1)
        _, labels = torch.max(solutions, 1)
        
        total_train_cells += labels.numel()
        correct_train_cells += (predicted == labels).sum().item()

train_accuracy = 100 * correct_train_cells / total_train_cells
print(f'Overall Training Accuracy: {train_accuracy:.2f}%')
correct_cells = 0
total_cells = 0

with torch.no_grad():
    for puzzles, solutions in test_loader:
        puzzles, solutions = puzzles.to(device), solutions.to(device)
        outputs = model(puzzles)
        
        _, predicted = torch.max(outputs, 1)
        _, labels = torch.max(solutions, 1)
        
        total_cells += labels.numel()
        correct_cells += (predicted == labels).sum().item()

test_accuracy = 100 * correct_cells / total_cells
print(f'Overall Test Accuracy: {test_accuracy:.2f}%')


Overall Training Accuracy: 83.28%
Overall Test Accuracy: 79.55%


In [None]:
puzzle