# Libraries

In [58]:
# imports
import pandas as pd
import numpy as np
import argparse

from sklearn.model_selection import train_test_split

# Import models from another notebook
%run model.ipynb

In [None]:
# params
parser = argparse.ArgumentParser(description='Process inputs for training a model')
parser.add_argument('--model_id', default='dense_model')
parser.add_argument('--dropout', default='0.4', type=float)
parser.add_argument('--layers_count', default='2', type=int)
parser.add_argument('--dense_units', default='64', type=int)
parser.add_argument('--hidden_activation', default='relu')

args = parser.parse_args()
print("Args: " + str(args))

# Preprocessing

### Load sudoku

In [16]:
# load inputs
sudokus = pd.read_csv('./datasets/sudoku-kaggle.csv').values
print("Full shape:", sudokus.shape)

Full shape: (1000000, 2)


### Split into data and labels

In [50]:
# Split into puzzles and solutions
puzzles, solutions = sudokus[:, 0], sudokus[:, 1]
print("Number of puzzles:", len(puzzles))
print("Number of solutions:", len(solutions))

Number of puzzles: 1000000
Number of solutions: 1000000


### Reshape the data

In [51]:
# Reshape to 9x9
reshape_f = lambda x: np.reshape([int(digit) for digit in x], (9, 9))
reshaped_puzzles = np.array(list(map(reshape_f, puzzles)))
reshaped_solutions = np.array(list(map(reshape_f, solutions)))
print("Shape of puzzles:", reshaped_puzzles.shape)
print("Shape of solutions:", reshaped_solutions.shape)

Shape of puzzles: (1000000, 9, 9)
Shape of solutions: (1000000, 9, 9)


In [53]:
# To one-hot encoding
one_hot_f = lambda x: to_categorical(x).astype('float32')
one_hot_puzzles = one_hot_f(reshaped_puzzles)
one_hot_solutions = one_hot_f(reshaped_solutions - 1)
print("Shape of puzzles:", one_hot_puzzles.shape)
print("Shape of solutions:", one_hot_solutions.shape)

Shape of puzzles: (1000000, 9, 9, 10)
Shape of solutions: (1000000, 9, 9, 9)


### Train test split

In [55]:
# Split into train and test set
X_train, X_test, y_train, y_test = train_test_split(one_hot_puzzles, one_hot_solutions, 
                                                    test_size=0.2, random_state=42)
print("Training data shape:", X_train.shape)
print("Training labels shape:", y_train.shape)
print("Testing data shape:", X_test.shape)
print("Testing labels shape:", y_test.shape)

Training data shape: (800000, 9, 9, 10)
Training labels shape: (800000, 9, 9, 9)
Testing data shape: (200000, 9, 9, 10)
Testing labels shape: (200000, 9, 9, 9)


### Function that randomly delete some digits from the solution

In [None]:
def delete_digits(X, digits_to_delete=1):
    grids = X.argmax(3)
    for grid in grids:
        grid.flat[np.random.randint(0, 81, digits_to_delete)] = 0
    return to_categorical(grids)

In [56]:
# initialize model

In [None]:
# train model