In [1]:
import pandas as pd
import numpy as np

In [10]:
sudoku = pd.DataFrame({
    'a':[np.nan,5,np.nan,np.nan,3,np.nan,1,2,np.nan],
    'b':[3,2,6,np.nan,9,4,np.nan,np.nan,np.nan],
    'c':[4,7,np.nan,5,np.nan,np.nan,np.nan,np.nan,np.nan],
    'd':[np.nan,np.nan,np.nan,np.nan,8,np.nan,6,np.nan,2],
    'e':[np.nan,1,np.nan,4,np.nan,np.nan,8,np.nan,7],
    'f':[6,8,9,np.nan,np.nan,2,4,np.nan,1],
    'g':[np.nan,6,4,np.nan,5,9,np.nan,8,3],
    'h':[8,np.nan,np.nan,np.nan,np.nan,3,2,1,np.nan],
    'i':[np.nan,3,2,np.nan,np.nan,6,np.nan,np.nan,np.nan]
})
sudoku

Unnamed: 0,a,b,c,d,e,f,g,h,i
0,,3.0,4.0,,,6.0,,8.0,
1,5.0,2.0,7.0,,1.0,8.0,6.0,,3.0
2,,6.0,,,,9.0,4.0,,2.0
3,,,5.0,,4.0,,,,
4,3.0,9.0,,8.0,,,5.0,,
5,,4.0,,,,2.0,9.0,3.0,6.0
6,1.0,,,6.0,8.0,4.0,,2.0,
7,2.0,,,,,,8.0,1.0,
8,,,,2.0,7.0,1.0,3.0,,


Attempt to solve a set:
-identify the cells in the target set
-identify the numbers already contained in this set, and the numbers to search for
-for each free number, check if there is a cell that must contain that number
    -create a list of cell positions to check: all empty cells in this set
    -cycle through this list and remove cells from list if the target number can't be there (ie. exist elsewhere in column or row
    -if there is only one cell remaining in list, fill with target number, otherwise move onto next target number
-once all free number in a set have been explored, move onto next set

System for order of sets:
-start with sets with most cells already filled
    -order list of sets by number of filled cells

harder version:
-focus on sets with more numbers in adjacent sets

Overall process:
-create first ordered list of sets to go through
-attempt to solve each set in order
-create new ordered list of sets
-attempt to solve each, and continue

---

In [4]:
''' This function takes a 9x9 cell sudoku dataframe as an argument, and returns list of subsets (labelled A through
I), ordered by the number of filled cells in that set '''

def create_ordered_subset_list(df):
    
    set_counts = pd.DataFrame({
                                'subset':['A','B','C','D','E','F','G','H','I'],
                                'nums_count':np.zeros(9),
                                'top_row':np.zeros(9),
                                'left_col':np.zeros(9)
                            })
    row_start=0
    col_start=0
    
    for i in range(9):
        target_set = df.iloc[row_start:row_start+3, col_start:col_start+3]

        set_counts.iloc[i,1] = int(target_set.count().sum())
        set_counts.iloc[i,2] = int(row_start)
        set_counts.iloc[i,3] = int(col_start)
        
        if row_start < 6:
            row_start+=3
        else:
            row_start=0
            col_start+=3

    set_counts = set_counts.sort_values(by='nums_count',ascending=False).reset_index(drop=True)
    
    return set_counts

In [5]:
''' This function takes a df as its argument, and returns the values between 1-9 that do not appear in that df.'''

def create_nums_to_search_list(df):
    nums_contained=[]
    
    for i in range(len(df.columns)):
        nums_contained += list(df.iloc[:,i].dropna())
    
    nums_contained = pd.unique(nums_contained)
    nums_to_search = [x for x in range(1,10) if x not in nums_contained]
    
    return nums_to_search

In [6]:
''' This function takes a number and a row,col pair as argument. It's assumed that the row,col pair is the top left
cell of the target subset, and the number is the number to search for in the target subset. This function does not 
return anything, instead it assigns values to empty cells in the sudoku df if conditions are met. '''

def search_target_set_for_num(num, top_row, left_col):
    
    
    #get list of new empty cells
    empty_cells = []
 
    for row in range(top_row, top_row+3):
        for col in range(left_col, left_col+3):
        
            if np.isnan(sudoku.iloc[row,col]):
                empty_cells.append( [row,col] )
                
                
    #search empty for target number
    target_num = num
    possible_cells = empty_cells.copy()

    for cell in empty_cells:
        #check in target_num exists in cell's row or column, if so, remove cell from possible cells list
        if (target_num in list(sudoku.iloc[cell[0],:])) or (target_num in list(sudoku.iloc[:,cell[1]])):
            possible_cells.remove(cell) 
            
            
    #if there is only one possible cell for the target_num, fill that cell in the sudoku
    if len(possible_cells) == 1:
        last_cell = possible_cells[0]
        sudoku.iloc[last_cell[0],last_cell[1]] = target_num

In [7]:
''' This function takes a sudoku as its argument (I think it assumes a 9x9 sudoku), and returns the solved sudoku
and a reults df showing the iteration process as a solution is approached. '''

def attempt_to_solve_sudoku(sudoku):
    
    subset_list = create_ordered_subset_list(sudoku)
    
    for i in range(len(subset_list)):
        subset_info = subset_list.iloc[i,:]
        
        #if subset is not full
        if subset_info[1] < 9: 
    
            top_row, left_col = int(subset_info[2]), int(subset_info[3])
        
            isolated_subset = sudoku.iloc[top_row:top_row+3, left_col:left_col+3]
            nums_to_search = create_nums_to_search_list(isolated_subset)
    
            for num in nums_to_search:
                search_target_set_for_num(num, top_row, left_col)

---

In [8]:
sudoku

Unnamed: 0,a,b,c,d,e,f,g,h,i
0,,3.0,4.0,,,6.0,,8.0,
1,5.0,2.0,7.0,,1.0,8.0,6.0,,3.0
2,,6.0,,,,9.0,4.0,,2.0
3,,,5.0,,4.0,,,,
4,3.0,9.0,,8.0,,,5.0,,
5,,4.0,,,,2.0,9.0,3.0,6.0
6,1.0,,,6.0,8.0,4.0,,2.0,
7,2.0,,,,,,8.0,1.0,
8,,,,2.0,7.0,1.0,3.0,,


In [9]:
number_of_iterations = 0
nan_count = 0
for col in range(len(sudoku.columns)):
        nan_count += sudoku.iloc[:,col].isna().sum()
        
results = pd.DataFrame({'Iteration #':[number_of_iterations],'Empty cell count':[nan_count]})
        


while nan_count != 0:
    
    attempt_to_solve_sudoku(sudoku)
    
    number_of_iterations+=1
    
    nan_count=0
    for col in range(len(sudoku.columns)):
        nan_count += sudoku.iloc[:,col].isna().sum()
        
    new_row = pd.DataFrame({'Iteration #':[number_of_iterations],'Empty cell count':[nan_count]})
    results = pd.concat([results,new_row])
    
    #check if solver has stalled, ie. has broken.
    #If empty cell count remains unchanged after an iteration then the sudoku is unsolvable given current methods.
    if results.iloc[-1,1] == results.iloc[-2,1]:
        print('Solver has failed to converge')
        break
    
    
print(results,'\n')
sudoku

   Iteration #  Empty cell count
0            0                43
0            1                20
0            2                 9
0            3                 1
0            4                 0 



Unnamed: 0,a,b,c,d,e,f,g,h,i
0,9.0,3.0,4.0,5.0,2.0,6.0,1.0,8.0,7.0
1,5.0,2.0,7.0,4.0,1.0,8.0,6.0,9.0,3.0
2,8.0,6.0,1.0,7.0,3.0,9.0,4.0,5.0,2.0
3,6.0,1.0,5.0,9.0,4.0,3.0,2.0,7.0,8.0
4,3.0,9.0,2.0,8.0,6.0,7.0,5.0,4.0,1.0
5,7.0,4.0,8.0,1.0,5.0,2.0,9.0,3.0,6.0
6,1.0,5.0,3.0,6.0,8.0,4.0,7.0,2.0,9.0
7,2.0,7.0,6.0,3.0,9.0,5.0,8.0,1.0,4.0
8,4.0,8.0,9.0,2.0,7.0,1.0,3.0,6.0,5.0


---