In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# other functions we need
from ast import literal_eval

## String of pearls: index

The output CSV contains an array of every cell index filled out. The rows are puzzles and the columns are cells filled out.

In [130]:
# import SQL dependencies
import sqlite3

# connect to .db
con = sqlite3.connect("../data-collection/nyt-sudoku.db")
cur = con.cursor()

doks_perf = pd.read_sql("SELECT * from doks_performance;",
           con)

doks_easy = pd.read_sql("SELECT * from doks_easy;",
           con)

In [131]:
doks_order = pd.DataFrame(doks_perf['doks_order'])

In [132]:
doks_times = pd.DataFrame(doks_perf['times'])

In [133]:
garland_data = pd.DataFrame(doks_order['doks_order'].apply(literal_eval).tolist())

In [5]:
garland_data.to_csv('string_of_pearls_data.csv', index=False)

## String of pearls: section

In [6]:
def get_sudoku_section(cell_index):
    # Check if the index is within the valid range (0 to 80)
    if cell_index < 0 or cell_index > 80:
        raise ValueError("Invalid index. Cell index should be between 0 and 80.")

    # Calculate the row and column indices from the cell index
    row_index = cell_index // 9
    col_index = cell_index % 9

    # Determine the section based on the row and column indices
    section_row = row_index // 3
    section_col = col_index // 3

    # Calculate the section number
    section_number = section_row * 3 + section_col

    # Return the section number (zero-indexed)
    return section_number

In [20]:
garland_data__section = garland_data.applymap(get_sudoku_section)

In [85]:
garland_data__section

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,49,50,51,52,53,54,55,56,57,58
0,8,7,1,6,0,0,0,0,1,1,...,,,,,,,,,,
1,1,7,6,8,7,6,0,0,0,3,...,,,,,,,,,,
2,2,6,3,3,0,1,2,2,5,5,...,,,,,,,,,,
3,0,3,1,0,0,0,0,2,2,1,...,,,,,,,,,,
4,1,2,5,3,6,7,6,3,7,8,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,1,4,2,1,1,1,0,0,7,8,...,,,,,,,,,,
84,0,2,2,1,4,8,5,5,5,5,...,,,,,,,,,,
85,1,3,3,4,7,7,4,5,5,5,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,2.0
86,2,8,5,7,1,0,6,7,7,2,...,,,,,,,,,,


In [36]:
from collections import Counter

In [93]:
# look for most common sections I start in
np.unique(
        # look at section values in the first n steps
        garland_data__section.iloc[:,:1]

        # convert to numpy so we can flatten
        .to_numpy().flatten(),
    
    # built-in argument of np.unique
    return_counts=True
)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([22, 21, 21,  4,  6,  4,  3,  5,  2], dtype=int64))

In [181]:
np.unique(garland_data__section.iloc[:,:5].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([55, 56, 63, 40, 44, 49, 50, 39, 44], dtype=int64))

In [182]:
np.unique(garland_data__section.iloc[:,5:10].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([50, 70, 49, 44, 43, 51, 45, 45, 43], dtype=int64))

In [183]:
np.unique(garland_data__section.iloc[:,10:15].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([65, 45, 52, 63, 49, 47, 42, 34, 43], dtype=int64))

In [184]:
np.unique(garland_data__section.iloc[:,15:20].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([69, 56, 70, 50, 42, 50, 41, 31, 31], dtype=int64))

In [185]:
np.unique(garland_data__section.iloc[:,20:25].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([36, 49, 47, 64, 54, 52, 52, 36, 50], dtype=int64))

In [186]:
np.unique(garland_data__section.iloc[:,25:30].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([33, 30, 27, 62, 58, 64, 53, 63, 50], dtype=int64))

In [187]:
np.unique(garland_data__section.iloc[:,30:35].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([24, 28, 34, 57, 61, 41, 67, 67, 61], dtype=int64))

In [188]:
np.unique(garland_data__section.iloc[:,35:40].values, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([26, 28, 30, 34, 50, 52, 45, 84, 91], dtype=int64))

In [189]:
np.unique(garland_data__section.iloc[:,40:45].values, return_counts=True)

(array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8., nan]),
 array([23, 28, 31, 25, 41, 34, 50, 71, 51, 86], dtype=int64))

In [190]:
np.unique(garland_data__section.iloc[:,45:50].values, return_counts=True)

(array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8., nan]),
 array([  2,   6,   5,   1,   3,   7,   5,   9,   9, 393], dtype=int64))

In [168]:
# look for most common sections I end in
Counter(
    # pick the last value in every array
    [x[~np.isnan(x)][-1]
     for x in
     garland_data__section.to_numpy()]
)

Counter({5.0: 11,
         6.0: 8,
         7.0: 15,
         4.0: 10,
         3.0: 7,
         8.0: 16,
         1.0: 12,
         0.0: 5,
         2.0: 4})

## String of pearls: times

In [135]:
garland_times = pd.DataFrame(doks_perf['times'].apply(literal_eval).tolist())

In [136]:
garland_times.to_csv('string_of_pearls_times_data.csv', index=False)