## Import statements

In [87]:
from pytesseract import image_to_string
from PIL import Image 
import cv2
import numpy as np
import urllib.parse
import openai

## This bit separates the crossword itself from the screenshot as grid.png

In [64]:
#Where is our image?
image_path = 'Screenshot.png'

#Preparing for cropping
im = cv2.imread(image_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
im = cv2.GaussianBlur(im, (5, 5), 0)
im = cv2.adaptiveThreshold(im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 9, 4)
cv2.imwrite("preprocess.jpg", im)

# Grid is the largest contour
contours, _ = cv2.findContours(im, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
max_area = -1
max_idx = -1
for i, contour in enumerate(contours):
    area = cv2.contourArea(contour)
    if max_area < area < (im.shape[0] * im.shape[1] / 2):
        max_area = area
        max_idx = i

#Saving grid.png
rect = cv2.boundingRect(contours[max_idx])
print(rect)
Image.open(image_path).crop((rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3])).save("grid.png")


(41, 29, 716, 720)


## Crossword hint extraction

In [71]:
def extract_hints_from_image(image_path):
    # Load the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale for OCR
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Use pytesseract to extract text from the image
    extracted_text = str(image_to_string(gray))
    
    # Only consider text after "Prize crossword"
    if "Prize crossword" in extracted_text:
        extracted_text = extracted_text.split("Prize crossword")[1].strip()
    
    # Split the text based on 'Across' and 'Down' to separate the hints
    across_hints, down_hints = extracted_text.split("DOWN")
    across_hints = across_hints.replace("ACROSS", "").strip()

    return across_hints, down_hints

across, down = extract_hints_from_image(image_path)
# print(across)
# print(down)



1 Tender, painful (4)

2 Abrade (3)

3 Measuring device (5)

4 --- Bradbury (pictured),
TV presenter (5)

5 Eat, take in (6)

6 Time of crisis (9)

9 Finest (4)
11 Air (@ programme) (9)
13 Fraught (5)

14 Damp (5)

18 Stupor (6)

20 Nuisance (4)

22 Marine predator (5)
23 Force forward (5)

24 Wide smile (4)

27 Small drink (3)


This bit finds the green squares and gets coordinates for them
## Do we need this?
Actually no

In [15]:
# crossword_path = 'grid.png'
# 
# def detect_green_squares_and_overlay(crossword_path):
#     # Load the image
#     image = cv2.imread(crossword_path)
#     
#     # Convert to HSV for color-based detection
#     hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#     
#     # Define range for green color
#     lower_green = (40, 25, 25)
#     upper_green = (45, 255,255)
# 
#     
#     # Threshold the HSV image to get only green colors
#     mask = cv2.inRange(hsv, lower_green, upper_green)
#     
#     # Convert the binary mask to a 3-channel image
#     mask_colored = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
#     
#     # Overlay the mask on the original image
#     alpha = 0.5  # Define the opacity for overlay
#     overlay = cv2.addWeighted(image, 1 - alpha, mask_colored, alpha, 0)
#     
#     # Find contours to detect squares
#     contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#     
#     # Placeholder list to store positions of green squares
#     positions = []
#     
#     for contour in contours:
#         # Compute the center of the contour
#         M = cv2.moments(contour)
#         if M["m00"] != 0:
#             cX = int(M["m10"] / M["m00"])
#             cY = int(M["m01"] / M["m00"])
#             
#             # Ignore the center element (based on contour area, adjust the threshold as needed)
#             if cv2.contourArea(contour) < 5000:  # 5000 is an example threshold, adjust as necessary
#                 positions.append((cX, cY))
#                 
#                 # Draw a circle at the detected position on the overlay
#                 cv2.circle(overlay, (cX, cY), 5, (0, 0, 255), -1)  # Drawing in red for visibility
# 
#     # Display the overlay
#     cv2.imshow("Overlay", overlay)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()
#     
#     print(type(positions))
#     print(positions)
#     return positions
#     
# 
# detect_green_squares_and_overlay(crossword_path)
# 


<class 'list'>
[(308, 703), (85, 591), (587, 422), (252, 254), (29, 29)]


[(308, 703), (85, 591), (587, 422), (252, 254), (29, 29)]

## Experimental bit turned core with the following steps:
#### 1. Use contours to detect every square
#### 2. Get coordinates for every square
#### 3. Based on squares per X coordinates+-2/3 get the size and shape of grid
#### 4. Read contents of every square one by one, and based on findings append symbol to matrix
#### 5. Return matrix
Except in our use case it's always 13 X 13 so we're simplifying

In [22]:
# Load the image
img = cv2.imread('grid.png')

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply adaptive thresholding
adaptive_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

# Use morphological operations to clean up the image
kernel = np.ones((5, 5), np.uint8)
cleaned_thresh = cv2.morphologyEx(adaptive_thresh, cv2.MORPH_CLOSE, kernel)

# Divide the image into a 13x13 grid to get the coordinates of each cell
cell_width = img.shape[1] // 13
cell_height = img.shape[0] // 13
grid_coordinates = [(j*cell_width, i*cell_height, cell_width, cell_height) for i in range(13) for j in range(13)]

# Analyze each cell to determine its type (black, white, numbered, green)
# Function to determine the content of a square based on its mean pixel value and presence of small details (numbers)
def determine_square_content_v2(roi_gray, roi_thresh):
    mean_val = np.mean(roi_gray)
    
    # Calculate the ratio of white pixels in the thresholded ROI
    white_ratio = np.sum(roi_thresh == 255) / (roi_thresh.shape[0] * roi_thresh.shape[1])
    
    # If the mean value is low and the white ratio is low, it's likely a black square
    if mean_val < 50 and white_ratio < 0.2:
        return 'X'
    # If the mean value is high and the white ratio is high, it's a white square
    elif mean_val > 200 and white_ratio > 0.75:
        # Check for numbers in the square
        contours, _ = cv2.findContours(roi_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if contours and cv2.contourArea(max(contours, key=cv2.contourArea)) < 0.15 * roi_thresh.size:
            return "(num)"  # Placeholder for numbered square
        else:
            return 'O'
    # If the mean value is between the thresholds, it's likely a green square
    else:
        # Check for numbers in the green square
        contours, _ = cv2.findContours(roi_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if contours and cv2.contourArea(max(contours, key=cv2.contourArea)) < 0.15 * roi_thresh.size:
            return "(numG)"  # Placeholder for numbered green square
        else:
            return 'G'

text_matrix = []
row = []
for i, coord in enumerate(grid_coordinates):
    x, y, w, h = coord
    roi_gray = gray[y:y+h, x:x+w]
    _, roi_thresh = cv2.threshold(roi_gray, 127, 255, cv2.THRESH_BINARY)
    content = determine_square_content_v2(roi_gray, roi_thresh)
    row.append(content)
    # Check if we've processed 13 squares (one row)
    if (i + 1) % 13 == 0:
        text_matrix.append(row)
        row = []

text_matrix


[['G', 'O', 'O', 'O', 'O', 'O', 'X', 'O', 'O', 'O', 'O', 'O', 'O'],
 ['O', 'X', 'O', 'X', 'X', 'O', 'O', 'O', 'X', 'O', 'X', 'O', 'X'],
 ['O', 'O', 'O', 'O', 'O', 'O', 'X', 'O', 'X', 'O', 'O', 'O', 'O'],
 ['O', 'X', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X'],
 ['X', 'O', 'X', 'O', 'G', 'O', 'O', 'O', 'O', 'O', 'X', 'O', 'X'],
 ['O', 'O', 'O', 'O', 'O', 'G', 'G', 'G', 'O', 'O', 'O', 'O', 'O'],
 ['X', 'O', 'X', 'X', 'O', 'G', 'G', 'O', 'O', 'X', 'X', 'O', 'X'],
 ['O', 'O', 'O', 'O', 'O', 'G', 'G', 'O', 'O', 'O', 'G', 'O', 'O'],
 ['X', 'O', 'X', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'X', 'O', 'X'],
 ['X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'X', 'O'],
 ['O', 'G', 'O', 'O', 'X', 'O', 'X', 'O', 'O', 'O', 'O', 'O', 'O'],
 ['X', 'O', 'X', 'O', 'X', 'O', 'O', 'O', 'X', 'X', 'O', 'X', 'O'],
 ['O', 'O', 'O', 'O', 'O', 'G', 'X', 'O', 'O', 'O', 'O', 'O', 'O']]

## This saves every single cell as a separate image for OCR

In [None]:
# Function to save every single square except the middle 3x3 squares
save_dir = "numbered_squares/"
def save_all_squares(img, coords):
    saved_files_all = []  # To keep track of saved files
    for i in range(13):
        for j in range(13):
            # Ignoring the middle 3x3 squares
            if 5 <= i <= 7 and 5 <= j <= 7:
                continue
            idx = i*13 + j
            x, y, w, h = coords[idx]
            roi = img[y:y+h, x:x+w]
            
            # Using direct string concatenation for filename
            filename = save_dir + f"all_squares_{i}_{j}.png"
            
            # Attempt to save the image and raise an exception if saving fails
            if not cv2.imwrite(filename, roi):
                raise Exception(f"Could not write image at position {i}, {j} to {filename}")
            
            saved_files_all.append(filename)
    return saved_files_all  # Return the list of saved files


# Save every square except the middle 3x3 ones
saved_files_all = save_all_squares(img, grid_coordinates)

saved_files_all  # Display the first 5 saved file paths for verification


## This generates the OCR matrix with numbers and their locations

In [52]:
import pytesseract
from PIL import Image

# Initialize the matrix
ocr_matrix = [[0 for _ in range(13)] for _ in range(13)]

# Process saved squares using pytesseract
for i in range(13):
    for j in range(13):
        # Skip the middle 3x3 squares
        if 5 <= i <= 7 and 5 <= j <= 7:
            ocr_matrix[i][j] = 'X'
            continue
        
        filename = f"numbered_squares/all_squares_{i}_{j}.png"
        
        # Open the image and pre-crop it
        image = Image.open(filename)
        cropped_image = image.crop((4, 3, 25, 27))
        
        # Use pytesseract to extract text from the cropped image
        result = pytesseract.image_to_string(cropped_image, config='--psm 6').strip()
        
        # If a number is detected, update the matrix with that number
        if result.isdigit():
            ocr_matrix[i][j] = int(result)

# Print the resulting matrix
for row in ocr_matrix:
    print(row)


[1, 0, 2, 0, 0, 3, 0, 4, 0, 5, 0, 6, 0]
[0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0]
[8, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 11, 0, 12, 13, 0, 0, 0, 14, 0, 0, 0, 0]
[15, 0, 0, 0, 0, 'X', 'X', 'X', 16, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 'X', 'X', 'X', 0, 0, 0, 0, 0]
[17, 0, 0, 18, 0, 'X', 'X', 'X', 19, 20, 0, 0, 0]
[0, 0, 0, 21, 0, 22, 0, 23, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24]
[25, 0, 0, 0, 0, 0, 0, 26, 0, 0, 27, 0, 0]
[0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0]
[29, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0]


## This will join the two matrices together into combined_matrix

In [67]:

# Combining the two matrices according to the given rules
combined_matrix = []
for ocr_row, text_row in zip(ocr_matrix, text_matrix):
    combined_row = []
    for ocr_element, text_element in zip(ocr_row, text_row):
        # Rule for 'X' in either matrix
        if ocr_element == 'X' or text_element == 'X':
            combined_row.append('X')
        # Rule for 'G' and a number
        elif text_element == 'G' and isinstance(ocr_element, int) and ocr_element != 0:
            combined_row.append(f'({ocr_element}G)')
        # Rule for 'O' and 'G'
        elif text_element == 'G':
            combined_row.append('G')
        # Rule for 'O' and a number
        elif isinstance(ocr_element, int) and ocr_element != 0:
            combined_row.append(f'({ocr_element})')
        # Rule for 'O' and 0
        else:
            combined_row.append('O')
    combined_matrix.append(combined_row)
print(combined_matrix)



[['(1G)', 'O', '(2)', 'O', 'O', '(3)', 'X', '(4)', 'O', '(5)', 'O', '(6)', 'O'], ['O', 'X', 'O', 'X', 'X', '(7)', 'O', 'O', 'X', 'O', 'X', 'O', 'X'], ['(8)', 'O', 'O', '(9)', 'O', 'O', 'X', 'O', 'X', '(10)', 'O', 'O', 'O'], ['O', 'X', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X'], ['X', '(11)', 'X', '(12)', '(13G)', 'O', 'O', 'O', '(14)', 'O', 'X', 'O', 'X'], ['(15)', 'O', 'O', 'O', 'O', 'X', 'X', 'X', '(16)', 'O', 'O', 'O', 'O'], ['X', 'O', 'X', 'X', 'O', 'X', 'X', 'X', 'O', 'X', 'X', 'O', 'X'], ['(17)', 'O', 'O', '(18)', 'O', 'X', 'X', 'X', '(19)', '(20)', 'G', 'O', 'O'], ['X', 'O', 'X', '(21)', 'O', '(22)', 'O', '(23)', 'O', 'O', 'X', 'O', 'X'], ['X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'O', 'X', 'X', '(24)'], ['(25)', 'G', 'O', 'O', 'X', 'O', 'X', '(26)', 'O', 'O', '(27)', 'O', 'O'], ['X', 'O', 'X', 'O', 'X', '(28)', 'O', 'O', 'X', 'X', 'O', 'X', 'O'], ['(29)', 'O', 'O', 'O', 'O', 'G', 'X', '(30)', 'O', 'O', 'O', 'O', 'O']]


## This will test if our manually made matrix matches the automated matrix

In [49]:
# def test_matrices(matrix1, matrix2):
#     for i in range(len(matrix1)):
#         for j in range(len(matrix1[0])):
#             if matrix1[i][j] != matrix2[i][j]:
#                 return False
#     return True
# 
# # Test
# result = test_matrices(combined_matrix, intended_matrix)
# print(result)
# print("Matrices are identical." if result else "Matrices are not identical.")


True
Matrices are identical:


## Alternative approach
###### Potentially could use corssword-solver.io instead of GPT4 as it could be more accurate when dealing word by word? Could also:
1. Identify which clues give any green letters at all
2. Resolve those
3. Use logic/GPT to find which letters correspond to green squares
4. Use GPT to come up with words with these letters 

In [72]:
# Taking 'across' and 'down' contents and making it into a dictionary
def parse_section(section_str):
    section_dict = {}
    lines = [line.strip() for line in section_str.split('\n') if line.strip()]
    
    # Combine multi-line clues
    combined_lines = []
    temp_line = ""
    for line in lines:
        # If the line starts with a number, it's the start of a new clue
        if line[0].isdigit():
            if temp_line:  # If there's content in temp_line, add it to combined_lines
                combined_lines.append(temp_line)
                temp_line = ""
            temp_line = line
        else:  # Otherwise, it's a continuation of the previous clue
            temp_line += " " + line
    combined_lines.append(temp_line)  # Add the last clue
    
    # Parse the combined lines
    for line in combined_lines:
        number, clue = line.split(" ", 1)
        section_dict[number.rstrip('.')] = clue
    return section_dict

parsed_clues_from_vars = {
    "ACROSS": parse_section(across),
    "DOWN": parse_section(down)
}

parsed_clues_from_vars

{'ACROSS': {'1': 'Shriek, yell (6)',
  '4': 'Skilled woodworker (6)',
  '7': 'Flightless bird (3)',
  '8': 'Warren dweller (6)',
  '10': 'Joy, happiness (4)',
  '12': 'Brooks, rills (7)',
  '15': 'Inscribe (5)',
  '16': 'Fish-eating mammal (5)',
  '17': 'Pub sport (5)',
  '19': 'Dot, tiny particle (5)',
  '21': 'Let-up (7)',
  '25': 'Farm building (4)',
  '26': 'Golf club (6)',
  '28': 'Fish eggs (3)',
  '29': 'Run naked (6)',
  '30': 'Pay attention (6)'},
 'DOWN': {'1': 'Tender, painful (4)',
  '2': 'Abrade (3)',
  '3': 'Measuring device (5)',
  '4': '--- Bradbury (pictured), TV presenter (5)',
  '5': 'Eat, take in (6)',
  '6': 'Time of crisis (9)',
  '9': 'Finest (4)',
  '11': 'Air (@ programme) (9)',
  '13': 'Fraught (5)',
  '14': 'Damp (5)',
  '18': 'Stupor (6)',
  '20': 'Nuisance (4)',
  '22': 'Marine predator (5)',
  '23': 'Force forward (5)',
  '24': 'Wide smile (4)',
  '27': 'Small drink (3)'}}

In [92]:
# Identifying which clues potentially give any green letters at all
def find_green_clues_refined(matrix):
    green_positions = [(i, j) for i, row in enumerate(matrix) for j, cell in enumerate(row) if 'G' in cell]
    clues = set()  
    
    for pos in green_positions:
        i, j = pos
        
        temp_j = j
        while temp_j >= 0:
            if matrix[i][temp_j][0] == '(':
                clue_num = ''.join([ch for ch in matrix[i][temp_j] if ch.isdigit()])
                clues.add(f"{clue_num} across")
            if matrix[i][temp_j] == 'X':
                break
            temp_j -= 1
        
        temp_i = i
        while temp_i >= 0:
            if matrix[temp_i][j][0] == '(':
                clue_num = ''.join([ch for ch in matrix[temp_i][j] if ch.isdigit()])
                clues.add(f"{clue_num} down")
            if matrix[temp_i][j] == 'X':
                break
            temp_i -= 1
    
    return list(clues)


def filter_given_clues(refined_clue_list, clues):
    filtered_clues = {"ACROSS": {}, "DOWN": {}}

    for clue in refined_clue_list:
        number, direction = clue.split(" ", 1)
        if direction == "across" and number in clues["ACROSS"]:
            filtered_clues["ACROSS"][number] = clues["ACROSS"][number]
        elif direction == "down" and number in clues["DOWN"]:
            filtered_clues["DOWN"][number] = clues["DOWN"][number]
    
    return filtered_clues


# Identify the clues for green boxes
refined_clue_list = find_green_clues_refined(combined_matrix)

# Filter the given clues based on the identified clues
filtered_clues_final = filter_given_clues(refined_clue_list, parsed_clues_from_vars)

filtered_clues_final

{'ACROSS': {'1': 'Shriek, yell (6)',
  '12': 'Brooks, rills (7)',
  '19': 'Dot, tiny particle (5)',
  '25': 'Farm building (4)',
  '29': 'Run naked (6)'},
 'DOWN': {'1': 'Tender, painful (4)',
  '13': 'Fraught (5)',
  '11': 'Air (@ programme) (9)',
  '22': 'Marine predator (5)'}}

## Alternative is using GPT 4 directly with each relevant clue. We can compare results

In [82]:


openai.api_key = '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'

response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
        {
            "role": "user",
            "content": f"For every crossword clue contained in the following dictionary, which word could this clue correspond to? Are there any others? Please only give the most likely word making sure the number of letters matches the clue. Only return the most likely word, nothing else, no punctuation, no other text, just the word itself, as a list. {filtered_clues_final}"
        },
    ],
  temperature=1,
  max_tokens=4523,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)

In [89]:
# Making this response into a dictionary
# Assuming response is already a dictionary-like object
solution_list = eval(response["choices"][0]["message"]["content"])

# Creating the solutions dictionary based on the solution list and filtered_clues_final
solutions_dict = {}
for direction, clues in filtered_clues_final.items():
    solutions_dict[direction] = {}
    for number, clue in clues.items():
        solutions_dict[direction][number] = solution_list.pop(0)

solutions_dict

{'ACROSS': {'12': 'Streams',
  '19': 'Speck',
  '1': 'Scream',
  '25': 'Barn',
  '29': 'Streak'},
 'DOWN': {'11': 'Breath', '22': 'Shark', '1': 'Sore', '13': 'Tense'}}

Now I need to somehow find where the green squares are and find the letters corresponding to the green squares, and list them out

In [94]:
def fill_crossword(matrix, clues):
    # Make a deep copy of the matrix to fill it
    filled_matrix = [row.copy() for row in matrix]
    green_letters = {}

    # Helper function to check if a cell is playable
    def is_playable(cell):
        return cell == 'O' or cell[0] == '(' or 'G' in cell

    # Helper function to get the clue number from a cell
    def get_clue_number(cell):
        return ''.join([ch for ch in cell if ch.isdigit()])

    # Helper function to fill the word in the matrix
    def fill_word(i, j, direction, word):
        if direction == "ACROSS":
            for w in word:
                if filled_matrix[i][j] == "(1G)":
                    green_letters[(i, j)] = w
                elif 'G' in filled_matrix[i][j]:
                    green_letters[(i, j)] = w
                
                # Fill the cell with the word character
                filled_matrix[i][j] = w
                
                # Move to the next cell
                j += 1
                if j >= len(matrix[0]) or not is_playable(filled_matrix[i][j]):
                    break

        elif direction == "DOWN":
            for w in word:
                if filled_matrix[i][j] == "(1G)":
                    green_letters[(i, j)] = w
                elif 'G' in filled_matrix[i][j]:
                    green_letters[(i, j)] = w
                
                # Fill the cell with the word character
                filled_matrix[i][j] = w

                # Move to the next cell
                i += 1
                if i >= len(matrix) or not is_playable(filled_matrix[i][j]):
                    break

    # Go through each cell in the matrix to fill words
    for i in range(len(matrix)):
        for j in range(len(matrix[0])):
            cell = matrix[i][j]
            
            if cell[0] == '(':  # It's a clue cell
                clue_number = get_clue_number(cell)

                # Check if the clue is in the provided clues and fill it
                if j+1 < len(matrix[0]) and is_playable(matrix[i][j+1]):  # It's an ACROSS clue
                    if clue_number in clues["ACROSS"]:
                        fill_word(i, j, "ACROSS", clues["ACROSS"][clue_number])

                if i+1 < len(matrix) and is_playable(matrix[i+1][j]):  # It's a DOWN clue
                    if clue_number in clues["DOWN"]:
                        fill_word(i, j, "DOWN", clues["DOWN"][clue_number])

    return green_letters

# Given clues and their solutions


green_letters_result = fill_crossword(combined_matrix, solutions_dict)
green_letters_result

# Make into a list

def extract_green_letters(green_letters_dict):
    return list(green_letters_dict.values())

green_letters_list = extract_green_letters(green_letters_result)
green_letters_list


['S', 't', 'e', 'k', 'a']

Getting answers from Waitrose website

In [98]:
import requests
from bs4 import BeautifulSoup

def extract_answers_from_url(url):
    # Fetch the content of the URL
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for HTTP errors
    
    # Get the HTML content
    html = response.text
    
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')

    # Look for the radio input elements
    radio_inputs = soup.find_all('input', {'type': 'radio', 'name': 'answer'})

    # Extract the value attribute of each radio input
    answers = [input_elem['value'] for input_elem in radio_inputs]
    
    return answers

# Run the function
url = "https://www.waitrose.com/ecom/content/competitions/win-with-waitrose-weekend"
answers = extract_answers_from_url((url))
# print(answers)

Now we're gonna ask GPT to figure out what the most likely word is from the following prompt, and based on these letters:

In [95]:


openai.api_key = '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'

response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
        {
            "role": "user",
            "content": f"Take the letters from the following list: '{green_letters_list}' and return the most likely word that can be made with these letters and also  can only be one of these words: {answers}. Please only return just the word, nothing else, no punctuation, no other text, just the word itself"
        },
    ],
  temperature=1,
  max_tokens=4523,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)

Let's just get the answer itself

In [100]:
print(response["choices"][0]["message"]["content"])

The most likely word that can be made with these letters is "Steak".


## The final bit attempts to feed the summarized text data above (textual matrix representation of puzzle and hints) into GPT-4 and prompt it to resolve the puzzle and return the characters that correspond to the green letters

Steps to be taken:
1. Set up API key blabla
2. Write prompt with f strings that will pass our data
3. Display result, potentially both all the solutions and the green letters, or just the final solution

Another thing would be to directly fetch the right page from the weekly magazine update,
and then automatically submit the solution and autofill the data on the form

Now, for each of the clues contained in "filtered_clues_final" I want to:
1. Take the clue and modify the following link: https://crossword-solver.io/clue/shriek%2c-yell/pattern/______/
Where "shriek%2c-yell" corresponds to the clue bit in our clue, and "pattern/______" corresponds to the (6) part in our clue (hence 6 underscores) 
2. Iterate over every clue in our dictionary and return a new dictionary with the corresponding links

In [77]:

# def generate_links_for_clues(filtered_clues):
#     links = {"ACROSS": {}, "DOWN": {}}
# 
#     for direction, clues in filtered_clues.items():
#         for number, clue in clues.items():
#             # Extracting the clue description and answer length
#             clue_description = clue.rsplit(" ", 1)[0]  # Everything except the last part
#             answer_length = int(clue.split("(")[-1].rstrip(")"))  # Extract the number between parentheses
#             
#             # Formatting the clue description to be URL-friendly
#             formatted_description = urllib.parse.quote(clue_description)
#             
#             # Generating the link
#             link = f"https://crossword-solver.io/clue/{formatted_description}/pattern/{'_' * answer_length}/"
#             links[direction][number] = link
#     
#     return links
# 
# clue_links = generate_links_for_clues(filtered_clues_final)
# clue_links


{'ACROSS': {'12': 'https://crossword-solver.io/clue/Brooks%2C%20rills/pattern/_______/',
  '19': 'https://crossword-solver.io/clue/Dot%2C%20tiny%20particle/pattern/_____/',
  '1': 'https://crossword-solver.io/clue/Shriek%2C%20yell/pattern/______/',
  '25': 'https://crossword-solver.io/clue/Farm%20building/pattern/____/',
  '29': 'https://crossword-solver.io/clue/Run%20naked/pattern/______/'},
 'DOWN': {'11': 'https://crossword-solver.io/clue/Air%20%28%40%20programme%29/pattern/_________/',
  '22': 'https://crossword-solver.io/clue/Marine%20predator/pattern/_____/',
  '1': 'https://crossword-solver.io/clue/Tender%2C%20painful/pattern/____/',
  '13': 'https://crossword-solver.io/clue/Fraught/pattern/_____/'}}