In [269]:
#pip install selenium
#install if not available

In [270]:
#import python libraries
import numpy as np
import pandas as pd
import time
import re

In [271]:
# setup selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC





In [272]:
#locate the webdriver
service = Service('pathtochromedriver') #mention the path to the chrome driver in your computer
# Set up the WebDriver (this project uses Chrome)
driver = webdriver.Chrome(service = service)


In [273]:
# Navigating to the website
#enter the wordle link
driver.get('https://www.nytimes.com/games/wordle/index.html') 
#find and press the play button
button = driver.find_element(By.XPATH,'/html/body/div[2]/div/div/div/div/div[2]/button[2]')
button.click()



In [274]:
#close the wordle tutorial
button = driver.find_element(By.XPATH,'/html/body/div[2]/div/dialog/div/div/button')
button.click()

In [275]:
# Locate the body or the main container where key inputs are expected
addText = driver.find_element(By.TAG_NAME, 'body')


In [276]:
import chardet

# Read the file in binary mode and detect encoding (if needed)
with open('full_wordlist.txt', 'rb') as file:
    raw_data = file.read()
    result = chardet.detect(raw_data)
    encoding = result['encoding']

print(f"Detected encoding: {encoding}")


Detected encoding: ascii


In [277]:
# importint the words list
file_path = 'full_wordlist.txt'

# Open the file and read lines into a list
with open(file_path, 'r', encoding='ascii') as file:
    # Read all lines and strip newline characters
    words = [line.strip() for line in file]
len(words)

14855

In [242]:
#seperating each word string by letters
wordsCharList = [list(word) for word in words]

wordsCharList[0]

['a', 'a', 'h', 'e', 'd']

In [278]:
#finding frequency of letters
alphaList = [item for sublist in words for item in sublist]

from collections import Counter

#creating a df and sort letters by frequency
alphaFreq = Counter(alphaList)
alphaFreqDf = pd.DataFrame(list(alphaFreq.items()), columns=['letter', 'frequency']).sort_values(by = 'frequency', ascending = False)
alphaFreqDf["probability"] = alphaFreqDf["frequency"]/sum(alphaFreqDf["frequency"]) #calculating the probability
alphaFreqDf

Unnamed: 0,letter,frequency,probability
2,e,7455,0.10037
7,s,7319,0.098539
0,a,7128,0.095968
18,o,5212,0.070172
8,r,4714,0.063467
5,i,4381,0.058984
4,l,3780,0.050892
10,t,3707,0.049909
16,n,3478,0.046826
19,u,2927,0.039408


In [279]:
#finding starter word
#I am using the words that are only made of the 7 most commonly occuring letters
#the letters can be tweaked to refine accuracy

commonLetters = alphaFreqDf["letter"].head(7)

#finding words that have the most common letters
#getting a list of words that has at least 3 or more of the letters from the list 
#this allows words with other letters not in the list of common letters.
starterWords = [word for word in words if sum(letter in word for letter in commonLetters) >4]

In [280]:
#picking a random starter word
#right now picking the starter word randomly which has worked the best for me
#rules to pick the starter words can be changed if it makes the accuracy better

import random
n = len(starterWords)
randomNum = random.randint(0,n-1)
print(randomNum) #this is the index of the word in the list it is picking randomly.


58


In [281]:
#identifyng the starter word
starterWord = starterWords[randomNum]
starterWord

'soral'

In [282]:
#introducing sleep time as selenuim is slow and unpredictable at times
time.sleep(20)

#adding the letters in the first row
for l in starterWord:
    addText.send_keys(l)

#press enter to confirm
addText.send_keys(Keys.RETURN)    


In [288]:
#creating a function to find a list of words for next row

def filter_words(words, presentInOtherPosition=None, discardLetters=None, correctLetters=None, repeatLetter = None):
    if discardLetters is None:
        discardLetters = [] # list of letters that should not be in the word
    if correctLetters is None:
        correctLetters = {} #dictionary of letters witht he positions that are guessed at the correct places in the word
    if presentInOtherPosition is None:
        presentInOtherPosition = {} #dictionary of letters that are in the word but with the incorrect positions
    if repeatLetter is None:
        repeatLetter = [] #the letter that is repeated in a word if the word has more than one occurence of the same letter
    

    filtered_list = [] # list of words that meet the hints from all the previous rounds
    
 
        #the logic is to check each letter and its position of a word from the word pool against... 
        #...the hints/rules from the previous rounds.
        
        
        #if the placeCondition is true after all the checks the word is consiered to be included in the filtered list
        #Setting the initial condition to be true
        
    placeCondition = True
        
    #print("analysing" + str(word))
    for letter in range(0,5):
        key = word[letter]
        #for example for testing the word "DEBIT"
        #for iteration 1 
        #check if we have already guessed a correct letter at the position we are testing.
        #if yes, is the correct letter 'D'?

        if correctLetters[letter] is not None and correctLetters[letter] != key:
            placeCondition = False #reject the word if no, and go to the next word in the list 
            break
        elif correctLetters[letter] == key:
            placeCondition = True #condition remains true, and we go to iteration 2 to test 'E'

        #OR

        #if a correct letter is not yet guessed at all (1:None) in correct letter dict
        elif correctLetters[letter] is None:
            #check if "D" is in the discard list
            if key in discardLetters:
                placeCondition = False #reject the word if yes, and go to the next word in the list 
                break

            #if not, there is a chance that we have used 'D'in our previous guess but at a different location
            #in other words 'D' is in the presentInOtherPositio dictionary

            #let's say the previous guess was "REDUB", and D is included in the dictionary as {d:[3]}
            #this means 'D' is in the word but not at the third position

            #we test in "DEBIT" in iteration 1 for letter 'D' if the D is not in location 3
            elif key in presentInOtherPosition and letter not in presentInOtherPosition[key]:
                placeCondition = True #if yes, the we go to iteration 2 to test 'E'
            elif key in presentInOtherPosition and letter in presentInOtherPosition[key]:
                placeCondition = False #if no, then we reject the word, go to the next word in the list
                break

                #if the letter 'D' is not present in any of the lists/dictionaries, ... 
                #the condition remains true and we move to next letter of the word 'R'
                #this means, 'D' is completely new guess

            #after testing all the letters of the word, if the placeCondition still remains true ... 
            #then we can consider the word to be added to the filtered list

            #like 'REBEL'


    
        #print("letter " + str(key) + " checked: "  + str(placeCondition))
        #we need to make sure that the placeCondition is true and 
        # the word has all the letters from the presentInOtherPosition list 
        if all(a in word  for a in presentInOtherPosition) and placeCondition == True:
        #the alogrithm set will get confused and produce wrong results if the correct word has letters occuring more than once
        #if we do not have a repeat Letter
            if not repeatLetter:
                filtered_list.append(word) #we add the word that meet the previous condition to our new pool
        #if we have a letter repeating    
            else:
                if all(word.count(r)>1 for r in repeatLetter): 
                    filtered_list.append(word) # we add only the words that have the letter appearing more than once
            
    #the new list is the output of the funciton        
    return(filtered_list)
                
            

In [None]:
#the code identifies the hints, sorts the letters into diferent lists/dictionaries and picks new guess word.

import re
pattern = r'aria-label="[^,]*,[^,]*,([^"]*)"' #regex to identify the hint
rowResults = {} #all the hints from all rounds get placed ito this dictionary
selectedWordList =[starterWord] #making a list of words already used in the guesses
discardLetters = [] #explained above
presentInOtherPosition = {} #explained above
correctLetters = {0:None ,1:None,2:None,3:None,4:None} #explained above

correctList = [] # list of values from the correctLetters dictionary
repeatLetter = [] #the list of letters that occur more than once in the final word, empty if it does not occur


for a in range(0,6): #range 6 as we have 6 gusses for the wordle
    time.sleep(20)
    xPath0 = None
    xPath0 = "/html/body/div[2]/div/div[4]/main/div[1]/div/div[" + str(a+1) + "]/div[" #path on the site that has the hints
    #print(xPath)
    result=[]
    correctList = []
    for l in correctLetters.values():
        correctList.append(l)
    #selectedWordList.append(SecondWord)
    for b in range (0,5): #range 5 as the words are 5 letters each
        xPath = xPath0 + str(b+1) + "]/div" 
        element = driver.find_element(By.XPATH, xPath) #finding the element that has hints
        full_html = element.get_attribute('outerHTML') #getting the element text
        match = re.search(pattern, full_html) #finding the part of elemt that has hints
        if match:
            value_after_second_comma = match.group(1).strip()  # Extract and strip any extra spaces
        else:
            value_after_second_comma = 'Not found'  # Handle case where pattern does not match
        #adding the hints to a dictionary
        result.append(value_after_second_comma) 
        rowResults[a] = result
    
    #example results
    #{0: [Correct,present in Other Position, Absent, Absent, Correct] } The key is the attempt number of the guess
    for b in range (0,5): #for all the letters and hints
        
        #if the hint to the corresponding letter is "Absent"
        if rowResults[a][b] == "absent" and selectedWordList[a][b] not in discardLetters :
            discardLetters.append(selectedWordList[a][b]) #include the letter in discardLetters list of not already present
        
        #if the hint to the corresponding letter ins "Present in Other position"
        elif rowResults[a][b] == "present in another position":
            if selectedWordList[a][b] in presentInOtherPosition: #the letter could be guessed wrong twice,
                presentInOtherPosition[selectedWordList[a][b]].append(b) #add only the incorrect location if the letter is yes
            else: #if it is not present
                presentInOtherPosition[selectedWordList[a][b]] = [b]#add both the guessed letter and the location it should not be in
        elif rowResults[a][b] == "correct":
            correctLetters[b] = selectedWordList[a][b]
            
        #identifying if the word has repeat letters
        #letter is repeating in the final word if the hint is "Present in anothe poisition" while it is already included in the correct list
        if rowResults[a][b] == "present in another position" and selectedWordList[a][b] in correctList:
            repeatLetter.append(selectedWordList[a][b]) #add letter to repeat letter list
    #print(rowResults)
    #print(discardLetters, presentInOtherPosition,correctLetters)
    
    #using the custom function to filter words based on the conditions
    words = filter_words(words, presentInOtherPosition, discardLetters, correctLetters, repeatLetter)
    
    #selecting a word from the filtered list at random
    #not the best way for selection, probabilities can be used to refine the process
    selectWord = wordsCharList[random.randint(0,len(wordsCharList)-1)]
    
    time.sleep(7)
    
    #input the letters in the boxes
    for l in selectWord:
        addText.send_keys(l)
    #press enter   
    addText.send_keys(Keys.RETURN)
    
    #Pause to see the result (optional)
    time.sleep(10)
    #add the word to the selected word list
    selectedWordList.append(selectWord)
    print(selectedWordList)


['soral', ['r', 'e', 'b', 'e', 'l']]
