In [1]:
def green_score(word, chances):
    score = 0
    if len(set(word)) <5:
        return 0
    for i, l in enumerate(word):
        score += chances[i].get(l, 0)
    return score

def yellow_score(word, chances, duplicates=False):
    score = 0
    if not duplicates and len(set(word)) <5:
        return 0
    for i, l in enumerate(word):
        score += chances.get(l, 0)
    
    return score

def legal_words(words, available_letters, green_letters, yellows):
    for word in words:
        legal_word = True
        word = word.strip()
        letters = set(word)
        for letter in letters:
            if letter not in available_letters:
                legal_word = False
        for w, g in zip(word, green_letters):
            if not(g == " " or g == w):
                legal_word = False
        for yellow_letters in yellows:
            for w, y in zip(word, yellow_letters):
                if not " " == y:
                    if w == y:
                        legal_word = False
                    if not y in letters:
                        legal_word = False
        if legal_word:
            yield word

def get_letter_frequencies(words):
    letter_frequency = dict()
    words_count = len(words)
    for word in words:
        word = word.strip()
        letters = set(word)
        for letter in letters:
            if letter not in letter_frequency:
                letter_frequency[letter] = 0
            letter_frequency[letter] += 1
    return letter_frequency, words_count

def next_prediction(available_letters, green, yellow, verbose=False):
    with open('hasła.txt', 'r', encoding='utf-8') as file:
        words = file.readlines()
        available_words = list(legal_words(words, available_letters, green, yellow))
        # print(len(available_words))
        letter_frequency, words_count = get_letter_frequencies(available_words)
        sorted_letters = {k: round(letter_frequency[k]/words_count, 2) for k in sorted(letter_frequency, key=letter_frequency.get, reverse=True)}
        sorted_unknown_letters = {k:sorted_letters[k] for k in sorted_letters if sorted_letters[k] < 1}
        if verbose:
            print(sorted_unknown_letters)

    positions = [dict() for _ in range(5)]
    letter_frequency = dict()
    if verbose:
        print(available_words)
        print(sorted_unknown_letters)
    for word in available_words:
        word = word.strip()
        for i, letter in enumerate(word):
            if letter not in positions[i]:
                positions[i][letter] = 0
            positions[i][letter] += 1
        for letter in set(word):
            if letter not in letter_frequency:
                letter_frequency[letter] = 0
            letter_frequency[letter] += 1
    
    with open('5liter.txt', 'r', encoding='utf-8') as file:
        words = file.readlines()
        best_elimination_score = 0
        best_elimination_word = ""
        best_balanced_word = ""
        best_balanced_score = 0
        for word in words:
            word = word.strip()
            score = yellow_score(word, sorted_unknown_letters)
            if score > best_elimination_score:
                best_elimination_score = score
                best_elimination_word = word
            balanced_score = score/2 + green_score(word, positions)
            if balanced_score > best_balanced_score:
                best_balanced_score = balanced_score
                best_balanced_word = word
    best_score = 0
    best_word = ""
    for word in available_words:
        word = word.strip()
        score = yellow_score(word, sorted_letters, duplicates=True)
        if score > best_score:
            best_score = score
            best_word = word
    best_guess = best_word
    return {"best_elimination":best_elimination_word, "best_guess":best_guess, "best_balanced":best_balanced_word, "available_words_count":len(available_words)}

In [41]:
letters = "abcdefghijklmnoprstuvwxyzóąćęłńśźż"
green = "     "
yellow = ["     "]
available_letters = "abcdefghijklmnoprstuvwxyzóąćęłńśźż"
haslo = "KAJAK"
next_prediction(available_letters, green, yellow)

{'best_elimination': 'okrai',
 'best_guess': 'arara',
 'best_balanced': 'soria',
 'available_words_count': 4893}

In [2]:
available_letters = "abcdefghjklmnptuvwxyzóąćęłńśźż"
green = "     "
yellow = ["    a"]

next_prediction(available_letters, green, yellow, verbose=False)

{'best_elimination': 'kenty',
 'best_guess': 'kanak',
 'best_balanced': 'kapeć',
 'available_words_count': 453}

In [6]:
available_letters = "abcdfghjklmpuvwxzóąćęłńśźż"
green = "     "
yellow = ["k   a"]

next_prediction(available_letters, green, yellow, verbose=True)

{'u': 0.43, 'p': 0.31, 'ć': 0.29, 'z': 0.26, 'j': 0.17, 'ł': 0.17, 'm': 0.14, 'd': 0.14, 'b': 0.14, 'l': 0.11, 'c': 0.11, 'ą': 0.11, 'ę': 0.11, 'h': 0.09, 'w': 0.06, 'f': 0.03}
['ajmak', 'alkad', 'bakch', 'bąkać', 'bujak', 'bukal', 'cudak', 'czkać', 'dukać', 'fukać', 'hamak', 'hukać', 'jąkać', 'lękać', 'luzak', 'łapak', 'majak', 'makak', 'mazak', 'pająk', 'pajuk', 'pakuł', 'pałąk', 'pęcak', 'pękać', 'pukać', 'układ', 'ukwap', 'wkład', 'zakał', 'zakaz', 'zakuć', 'zakup', 'zębak', 'zupak']
{'u': 0.43, 'p': 0.31, 'ć': 0.29, 'z': 0.26, 'j': 0.17, 'ł': 0.17, 'm': 0.14, 'd': 0.14, 'b': 0.14, 'l': 0.11, 'c': 0.11, 'ą': 0.11, 'ę': 0.11, 'h': 0.09, 'w': 0.06, 'f': 0.03}


{'best_elimination': 'płucz',
 'best_guess': 'makak',
 'best_balanced': 'pukać',
 'available_words_count': 35}

In [7]:
available_letters = "abdfghjklmvwxóąćęńśźż"
green = "     "
yellow = ["k   a"]

next_prediction(available_letters, green, yellow, verbose=True)

{'m': 0.5, 'j': 0.38, 'ć': 0.38, 'l': 0.25, 'ą': 0.25, 'd': 0.12, 'b': 0.12, 'h': 0.12, 'ę': 0.12}
['ajmak', 'alkad', 'bąkać', 'hamak', 'jąkać', 'lękać', 'majak', 'makak']
{'m': 0.5, 'j': 0.38, 'ć': 0.38, 'l': 0.25, 'ą': 0.25, 'd': 0.12, 'b': 0.12, 'h': 0.12, 'ę': 0.12}


{'best_elimination': 'ćmiej',
 'best_guess': 'makak',
 'best_balanced': 'jąkać',
 'available_words_count': 8}

In [69]:
def game(solution):
    letters = set("abcdefghijklmnoprstuvwxyzóąćęłńśźż")
    green = list("     ")
    yellows = ["     "]
    prediction = next_prediction("".join(letters), "".join(green), yellows)["best_balanced"]
    # print(prediction)
    guesses = 1
    while not prediction == solution:
        new_yellow = list("     ")
        for i in range(5):
            if prediction[i] == solution[i]:
                green[i] = prediction[i]
            elif prediction[i] in solution:
                new_yellow[i] = prediction[i]
            else:
                letters.discard(prediction[i])
        if not "".join(new_yellow) == "     ":
            yellows.append(new_yellow)
        predictions = next_prediction("".join(letters), "".join(green), yellows)
        if predictions["available_words_count"] > 8:
            prediction = predictions["best_elimination"]
        else:
            prediction = predictions["best_guess"]
        guesses += 1
        if guesses == 7:
            return 7
    return guesses

In [62]:
game("flora")

5

In [84]:
import random

with open('hasła.txt', 'r', encoding='utf-8') as file:
    results = dict()    
    words = file.readlines()
    for i, word in enumerate(random.sample(words, 200)):
        if (i+1)%10 == 0:
            print(i+1)
        word = word.strip()
        guesses = game(word)
        if not guesses in results:
            results[guesses] = 0
        results[guesses] += 1
print(results)

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
{3: 61, 4: 96, 6: 11, 5: 30, 2: 2}


In [85]:
results = {k: results[k] for k in sorted(results)}
print(results)

{2: 2, 3: 61, 4: 96, 5: 30, 6: 11}
