In [42]:
import os
import pandas as pd
import cv2
import lxml.html
import numpy as np

In [7]:
folder = 0
filepath = f"score_prediction/{folder}"

image = cv2.imread(f"{filepath}/sample.png")
paths_df = pd.read_csv(f"{filepath}/paths.csv", delim_whitespace=True)
matrix_df = pd.read_csv(f"{filepath}/matrix.csv", delim_whitespace=True)

In [92]:
paths_df

Unnamed: 0,score,word,path
0,104,mesost,"[(1, 0), (2, 1), (2, 2), (1, 3), (1, 2), (0, 2)]"
1,79,jatos,"[(0, 0), (1, 1), (0, 2), (1, 3), (2, 2)]"
2,78,meso,"[(1, 0), (2, 1), (2, 2), (1, 3)]"
3,72,amatols,"[(2, 0), (1, 0), (1, 1), (0, 2), (1, 3), (0, 3..."
4,69,loiasis,"[(0, 3), (1, 3), (2, 3), (3, 3), (2, 2), (3, 1..."
5,65,amatol,"[(2, 0), (1, 0), (1, 1), (0, 2), (1, 3), (0, 3)]"
6,63,james,"[(0, 0), (1, 1), (1, 0), (2, 1), (2, 2)]"
7,60,losses,"[(0, 3), (1, 3), (1, 2), (2, 2), (2, 1), (3, 0)]"
8,59,asemasia,"[(2, 0), (3, 0), (2, 1), (1, 0), (1, 1), (2, 2..."
9,59,santols,"[(2, 2), (1, 1), (0, 1), (0, 2), (1, 3), (0, 3..."


In [93]:
selected_paths = {}
for i, row in paths_df.iterrows():
    word = row['word']
    path = eval(row['path'])
    score = row['score']
    
    selected_paths[word] = path

In [103]:
# open as html
with open(f"{filepath}/results.html", "r") as f:
    results_html_string = f.read()
    results_html = lxml.html.fromstring(results_html_string)
    
# fetch scores
scores = {}
score_list = []
rows = results_html.cssselect("div.points-result")[0]
for row in rows:
    word = row.cssselect("div.word")[0].text_content()
    points = int(row.cssselect("div.points")[0].cssselect("div.left")[0].text_content())
    score_list.append((word, points))
    scores[word] = points

In [87]:
score_df = pd.DataFrame(score_list, columns=["word", "points"])
score_df.to_csv(f"{filepath}/actual_scores.csv", sep=" ")

In [79]:
class Cell:
    def __init__(self):
        self.value = 0
        self.char = ""
        self.bonus = ""
    
    def __str__(self):
        return f"{self.bonus} {self.char} {self.value}"
    
    def __repr__(self):
        return self.__str__()

In [81]:
def construct_matrix(matrix_df):
    cells = [Cell() for _ in range(16)]
    cells = np.array(cells).reshape((4, 4))
    
    for i, row in matrix_df.iterrows():
        index = row['index']
        index = eval(index)
        
        char = row['character']
        value = row['value']
        bonus = row['bonus']
        
        cell = cells[index]
        cell.char = char
        cell.value = value
        cell.bonus = bonus
    
    return cells

In [82]:
matrix = construct_matrix(matrix_df)

In [95]:
def get_dictionary_fix(scores, selected_paths):
    invalid_words = []
    missed_words = []

    for word in scores:
        if word not in selected_paths:
            missed_words.append(word)
            
    for word in selected_paths:
        # then word is invalid
        if word not in scores:
            invalid_words.append(word)
            continue
    
    return invalid_words, missed_words

In [99]:
def predict_scores(calculator):
    predicted_scores = {}

    for word, path in selected_paths.items():
        # then word is invalid
        if word not in scores:
            continue

        # if valid word then compute
        # take in the path, matrix
        score = calculator(path, matrix)
        predicted_scores[word] = score
    
    return predicted_scores

In [100]:
def sze_calculator(path, matrix):
    word_multipliers = 1
    
    product_sum = 0 # letter value * letter multiplier
    
    constant = 0
    
    for index in path:
        cell = matrix[index]
        bonus = cell.bonus
        if bonus == '3W':
            word_multipliers *= 3
        elif bonus == '2W':
            word_multipliers *= 2
        
        value = cell.value
        
        if bonus == '2L':
            product_sum += value * 2
        elif bonus == '3L':
            product_sum += value * 3
        else:
            product_sum += value
            
    return (product_sum * word_multipliers) + constant

In [106]:
def extract_metadata(path):
    values = []
    bonuses = []
    
    for index in path:
        cell = matrix[index]
        values.append(cell.value)
        bonuses.append(cell.bonus)
    
    return values, bonuses

In [104]:
predicted_scores = predict_scores(sze_calculator)
comparison_data = []
for word in predicted_scores:
    target_score = scores[word]
    predicted_score = predicted_scores[word]
    error = target_score-predicted_score
    comparison_data.append((word, target_score, predicted_score, error))
    
    values, 
    
headers = ["word", "target", "predicted", "error"]

df = pd.DataFrame(comparison_data, columns=headers)

In [105]:
df.to_csv(f"{filepath}/sze_calculator_predictions.csv", sep=" ")

Unnamed: 0,word,target,predicted,error
0,jatos,81,72,9
1,amatols,74,72,2
2,amatol,65,66,-1
3,james,65,56,9
4,losses,59,72,-13
5,santols,62,60,2
6,matlo,57,60,-3
7,siamese,54,48,6
8,loses,51,66,-15
9,messias,54,48,6
