# Attempt to work on Ratings

# Trying to define a word evaluator

In [88]:
import re
from enum import Enum

__CLEANER = re.compile('\\s+')

def clean_word(word):
    word = word.strip().lower()
    word = __CLEANER.sub(string=word, repl=' ')
    return word

class WordEvalType(Enum):
    LINEAR = 1
    EXPONENTIAL = 2
    MANUAL = 3
    
class WordEval:
    def __init__(self, vals):
        self.names_to_synonyms = vals
        self.keys = list(vals.keys())
        self.lookup_synonym = {}
        self.base = 0.9
        self.type = WordEvalType.LINEAR
        for key, synonyms in vals.items():
            self.lookup_synonym[key]=key
            for synonym in synonyms:
                self.lookup_synonym[synonym]=key
    
    def get_key(self, word):
        if word in self.lookup_synonym:
            return self.lookup_synonym[word]
        else:
            return None
    
    def keys_match_score(self, word_list):
        keys_used = set()
        none_count=0
        for word in word_list:
            if word is not None and len(word) > 0:
                key = self.get_key(word)
                if key is not None:
                    keys_used.add(key)
                else:
                    none_count += 1
        percent = len(keys_used) / len(self.keys)
        rval = percent - none_count
        return rval
        
    def eval(self, word):
        word = clean_word(word)
        key = self.get_key(word)
        if key is None:
            return None
        nitems = len(self.names_to_synonyms)
        if self.type is WordEvalType.LINEAR:
            index = self.keys.index(key)
            #print(index)
            if nitems <= 1:
                return nitems
            else:
                return (nitems-1-index)/(nitems-1)
        elif self.type is WordEvalType.EXPONENTIAL:
            index = self.keys.index
            if nitems <= 1:
                return nitems
            else:
                return self.base ** index
        else:
            raise ValueError("Have not done manual case yet")
            
        pass

In [81]:
hml = WordEval({
    'high':('h', 'hi'),
    'medium':('medi', 'med', 'me', 'm'),
    'low':('lowe', 'lo', 'l')
})
vhhmlvl = WordEval({
    'very high':('ver high', 'vy high', 'v high', 'vhigh', 'very hi', 'very h', 'v h', 'vh'),
    'high':('hig', 'hi', 'h'),
    'medium':('mediu', 'med', 'me', 'm', 'okay', 'ok', 'o', 'average', 'aver', 'avg'),
    'low':('lo', 'l', 'lw', 'bad', 'bd', 'not high', 'not hi', 'not h'),
    'very low':('ver low', 'vy low', 'v low', 'vlow', 'vlo', 'vl', 'v lo')
})
abcdf = WordEval({
    'a':(),
    'b':(),
    'c':(),
    'd':(),
    'f':('e')
})

In [82]:
hml.eval('l')

0.0

In [83]:
vhhmlvl.eval('l'), vhhmlvl.eval('oK')

(0.25, 0.5)

In [84]:
hml.keys_match_score(['high', 'hi', 'm', ''])

0.6666666666666666

In [86]:
hml.keys_match_score(['high', 'hi', 'm', '', None])

0.6666666666666666

In [87]:
hml.keys_match_score(['high', 'hi', 'm', '', None, "biL"])

-0.33333333333333337