# Notebook that does grammar correction.

In [1]:
### Imports
import imports as i

In [2]:
df = i.pd.read_csv('../data/google_reviews.csv')
text_rev = df['text'].tolist()

corrected_text = []

In [3]:
### function that does grammar correction

sym_spell = i.SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
dictionary_path = i.pkg_resources.resource_filename("symspellpy", "frequency_dictionary_en_82_765.txt")
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

def grammar_corrector(text:str) -> str:
    """
    Corrects spelling and grammar in the given text.

    Args:
        text (str or list): The input text to be corrected. It can be a single string or a list of strings.

    Returns:
        str or list: The corrected text, with spelling and grammar issues fixed.
    """
    cleaned_text = []

    if isinstance(text, str):
        text = [text]  # Convert a single string to a list of strings for consistency.

    for line in text:
        temp_line = []
        words = line.split()
        for _, word in enumerate(words):
            # Check if the word contains a numeric character
            has_numeric = any(char.isdigit() for char in word)
            
            if has_numeric:
                # If the word contains a numeric character, keep the original word
                corrected_word = word
            else:
                # If the word does not contain a numeric character, perform correction
                corrected_word = sym_spell.lookup(word.lower(), i.Verbosity.CLOSEST, max_edit_distance=2)
                corrected_word = corrected_word[0].term if corrected_word else corrected_word

            # Append the punctuation back to the corrected word if the original word had it
            if word[-1] in ['!', '?', '.']:
                corrected_word += word[-1]

            temp_line.append(corrected_word)

            # Add space between words, except for the last word
            if _ < len(words) - 1:
                temp_line.append(' ')

        cleaned_text.append(''.join(map(str, temp_line)))
       

    if isinstance(text, str):
        return cleaned_text[0]  # Return the corrected string.
    else:
        return cleaned_text


In [4]:
for _ in text_rev:
    corrected_text.append(grammar_corrector(_))


In [5]:
df['corrected_review'] = corrected_text

In [6]:
for _, row in df.iterrows():
    if _ == 1:
        print(row['text'], row['corrected_review'])
        break

Big gym spanning 3 floors. Good selection of benches and power racks. All equipment is of the lowest quality possible (Technogym) and has clearly been compromised to maximize profits. This lack of care for traning quality also becomes strenghtend by obscure rules, such as backpacks/gymbags or similar being forbidden in the gym. Receptionist staff are all super friendly and helpful, and the ALL-IN and heatwave yoga are excellent offers. This gym is very good at everthing not related to actual training. Would recommend if your looking for a cheap place to just do cardio and some strength training. ['big gym spanning 3 floors. good selection of benches and power racks. all equipment is of the lowest quality possible [] and has clearly been compromised to maximize profits. this lack of care for training quality also becomes strengthen by obscure rules such as [] or similar being forbidden in the gym. receptionist staff are all super friendly and helpful and the allen and heatwave yoga are 

In [7]:
df.head(5)

Unnamed: 0,place_id,type,name,lat,lng,author_name,rating,text,opening_hours,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday,corrected_review
0,ChIJnebu2e7fTEYRl2jmqFgqVZo,PureGym Odense,PureGym,55.394857,10.366625,Lea Hansen,5,Can't complain. They do their job very well an...,"{'Monday': '05:00AM - 12:00AM', 'Tuesday': '05...",05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 11:59PM,05:00AM - 12:00AM,05:00AM - 12:00AM,[can't complaint. they do their job very well ...
1,ChIJnebu2e7fTEYRl2jmqFgqVZo,PureGym Odense,PureGym,55.394857,10.366625,Marcus Nygård,2,Big gym spanning 3 floors. Good selection of b...,"{'Monday': '05:00AM - 12:00AM', 'Tuesday': '05...",05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 11:59PM,05:00AM - 12:00AM,05:00AM - 12:00AM,[big gym spanning 3 floors. good selection of ...
2,ChIJnebu2e7fTEYRl2jmqFgqVZo,PureGym Odense,PureGym,55.394857,10.366625,Maj Vangsø Simonsen,5,Love this gym! Best instructors and facilities...,"{'Monday': '05:00AM - 12:00AM', 'Tuesday': '05...",05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 11:59PM,05:00AM - 12:00AM,05:00AM - 12:00AM,[love this gym! best instructors and facilitie...
3,ChIJnebu2e7fTEYRl2jmqFgqVZo,PureGym Odense,PureGym,55.394857,10.366625,Henrik Lambert,4,Fine place but watch your parking meter.,"{'Monday': '05:00AM - 12:00AM', 'Tuesday': '05...",05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 11:59PM,05:00AM - 12:00AM,05:00AM - 12:00AM,[fine place but watch your parking meter.]
4,ChIJnebu2e7fTEYRl2jmqFgqVZo,PureGym Odense,PureGym,55.394857,10.366625,Fresh Andrew,5,Lots of space and it's nice and clean,"{'Monday': '05:00AM - 12:00AM', 'Tuesday': '05...",05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 12:00AM,05:00AM - 11:59PM,05:00AM - 12:00AM,05:00AM - 12:00AM,[lots of space and it's nice and clean]
