In [2]:
import spacy
import nltk
import json
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet


nlp = spacy.load("en_core_web_sm")

text = "masage spa NY, oil change in michigan, oli change in New York"

doc = nlp(text)

places = [ent.text for ent in doc.ents if ent.label_ in ['GPE', 'LOC']]

print('Detected locations:')
for place in places:
    print(place)

Detected locations:
michigan
New York


#### Here is an example of the results produced by the entire query preprocessor
##### Please note that this is just an example!!! Some parts should be addressed differently for improved accuracy and effectiveness in real scenarios. It’s important to continuously refine the preprocessor to align better with specific use cases and the unique requirements of our application.

In [3]:
nlp = spacy.load("en_core_web_sm")

text = "masage spa NY, oil change in michigan, oli change in New York"

tokens = word_tokenize(text)

def get_synonyms(word):
    synsets = wordnet.synsets(word)
    synonyms = set()
    for syn in synsets:
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return list(synonyms)

doc = nlp(text)

localities = [ent.text for ent in doc.ents if ent.label_ in ['GPE', 'LOC']]

result = {
    "query": text,
    "localities": localities,
    "tokens": tokens,
    "synonyms": [{"word": token, "synonyms": get_synonyms(token)} for token in tokens]
}

print(json.dumps(result, indent=4))

{
    "query": "masage spa NY, oil change in michigan, oli change in New York",
    "localities": [
        "michigan",
        "New York"
    ],
    "tokens": [
        "masage",
        "spa",
        "NY",
        ",",
        "oil",
        "change",
        "in",
        "michigan",
        ",",
        "oli",
        "change",
        "in",
        "New",
        "York"
    ],
    "synonyms": [
        {
            "word": "masage",
            "synonyms": []
        },
        {
            "word": "spa",
            "synonyms": [
                "health_spa",
                "watering_hole",
                "health_club",
                "resort_hotel",
                "spa",
                "watering_place"
            ]
        },
        {
            "word": "NY",
            "synonyms": [
                "New_York_State",
                "Empire_State",
                "New_York",
                "NY"
            ]
        },
        {
            "word": ",",
           