In [14]:
import nltk
from nltk.corpus import wordnet as wn
import re

In [15]:
query = "What are the ins/outs of writing equipment purchases off as business expenses in a home based business?"
query

'What are the ins/outs of writing equipment purchases off as business expenses in a home based business?'

In [16]:
def basic_cleaning(query):
    """
    Standardize cleaning process -> lowercase, remove punctuations, remove redundant whitespaces
    """
    query = str(query)
    query = query.lower()
    query = re.sub(r'[^\w\s]','',query)
    query = ' '.join(query.split())
    return query

In [17]:
def nouns_only(query):
    """
    Takes in a query and returns list of nouns in that query
    """
    try:
        tagged_text = nltk.tag.pos_tag(query.split())
        nouns_list = [word for word,tag in tagged_text if  tag == 'NNP' or tag == 'NNPS' or tag=="NN" or tag=="NNS"]
        return list(set(nouns_list))
    except:
        return []

In [18]:
def query_noun_mapping(query_nouns):
    """
    takes in list of nouns and returns closest synonym to each nount
    """
    synonym_dict = {}
    for query_noun in query_nouns:
        try:
            closest_noun = wn.synsets(query_noun)[0].lemmas()[1].name()
            synonym_dict[query_noun] = closest_noun
        except:
            pass
    return synonym_dict

In [19]:
def query_expansion(query):
    """
    End to end process for expanding query by including synonyms of nouns to query
    Takes in query string and returns expanded query string
    """
    clean_query = basic_cleaning(query)
    query_nouns_list = nouns_only(clean_query)
    if len(query_nouns_list)==0:
        return query
    else:
        synonym_dict = query_noun_mapping(query_nouns_list)
    if len(synonym_dict.keys()) == 0:
        return query
    else:
        for k,v in synonym_dict.items():
            idx = query.lower().index(k)
            query = query[:idx] + f"{v} and " + query[idx:]
    return query

In [20]:
query_expansion(query)

'What are the ins/outs of writing equipment purchases off as concern and business disbursal and expenses in a place and home based business?'