In [1]:
from itertools import product, chain
import re

first_words = ["next", "incoming", "coming", "upcoming", "subsequent", 
               "following"]
second_words = ["fiscal", "month", "period", "quarter", "year"]

# Note that "'ll" is an addition to original list
first_search = ["'ll", "will", "future"] + \
      [first + ' ' + second 
       for first, second in product(first_words, second_words)]

In [2]:
template = ["we $word", "and $word", "but $word", "do not $word",
            "company $words", "corporation $words", "firm $words",
            "management $words", "and $words", "but $words", 
            "does not $word", "is $past_tense", "are $past_tense",
            "not $past_tense", "is $pres_part", "are $pres_part", 
            "not $pres_part", "normally $word",
            "normally $words", "currently $word", "currently $words",
            "also $word", "also $words"]

verbs = ["aim", "anticipate", "assume", "commit", "estimate", 
         "expect", "forecast", "foresee", "hope", "intend",
         "plan", "project", "seek", "target"]

def past_tense(word):
    if word == "commit":
        return "committed"
    elif word == "plan":
        return "planned"
    elif word == "foresee":
        return "foreseen"
    elif word == "seek":
        return "sought"
    elif re.search("e$", word):
        return word + "d"
    else:
        return word + "ed"

def present_participle(word):
    if word == "commit":
        return "committing"
    elif word == "plan":
        return "planning"
    elif re.search("[^e]e$", word):
        return re.sub("e$", "ing", word)
    else:
        return word + "ing"

def complete_template(word):
    from string import Template
    words = word + "s"
    past = past_tense(word)
    part = present_participle(word)
    return [Template(str).substitute(word=word, words=words,
                              past_tense=past, pres_part=part) 
            for str in template]

def flatten(list_of_lists):
    return list(chain.from_iterable(list_of_lists))
    
second_search = flatten([complete_template(verb) for verb in verbs])

In [3]:
all_search = first_search + second_search

In [4]:
def term_list(terms_file):
    """Creates a list of terms"""

    # opens the specified dict_file in "r" (read) mode
    with open(terms_file,"r") as file:
        # reads the content of the file line-by-line
        # and creates a list of terms
        terms = file.read().splitlines()

    return [s.strip() for s in terms]
    
all_search_yy = term_list(r"fls_terms2.txt")

In [5]:
[term for term in all_search if term not in all_search_yy]

['we estimate',
 'and estimate',
 'but estimate',
 'do not estimate',
 'company estimates',
 'corporation estimates',
 'firm estimates',
 'management estimates',
 'and estimates',
 'but estimates',
 'does not estimate',
 'is estimated',
 'are estimated',
 'not estimated',
 'is estimating',
 'are estimating',
 'not estimating',
 'normally estimate',
 'normally estimates',
 'currently estimate',
 'currently estimates',
 'also estimate',
 'also estimates',
 'is planned',
 'are planned',
 'not planned',
 'is planning',
 'are planning',
 'not planning']

In [6]:
[term for term in all_search_yy if term not in all_search]

['is planed',
 'are planed',
 'not planed',
 'is planing',
 'are planing',
 'not planing']