In [10]:
#!/usr/bin/env python
# coding: utf-8
import warnings
warnings.filterwarnings("ignore") # just to ingore gensim deprecated numpy conversion warning
import sys
from pathlib import Path
import string
import gensim
import gensim.downloader as api
from gensim.test.utils import get_tmpfile
from gensim.models import KeyedVectors

## Loading the model

In [14]:
def load_model():
    model_path = Path('./fasttext.model').absolute()
    # fasttext_model300 = api.load('fasttext-wiki-news-subwords-300') # download pretrained model from internet
    fname = get_tmpfile(model_path)
    # fasttext_model300.save(fname) # save pretrained model from internet to current directory
    # print('model saved to', fname)
    print('Loading model from {}...'.format(model_path))
    model = KeyedVectors.load(fname) # load model from current directory
    print('Model loaded from', fname)
    print('\n')
    return model

In [17]:
def print_single_prediction(pred_text, model, no_preds=15):
    # helper function for single word predictions
    # we will split mulitple words into a series of single words and run them through this function
    print('Keywords closely related to \"{}\":'.format(pred_text))
    print(model.most_similar(pred_text, topn=no_preds))
    print()

In [18]:
def predict(pred_text, model):
    # top-level predict function for all predictions
    if pred_text in model.vocab: # Predicting Word in Vocab
        print_single_prediction(pred_text, model)
        return
    else: # Predicting phrase
        print('\"{}\" is not in vocab; splitting into multiple words...\n'.format(pred_text))
        for char in pred_text:
            if char in string.punctuation: # error checking for strings like dui_lawyer
                pred_text = outside_pred.replace(char, ' ')

    split_pred_text = pred_text.split(' ')
    no_pred_words = []
    for word in split_pred_text:
        if word in model.vocab:
            print_single_prediction(word, model)
        else:
            no_pred_words.append(word)

    for word in no_pred_words:
        print('\nNo predictions found for \"{}\". Please try another word.\n'.format(word))

## How to get new keyword suggestions

 ### Load Model

In [23]:
model = load_model()

Loading model from C:\Users\lewys\Documents\Freelance\suggest-keywords\fasttext.model...
Model loaded from C:\Users\lewys\Documents\Freelance\suggest-keywords\fasttext.model




### Now give the predict function a string and loaded model

In [25]:
input_string = "divorce lawyer"
predict(input_string, model)

"divorce lawyer" is not in vocab; splitting into multiple words...

Keywords closely related to "divorce":
[('divorces', 0.8392422199249268), ('pre-divorce', 0.8014459609985352), ('post-divorce', 0.7984651327133179), ('marriage', 0.7579370737075806), ('divorcing', 0.757463812828064), ('remarriage', 0.7570939064025879), ('re-marriage', 0.7552791237831116), ('postdivorce', 0.735708475112915), ('divorcement', 0.720141589641571), ('divorce-related', 0.7192463874816895), ('divorce.', 0.7122068405151367), ('annulment', 0.6923553943634033), ('divorcés', 0.6922506093978882), ('divorced', 0.6849225759506226), ('Divorce', 0.6791139841079712)]

Keywords closely related to "lawyer":
[('attorney', 0.8739438056945801), ('ex-lawyer', 0.7932926416397095), ('lawyers', 0.7913783192634583), ('non-lawyer', 0.784548819065094), ('nonlawyer', 0.782863438129425), ('solicitor', 0.7826274633407593), ('attorney-at-law', 0.7760663032531738), ('litigator', 0.772079348564148), ('non-attorney', 0.748200535774231), (