## Testing spacy

In [0]:
import spacy
import en_core_web_sm

nlp = spacy.load("en_core_web_sm") #, disable=["tagger", "parser"])
textcat=nlp.create_pipe( "textcat", config={"exclusive_classes": True, "architecture": "simple_cnn"})
nlp.add_pipe(text_cat, last=True)
nlp.pipe_names

text = 'That is the will of Parliament and the nation. The British Empire and the French Republic, linked together in their cause and in their need, will defend to the death their native soil, aiding each other like good comrades to the utmost of their strength. Even though large tracts of Europe and many old and famous States have fallen or may fall into the grip of the Gestapo and all the odious apparatus of Nazi rule, we shall not flag or fail. We shall go on to the end, we shall fight in France, we shall fight on the seas and oceans, we shall fight with growing confidence and growing strength in the air, we shall defend our Island, whatever the cost may be, we shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and in the streets, we shall fight in the hills; we shall never surrender, and even if, which I do not for a moment believe, this Island or a large part of it were subjugated and starving, then our Empire beyond the seas, armed and guarded by the British Fleet, would carry on the struggle, until, in God’s good time, the New World, with all its power and might, steps forth to the rescue and the liberation of the old.'

t = nlp(text)
for ent in t.ents:
  print(ent.text, ent.start_char, ent.end_char, ent.label_)

## Saving model and doing test

In [0]:
import os

nlp_model_path = 'spacy-nlp-model'


nlp.to_disk(nlp_model_path)

for e in nlp(text).ents:
  print(e.text, e.label_)

## Load model and get same results

In [0]:
mod = spacy.load(nlp_model_path)
for e in mod(text).ents:
  print(e.text, e.label_)

## Install mlflow

## Save SpacyWrapper model, load and test

In [0]:
import mlflow
import pip

# Create an `artifacts` dictionary that assigns a unique name to the saved XGBoost model file.
# This dictionary will be passed to `mlflow.pyfunc.save_model`, which will copy the model file
# into the new MLflow Model's directory.
artifacts = {
    "nlp_model": nlp_model_path
}

# Define the model class
import mlflow.pyfunc
class SpacyWrapper(mlflow.pyfunc.PythonModel):
    

    def load_context(self, context):
        import spacy
        self.nlp = spacy.load(context.artifacts["nlp_model"])

    def predict(self, context, model_input):
      import json
      def get_entities(text):
        ents = self.nlp(text).ents
        return [(ent.text, ent.start_char, ent.end_char, ent.label_) for ent in ents]
      try:
        ents = model_input.text.apply(get_entities)

        return ents.apply(lambda s: json.dumps(s))
      except TypeError:
        return "DataFrame must contain strings"

# Create a Conda environment for the new MLflow Model that contains the XGBoost library
# as a dependency, as well as the required CloudPickle library
import cloudpickle
# Let's create our own conda environment
conda_env = {
    'channels': ['defaults', 'pytorch'],
    'dependencies': [
      f'python=3.6.9',
      {
          'pip':[
            f'pip=={pip.__version__}',
            f'mlflow=={mlflow.__version__}',
            f'spacy=={spacy.__version__}',
            f'cloudpickle=={cloudpickle.__version__}'
          ]
      }
    ],
    'name': 'mlflow-env-spacy'
}

# Save the MLflow Model
mlflow_pyfunc_model_path = "spacy_mlflow_pyfunc"
# remove pre-existing folder
import shutil
shutil.rmtree(mlflow_pyfunc_model_path)
mlflow.pyfunc.save_model(
        python_model=SpacyWrapper(),
         artifacts=artifacts,
         path=mlflow_pyfunc_model_path,
        conda_env=conda_env)

# Load the model in `python_function` format
loaded_model = mlflow.pyfunc.load_model(mlflow_pyfunc_model_path)

# Evaluate the model
import pandas as pd
test_predictions = loaded_model.predict(pd.DataFrame(data={'text':['What a beautiful day', 'That is the will of Parliament and the nation. The British Empire and the French Republic, linked together in their cause and in their need']}))
print(test_predictions)
