In [1]:
import watson_nlp

In [2]:
import io, os, types, zipfile
from ibm_watson_studio_lib import access_project_or_space
wslib = access_project_or_space()

raw_data_0 = wslib.load_data('Financial+demo+extractor-labeled_data.zip')

training_data_path = 'training_data'
training_zip_filename = 'training.zip'

with io.FileIO(training_zip_filename, 'w') as file:
    for b in raw_data_0:
        file.write(b)

with zipfile.ZipFile(training_zip_filename) as zObject:
    zObject.extractall(path=".")

if not os.path.exists(training_data_path):
    os.mkdir(training_data_path)
os.rename('./labeled_data.json', f"./{training_data_path}/labeled_data.json")



In [3]:
from watson_nlp.toolkit.entity_mentions_utils.train_util import prepare_stream_of_train_records_from_JSON_collection

syntax_models = [watson_nlp.load('syntax_izumo_en_stock')]

train_data_stream = prepare_stream_of_train_records_from_JSON_collection(training_data_path)

train_data_stream, dev_data_stream = train_data_stream.train_test_split(
    test_split=0.2,
    seed=123,
)

pretrained_model_resource = watson_nlp.load('pretrained-model_slate.153m.distilled_many_transformer_multilingual_uncased')


trained_workflow = watson_nlp.workflows.entity_mentions.transformer.Transformer.train(
    train_data_stream=train_data_stream,
    dev_data_stream=dev_data_stream,
    syntax_models=syntax_models,
    template_resource=pretrained_model_resource,
    learning_rate=1.00E-04,
    num_train_epochs=20,
    warmup_ratio=5/50,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=32
)

You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


***** train metrics *****
  epoch                    =       20.0
  total_flos               =     7559GF
  train_loss               =     0.8268
  train_runtime            = 0:02:07.68
  train_samples            =         73
  train_samples_per_second =     11.434
  train_steps_per_second   =      0.313


In [4]:
text = "Fiscal 2010 Financial Results\r\rFiscal 2010 was a year of record performance for our company. Each of our brands ended the year stronger than it began, and aggressive and proactive initiatives across the organization led to new milestones in profitability. We are pleased with the progress we made in merchandising, marketing, customer acquisition and customer service, as it was these initiatives that we believe have allowed us to attract new customers to our brands and gain profitable market share all year. In fiscal 2010, our net revenues increased 12.9% to $3,504,158,000 compared to $3,102,704,000 in fiscal 2009 and we increased our fiscal 2010 diluted earnings per share to $1.83 versus $0.72 in fiscal 2009. We also ended the year with $628,403,000 in cash after returning nearly $185,000,000 to our shareholders through stock repurchases and dividends.\r\rIn our direct-to-customer channel, net revenues in fiscal 2010 increased by $227,902,000, or 18.6%, compared to fiscal 2009. This increase was driven by 26.9% growth in Internet net revenues in fiscal 2010 compared to fiscal 2009. Increased net revenues during fiscal 2010 were driven by the Pottery Barn, Pottery Barn Kids and PBteen brands.\r\rRetail net revenues in fiscal 2010 increased by $173,552,000, or 9.2%, compared to fiscal 2009. This increase was driven by growth of 9.8% in comparable store sales, partially offset by a 4.1% year-over-year reduction in retail leased square footage, including 18 net fewer stores. Increased net revenues during fiscal 2010 were driven by the Pottery Barn, West Elm and Williams-Sonoma brands.\r\rIn our core brands, net revenues increased 12.3% compared to fiscal 2009 led by growth in the Pottery Barn brand. Sales trends improved in every concept and we saw significant growth in new customer acquisition.\r\rIn our emerging brands, net revenues increased 17.1% led by West Elm and PBteen, which continued to see net revenue and operating margin increases. In Williams-Sonoma Home, we completed the retail restructuring of the brand by closing all stand-alone retail stores."

In [5]:
trained_workflow.run(text, "en")

{
  "mentions": [
    {
      "span": {
        "begin": 0,
        "end": 11,
        "text": "Fiscal 2010"
      },
      "type": "PRODUCT_SERVICE",
      "producer_id": null,
      "confidence": 0.523390297380134,
      "mention_type": "MENTT_UNSET",
      "mention_class": "MENTC_UNSET",
      "role": ""
    },
    {
      "span": {
        "begin": 31,
        "end": 42,
        "text": "Fiscal 2010"
      },
      "type": "EARNINGS_PERIOD",
      "producer_id": null,
      "confidence": 0.4390660249160806,
      "mention_type": "MENTT_UNSET",
      "mention_class": "MENTC_UNSET",
      "role": ""
    },
    {
      "span": {
        "begin": 122,
        "end": 126,
        "text": "year"
      },
      "type": "EARNINGS_PERIOD",
      "producer_id": null,
      "confidence": 0.29904913902282715,
      "mention_type": "MENTT_UNSET",
      "mention_class": "MENTC_UNSET",
      "role": ""
    },
    {
      "span": {
        "begin": 514,
        "end": 525,
        "text": "fiscal 

In [6]:
from ibm_watson_machine_learning import APIClient

cp4d_hostname = "#### REPLACE WITH YOUR CP4D HOSTNAME, e.g. https://cpd-cp4d-instances.apps.yourdomain.cp.com ####"
wml_credentials = {
    "url": cp4d_hostname,
    "token": wslib.auth.get_current_token(),
    "instance_id": "wml_local",
    "version" : "4.7"
}
client = APIClient(wml_credentials)
client.spaces.list()

Note: 'limit' is not provided. Only first 50 records will be displayed if the number of records exceed 50
------------------------------------  ----  ------------------------
ID                                    NAME  CREATED
f0610e22-de4e-47d0-86e3-898f5425fa1d  test  2023-10-17T03:15:04.366Z
------------------------------------  ----  ------------------------


Unnamed: 0,ID,NAME,CREATED
0,f0610e22-de4e-47d0-86e3-898f5425fa1d,test,2023-10-17T03:15:04.366Z


In [7]:
model_name = "Financial demo"
space_id = "#### REPLACe WITH YOUR SPACE ID, e.g. f0610e22-de4e-47d0-86e3-898f5425fa1d in the above list ####"
wslib = access_project_or_space({
    'space_id': space_id
})
wslib.save_data(model_name, trained_workflow.as_bytes(), overwrite=True)

{'name': 'Financial demo',
 'asset_type': 'data_asset',
 'asset_id': 'f8f18836-3301-4189-9399-95bad3a5b811',
 'attachment_id': '29374eff-0091-48f2-9969-781caed4cc52',
 'filepath': 'Financial demo.',
 'data_size': None,
 'mime': 'application/binary',
 'summary': ['created or overwritten file',
  'created data asset',
  'created attachment']}

In [8]:
client.set.default_space(space_id)

'SUCCESS'

In [26]:
def extract_entities():
    import watson_nlp
    from ibm_watson_studio_lib import access_project_or_space
    wslib = access_project_or_space()
    loaded = wslib.load_data("Financial demo") # change here if you change model_name
    entity_model = watson_nlp.load(loaded)
    def score(input):
        scoring_prediction_out = []
        for input_data_row in input["input_data"][0]["values"]:
            scoring_prediction_row = []
            for input_data in input_data_row:
                prediction = entity_model.run(input_data, 'en').to_dict()
                scoring_prediction_row.append(prediction)
            scoring_prediction_out.append(scoring_prediction_row)
        scoring_response = {
            'predictions': [{'fields': ['nlp_prediction'], 'values': scoring_prediction_out}]
        }
        return scoring_response
    return score

In [27]:
pyfunc_swspec_id = client.software_specifications.get_uid_by_name("runtime-22.2-py3.10")

metadata = {
    client.repository.FunctionMetaNames.NAME: f"{model_name}-function",
    client.repository.FunctionMetaNames.DESCRIPTION: f"function using {model_name} model",
    client.repository.FunctionMetaNames.SOFTWARE_SPEC_UID: pyfunc_swspec_id
}

entity_extractor_details = client.repository.store_function(meta_props=metadata, function=extract_entities)

In [28]:
entity_extractor_uid = client.repository.get_function_uid(entity_extractor_details)

meta_props = {
    client.deployments.ConfigurationMetaNames.NAME: f"{model_name}-deploy",
    client.deployments.ConfigurationMetaNames.HARDWARE_SPEC: { 'name': 'S'},  
    client.deployments.ConfigurationMetaNames.ONLINE: {}
}


entity_deployment_details = client.deployments.create(entity_extractor_uid, meta_props=meta_props)
entity_deployment_id = client.deployments.get_uid(entity_deployment_details)



#######################################################################################

Synchronous deployment creation for uid: '35bb7573-549e-4130-a774-ed16f24644c7' started

#######################################################################################


initializing
Note: online_url is deprecated and will be removed in a future release. Use serving_urls instead.
.........
ready


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='973046bd-2f4e-4afa-90e6-6d831c7d9aa2'
------------------------------------------------------------------------------------------------




In [29]:
scoring_payload = {
    client.deployments.ScoringMetaNames.INPUT_DATA: [{
        'fields': ["text"],
        'values': [[text]]
    }]
}
predictions = client.deployments.score(entity_deployment_id, scoring_payload)
predictions

{'predictions': [{'fields': ['nlp_prediction'],
   'values': [[{'mentions': [{'span': {'begin': 0,
         'end': 11,
         'text': 'Fiscal 2010'},
        'type': 'PRODUCT_SERVICE',
        'producer_id': None,
        'confidence': 0.5233900182453832,
        'mention_type': 'MENTT_UNSET',
        'mention_class': 'MENTC_UNSET',
        'role': ''},
       {'span': {'begin': 31, 'end': 42, 'text': 'Fiscal 2010'},
        'type': 'EARNINGS_PERIOD',
        'producer_id': None,
        'confidence': 0.43906629349821147,
        'mention_type': 'MENTT_UNSET',
        'mention_class': 'MENTC_UNSET',
        'role': ''},
       {'span': {'begin': 122, 'end': 126, 'text': 'year'},
        'type': 'EARNINGS_PERIOD',
        'producer_id': None,
        'confidence': 0.2990490794181824,
        'mention_type': 'MENTT_UNSET',
        'mention_class': 'MENTC_UNSET',
        'role': ''},
       {'span': {'begin': 514, 'end': 525, 'text': 'fiscal 2010'},
        'type': 'EARNINGS_PERIOD',
  