### Please install the required Python modules/SDKs

In [None]:
! activate ai-azure-c1

import sys

sys.path.append("/opt/conda/envs/ai-azure-c1/lib/python3.8/site-packages")

## Importing Azure Form Recognizer python modules

In [None]:
import os
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import FormTrainingClient
from azure.core.credentials import AzureKeyCredential

In [None]:
AZURE_FORM_RECOGNIZER_ENDPOINT = "ENTER FORM RECOGNIZER ENDPOINT"
AZURE_FORM_RECOGNIZER_KEY = "ENTER FORM RECOGNIZER KEY"

In [None]:
endpoint = AZURE_FORM_RECOGNIZER_ENDPOINT
key = AZURE_FORM_RECOGNIZER_KEY

In [None]:
form_training_client = FormTrainingClient(endpoint=endpoint, credential=AzureKeyCredential(key))

In [None]:
saved_model_list = form_training_client.list_custom_models()

## Training Source Data URL

### Get training documents for this demo from the link below
- To test solution code, download the training documents to your local system: https://github.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/tree/main/resources/1040docs-solution
- Upload training documents `f1040-001.pdf` through `f1040-006.pdf`, together with their `labels.json` files, to a blob container at Azure Blob Storage.
- Generate the SAS URL of this training data container. At the bottom of this page, you can see how you can generate SAS url for your training data blob container.
- At Form Recognizer portal, create a new project and connect it to this blob container. 
- Since the documents are aleady labeled, all you need to do is to run layout on them at the Form Recognizer portal and have the `ocr.json` files auto-generated in the blob container. 


In [None]:
trainingDataUrl = "ADD YOUR BLOB STORAGE SAS URL HERE"

## Performing Labeled Traning
### use_training_labels=True
* You need at least 5 labeled documents with the `ocr.json` and `labels.json` files; otherwise, you will run into errors.

In [None]:
training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=True)
custom_model = training_process.result()

## Getting Model Info

In [None]:
custom_model

In [None]:
custom_model.model_id

In [None]:
custom_model.status

In [None]:
custom_model.training_started_on

In [None]:
custom_model.training_completed_on

In [None]:
custom_model.training_documents

In [None]:
for doc in custom_model.training_documents:
    print("Document name: {}".format(doc.name))
    print("Document status: {}".format(doc.status))
    print("Document page count: {}".format(doc.page_count))
    print("Document errors: {}".format(doc.errors))

In [None]:
custom_model.properties

In [None]:
custom_model.submodels

In [None]:
for submodel in custom_model.submodels:
    print(
        "The submodel with form type '{}' has recognized the following fields: {}".format(
            submodel.form_type,
            ", ".join(
                [
                    field.label if field.label else name
                    for name, field in submodel.fields.items()
                ]
            ),
        )
    )

In [None]:
custom_model.model_id

In [None]:
custom_model_info = form_training_client.get_custom_model(model_id=custom_model.model_id)
print("Model ID: {}".format(custom_model_info.model_id))
print("Status: {}".format(custom_model_info.status))
print("Training started on: {}".format(custom_model_info.training_started_on))
print("Training completed on: {}".format(custom_model_info.training_completed_on))

## Using an Image/PDF document as test document URL (PDF in this example)
## Save PDF to a blob container and use SAS URL of the test PDF document as target url

### Sample Test Documents:
- You could use one or more of these documents to perform testing/prediction. Note the test document should **not** be used in the training.
    - https://github.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/blob/main/resources/1040docs-solution/f1040-007.pdf
    - https://github.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/blob/main/resources/1040docs-solution/f1040-008.pdf
    - https://github.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/blob/main/resources/1040docs-solution/f1040-009.pdf

1. Please download and save the above files locally, and then upload to your Azure blob container. 
2. After that, please create Azure SAS URL with only **READ access** to use below in the test process with the model.

At the bottom of this page, you can see how you can generate SAS URL for your test PDF document.

In [None]:
new_test_url = "USE Azure BLOB SAS URL of test document here"

In [None]:
new_test_url

In [None]:
form_recognizer_client = FormRecognizerClient(endpoint=endpoint, credential=AzureKeyCredential(key))

In [None]:
custom_model.model_id

In [None]:
custom_model_info.model_id

In [None]:
custom_test_action = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=custom_model_info.model_id, form_url=new_test_url)

In [None]:
custom_test_action.result

In [None]:
custom_test_action.status()

In [None]:
custom_test_action_result = custom_test_action.result()

In [None]:
for recognized_content in custom_test_action_result:
    print("Form type: {}".format(recognized_content.form_type))
    for name, field in recognized_content.fields.items():
        print("Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
            name,
            field.label_data.text if field.label_data else name,
            field.value,
            field.confidence
        ))

### If all goes well your results should look like as below:
- Form type: custom:9ca8996d-91d1-4b28-8bb9-b3ace2756dc2
- Field 'Main SSN' has label 'Main SSN' with value '100 40 6000' and a confidence score of 0.99
- Field 'Spouse SSN' has label 'Spouse SSN' with value '41. 9009' and a confidence score of 0.981
- Field 'Main Presidential Campaign' has label 'Main Presidential Campaign' with value 'unselected' and a confidence score of 0.941
- Field 'Spouse Presidential Campaign' has label 'Spouse Presidential Campaign' with value 'selected' and a confidence score of 0.992
- Field 'Crypto Interest Yes' has label 'Crypto Interest Yes' with value 'selected' and a confidence score of 0.991
- Field 'Spouse First Name' has label 'Spouse First Name' with value 'Joan' and a confidence score of 0.99
- Field 'Spouse Last Name' has label 'Spouse Last Name' with value 'Madan' and a confidence score of 0.99
- Field 'Crypto Interest No' has label 'Crypto Interest No' with value 'unselected' and a confidence score of 0.991
- Field 'Main Last Name' has label 'Main Last Name' with value 'Baxter' and a confidence score of 0.99
- Field 'Main First Name' has label 'Main First Name' with value 'Philppe' and a confidence score of 0.99