diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml b/.github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml
new file mode 100644
index 0000000000..fefc37004f
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-fill-mask-fill-mask
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "34 11/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/fill-mask/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/fill-mask/fill-mask.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "fill-mask.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python fill-mask.ipynb fill-mask.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/fill-mask
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: fill-mask
+        path: sdk/python/foundation-models/system/evaluation/fill-mask
diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml b/.github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml
new file mode 100644
index 0000000000..da046dae81
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-question-answering-question-answering
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "12 3/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/question-answering/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/question-answering/question-answering.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "question-answering.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python question-answering.ipynb question-answering.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/question-answering
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: question-answering
+        path: sdk/python/foundation-models/system/evaluation/question-answering
diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml b/.github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml
new file mode 100644
index 0000000000..9cfb152b4b
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "28 9/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/summarization/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "abstractive-and-extractive-summarization.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python abstractive-and-extractive-summarization.ipynb abstractive-and-extractive-summarization.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/summarization
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: abstractive-and-extractive-summarization
+        path: sdk/python/foundation-models/system/evaluation/summarization
diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml b/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml
new file mode 100644
index 0000000000..b84c8324f2
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "56 5/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/text-classification/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "entailment-contradiction.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python entailment-contradiction.ipynb entailment-contradiction.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/text-classification
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: entailment-contradiction
+        path: sdk/python/foundation-models/system/evaluation/text-classification
diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml b/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml
new file mode 100644
index 0000000000..e01c8ff96f
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "20 7/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/text-classification/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "sentiment-analysis.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python sentiment-analysis.ipynb sentiment-analysis.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/text-classification
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: sentiment-analysis
+        path: sdk/python/foundation-models/system/evaluation/text-classification
diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml b/.github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml
new file mode 100644
index 0000000000..5b964e14a4
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-text-generation-text-generation
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "57 9/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/text-generation/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/text-generation/text-generation.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "text-generation.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python text-generation.ipynb text-generation.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/text-generation
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: text-generation
+        path: sdk/python/foundation-models/system/evaluation/text-generation
diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml b/.github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml
new file mode 100644
index 0000000000..23479dfa24
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "8 2/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/token-classification/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "news-articles-entity-recognition.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python news-articles-entity-recognition.ipynb news-articles-entity-recognition.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/token-classification
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: news-articles-entity-recognition
+        path: sdk/python/foundation-models/system/evaluation/token-classification
diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml b/.github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml
new file mode 100644
index 0000000000..4cf2f733af
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "28 3/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/evaluation/translation/**
+      - .github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "translation-romanian-to-english.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python translation-romanian-to-english.ipynb translation-romanian-to-english.output.ipynb
+      working-directory: sdk/python/foundation-models/system/evaluation/translation
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: translation-romanian-to-english
+        path: sdk/python/foundation-models/system/evaluation/translation
diff --git a/.github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml b/.github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml
new file mode 100644
index 0000000000..3735776e07
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-finetune-question-answering-extractive-qa
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/finetune/question-answering/**
+      - .github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/finetune/question-answering/extractive-qa.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "extractive-qa.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python extractive-qa.ipynb extractive-qa.output.ipynb
+      working-directory: sdk/python/foundation-models/system/finetune/question-answering
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: extractive-qa
+        path: sdk/python/foundation-models/system/finetune/question-answering
diff --git a/.github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml b/.github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml
new file mode 100644
index 0000000000..de55d05410
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-finetune-summarization-news-summary
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/finetune/summarization/**
+      - .github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/finetune/summarization/news-summary.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "news-summary.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python news-summary.ipynb news-summary.output.ipynb
+      working-directory: sdk/python/foundation-models/system/finetune/summarization
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: news-summary
+        path: sdk/python/foundation-models/system/finetune/summarization
diff --git a/.github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml b/.github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml
new file mode 100644
index 0000000000..93afea236f
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-finetune-text-classification-emotion-detection
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/finetune/text-classification/**
+      - .github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/finetune/text-classification/emotion-detection.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "emotion-detection.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python emotion-detection.ipynb emotion-detection.output.ipynb
+      working-directory: sdk/python/foundation-models/system/finetune/text-classification
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: emotion-detection
+        path: sdk/python/foundation-models/system/finetune/text-classification
diff --git a/.github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml b/.github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml
new file mode 100644
index 0000000000..5330b7a783
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-finetune-token-classification-token-classification
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/finetune/token-classification/**
+      - .github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/finetune/token-classification/token-classification.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "token-classification.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python token-classification.ipynb token-classification.output.ipynb
+      working-directory: sdk/python/foundation-models/system/finetune/token-classification
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: token-classification
+        path: sdk/python/foundation-models/system/finetune/token-classification
diff --git a/.github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml b/.github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml
new file mode 100644
index 0000000000..5347d7be3a
--- /dev/null
+++ b/.github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml
@@ -0,0 +1,77 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: sdk-foundation-models-system-finetune-translation-translation
+# This file is created by sdk/python/readme.py.
+# Please do not edit directly.
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - sdk/python/foundation-models/system/finetune/translation/**
+      - .github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml
+      - sdk/python/dev-requirements.txt
+      - infra/**
+      - sdk/python/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: pip install notebook reqs
+      run: pip install -r sdk/python/dev-requirements.txt
+    - name: pip install mlflow reqs
+      run: pip install -r sdk/python/mlflow-requirements.txt
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra
+      continue-on-error: false
+    - name: setup SDK
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: sdk/python
+      continue-on-error: true
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run foundation-models/system/finetune/translation/translation.ipynb
+      run: |
+          source "${{ github.workspace }}/infra/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/init_environment.sh";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
+          bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "translation.ipynb";
+          [ -f "../../.azureml/config" ] && cat "../../.azureml/config";
+          papermill -k python translation.ipynb translation.output.ipynb
+      working-directory: sdk/python/foundation-models/system/finetune/translation
+    - name: upload notebook's working folder as an artifact
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: translation
+        path: sdk/python/foundation-models/system/finetune/translation
diff --git a/cli/foundation-models/system/evaluation/fill-mask/fill-mask-pipeline.yml b/cli/foundation-models/system/evaluation/fill-mask/fill-mask-pipeline.yml
new file mode 100644
index 0000000000..b9c73a605c
--- /dev/null
+++ b/cli/foundation-models/system/evaluation/fill-mask/fill-mask-pipeline.yml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: fill-mask-model-evaluation-subgraph
+
+compute: gpu-cluster-big
+
+jobs:
+  pipeline_component_job:
+    type: pipeline
+    component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest
+    inputs:
+      task: fill-mask
+      test_data:
+        path: "../../../../../sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask/small-test.jsonl"
+        type: uri_file
+      mlflow_model: 
+        path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3
+      label_column_name: title
+      device: gpu
diff --git a/cli/foundation-models/system/evaluation/question-answering/extractive-qa-pipeline.yml b/cli/foundation-models/system/evaluation/question-answering/extractive-qa-pipeline.yml
new file mode 100644
index 0000000000..ee7d8b4931
--- /dev/null
+++ b/cli/foundation-models/system/evaluation/question-answering/extractive-qa-pipeline.yml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: question-answering-model-evaluation-subgraph
+
+compute: gpu-cluster-big
+
+jobs:
+  pipeline_component_job:
+    type: pipeline
+    component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest
+    inputs:
+      task: question-answering
+      test_data:
+        path: "../../../../../sdk/python/foundation-models/system/evaluation/question-answering/squad-v2/small-test.jsonl"
+        type: uri_file
+      mlflow_model: 
+        path: azureml://registries/azureml-preview/models/distilbert-base-uncased-distilled-squad/versions/3
+      label_column_name: answer_text
+      device: gpu
diff --git a/cli/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization-pipeline.yml b/cli/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization-pipeline.yml
new file mode 100644
index 0000000000..e17aba156d
--- /dev/null
+++ b/cli/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization-pipeline.yml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: text-summarization-model-evaluation-subgraph
+
+compute: gpu-cluster-big
+
+jobs:
+  pipeline_component_job:
+    type: pipeline
+    component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest
+    inputs:
+      task: text-summarization
+      test_data:
+        path: "../../../../../sdk/python/foundation-models/system/evaluation/summarization/cnn_dailymail/small-test.jsonl"
+        type: uri_file
+      mlflow_model: 
+        path: azureml://registries/azureml-preview/models/sshleifer-distilbart-cnn-12-6/versions/3
+      label_column_name: summary
+      device: gpu
diff --git a/cli/foundation-models/system/evaluation/text-classification/entailment-contradiction-pipeline.yml b/cli/foundation-models/system/evaluation/text-classification/entailment-contradiction-pipeline.yml
new file mode 100644
index 0000000000..087c090e65
--- /dev/null
+++ b/cli/foundation-models/system/evaluation/text-classification/entailment-contradiction-pipeline.yml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: text-classification-mnli-model-evaluation-subgraph
+
+compute: gpu-cluster-big
+
+jobs:
+  pipeline_component_job:
+    type: pipeline
+    component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest
+    inputs:
+      task: text-classification
+      test_data:
+        path: "../../../../../sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/small_train.jsonl"
+        type: uri_file
+      mlflow_model: 
+        path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3
+      label_column_name: label_string
+      device: gpu
diff --git a/cli/foundation-models/system/evaluation/text-generation/text-generation-pipeline.yml b/cli/foundation-models/system/evaluation/text-generation/text-generation-pipeline.yml
new file mode 100644
index 0000000000..76264bc297
--- /dev/null
+++ b/cli/foundation-models/system/evaluation/text-generation/text-generation-pipeline.yml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: text-generation-model-evaluation-subgraph
+
+compute: gpu-cluster-big
+
+jobs:
+  pipeline_component_job:
+    type: pipeline
+    component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest
+    inputs:
+      task: text-generation
+      test_data:
+        path: "../../../../../sdk/python/foundation-models/system/evaluation/text-generation/text-generation/small-test.jsonl"
+        type: uri_file
+      mlflow_model: 
+        path: azureml://registries/azureml-preview/models/gpt2/versions/3
+      label_column_name: ground_truth
+      device: gpu
diff --git a/cli/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition-pipeline.yml b/cli/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition-pipeline.yml
new file mode 100644
index 0000000000..fa7207f2c2
--- /dev/null
+++ b/cli/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition-pipeline.yml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: named-entity-recognition-model-evaluation-subgraph
+
+compute: gpu-cluster-big
+
+jobs:
+  pipeline_component_job:
+    type: pipeline
+    component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest
+    inputs:
+      task: text-named-entity-recognition
+      test_data:
+        path: "../../../../../sdk/python/foundation-models/system/evaluation/token-classification/conll2003/small-test.jsonl"
+        type: uri_file
+      mlflow_model: 
+        path: azureml://registries/azureml-preview/models/jean-baptiste-camembert-ner/versions/3
+      label_column_name: ner_tags_str
+      device: gpu
diff --git a/cli/foundation-models/system/evaluation/translation/translation-pipeline.yml b/cli/foundation-models/system/evaluation/translation/translation-pipeline.yml
new file mode 100644
index 0000000000..74bf800333
--- /dev/null
+++ b/cli/foundation-models/system/evaluation/translation/translation-pipeline.yml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: text-translation-model-evaluation-subgraph
+
+compute: gpu-cluster-big
+
+jobs:
+  pipeline_component_job:
+    type: pipeline
+    component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest
+    inputs:
+      task: text-translation
+      test_data:
+        path: "../../../../../sdk/python/foundation-models/system/evaluation/translation/wmt16_ro-en/small-test.jsonl"
+        type: uri_file
+      mlflow_model: 
+        path: azureml://registries/azureml-preview/models/t5-base/versions/4
+      label_column_name: ro
+      device: gpu
diff --git a/cli/foundation-models/system/finetune/question-answering/deploy.yml b/cli/foundation-models/system/finetune/question-answering/deploy.yml
new file mode 100644
index 0000000000..40b0d93f09
--- /dev/null
+++ b/cli/foundation-models/system/finetune/question-answering/deploy.yml
@@ -0,0 +1,4 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
\ No newline at end of file
diff --git a/cli/foundation-models/system/finetune/question-answering/extractive-qa-pipeline.yml b/cli/foundation-models/system/finetune/question-answering/extractive-qa-pipeline.yml
new file mode 100644
index 0000000000..9db2405668
--- /dev/null
+++ b/cli/foundation-models/system/finetune/question-answering/extractive-qa-pipeline.yml
@@ -0,0 +1,90 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: question-answering-extractive-qna
+
+inputs:
+  compute_model_import: gpu-cluster-big
+  compute_preprocess: gpu-cluster-big
+  compute_finetune: gpu-cluster-big
+  compute_model_evaluation: gpu-cluster-big
+
+  # specify the foundation model available in the azureml system registry
+  mlflow_model_path: 
+    path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3
+    # huggingface_id: 'bert-base-uncased' # if you want to use a huggingface model, uncomment this line and comment the above line
+
+  # map the dataset files to parameters
+  train_file_path: 
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_train.jsonl"
+  validation_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_validation.jsonl"
+  test_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_test.jsonl"
+  evaluation_config_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json"
+  
+  # The following parameters map to the dataset fields
+  # the question whose answer needs to be extracted from the provided context 
+  # question_key parameter maps to the "question" field in the SQuAD dataset
+  question_key: "question"
+  # the context that contains the answer to the question
+  # context_key parameter maps to the "context" field in the SQuAD dataset
+  context_key: "context"
+  # The value of this field is text in json format with two nested keys, answer_start_key and answer_text_key with their corresponding values
+  # answers_key parameter maps to the "answers" field in the SQuAD dataset
+  answers_key: "answers"
+  # Refers to the position where the answer beings in context. Needs a value that maps to a nested key in the values of the answers_key parameter.
+  # in the SQuAD dataset, the answer_start_key maps "answer_start" under "answer"
+  answer_start_key: "answer_start"
+  # Contains the answer to the question. Needs a value that maps to a nested key in the values of the answers_key parameter
+  # in the SQuAD dataset, the answer_text_key maps to "text" under "answer"
+  answer_text_key: "text"
+
+  # training settings
+  number_of_gpu_to_use_finetuning: 2
+  num_train_epochs: 3
+  learning_rate: 2e-5
+
+outputs:
+  # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model
+  # registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+
+jobs:
+  extractive_qna_finetune_job:
+    type: pipeline
+    # component: azureml://registries/azureml-preview/components/question_answering_pipeline/versions/0.0.3
+    component: azureml://registries/azureml-preview/components/question_answering_pipeline/labels/latest
+    inputs:
+      mlflow_model_path: ${{parent.inputs.mlflow_model_path}} 
+
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_preprocess: ${{parent.inputs.compute_preprocess}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
+
+      train_file_path: ${{parent.inputs.train_file_path}}
+      validation_file_path: ${{parent.inputs.validation_file_path}}
+      test_file_path: ${{parent.inputs.test_file_path}}
+      evaluation_config: ${{parent.inputs.evaluation_config_path}}
+
+      question_key: ${{parent.inputs.question_key}}
+      context_key: ${{parent.inputs.context_key}}
+      answers_key: ${{parent.inputs.answers_key}}
+      answer_start_key: ${{parent.inputs.answer_start_key}}
+      answer_text_key: ${{parent.inputs.answer_text_key}}
+
+      number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}}
+      num_train_epochs: ${{parent.inputs.num_train_epochs}}
+      learning_rate: ${{parent.inputs.learning_rate}}
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/cli/foundation-models/system/finetune/question-answering/extractive-qa.sh b/cli/foundation-models/system/finetune/question-answering/extractive-qa.sh
new file mode 100644
index 0000000000..20dd673bf3
--- /dev/null
+++ b/cli/foundation-models/system/finetune/question-answering/extractive-qa.sh
@@ -0,0 +1,186 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection
+# the data files are available in the same folder as the above notebook
+
+# script inputs
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+registry_name="azureml"
+
+compute_cluster="gpu-cluster-big"
+# if above compute cluster does not exist, create it with the following vm size
+compute_sku="Standard_ND40rs_v2"
+# This is the number of GPUs in a single node of the selected 'vm_size' compute. 
+# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
+# Setting this to more than the number of GPUs will result in an error.
+gpus_per_node=2 
+# This is the foundation model for finetuning
+model_name="bert-base-uncased"
+# using the latest version of the model - not working yet
+model_version=1
+
+version=$(date +%s)
+finetuned_model_name=$model_name"-extractive-qna"
+endpoint_name="ext-qna-$version"
+deployment_sku="Standard_DS3_v2"
+
+
+# training data
+train_data="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_train.jsonl"
+# validation data
+validation_data="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_validation.jsonl"
+# test data
+test_data="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_test.jsonl"
+# evaluation config
+evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json"
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json"
+
+# finetuning job parameters
+finetuning_pipeline_component="question_answering_pipeline"
+# The following parameters map to the dataset fields
+# the question whose answer needs to be extracted from the provided context 
+# question_key parameter maps to the "question" field in the SQuAD dataset
+question_key="question"
+# the context that contains the answer to the question
+# context_key parameter maps to the "context" field in the SQuAD dataset
+context_key="context"
+# The value of this field is text in json format with two nested keys, answer_start_key and answer_text_key with their corresponding values
+# answers_key parameter maps to the "answers" field in the SQuAD dataset
+answers_key="answers"
+# Refers to the position where the answer beings in context. Needs a value that maps to a nested key in the values of the answers_key parameter.
+# in the SQuAD dataset, the answer_start_key maps "answer_start" under "answer"
+answer_start_key="answer_start"
+# Contains the answer to the question. Needs a value that maps to a nested key in the values of the answers_key parameter
+# in the SQuAD dataset, the answer_text_key maps to "text" under "answer"
+answer_text_key="text"
+# Training settings
+number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute
+num_train_epochs=3
+learning_rate=2e-5
+
+# 1. Setup pre-requisites
+
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   [ "$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# check if $compute_cluster exists, else create it
+if az ml compute show --name $compute_cluster $workspace_info
+then
+    echo "Compute cluster $compute_cluster already exists"
+else
+    echo "Creating compute cluster $compute_cluster"
+    az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_cluster"
+        exit 1
+    }
+fi
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Check if training data, validation data and test data exist
+if [ ! -f $train_data ]; then
+    echo "Training data $train_data does not exist"
+    exit 1
+fi
+if [ ! -f $validation_data ]; then
+    echo "Validation data $validation_data does not exist"
+    exit 1
+fi
+if [ ! -f $test_data ]; then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
+
+# 4. Submit finetuning job using pipeline.yml
+
+# check if the finetuning pipeline component exists
+if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
+then
+    echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
+    exit 1
+fi
+
+# need to switch to using latest version for model, currently blocked with a bug.
+# submit finetuning job
+parent_job_name=$( az ml job create --file ./extractive-qa-pipeline.yml $workspace_info --query name -o tsv --set \
+  jobs.extractive_qna_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
+  inputs.compute_model_import=$compute_cluster \
+  inputs.compute_preprocess=$compute_cluster \
+  inputs.compute_finetune=$compute_cluster \
+  inputs.compute_model_evaluation=$compute_cluster \
+  inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \
+  inputs.train_file_path.path=$train_data \
+  inputs.validation_file_path.path=$validation_data \
+  inputs.test_file_path.path=$test_data \
+  inputs.evaluation_config.path=$evaluation_config \
+  inputs.question_key=$question_key \
+  inputs.context_key=$context_key \
+  inputs.answers_key=$answers_key \
+  inputs.answer_start_key=$answer_start_key \
+  inputs.answer_text_key=$answer_text_key \
+  inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \
+  inputs.num_train_epochs=$num_train_epochs \
+  inputs.learning_rate=$learning_rate ) || {
+    echo "Failed to submit finetuning job"
+    exit 1
+  }
+
+az ml job stream --name $parent_job_name $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 5. Create model in workspace from train job output
+az ml model create --name $finetuned_model_name --version $version --type mlflow_model \
+ --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info  || {
+    echo "model create in workspace failed"; exit 1;
+}
+
+# 6. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 7. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 8. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
diff --git a/cli/foundation-models/system/finetune/summarization/deploy.yml b/cli/foundation-models/system/finetune/summarization/deploy.yml
new file mode 100644
index 0000000000..40b0d93f09
--- /dev/null
+++ b/cli/foundation-models/system/finetune/summarization/deploy.yml
@@ -0,0 +1,4 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
\ No newline at end of file
diff --git a/cli/foundation-models/system/finetune/summarization/news-summary-pipeline.yml b/cli/foundation-models/system/finetune/summarization/news-summary-pipeline.yml
new file mode 100644
index 0000000000..77490c046e
--- /dev/null
+++ b/cli/foundation-models/system/finetune/summarization/news-summary-pipeline.yml
@@ -0,0 +1,77 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: summarization-news-summary
+
+inputs:
+  compute_model_import: gpu-cluster-big
+  compute_preprocess: gpu-cluster-big
+  compute_finetune: gpu-cluster-big
+  compute_model_evaluation: gpu-cluster-big
+
+  # specify the foundation model available in the azureml system registry
+  mlflow_model_path: 
+    path: azureml://registries/azureml-preview/models/t5-small/versions/4
+    # huggingface_id: 't5-small' # if you want to use a huggingface model, uncomment this line and comment the above line
+
+  # map the dataset files to parameters
+  train_file_path: 
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_train.jsonl"
+  validation_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_validation.jsonl"
+  test_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_test.jsonl"
+  evaluation_config_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/summarization-config.json"
+  
+  
+  # The following parameters map to the dataset fields
+  # document_key parameter maps to the "article" field in the news summary dataset
+  document_key: "article"
+  # summary_key parameter maps to the "highlights" field in the news summary dataset
+  summary_key: "highlights"
+
+  # training settings
+  number_of_gpu_to_use_finetuning: 2
+  num_train_epochs: 3
+  learning_rate: 2e-5
+
+outputs:
+  # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model
+  # registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+
+jobs:
+  news_summary_finetune_job:
+    type: pipeline
+    # component: azureml://registries/azureml-preview/components/summarization_pipeline/versions/0.0.3
+    component: azureml://registries/azureml-preview/components/summarization_pipeline/labels/latest
+    inputs:
+      mlflow_model_path: ${{parent.inputs.mlflow_model_path}} 
+
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_preprocess: ${{parent.inputs.compute_preprocess}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
+
+      train_file_path: ${{parent.inputs.train_file_path}}
+      validation_file_path: ${{parent.inputs.validation_file_path}}
+      test_file_path: ${{parent.inputs.test_file_path}}
+      evaluation_config: ${{parent.inputs.evaluation_config_path}}
+
+      document_key: ${{parent.inputs.document_key}}
+      summary_key: ${{parent.inputs.summary_key}}
+
+      number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}}
+      num_train_epochs: ${{parent.inputs.num_train_epochs}}
+      learning_rate: ${{parent.inputs.learning_rate}}
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/cli/foundation-models/system/finetune/summarization/news-summary.sh b/cli/foundation-models/system/finetune/summarization/news-summary.sh
new file mode 100644
index 0000000000..3078fd2abf
--- /dev/null
+++ b/cli/foundation-models/system/finetune/summarization/news-summary.sh
@@ -0,0 +1,172 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection
+# the data files are available in the same folder as the above notebook
+
+# script inputs
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+registry_name="azureml"
+
+compute_cluster="gpu-cluster-big"
+# if above compute cluster does not exist, create it with the following vm size
+compute_sku="Standard_ND40rs_v2"
+# This is the number of GPUs in a single node of the selected 'vm_size' compute. 
+# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
+# Setting this to more than the number of GPUs will result in an error.
+gpus_per_node=2 
+# This is the foundation model for finetuning
+model_name="t5-small"
+# using the latest version of the model - not working yet
+model_version=1
+
+version=$(date +%s)
+finetuned_model_name=$model_name"-news-summary"
+endpoint_name="news-summary-$version"
+deployment_sku="Standard_DS3_v2"
+
+
+# training data
+train_data="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_train.jsonl"
+# validation data
+validation_data="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_validation.jsonl"
+# test data
+test_data="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_test.jsonl"
+# evaluation config
+evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/summarization/summarization-config.json"
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json"
+
+# finetuning job parameters
+finetuning_pipeline_component="summarization_pipeline"
+# The following parameters map to the dataset fields
+# document_key parameter maps to the "article" field in the news summary dataset
+document_key="article"
+# summary_key parameter maps to the "highlights" field in the news summary dataset
+summary_key="highlights"
+# Training settings
+number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute
+num_train_epochs=3
+learning_rate=2e-5
+
+# 1. Setup pre-requisites
+
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   [ "$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# check if $compute_cluster exists, else create it
+if az ml compute show --name $compute_cluster $workspace_info
+then
+    echo "Compute cluster $compute_cluster already exists"
+else
+    echo "Creating compute cluster $compute_cluster"
+    az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_cluster"
+        exit 1
+    }
+fi
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Check if training data, validation data and test data exist
+if [ ! -f $train_data ]; then
+    echo "Training data $train_data does not exist"
+    exit 1
+fi
+if [ ! -f $validation_data ]; then
+    echo "Validation data $validation_data does not exist"
+    exit 1
+fi
+if [ ! -f $test_data ]; then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
+
+# 4. Submit finetuning job using pipeline.yml
+
+# check if the finetuning pipeline component exists
+if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
+then
+    echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
+    exit 1
+fi
+
+# need to switch to using latest version for model, currently blocked with a bug.
+# submit finetuning job
+parent_job_name=$( az ml job create --file ./news-summary-pipeline.yml $workspace_info --query name -o tsv --set \
+  jobs.news_summary_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
+  inputs.compute_model_import=$compute_cluster \
+  inputs.compute_preprocess=$compute_cluster \
+  inputs.compute_finetune=$compute_cluster \
+  inputs.compute_model_evaluation=$compute_cluster \
+  inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \
+  inputs.train_file_path.path=$train_data \
+  inputs.validation_file_path.path=$validation_data \
+  inputs.test_file_path.path=$test_data \
+  inputs.evaluation_config.path=$evaluation_config \
+  inputs.document_key=$document_key \
+  inputs.summary_key=$summary_key \
+  inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \
+  inputs.num_train_epochs=$num_train_epochs \
+  inputs.learning_rate=$learning_rate ) || {
+    echo "Failed to submit finetuning job"
+    exit 1
+  }
+
+az ml job stream --name $parent_job_name $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 5. Create model in workspace from train job output
+az ml model create --name $finetuned_model_name --version $version --type mlflow_model \
+ --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info  || {
+    echo "model create in workspace failed"; exit 1;
+}
+
+# 6. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 7. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 8. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
diff --git a/cli/foundation-models/system/finetune/text-classification/deploy.yml b/cli/foundation-models/system/finetune/text-classification/deploy.yml
new file mode 100644
index 0000000000..40b0d93f09
--- /dev/null
+++ b/cli/foundation-models/system/finetune/text-classification/deploy.yml
@@ -0,0 +1,4 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
\ No newline at end of file
diff --git a/cli/foundation-models/system/finetune/text-classification/emotion-detection-pipeline.yml b/cli/foundation-models/system/finetune/text-classification/emotion-detection-pipeline.yml
new file mode 100644
index 0000000000..c7f88973d4
--- /dev/null
+++ b/cli/foundation-models/system/finetune/text-classification/emotion-detection-pipeline.yml
@@ -0,0 +1,75 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: text-classification-emotion-detection
+
+inputs:
+  compute_model_import: gpu-cluster-big
+  compute_preprocess: gpu-cluster-big
+  compute_finetune: gpu-cluster-big
+  compute_model_evaluation: gpu-cluster-big
+
+  # specify the foundation model available in the azureml system registry
+  mlflow_model_path: 
+    path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3
+    # huggingface_id: 'bert-base-uncased' # if you want to use a huggingface model, uncomment this line and comment the above line
+
+  # map the dataset files to parameters
+  train_file_path: 
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_train.jsonl"
+  validation_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_validation.jsonl"
+  test_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_test.jsonl"
+  evaluation_config_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json"
+  
+  
+  # The following parameters map to the dataset fields
+  sentence1_key: "text"
+  label_key: "label_string"
+
+  # training settings
+  number_of_gpu_to_use_finetuning: 2
+  num_train_epochs: 3
+  learning_rate: 2e-5
+
+outputs:
+  # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model
+  # registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+
+jobs:
+  emotion_detection_finetune_job:
+    type: pipeline
+    # component: azureml://registries/azureml-preview/components/text_classification_pipeline/versions/0.0.3
+    component: azureml://registries/azureml-preview/components/text_classification_pipeline/labels/latest
+    inputs:
+      mlflow_model_path: ${{parent.inputs.mlflow_model_path}} 
+
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_preprocess: ${{parent.inputs.compute_preprocess}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
+
+      train_file_path: ${{parent.inputs.train_file_path}}
+      validation_file_path: ${{parent.inputs.validation_file_path}}
+      test_file_path: ${{parent.inputs.test_file_path}}
+      evaluation_config: ${{parent.inputs.evaluation_config_path}}
+
+      sentence1_key: ${{parent.inputs.sentence1_key}}
+      label_key: ${{parent.inputs.label_key}}
+
+      number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}}
+      num_train_epochs: ${{parent.inputs.num_train_epochs}}
+      learning_rate: ${{parent.inputs.learning_rate}}
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/cli/foundation-models/system/finetune/text-classification/emotion-detection.sh b/cli/foundation-models/system/finetune/text-classification/emotion-detection.sh
new file mode 100644
index 0000000000..f09d212324
--- /dev/null
+++ b/cli/foundation-models/system/finetune/text-classification/emotion-detection.sh
@@ -0,0 +1,170 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection
+# the data files are available in the same folder as the above notebook
+
+# script inputs
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+registry_name="azureml"
+
+compute_cluster="gpu-cluster-big"
+# if above compute cluster does not exist, create it with the following vm size
+compute_sku="Standard_ND40rs_v2"
+# This is the number of GPUs in a single node of the selected 'vm_size' compute. 
+# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
+# Setting this to more than the number of GPUs will result in an error.
+gpus_per_node=2 
+# This is the foundation model for finetuning
+model_name="bert-base-uncased"
+# using the latest version of the model - not working yet
+model_version=1
+
+version=$(date +%s)
+finetuned_model_name=$model_name"-emotion-detection"
+endpoint_name="emotion-$version"
+deployment_sku="Standard_DS3_v2"
+
+
+# training data
+train_data="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_train.jsonl"
+# validation data
+validation_data="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_validation.jsonl"
+# test data
+test_data="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_test.jsonl"
+# evaluation config
+evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json"
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json"
+
+# finetuning job parameters
+finetuning_pipeline_component="text_classification_pipeline"
+# The following parameters map to the dataset fields
+sentence1_key="text"
+label_key="label_string"
+# Training settings
+number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute
+num_train_epochs=3
+learning_rate=2e-5
+
+# 1. Setup pre-requisites
+
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   [ "$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# check if $compute_cluster exists, else create it
+if az ml compute show --name $compute_cluster $workspace_info
+then
+    echo "Compute cluster $compute_cluster already exists"
+else
+    echo "Creating compute cluster $compute_cluster"
+    az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_cluster"
+        exit 1
+    }
+fi
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Check if training data, validation data and test data exist
+if [ ! -f $train_data ]; then
+    echo "Training data $train_data does not exist"
+    exit 1
+fi
+if [ ! -f $validation_data ]; then
+    echo "Validation data $validation_data does not exist"
+    exit 1
+fi
+if [ ! -f $test_data ]; then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
+
+# 4. Submit finetuning job using pipeline.yml
+
+# check if the finetuning pipeline component exists
+if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
+then
+    echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
+    exit 1
+fi
+
+# need to switch to using latest version for model, currently blocked with a bug.
+# submit finetuning job
+parent_job_name=$( az ml job create --file ./emotion-detection-pipeline.yml $workspace_info --query name -o tsv --set \
+  jobs.emotion_detection_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
+  inputs.compute_model_import=$compute_cluster \
+  inputs.compute_preprocess=$compute_cluster \
+  inputs.compute_finetune=$compute_cluster \
+  inputs.compute_model_evaluation=$compute_cluster \
+  inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \
+  inputs.train_file_path.path=$train_data \
+  inputs.validation_file_path.path=$validation_data \
+  inputs.test_file_path.path=$test_data \
+  inputs.evaluation_config.path=$evaluation_config \
+  inputs.sentence1_key=$sentence1_key \
+  inputs.label_key=$label_key \
+  inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \
+  inputs.num_train_epochs=$num_train_epochs \
+  inputs.learning_rate=$learning_rate ) || {
+    echo "Failed to submit finetuning job"
+    exit 1
+  }
+
+az ml job stream --name $parent_job_name $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 5. Create model in workspace from train job output
+az ml model create --name $finetuned_model_name --version $version --type mlflow_model \
+ --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info  || {
+    echo "model create in workspace failed"; exit 1;
+}
+
+# 6. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 7. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 8. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
diff --git a/cli/foundation-models/system/finetune/token-classification/deploy.yml b/cli/foundation-models/system/finetune/token-classification/deploy.yml
new file mode 100644
index 0000000000..40b0d93f09
--- /dev/null
+++ b/cli/foundation-models/system/finetune/token-classification/deploy.yml
@@ -0,0 +1,4 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
\ No newline at end of file
diff --git a/cli/foundation-models/system/finetune/token-classification/token-classification-pipeline.yml b/cli/foundation-models/system/finetune/token-classification/token-classification-pipeline.yml
new file mode 100644
index 0000000000..eeba47f899
--- /dev/null
+++ b/cli/foundation-models/system/finetune/token-classification/token-classification-pipeline.yml
@@ -0,0 +1,75 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: token-classification-ner
+
+inputs:
+  compute_model_import: gpu-cluster-big
+  compute_preprocess: gpu-cluster-big
+  compute_finetune: gpu-cluster-big
+  compute_model_evaluation: gpu-cluster-big
+
+  # specify the foundation model available in the azureml system registry
+  mlflow_model_path: 
+    path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3
+    # huggingface_id: 'bert-base-uncased' # if you want to use a huggingface model, uncomment this line and comment the above line
+
+  # map the dataset files to parameters
+  train_file_path: 
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_train.jsonl"
+  validation_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_validation.jsonl"
+  test_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_test.jsonl"
+  evaluation_config_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json"
+  
+  
+  # The following parameters map to the dataset fields
+  token_key: "tokens"
+  tag_key: "ner_tags_str"
+
+  # training settings
+  number_of_gpu_to_use_finetuning: 2
+  num_train_epochs: 3
+  learning_rate: 2e-5
+
+outputs:
+  # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model
+  # registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+
+jobs:
+  ner_finetune_job:
+    type: pipeline
+    # component: azureml://registries/azureml-preview/components/token_classification_pipeline/versions/0.0.3
+    component: azureml://registries/azureml-preview/components/token_classification_pipeline/labels/latest
+    inputs:
+      mlflow_model_path: ${{parent.inputs.mlflow_model_path}} 
+
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_preprocess: ${{parent.inputs.compute_preprocess}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
+
+      train_file_path: ${{parent.inputs.train_file_path}}
+      validation_file_path: ${{parent.inputs.validation_file_path}}
+      test_file_path: ${{parent.inputs.test_file_path}}
+      evaluation_config: ${{parent.inputs.evaluation_config_path}}
+
+      token_key: ${{parent.inputs.token_key}}
+      tag_key: ${{parent.inputs.tag_key}}
+
+      number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}}
+      num_train_epochs: ${{parent.inputs.num_train_epochs}}
+      learning_rate: ${{parent.inputs.learning_rate}}
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/cli/foundation-models/system/finetune/token-classification/token-classification.sh b/cli/foundation-models/system/finetune/token-classification/token-classification.sh
new file mode 100644
index 0000000000..40151b3c5b
--- /dev/null
+++ b/cli/foundation-models/system/finetune/token-classification/token-classification.sh
@@ -0,0 +1,170 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection
+# the data files are available in the same folder as the above notebook
+
+# script inputs
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+registry_name="azureml"
+
+compute_cluster="gpu-cluster-big"
+# if above compute cluster does not exist, create it with the following vm size
+compute_sku="Standard_ND40rs_v2"
+# This is the number of GPUs in a single node of the selected 'vm_size' compute. 
+# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
+# Setting this to more than the number of GPUs will result in an error.
+gpus_per_node=2 
+# This is the foundation model for finetuning
+model_name="bert-base-uncased"
+# using the latest version of the model - not working yet
+model_version=1
+
+version=$(date +%s)
+finetuned_model_name=$model_name"-ner"
+endpoint_name="ner-$version"
+deployment_sku="Standard_DS3_v2"
+
+
+# training data
+train_data="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_train.jsonl"
+# validation data
+validation_data="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_validation.jsonl"
+# test data
+test_data="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_test.jsonl"
+# evaluation config
+evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json"
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json"
+
+# finetuning job parameters
+finetuning_pipeline_component="token_classification_pipeline"
+# The following parameters map to the dataset fields
+token_key="tokens"
+tag_key="ner_tags_str"
+# Training settings
+number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute
+num_train_epochs=3
+learning_rate=2e-5
+
+# 1. Setup pre-requisites
+
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   [ "$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# check if $compute_cluster exists, else create it
+if az ml compute show --name $compute_cluster $workspace_info
+then
+    echo "Compute cluster $compute_cluster already exists"
+else
+    echo "Creating compute cluster $compute_cluster"
+    az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_cluster"
+        exit 1
+    }
+fi
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Check if training data, validation data and test data exist
+if [ ! -f $train_data ]; then
+    echo "Training data $train_data does not exist"
+    exit 1
+fi
+if [ ! -f $validation_data ]; then
+    echo "Validation data $validation_data does not exist"
+    exit 1
+fi
+if [ ! -f $test_data ]; then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
+
+# 4. Submit finetuning job using pipeline.yml
+
+# check if the finetuning pipeline component exists
+if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
+then
+    echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
+    exit 1
+fi
+
+# need to switch to using latest version for model, currently blocked with a bug.
+# submit finetuning job
+parent_job_name=$( az ml job create --file ./token-classification-pipeline.yml $workspace_info --query name -o tsv --set \
+  jobs.ner_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
+  inputs.compute_model_import=$compute_cluster \
+  inputs.compute_preprocess=$compute_cluster \
+  inputs.compute_finetune=$compute_cluster \
+  inputs.compute_model_evaluation=$compute_cluster \
+  inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \
+  inputs.train_file_path.path=$train_data \
+  inputs.validation_file_path.path=$validation_data \
+  inputs.test_file_path.path=$test_data \
+  inputs.evaluation_config.path=$evaluation_config \
+  inputs.token_key=$token_key \
+  inputs.tag_key=$tag_key \
+  inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \
+  inputs.num_train_epochs=$num_train_epochs \
+  inputs.learning_rate=$learning_rate ) || {
+    echo "Failed to submit finetuning job"
+    exit 1
+  }
+
+az ml job stream --name $parent_job_name $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 5. Create model in workspace from train job output
+az ml model create --name $finetuned_model_name --version $version --type mlflow_model \
+ --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info  || {
+    echo "model create in workspace failed"; exit 1;
+}
+
+# 6. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 7. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 8. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
diff --git a/cli/foundation-models/system/finetune/translation/deploy.yml b/cli/foundation-models/system/finetune/translation/deploy.yml
new file mode 100644
index 0000000000..40b0d93f09
--- /dev/null
+++ b/cli/foundation-models/system/finetune/translation/deploy.yml
@@ -0,0 +1,4 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
\ No newline at end of file
diff --git a/cli/foundation-models/system/finetune/translation/translation-pipeline.yml b/cli/foundation-models/system/finetune/translation/translation-pipeline.yml
new file mode 100644
index 0000000000..6392a655a8
--- /dev/null
+++ b/cli/foundation-models/system/finetune/translation/translation-pipeline.yml
@@ -0,0 +1,77 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: translation-wmt16-en-ro
+
+inputs:
+  compute_model_import: gpu-cluster-big
+  compute_preprocess: gpu-cluster-big
+  compute_finetune: gpu-cluster-big
+  compute_model_evaluation: gpu-cluster-big
+
+  # specify the foundation model available in the azureml system registry
+  mlflow_model_path: 
+    path: azureml://registries/azureml-preview/models/t5-small/versions/4
+    # huggingface_id: 't5-small' # if you want to use a huggingface model, uncomment this line and comment the above line
+
+  # map the dataset files to parameters
+  train_file_path: 
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_train.jsonl"
+  validation_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_validation.jsonl"
+  test_file_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_test.jsonl"
+  evaluation_config_path:
+    type: uri_file
+    path: "../../../../../sdk/python/foundation-models/system/finetune/translation/translation-config.json"
+  
+  
+  # The following parameters map to the dataset fields
+  # source_lang parameter maps to the "en" field in the wmt16 dataset
+  source_lang: "en"
+  # target_lang parameter maps to the "ro" field in the wmt16 dataset
+  target_lang: "ro"
+
+  # training settings
+  number_of_gpu_to_use_finetuning: 2
+  num_train_epochs: 3
+  learning_rate: 2e-5
+
+outputs:
+  # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model
+  # registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+
+jobs:
+  translation_en_ro_finetune_job:
+    type: pipeline
+    # component: azureml://registries/azureml-preview/components/translation_pipeline/versions/0.0.3
+    component: azureml://registries/azureml-preview/components/translation_pipeline/labels/latest
+    inputs:
+      mlflow_model_path: ${{parent.inputs.mlflow_model_path}} 
+
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_preprocess: ${{parent.inputs.compute_preprocess}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
+
+      train_file_path: ${{parent.inputs.train_file_path}}
+      validation_file_path: ${{parent.inputs.validation_file_path}}
+      test_file_path: ${{parent.inputs.test_file_path}}
+      evaluation_config: ${{parent.inputs.evaluation_config_path}}
+
+      source_lang: ${{parent.inputs.source_lang}}
+      target_lang: ${{parent.inputs.target_lang}}
+
+      number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}}
+      num_train_epochs: ${{parent.inputs.num_train_epochs}}
+      learning_rate: ${{parent.inputs.learning_rate}}
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/cli/foundation-models/system/finetune/translation/translation.sh b/cli/foundation-models/system/finetune/translation/translation.sh
new file mode 100644
index 0000000000..494addff0d
--- /dev/null
+++ b/cli/foundation-models/system/finetune/translation/translation.sh
@@ -0,0 +1,171 @@
+#! /bin/bash
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection
+# the data files are available in the same folder as the above notebook
+
+# script inputs
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+registry_name="azureml"
+
+compute_cluster="gpu-cluster-big"
+# if above compute cluster does not exist, create it with the following vm size
+compute_sku="Standard_ND40rs_v2"
+# This is the number of GPUs in a single node of the selected 'vm_size' compute. 
+# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
+# Setting this to more than the number of GPUs will result in an error.
+gpus_per_node=2 
+# This is the foundation model for finetuning
+model_name="t5-small"
+# using the latest version of the model - not working yet
+model_version=1
+
+version=$(date +%s)
+finetuned_model_name=$model_name"-wmt16-en-ro"
+endpoint_name="translation-en-ro-$version"
+deployment_sku="Standard_DS3_v2"
+
+
+# training data
+train_data="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_train.jsonl"
+# validation data
+validation_data="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_validation.jsonl"
+# test data
+test_data="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_test.jsonl"
+# evaluation config
+evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/translation/translation-config.json"
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json"
+
+# finetuning job parameters
+finetuning_pipeline_component="translation_pipeline"
+# The following parameters map to the dataset fields
+# source_lang parameter maps to the "en" field in the wmt16 dataset
+source_lang="en"
+# target_lang parameter maps to the "ro" field in the wmt16 dataset
+target_lang="ro"
+# Training settings
+number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute
+num_train_epochs=3
+learning_rate=2e-5
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   [ "$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# check if $compute_cluster exists, else create it
+if az ml compute show --name $compute_cluster $workspace_info
+then
+    echo "Compute cluster $compute_cluster already exists"
+else
+    echo "Creating compute cluster $compute_cluster"
+    az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_cluster"
+        exit 1
+    }
+fi
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Check if training data, validation data and test data exist
+if [ ! -f $train_data ]; then
+    echo "Training data $train_data does not exist"
+    exit 1
+fi
+if [ ! -f $validation_data ]; then
+    echo "Validation data $validation_data does not exist"
+    exit 1
+fi
+if [ ! -f $test_data ]; then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
+
+# 4. Submit finetuning job using pipeline.yml
+
+# check if the finetuning pipeline component exists
+if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
+then
+    echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
+    exit 1
+fi
+
+# need to switch to using latest version for model, currently blocked with a bug.
+# submit finetuning job
+parent_job_name=$( az ml job create --file ./translation-pipeline.yml $workspace_info --query name -o tsv --set \
+  jobs.translation_en_ro_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
+  inputs.compute_model_import=$compute_cluster \
+  inputs.compute_preprocess=$compute_cluster \
+  inputs.compute_finetune=$compute_cluster \
+  inputs.compute_model_evaluation=$compute_cluster \
+  inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \
+  inputs.train_file_path.path=$train_data \
+  inputs.validation_file_path.path=$validation_data \
+  inputs.test_file_path.path=$test_data \
+  inputs.evaluation_config.path=$evaluation_config \
+  inputs.source_lang=$source_lang \
+  inputs.target_lang=$target_lang \
+  inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \
+  inputs.num_train_epochs=$num_train_epochs \
+  inputs.learning_rate=$learning_rate ) || {
+    echo "Failed to submit finetuning job"
+    exit 1
+  }
+
+az ml job stream --name $parent_job_name $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 5. Create model in workspace from train job output
+az ml model create --name $finetuned_model_name --version $version --type mlflow_model \
+ --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info  || {
+    echo "model create in workspace failed"; exit 1;
+}
+
+# 6. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 7. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 8. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
diff --git a/cli/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.sh b/cli/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.sh
new file mode 100644
index 0000000000..bf79df063d
--- /dev/null
+++ b/cli/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-asr
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="openai-whisper-large"
+# using the latest version of the model - not working yet
+model_version=1
+
+version=$(date +%s)
+endpoint_name="asr-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS4_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/foundation-models/system/inference/automatic-speech-recognition/deploy.yml b/cli/foundation-models/system/inference/automatic-speech-recognition/deploy.yml
new file mode 100644
index 0000000000..48bce7ade6
--- /dev/null
+++ b/cli/foundation-models/system/inference/automatic-speech-recognition/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS4_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/fill-mask/deploy.yml b/cli/foundation-models/system/inference/fill-mask/deploy.yml
new file mode 100644
index 0000000000..336e5519f5
--- /dev/null
+++ b/cli/foundation-models/system/inference/fill-mask/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.sh b/cli/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.sh
new file mode 100644
index 0000000000..fbfe2d68bb
--- /dev/null
+++ b/cli/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-fill-mask
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="bert-base-uncased"
+# using the latest version of the model - not working yet
+model_version=3
+
+version=$(date +%s)
+endpoint_name="fill-mask-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS2_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/foundation-models/system/inference/question-answering/deploy.yml b/cli/foundation-models/system/inference/question-answering/deploy.yml
new file mode 100644
index 0000000000..336e5519f5
--- /dev/null
+++ b/cli/foundation-models/system/inference/question-answering/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/question-answering/question-answering-online-endpoint.sh b/cli/foundation-models/system/inference/question-answering/question-answering-online-endpoint.sh
new file mode 100644
index 0000000000..d0a8579621
--- /dev/null
+++ b/cli/foundation-models/system/inference/question-answering/question-answering-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-question-answering
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="deepset-minilm-uncased-squad2"
+# using the latest version of the model - not working yet
+model_version=3
+
+version=$(date +%s)
+endpoint_name="question-answering-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS2_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/foundation-models/system/inference/summarization/deploy.yml b/cli/foundation-models/system/inference/summarization/deploy.yml
new file mode 100644
index 0000000000..336e5519f5
--- /dev/null
+++ b/cli/foundation-models/system/inference/summarization/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/summarization/summarization-online-endpoint.sh b/cli/foundation-models/system/inference/summarization/summarization-online-endpoint.sh
new file mode 100644
index 0000000000..6948d59502
--- /dev/null
+++ b/cli/foundation-models/system/inference/summarization/summarization-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-summarization
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="sshleifer-distilbart-cnn-12-6"
+# using the latest version of the model - not working yet
+model_version=3
+
+version=$(date +%s)
+endpoint_name="summarization-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS3_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/foundation-models/system/inference/text-classification/deploy.yml b/cli/foundation-models/system/inference/text-classification/deploy.yml
new file mode 100644
index 0000000000..336e5519f5
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-classification/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/text-classification/text-classification-online-endpoint.sh b/cli/foundation-models/system/inference/text-classification/text-classification-online-endpoint.sh
new file mode 100644
index 0000000000..75d193e047
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-classification/text-classification-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-text-classification
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="microsoft-deberta-base-mnli"
+# using the latest version of the model - not working yet
+model_version=3
+
+version=$(date +%s)
+endpoint_name="text-classification-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS3_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/foundation-models/system/inference/text-generation/deploy.yml b/cli/foundation-models/system/inference/text-generation/deploy.yml
new file mode 100644
index 0000000000..336e5519f5
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-generation/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/text-generation/text-generation-online-endpoint.sh b/cli/foundation-models/system/inference/text-generation/text-generation-online-endpoint.sh
new file mode 100644
index 0000000000..6b3428d639
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-generation/text-generation-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-text-generation
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="gpt2"
+# using the latest version of the model - not working yet
+model_version=3
+
+version=$(date +%s)
+endpoint_name="text-generation-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS2_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/foundation-models/system/inference/token-classification/deploy.yml b/cli/foundation-models/system/inference/token-classification/deploy.yml
new file mode 100644
index 0000000000..336e5519f5
--- /dev/null
+++ b/cli/foundation-models/system/inference/token-classification/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/token-classification/token-classification-online-endpoint.sh b/cli/foundation-models/system/inference/token-classification/token-classification-online-endpoint.sh
new file mode 100644
index 0000000000..7ca6b1e351
--- /dev/null
+++ b/cli/foundation-models/system/inference/token-classification/token-classification-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-token-classification
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="Jean-Baptiste-camembert-ner"
+# using the latest version of the model - not working yet
+model_version=3
+
+version=$(date +%s)
+endpoint_name="token-classification-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS2_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/foundation-models/system/inference/translation/deploy.yml b/cli/foundation-models/system/inference/translation/deploy.yml
new file mode 100644
index 0000000000..9c7951062c
--- /dev/null
+++ b/cli/foundation-models/system/inference/translation/deploy.yml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: demo
+instance_type: Standard_DS3_v2
+instance_count: 1
+request_settings:
+  request_timeout_ms: 60000
diff --git a/cli/foundation-models/system/inference/translation/translation-online-endpoint.sh b/cli/foundation-models/system/inference/translation/translation-online-endpoint.sh
new file mode 100644
index 0000000000..61e1e8337e
--- /dev/null
+++ b/cli/foundation-models/system/inference/translation/translation-online-endpoint.sh
@@ -0,0 +1,79 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-translation
+# the sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml-preview"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="t5-small"
+# using the latest version of the model - not working yet
+model_version=4
+
+version=$(date +%s)
+endpoint_name="translation-$version"
+
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS2_v2"
+
+# scoring_file
+scoring_file="../../../../../sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+
+
diff --git a/cli/jobs/pipelines-with-components/image_classification_with_densenet/image_cnn_train/requirements.txt b/cli/jobs/pipelines-with-components/image_classification_with_densenet/image_cnn_train/requirements.txt
index e69de29bb2..d605e3bcc0 100644
--- a/cli/jobs/pipelines-with-components/image_classification_with_densenet/image_cnn_train/requirements.txt
+++ b/cli/jobs/pipelines-with-components/image_classification_with_densenet/image_cnn_train/requirements.txt
@@ -0,0 +1 @@
+git+git://github.com/NVIDIA/dllogger.git@26a0f8f1958de2c0c460925ff6102a4d2486d6cc#egg=dllogger
diff --git a/sdk/python/README.md b/sdk/python/README.md
index 805aec46c4..1dcb3a5f44 100644
--- a/sdk/python/README.md
+++ b/sdk/python/README.md
@@ -68,6 +68,19 @@ Test Status is for branch - **_main_**
 |endpoints|online|[online-endpoints-deploy-mlflow-model](endpoints/online/mlflow/online-endpoints-deploy-mlflow-model.ipynb)|Deploy an mlflow model to an online endpoint. This will be a no-code-deployment. It doesn't require scoring script and environment.|[![online-endpoints-deploy-mlflow-model](https://github.com/Azure/azureml-examples/actions/workflows/sdk-endpoints-online-mlflow-online-endpoints-deploy-mlflow-model.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-endpoints-online-mlflow-online-endpoints-deploy-mlflow-model.yml)|
 |endpoints|online|[online-endpoints-triton](endpoints/online/triton/single-model/online-endpoints-triton.ipynb)|Deploy a custom container as an online endpoint. Use web servers other than the default Python Flask server used by Azure ML without losing the benefits of Azure ML's built-in monitoring, scaling, alerting, and authentication.|[![online-endpoints-triton](https://github.com/Azure/azureml-examples/actions/workflows/sdk-endpoints-online-triton-single-model-online-endpoints-triton.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-endpoints-online-triton-single-model-online-endpoints-triton.yml)|
 |foundation-models|system|[import_model_into_registry](foundation-models/system/import/import_model_into_registry.ipynb)|*no description*|[![import_model_into_registry](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-import-import_model_into_registry.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-import-import_model_into_registry.yml)|
+|foundation-models|system|[fill-mask](foundation-models/system/evaluation/fill-mask/fill-mask.ipynb)|*no description*|[![fill-mask](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml)|
+|foundation-models|system|[question-answering](foundation-models/system/evaluation/question-answering/question-answering.ipynb)|*no description*|[![question-answering](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml)|
+|foundation-models|system|[abstractive-and-extractive-summarization](foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb)|*no description*|[![abstractive-and-extractive-summarization](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml)|
+|foundation-models|system|[entailment-contradiction](foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb)|*no description*|[![entailment-contradiction](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml)|
+|foundation-models|system|[sentiment-analysis](foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb)|*no description*|[![sentiment-analysis](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml)|
+|foundation-models|system|[text-generation](foundation-models/system/evaluation/text-generation/text-generation.ipynb)|*no description*|[![text-generation](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml)|
+|foundation-models|system|[news-articles-entity-recognition](foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb)|*no description*|[![news-articles-entity-recognition](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml)|
+|foundation-models|system|[translation-romanian-to-english](foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb)|*no description*|[![translation-romanian-to-english](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml)|
+|foundation-models|system|[extractive-qa](foundation-models/system/finetune/question-answering/extractive-qa.ipynb)|*no description*|[![extractive-qa](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml)|
+|foundation-models|system|[news-summary](foundation-models/system/finetune/summarization/news-summary.ipynb)|*no description*|[![news-summary](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml)|
+|foundation-models|system|[emotion-detection](foundation-models/system/finetune/text-classification/emotion-detection.ipynb)|*no description*|[![emotion-detection](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml)|
+|foundation-models|system|[token-classification](foundation-models/system/finetune/token-classification/token-classification.ipynb)|*no description*|[![token-classification](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml)|
+|foundation-models|system|[translation](foundation-models/system/finetune/translation/translation.ipynb)|*no description*|[![translation](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-translation-translation.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-foundation-models-system-finetune-translation-translation.yml)|
 |jobs|automl-standalone-jobs|[automl-classification-task-bankmarketing](jobs/automl-standalone-jobs/automl-classification-task-bankmarketing/automl-classification-task-bankmarketing.ipynb)|*no description*|[![automl-classification-task-bankmarketing](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-automl-classification-task-bankmarketing.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-automl-classification-task-bankmarketing.yml)|
 |jobs|automl-standalone-jobs|[mlflow-model-local-inference-test](jobs/automl-standalone-jobs/automl-classification-task-bankmarketing/mlflow-model-local-inference-test.ipynb)|*no description* - _This sample is excluded from automated tests_|[![mlflow-model-local-inference-test](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-mlflow-model-local-inference-test.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-mlflow-model-local-inference-test.yml)|
 |jobs|automl-standalone-jobs|[auto-ml-forecasting-github-dau](jobs/automl-standalone-jobs/automl-forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb)|*no description*|[![auto-ml-forecasting-github-dau](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-github-dau-auto-ml-forecasting-github-dau.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-github-dau-auto-ml-forecasting-github-dau.yml)|
diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/README.md b/sdk/python/foundation-models/system/evaluation/fill-mask/README.md
new file mode 100644
index 0000000000..5609ee1a3b
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/fill-mask/README.md
@@ -0,0 +1,15 @@
+## Fill Mask
+
+### List of supported keyword arguments:
+
+| Keyword Argument  | Description                                                                                                      | Type      | Sample                      |
+|:-----------------:|:-----------------------------------------------------------------------------------------------------------------|-----------|-----------------------------|
+|      metrics      | List for subset of metrics to be computed. All supported metrics listed below.                                   | list<str> | ["perplexities"]            |
+|     model_id      | Model used for calculating Perplexity. Perplexity can only be calculated for causal language models.             | str       | "gpt2", "bert-base-uncased" |
+|    batch_size     | The batch size to run texts through the model                                                                    | int       | 16                          |
+|  add_start_token  | Boolean flag to add the start token to the texts so the perplexity can include the probability of the first word | boolean   | true, false                 |
+| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing)                                       | dict      |                             |
+
+### List of supported metrics:
+
+* perplexities
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/eval-config.json b/sdk/python/foundation-models/system/evaluation/fill-mask/eval-config.json
new file mode 100644
index 0000000000..81c4c0061b
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/fill-mask/eval-config.json
@@ -0,0 +1,5 @@
+{
+  "metrics": ["perplexities"],
+  "model_id": "gpt2",
+  "add_start_token": true
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask-eval-dashboard.png
new file mode 100644
index 0000000000..3eeb20923f
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb b/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb
new file mode 100644
index 0000000000..221ee60f9d
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb
@@ -0,0 +1,468 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fill Mask Evaluation\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `fill-mask` task. \n",
+    "\n",
+    "### Evaluation dataset\n",
+    "Contains ~70k pages from wikipedia, each describing a person. For each page, the person described in the text is masked with a mask token. The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike License. Compared to the preprocessed version of Penn Treebank (PTB), WikiText-2 is over 2 times larger and WikiText-103 is over 110 times larger. The WikiText dataset also features a far larger vocabulary and retains the original case, punctuation and numbers - all of which are removed in PTB. As it is composed of full articles, the dataset is well suited for models that can take advantage of long term dependencies. A copy of the [rcds/wikipedia-for-mask-filling](https://huggingface.co/datasets/rcds/wikipedia-for-mask-filling/viewer/original_512/train) dataset is available in the [fill-mask](./fill-mask) folder.\n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `fill-mask` is generic task type that can be used for predicting which words should replace some of the words that were masked in a sentence based on context provided. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the rcds/wikipedia-for-mask-filling dataset, we would like to look for models finetuned for this specific scenario. We will compare `bert-base-uncased`, `distilbert-base-uncased` and `microsoft-deberta-large` in this sample, which are available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "%pip install --upgrade azure-ai-ml\n",
+    "%pip install --upgrade azure-identity\n",
+    "%pip install --upgrade datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319354708
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"bert-base-cased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"bert-base-uncased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"bert-large-cased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"bert-large-uncased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"camembert-base\", \"version\": \"1\", \"mask\": \"<mask>\"},\n",
+    "    {\"name\": \"distilbert-base-cased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"distilbert-base-uncased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"distilroberta-base\", \"version\": \"1\", \"mask\": \"<mask>\"},\n",
+    "    {\"name\": \"microsoft-deberta-base\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"microsoft-deberta-large\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"microsoft-deberta-xlarge\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n",
+    "    {\"name\": \"roberta-base\", \"version\": \"1\", \"mask\": \"<mask>\"},\n",
+    "    {\"name\": \"roberta-large\", \"version\": \"1\", \"mask\": \"<mask>\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the Wikipedia For Mask Filling is available in the [fill-mask](./fill-mask/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
+    "* To use the entire dataset, uncomment the cells below and run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from datasets import load_dataset\n",
+    "\n",
+    "# hf_test_data = load_dataset('rcds/wikipedia-for-mask-filling', 'original_512')\n",
+    "\n",
+    "# hf_test_data['train'].to_pandas().head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data_mask_1 = \"./fill-mask/small-test-[MASK].jsonl\"  # [MASK]\n",
+    "test_data_mask_2 = \"./fill-mask/small-test-mask.jsonl\"  # <mask>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "pd.read_json(test_data_mask_1, lines=True).head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "pd.read_json(test_data_mask_2, lines=True).head()"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    "\n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `perplexities` in this sample.\n",
+    "\n",
+    "All supported evaluation configurations for `fill-mask` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(test_data, mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=test_data,\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"input_string\",\n",
+    "        label_column_name=\"title\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"fill-mask\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"fill-mask-evaluation\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    if model[\"mask\"] == \"[MASK]\":\n",
+    "        test_data = Input(type=AssetTypes.URI_FILE, path=test_data_mask_1)\n",
+    "    else:\n",
+    "        test_data = Input(type=AssetTypes.URI_FILE, path=test_data_mask_2)\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        test_data=test_data,\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    # wait for the pipeline job to complete\n",
+    "    workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./fill-mask-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"exact_match\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/README.md b/sdk/python/foundation-models/system/evaluation/question-answering/README.md
new file mode 100644
index 0000000000..e6020e1d50
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/question-answering/README.md
@@ -0,0 +1,20 @@
+## Question Answering
+
+### List of supported keyword arguments:
+
+|  Keyword Argument  | Description                                                                    | Type      | Sample                      |
+|:------------------:|:-------------------------------------------------------------------------------|-----------|-----------------------------|
+|      metrics       | List for subset of metrics to be computed. All supported metrics listed below. | list<str> | ["exact_match", "f1_score"] |
+|     tokenizer      | Tokenizer object to perform tokenization on provided input text                | boolean   | false, true                 |
+| regexes_to_ignore  | List of regex to ignore in our input data points                               | list      | ["$[A-Z]+"]                 |
+|    ignore_case     | Boolean flag to indicate whether we need to ignore case                        | boolean   | false, true                 |
+| ignore_punctuation | Boolean flag to indicate whether we need to ignore punctuation                 | boolean   | false, true                 |
+|   ignore_numbers   | Boolean flag to indicate whether we need to ignore numbers                     | boolean   | false, true                 |
+| custom_dimensions  | Used to report telemetry data (can later be used to perform PII scrubbing)     | dict      |                             |
+
+### List of supported metrics:
+
+* rouge1
+* rouge2
+* rougeLsum
+* rougeL
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/eval-config.json b/sdk/python/foundation-models/system/evaluation/question-answering/eval-config.json
new file mode 100644
index 0000000000..15165acfe5
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/question-answering/eval-config.json
@@ -0,0 +1,7 @@
+{
+  "metrics": ["exact_match", "f1_score"],
+  "regexes_to_ignore": ["$[A-Z]+"],
+  "ignore_case": false,
+  "ignore_numbers": false,
+  "ignore_punctuations": true
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/question-answering-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/question-answering/question-answering-eval-dashboard.png
new file mode 100644
index 0000000000..dc76733da1
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/question-answering/question-answering-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb b/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb
new file mode 100644
index 0000000000..24fb314bc0
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb
@@ -0,0 +1,462 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question Answering Evaluation\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `question-answering` task. \n",
+    "\n",
+    "### Evaluation dataset\n",
+    "The version 2 of Stanford Question Answering Dataset (SQuAD), SQuAD 2.0, combines the 100,000 questions in SQuAD 1.1 with over 50,000 unanswerable questions written adversarially by crowdworkers to look similar to answerable ones. To do well on SQuAD2.0, systems must not only answer questions when possible, but also determine when no answer is supported by the paragraph and abstain from answering. A copy of the [SQuAD_v2](https://huggingface.co/datasets/squad_v2) dataset is available in the [squad-v2](./squad-v2) folder.\n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `question-answering` is generic task type that can be used for scenarios to answer questions based on context provided. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the SQuAD_v2 dataset, we would like to look for models finetuned for this specific scenario. We will compare `distilbert-base-uncased-distilled-squad`, `deepset-roberta-base-squad2` and `deepset-minilm-uncased-squad2` in this sample, which are available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "%pip install --upgrade azure-ai-ml\n",
+    "%pip install --upgrade azure-identity\n",
+    "%pip install --upgrade datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319354708
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    },
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"deepset-minilm-uncased-squad2\", \"version\": \"1\"},\n",
+    "    {\"name\": \"deepset-roberta-base-squad2\", \"version\": \"1\"},\n",
+    "    {\"name\": \"distilbert-base-cased-distilled-squad\", \"version\": \"1\"},\n",
+    "    {\"name\": \"distilbert-base-uncased-distilled-squad\", \"version\": \"1\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the Squad v2 is available in the [squad-v2](./squad-v2/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
+    "* To use the entire dataset, uncomment the cells below and run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from datasets import load_dataset\n",
+    "\n",
+    "# hf_test_data = load_dataset('squad_v2')\n",
+    "\n",
+    "# hf_test_data['train'].to_pandas().head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = \"./squad-v2/small-test.jsonl\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "pd.read_json(test_data, lines=True).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    "\n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `exact_match` and `f1_score` in this sample.\n",
+    "\n",
+    "All supported evaluation configurations for `question-answering` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"context,question\",\n",
+    "        label_column_name=\"answer_text\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"question-answering\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"question-answering-evaluation\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    # wait for the pipeline job to complete\n",
+    "    workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./question-answering-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"exact_match\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernel_info": {
+   "name": "python310-sdkv2"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "microsoft": {
+   "host": {
+    "AzureML": {
+     "notebookHasBeenCompleted": true
+    }
+   },
+   "ms_spell_check": {
+    "ms_spell_check_language": "en"
+   }
+  },
+  "nteract": {
+   "version": "nteract-front-end@1.0.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/evaluation/summarization/README.md b/sdk/python/foundation-models/system/evaluation/summarization/README.md
new file mode 100644
index 0000000000..4973b2a713
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/summarization/README.md
@@ -0,0 +1,17 @@
+## Summarization
+
+### List of supported keyword arguments:
+
+| Keyword Argument  | Description                                                                           | Type      | Sample                                      |
+|:-----------------:|:--------------------------------------------------------------------------------------|-----------|---------------------------------------------|
+|      metrics      | List for subset of metrics to be computed. All supported metrics listed below.        | list<str> | ["rouge1", "rouge2", "rougeL", "rougeLsum"] |
+|    aggregator     | Boolean flag to indicate if need to aggregate rouge scores for individual data points | boolean   | true, false                                 |
+|      stemmer      | Boolean flag to indicate whether to use Porter Stemmer for suffixes                   | boolean   | true, false                                 |
+| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing)            | dict      |                                             |
+
+### List of supported metrics:
+
+* rouge1
+* rouge2
+* rougeLsum
+* rougeL
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb b/sdk/python/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb
new file mode 100644
index 0000000000..81cfaa38b2
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb
@@ -0,0 +1,451 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Summarization Evaluation - Abstractive and Extractive Summarization\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-summarization` task. \n",
+    "\n",
+    "### Evaluation dataset\n",
+    "The CNN / DailyMail Dataset is an English-language dataset containing just over 300k unique news articles as written by journalists at CNN and the Daily Mail. The current version supports both extractive and abstractive summarization, though the original version was created for machine reading and comprehension and abstractive question answering. A copy of the [cnn_dailymail](https://huggingface.co/datasets/cnn_dailymail) dataset is available in the [cnn_dailymail](./cnn_dailymail) folder.\n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `text-summarization` is generic task type that can be used for scenarios such as abstractive and extractive summarization. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the CNN_Dailymail dataset, we would like to look for models finetuned for this specific scenario. We will compare `sshleifer-distilbart-cnn-12-6`, and `facebook-bart-large-cnn` in this sample, which are available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "%pip install --upgrade azure-ai-ml\n",
+    "%pip install --upgrade azure-identity\n",
+    "%pip install --upgrade datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319354708
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"facebook-bart-large-cnn\", \"version\": \"1\"},\n",
+    "    {\"name\": \"sshleifer-distilbart-cnn-12-6\", \"version\": \"1\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the cnn_dailymail is available in the [cnn_dailymail](./cnn_dailymail/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
+    "* To use the entire dataset, uncomment the cells below and run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from datasets import load_dataset\n",
+    "\n",
+    "# hf_test_data = load_dataset('cnn_dailymail')\n",
+    "\n",
+    "# hf_test_data['train'].to_pandas().head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = \"./cnn_dailymail/small-test.jsonl\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "pd.read_json(test_data, lines=True).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    "\n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `rouge1`, `rouge2`, `rougeL` and `rougeLsum` in this sample.\n",
+    "\n",
+    "All supported evaluation configurations for `text-summarization` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"input_string\",\n",
+    "        label_column_name=\"summary\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"text-summarization\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"summarization-evaluation\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    # wait for the pipeline job to complete\n",
+    "    workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./text-summarization-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"rouge1\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernel_info": {
+   "name": "python310-sdkv2"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "microsoft": {
+   "host": {
+    "AzureML": {
+     "notebookHasBeenCompleted": true
+    }
+   },
+   "ms_spell_check": {
+    "ms_spell_check_language": "en"
+   }
+  },
+  "nteract": {
+   "version": "nteract-front-end@1.0.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/evaluation/summarization/eval-config.json b/sdk/python/foundation-models/system/evaluation/summarization/eval-config.json
new file mode 100644
index 0000000000..899d0c33b8
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/summarization/eval-config.json
@@ -0,0 +1,5 @@
+{
+    "metrics": ["rouge1", "rouge2", "rougeL", "rougeLsum"],
+    "aggregator": true,
+    "stemmer": true
+}
diff --git a/sdk/python/foundation-models/system/evaluation/summarization/text-summarization-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/summarization/text-summarization-eval-dashboard.png
new file mode 100644
index 0000000000..ff381d293d
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/summarization/text-summarization-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/README.md b/sdk/python/foundation-models/system/evaluation/text-classification/README.md
new file mode 100644
index 0000000000..c444319df0
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/README.md
@@ -0,0 +1,48 @@
+## Single Label Classification
+
+### List of supported keyword arguments:
+
+|     Keyword Argument     | Description                                                                    | Type             | Sample                                                          |
+|:------------------------:|:-------------------------------------------------------------------------------|------------------|-----------------------------------------------------------------|
+|         metrics          | List for subset of metrics to be computed. All supported metrics listed below. | list<str>        | ["accuracy", "f1_score_micro", "average_precision_score_macro"] |
+|       class_labels       | List for superset of all existing labels in our dataset                        | list, np.ndarray | [0, 1, 2, 3], ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"]        |
+|       train_labels       | List for labels on which model is trained                                      | list, np.ndarray | [0, 1, 2, 3], ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"]        |
+|      sample_weights      | List containing the weight associated with each data sample                    | list, np.ndarray | [1, 2, 3, 4, 5, 6]                                              |
+|      y_transformer       | Transformer object to be applied on y_pred                                     |                  |                                                                 |
+|        use_binary        | Compute metrics only on the true class for binary classification               | boolean          | true, false                                                     |
+| enable_metric_confidence | Computes confidence interval for supported metrics                             | boolean          | true, false                                                     |
+|        multilabel        | Boolean variable that computes multilabel metrics when set to True             | boolean          | false (Should be false for single label classification)         |
+|      positive_label      | Label to be treated as positive label                                          | int/str          | 0, "CONTRADICTION"                                              |
+|    confidence_metrics    | List of metrics to compute confidence intervals                                | list<str>        | ["accuracy", "f1_score_micro"]                                  |
+|    custom_dimensions     | Used to report telemetry data (can later be used to perform PII scrubbing)     | dict             |                                                                 |
+
+### List of supported metrics:
+
+* log_loss
+* average_precision_score_binary
+* weighted_accuracy
+* AUC_weighted
+* f1_score_micro
+* f1_score_binary
+* precision_score_micro
+* precision_score_binary
+* recall_score_weighted
+* f1_score_weighted
+* confusion_matrix
+* average_precision_score_micro
+* recall_score_binary
+* recall_score_macro
+* average_precision_score_weighted
+* AUC_binary
+* matthews_correlation
+* precision_score_macro
+* accuracy
+* average_precision_score_macro
+* AUC_macro
+* recall_score_micro
+* balanced_accuracy
+* f1_score_macro
+* precision_score_weighted
+* accuracy_table
+* AUC_micro
+* norm_macro_recall
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb b/sdk/python/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb
new file mode 100644
index 0000000000..efacf9fa46
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb
@@ -0,0 +1,484 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Classification Evaluation - Entailment v/s Contradiction\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-classification` task. \n",
+    "\n",
+    "### Evaluation dataset\n",
+    "The Multi-Genre Natural Language Inference Corpus, or MNLI is a crowd sourced collection of sentence pairs with textual entailment annotations. Given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral). The [MNLI](https://huggingface.co/datasets/glue) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-mnli-dataset](./glue-mnli-dataset/) folder. \n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the entailment v/s contradiction dataset, we would like to look for models finetuned for this specific scenario. We will compare `roberta-large-mnli`, `microsoft-deberta-large-mnli` and `microsoft-deberta-base-mnli` in this sample, which are available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install azure-ai-ml\n",
+    "%pip install azure-identity\n",
+    "%pip install datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"microsoft-deberta-base-mnli\", \"version\": \"1\"},\n",
+    "    {\"name\": \"microsoft-deberta-large-mnli\", \"version\": \"1\"},\n",
+    "    {\"name\": \"roberta-large-mnli\", \"version\": \"1\"},\n",
+    "    # please prepare appropriate dataset and config in similar way to run evaluation on this dataset\n",
+    "    #     {\"name\": \"roberta-large-openai-detector\", \"version\": \"1\"},\n",
+    "    #     {\"name\": \"roberta-base-openai-detector\", \"version\": \"1\"},\n",
+    "    #     {\"name\": \"distilbert-base-uncased-finetuned-sst-2-english\", \"version\": \"1\"},\n",
+    "    #     {\"name\": \"finiteautomata-bertweet-base-sentiment-analysis\", \"version\": \"1\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the MNLI is available in the [glue-mnli-dataset](./glue-mnli-dataset/)  folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-mnli-dataset/label.json](./glue-mnli-dataset/label.json). This step is needed because the selected models will return labels such `CONTRADICTION`, `CONTRADICTION`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, `2`, etc., then they would not match with prediction labels returned by the models.\n",
+    "* The dataset contains `premise` and `hypothesis` as two different columns. However, the models expect a single string for prediction in the format `[CLS] <premise text> [SEP] <hypothesis text> [SEP]`. Hence we merge the columns and drop the original columns.\n",
+    "* We want this sample to run quickly, so save smaller dataset containing 10% of the original. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "dataset_dir = \"./glue-mnli-dataset\"\n",
+    "data_file = \"train.jsonl\"\n",
+    "\n",
+    "# load the train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "df = pd.read_json(os.path.join(dataset_dir, data_file), lines=True)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n",
+    "import json\n",
+    "\n",
+    "label_file = \"label.json\"\n",
+    "with open(os.path.join(dataset_dir, label_file)) as f:\n",
+    "    id2label = json.load(f)\n",
+    "    id2label = id2label[\"id2label\"]\n",
+    "    label_df = pd.DataFrame.from_dict(\n",
+    "        id2label, orient=\"index\", columns=[\"label_string\"]\n",
+    "    )\n",
+    "    label_df[\"label\"] = label_df.index.astype(\"int64\")\n",
+    "    label_df = label_df[[\"label\", \"label_string\"]]\n",
+    "label_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n",
+    "df = df.merge(label_df, on=\"label\", how=\"left\")\n",
+    "# concat the premise and hypothesis columns to with \"[CLS]\" in the beginning and \"[SEP]\" in the middle and end to get the text column\n",
+    "df[\"input_string\"] = \"[CLS] \" + df[\"premise\"] + \" [SEP] \" + df[\"hypothesis\"] + \" [SEP]\"\n",
+    "# drop the idx, premise and hypothesis columns as they are not needed\n",
+    "df = df.drop(columns=[\"idx\", \"premise\", \"hypothesis\"])\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./dataset_dir folder\n",
+    "small_data_file = \"small_train.jsonl\"\n",
+    "df.sample(frac=0.1).to_json(\n",
+    "    os.path.join(dataset_dir, small_data_file), orient=\"records\", lines=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    " \n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model. \n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval-config.json](./eval-config.json) file.\n",
+    "\n",
+    "All supported evaluation configurations for `text-classification` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n",
+    "from azure.ai.ml import PyTorchDistribution, Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=Input(\n",
+    "            type=AssetTypes.URI_FILE, path=os.path.join(dataset_dir, small_data_file)\n",
+    "        ),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"input_string\",\n",
+    "        label_column_name=\"label_string\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"text-classification\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"text-classification-mnli-evaluation\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    display(pipeline_jobs)\n",
+    "    # wait for the pipeline job to complete\n",
+    "#     workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_job"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./mnli-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"accuracy\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/eval-config.json b/sdk/python/foundation-models/system/evaluation/text-classification/eval-config.json
new file mode 100644
index 0000000000..4f852bdcb1
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/eval-config.json
@@ -0,0 +1,9 @@
+{
+  "metrics": ["average_precision_score_macro", "AUC_macro", "recall_score_macro", "average_precision_score_binary", "average_precision_score_micro", "AUC_binary", "recall_score_micro", "AUC_micro", "norm_macro_recall", "average_precision_score_weighted", "weighted_accuracy", "precision_score_micro", "f1_score_binary", "accuracy_table", "precision_score_macro", "f1_score_micro", "precision_score_weighted", "f1_score_weighted", "confusion_matrix", "recall_score_binary", "matthews_correlation", "log_loss", "accuracy", "precision_score_binary", "balanced_accuracy", "AUC_weighted", "f1_score_macro", "recall_score_weighted"],
+  "class_labels": ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"],
+  "train_labels": ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"],
+  "multilabel": false,
+  "enable_metric_confidence": true,
+  "confidence_metrics": ["accuracy", "f1_score_micro"],
+  "use_binary": false
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/download-dataset.py b/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/download-dataset.py
new file mode 100644
index 0000000000..b6794c4b4f
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/download-dataset.py
@@ -0,0 +1,43 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="glue", help="dataset name")
+# add an argument to specify the config name of the dataset
+parser.add_argument(
+    "--config_name", type=str, default="mnli", help="config name of the dataset"
+)
+# argument to save a fraction of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.1, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="./",
+    help="directory to download the dataset to",
+)
+# add an argument to specify the split of the dataset to download
+parser.add_argument(
+    "--split", type=str, default="train", help="split of the dataset to download"
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset, config_name=args.config_name):
+    if split == args.split:
+        print(f"Loading {split} split of {args.dataset} dataset...")
+        # load the split of the dataset
+        dataset = load_dataset(args.dataset, args.config_name, split=split)
+        # save the split of the dataset to the download directory as json lines file
+        dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+            os.path.join(args.download_dir, f"{split}.jsonl")
+        )
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/label.json b/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/label.json
new file mode 100644
index 0000000000..b836faff17
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/label.json
@@ -0,0 +1,12 @@
+{
+  "id2label": {
+    "0": "ENTAILMENT",
+    "1": "NEUTRAL",
+    "2": "CONTRADICTION"
+  },
+  "label2id": {
+    "ENTAILMENT": 0,
+    "CONTRADICTION": 2,
+    "NEUTRAL": 1
+  }
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/download-dataset.py b/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/download-dataset.py
new file mode 100644
index 0000000000..b6794c4b4f
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/download-dataset.py
@@ -0,0 +1,43 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="glue", help="dataset name")
+# add an argument to specify the config name of the dataset
+parser.add_argument(
+    "--config_name", type=str, default="mnli", help="config name of the dataset"
+)
+# argument to save a fraction of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.1, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="./",
+    help="directory to download the dataset to",
+)
+# add an argument to specify the split of the dataset to download
+parser.add_argument(
+    "--split", type=str, default="train", help="split of the dataset to download"
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset, config_name=args.config_name):
+    if split == args.split:
+        print(f"Loading {split} split of {args.dataset} dataset...")
+        # load the split of the dataset
+        dataset = load_dataset(args.dataset, args.config_name, split=split)
+        # save the split of the dataset to the download directory as json lines file
+        dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+            os.path.join(args.download_dir, f"{split}.jsonl")
+        )
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/label.json b/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/label.json
new file mode 100644
index 0000000000..67b701dbfd
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/label.json
@@ -0,0 +1,10 @@
+{
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  }
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/mnli-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/text-classification/mnli-eval-dashboard.png
new file mode 100644
index 0000000000..5bb523f46d
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/text-classification/mnli-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb b/sdk/python/foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb
new file mode 100644
index 0000000000..e185690d05
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb
@@ -0,0 +1,477 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Classification Evaluation - Sentiment Analysis\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-classification` task. \n",
+    "\n",
+    "### Evaluation dataset\n",
+    "The Stanford Sentiment Treebank consists of sentences from movie reviews and human annotations of their sentiment. The task is to predict the sentiment of a given sentence. It uses the two-way (positive/negative) class split, with only sentence-level labels. The [SST2](https://huggingface.co/datasets/glue/viewer/sst2/validation) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-sst2-dataset](./glue-sst2-dataset/) folder.\n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the sentiment analysis dataset, we would like to look for models finetuned for this specific scenario. We will compare `distilbert-base-uncased-finetuned-sst-2-english` and `finiteautomata-bertweet-base-sentiment-analysis` in this sample, which are available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install azure-ai-ml\n",
+    "%pip install azure-identity\n",
+    "%pip install datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"distilbert-base-uncased-finetuned-sst-2-english\", \"version\": \"1\"},\n",
+    "    # please prepare appropriate dataset and config in similar way to run evaluation on this dataset\n",
+    "    #     {\"name\": \"finiteautomata-bertweet-base-sentiment-analysis\", \"version\": \"1\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the SST2 is available in the [glue-sst2-dataset](./glue-sst2-dataset/)  folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-sst2-dataset/label.json](./glue-sst2-dataset/label.json). This step is needed because the selected models will return labels such `POSITVE`, `NEGATIVE`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, etc., then they would not match with prediction labels returned by the models.\n",
+    "* The dataset contains `sentence` and `label` as two different columns. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "dataset_dir = \"./glue-sst2-dataset\"\n",
+    "data_file = \"validation.jsonl\"\n",
+    "\n",
+    "# load the train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "df = pd.read_json(os.path.join(dataset_dir, data_file), lines=True)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n",
+    "import json\n",
+    "\n",
+    "label_file = \"label.json\"\n",
+    "with open(os.path.join(dataset_dir, label_file)) as f:\n",
+    "    id2label = json.load(f)\n",
+    "    id2label = id2label[\"id2label\"]\n",
+    "    label_df = pd.DataFrame.from_dict(\n",
+    "        id2label, orient=\"index\", columns=[\"label_string\"]\n",
+    "    )\n",
+    "    label_df[\"label\"] = label_df.index.astype(\"int64\")\n",
+    "    label_df = label_df[[\"label\", \"label_string\"]]\n",
+    "label_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n",
+    "df = df.merge(label_df, on=\"label\", how=\"left\")\n",
+    "# creating a new column to match the signature of mlflow base model\n",
+    "df[\"input_string\"] = df[\"sentence\"]\n",
+    "# drop the idx, sentence columns as they are not needed\n",
+    "df = df.drop(columns=[\"idx\", \"sentence\"])\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./dataset_dir folder\n",
+    "small_data_file = \"small_validation.jsonl\"\n",
+    "df.sample(frac=0.1).to_json(\n",
+    "    os.path.join(dataset_dir, small_data_file), orient=\"records\", lines=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    " \n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model. \n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [sst2-eval-config.json](./sst2-eval-config.json) file.\n",
+    "\n",
+    "All supported evaluation configurations for `text-classification` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n",
+    "from azure.ai.ml import PyTorchDistribution, Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=Input(\n",
+    "            type=AssetTypes.URI_FILE, path=os.path.join(dataset_dir, small_data_file)\n",
+    "        ),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"input_string\",\n",
+    "        label_column_name=\"label_string\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"text-classification\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(\n",
+    "            type=AssetTypes.URI_FILE, path=\"./sst2-eval-config.json\"\n",
+    "        ),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"text-classification-sentiment-analysis\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    # wait for the pipeline job to complete\n",
+    "    workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./sst2-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"accuracy\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-config.json b/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-config.json
new file mode 100644
index 0000000000..869f0860f1
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-config.json
@@ -0,0 +1,9 @@
+{
+  "metrics": ["average_precision_score_macro", "AUC_macro", "recall_score_macro", "average_precision_score_binary", "average_precision_score_micro", "AUC_binary", "recall_score_micro", "AUC_micro", "norm_macro_recall", "average_precision_score_weighted", "weighted_accuracy", "precision_score_micro", "f1_score_binary", "accuracy_table", "precision_score_macro", "f1_score_micro", "precision_score_weighted", "f1_score_weighted", "confusion_matrix", "recall_score_binary", "matthews_correlation", "log_loss", "accuracy", "precision_score_binary", "balanced_accuracy", "AUC_weighted", "f1_score_macro", "recall_score_weighted"],
+  "class_labels": ["NEGATIVE", "POSITIVE"],
+  "train_labels": ["NEGATIVE", "POSITIVE"],
+  "multilabel": false,
+  "enable_metric_confidence": true,
+  "confidence_metrics": ["accuracy", "f1_score_micro"],
+  "use_binary": false
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-dashboard.png
new file mode 100644
index 0000000000..fc389b52b2
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/README.md b/sdk/python/foundation-models/system/evaluation/text-generation/README.md
new file mode 100644
index 0000000000..2f67947277
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-generation/README.md
@@ -0,0 +1,23 @@
+## Text Generation
+
+### List of supported keyword arguments:
+
+| Keyword Argument  | Description                                                                           | Type      | Sample                                   |
+|:-----------------:|:--------------------------------------------------------------------------------------|-----------|------------------------------------------|
+|      metrics      | List for subset of metrics to be computed. All supported metrics listed below.        | list<str> | ["bleu_1", "bleu_2", "rouge1", "rouge2"] |
+|     tokenizer     | Tokenizer object to perform tokenization on provided input text                       |           |                                          |
+|     smoothing     | Boolean flag to indicate if bleu score needs to be smoothened                         | boolean   | false, true                              |
+|    aggregator     | Boolean flag to indicate if need to aggregate rouge scores for individual data points | boolean   | true, false                              |
+|      stemmer      | Boolean flag to indicate whether to use Porter Stemmer for suffixes                   | boolean   | true, false                              |
+| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing)            | dict      |                                          |
+
+### List of supported metrics:
+
+* rouge1
+* rouge2
+* rougeLsum
+* rougeL
+* bleu_1
+* bleu_2
+* bleu_3
+* bleu_4
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/eval-config.json b/sdk/python/foundation-models/system/evaluation/text-generation/eval-config.json
new file mode 100644
index 0000000000..8dd4358113
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-generation/eval-config.json
@@ -0,0 +1,6 @@
+{
+  "metrics": ["rouge1", "rouge2", "bleu_3", "bleu_4"],
+  "aggregator": true,
+  "stemmer": true,
+  "smoothing": false
+}
diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/text-generation-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/text-generation/text-generation-eval-dashboard.png
new file mode 100644
index 0000000000..e9f076781f
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/text-generation/text-generation-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/text-generation.ipynb b/sdk/python/foundation-models/system/evaluation/text-generation/text-generation.ipynb
new file mode 100644
index 0000000000..466762c3f4
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/text-generation/text-generation.ipynb
@@ -0,0 +1,442 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Generation Evaluation\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-generation` task.\n",
+    "\n",
+    "### Evaluation dataset\n",
+    "The CNN / DailyMail Dataset is an English-language dataset containing just over 300k unique news articles as written by journalists at CNN and the Daily Mail. The current version supports both extractive and abstractive summarization, though the original version was created for machine reading and comprehension and abstractive question answering. A copy of the [cnn_dailymail](https://huggingface.co/datasets/cnn_dailymail) dataset is available in the [text-generation](./text-generation) folder.\n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `text-generation` is generic task type that can be used for scenarios to generate text based on context provided. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the cnn_dailymail dataset, we would like to look for models finetuned for this specific scenario. We will compare `gpt2`, `gpt2-medium` and `distilgpt2` in this sample, which are available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb).\n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "%pip install --upgrade azure-ai-ml\n",
+    "%pip install --upgrade azure-identity\n",
+    "%pip install --upgrade datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319354708
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"distilgpt2\", \"version\": \"1\"},\n",
+    "    {\"name\": \"gpt2\", \"version\": \"1\"},\n",
+    "    {\"name\": \"gpt2-large\", \"version\": \"1\"},\n",
+    "    {\"name\": \"gpt2-medium\", \"version\": \"1\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the cnn_dailymail is available in the [text-generation](./text-generation/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
+    "* To use the entire dataset, uncomment the cells below and run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from datasets import load_dataset\n",
+    "\n",
+    "# hf_test_data = load_dataset('cnn_dailymail')\n",
+    "\n",
+    "# hf_test_data['train'].to_pandas().head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = \"./text-generation/small-test.jsonl\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "pd.read_json(test_data, lines=True).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    "\n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `rouge1`, `rouge2`, `bleu_3` and `bleu_4` in this sample.\n",
+    "\n",
+    "All supported evaluation configurations for `text-generation` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"input_string\",\n",
+    "        label_column_name=\"ground_truth\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"text-generation\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"text-generation-evaluation\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    # wait for the pipeline job to complete\n",
+    "    workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./text-generation-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"exact_match\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/README.md b/sdk/python/foundation-models/system/evaluation/token-classification/README.md
new file mode 100644
index 0000000000..6f82df11eb
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/token-classification/README.md
@@ -0,0 +1,22 @@
+## Named Entity Recognition
+
+### List of supported keyword arguments:
+
+|     Keyword Argument     | Description                                                                    | Type      | Sample                                                        |
+|:------------------------:|:-------------------------------------------------------------------------------|-----------|---------------------------------------------------------------|
+|         metrics          | List for subset of metrics to be computed. All supported metrics listed below. | list<str> | ["accuracy", "f1_score_macro", "f1_score_micro"]              |
+|       labels_list        | List for supported labels for tokens                                           | list<str> | ["B-PER", "I-PER", "O", "B-LOC", "I-LOC", "B-MISC", "I-MISC"] |
+|    custom_dimensions     | Used to report telemetry data (can later be used to perform PII scrubbing)     | dict      |                                                               |
+
+### List of supported metrics:
+
+* f1_score_macro
+* precision_score_weighted
+* precision_score_macro
+* f1_score_weighted
+* precision_score_micro
+* recall_score_weighted
+* f1_score_micro
+* accuracy
+* recall_score_micro
+* recall_score_macro
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/eval-config.json b/sdk/python/foundation-models/system/evaluation/token-classification/eval-config.json
new file mode 100644
index 0000000000..360dd40365
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/token-classification/eval-config.json
@@ -0,0 +1,3 @@
+{
+  "metrics": ["accuracy", "f1_score_macro", "f1_score_micro"]
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb b/sdk/python/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb
new file mode 100644
index 0000000000..a1154858de
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb
@@ -0,0 +1,450 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Named Entity Recognition Evaluation\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-named-entity-recognition` task. \n",
+    "\n",
+    "### Evaluation dataset\n",
+    "The CoNLL-2003 shared task data files contain four columns separated by a single space. Each word has been put on a separate line and there is an empty line after each sentence. The first item on each line is a word, the second a part-of-speech (POS) tag, the third a syntactic chunk tag and the fourth the named entity tag. The chunk tags and the named entity tags have the format I-TYPE which means that the word is inside a phrase of type TYPE. Only if two phrases of the same type immediately follow each other, the first word of the second phrase will have tag B-TYPE to show that it starts a new phrase. A word with tag O is not part of a phrase. Note the dataset uses IOB2 tagging scheme, whereas the original dataset uses IOB1. A copy of the [CoNLL-2003](https://huggingface.co/datasets/conll2003) dataset is available in the [conll2003](./conll2003) folder.\n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `text-named-entity-recognition` is generic task type that can be used for scenarios to recognise named entities such as persons, locations, organizations, etc. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the CoNLL-2003 dataset, we would like to look for models finetuned for this specific scenario. We will review `jean-baptiste-camembert-ner` in this sample, which is available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "%pip install --upgrade azure-ai-ml\n",
+    "%pip install --upgrade azure-identity\n",
+    "%pip install --upgrade datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319354708
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"Jean-Baptiste-camembert-ner\", \"version\": \"1\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the CoNLL2003 is available in the [conll2003](./conll2003/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
+    "* To use the entire dataset, uncomment the cells below and run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from datasets import load_dataset\n",
+    "\n",
+    "# hf_test_data = load_dataset('conll2003')\n",
+    "\n",
+    "# hf_test_data['train'].to_pandas().head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = \"./conll2003/small-test.jsonl\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "pd.read_json(test_data, lines=True).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    "\n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `accuracy`, `f1_score_macro` and `f1_score_micro` in this sample.\n",
+    "\n",
+    "All supported evaluation configurations for `text-named-entity-recognition` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"input_string\",\n",
+    "        label_column_name=\"ner_tags_str\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"text-named-entity-recognition\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"text-named-entity-recognition-evaluation\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    # wait for the pipeline job to complete\n",
+    "    workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./text-named-entity-recognition-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"accuracy\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernel_info": {
+   "name": "python310-sdkv2"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "microsoft": {
+   "host": {
+    "AzureML": {
+     "notebookHasBeenCompleted": true
+    }
+   },
+   "ms_spell_check": {
+    "ms_spell_check_language": "en"
+   }
+  },
+  "nteract": {
+   "version": "nteract-front-end@1.0.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/text-named-entity-recognition-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/token-classification/text-named-entity-recognition-eval-dashboard.png
new file mode 100644
index 0000000000..088d148de6
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/token-classification/text-named-entity-recognition-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/translation/README.md b/sdk/python/foundation-models/system/evaluation/translation/README.md
new file mode 100644
index 0000000000..6c6c0383e8
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/translation/README.md
@@ -0,0 +1,17 @@
+## Translation
+
+### List of supported keyword arguments:
+
+| Keyword Argument  | Description                                                                    | Type      | Sample                                   |
+|:-----------------:|:-------------------------------------------------------------------------------|-----------|------------------------------------------|
+|      metrics      | List for subset of metrics to be computed. All supported metrics listed below. | list<str> | ["bleu_1", "bleu_2", "bleu_3", "bleu_4"] |
+|     tokenizer     | Tokenizer object to perform tokenization on provided input text                |           |                                          |
+|     smoothing     | Boolean flag to indicate if bleu score needs to be smoothened                  | boolean   | false, true                              |
+| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing)     | dict      |                                          |
+
+### List of supported metrics:
+
+* bleu_1
+* bleu_2
+* bleu_3
+* bleu_4
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/translation/eval-config.json b/sdk/python/foundation-models/system/evaluation/translation/eval-config.json
new file mode 100644
index 0000000000..213d60cd45
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/translation/eval-config.json
@@ -0,0 +1,4 @@
+{
+  "metrics": ["bleu_1", "bleu_2", "bleu_3", "bleu_4"],
+  "smoothing": false
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/evaluation/translation/text-translation-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/translation/text-translation-eval-dashboard.png
new file mode 100644
index 0000000000..24a97420d0
Binary files /dev/null and b/sdk/python/foundation-models/system/evaluation/translation/text-translation-eval-dashboard.png differ
diff --git a/sdk/python/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb b/sdk/python/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb
new file mode 100644
index 0000000000..19b81a66ad
--- /dev/null
+++ b/sdk/python/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb
@@ -0,0 +1,454 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Translation Evaluation - Translation between Romanian and English\n",
+    "\n",
+    "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-translation` task. \n",
+    "\n",
+    "### Evaluation dataset\n",
+    "Translation dataset based on the data from statmt.org. Versions exist for different years using a combination of data sources. The base wmt allows you to create a custom dataset by choosing your own data/language pair. A copy of the [wmt16/ro-en](https://huggingface.co/datasets/wmt16/viewer/ro-en) dataset is available in the [wmt16_ro-en](./wmt16_ro-en) folder.\n",
+    "\n",
+    "### Model\n",
+    "The goal of evaluating models is to compare their performance on a variety of metrics. `text-translation` is generic task type that can be used for translation between two languages. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the WMT16-RO-EN dataset, we would like to look for models finetuned for this specific scenario. We will compare `t5-base`, `t5-small` and `t5-large` in this sample, which are available in the `azureml` system registry.\n",
+    "\n",
+    "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick the models to evaluate.\n",
+    "* Pick and explore evaluate data.\n",
+    "* Configure the evaluation jobs.\n",
+    "* Run the evaluation jobs.\n",
+    "* Review the evaluation metrics. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "%pip install --upgrade azure-ai-ml\n",
+    "%pip install --upgrade azure-identity\n",
+    "%pip install --upgrade datasets==2.9.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319346668
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = None\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    workspace = workspace_ml_client.workspace_name\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace = \"<AML_WORKSPACE_NAME>\"\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential, subscription_id, resource_group, workspace\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "preview_registry = \"azureml-staging\"\n",
+    "registry = \"azureml\"\n",
+    "\n",
+    "preview_registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=preview_registry\n",
+    ")\n",
+    "print(preview_registry_ml_client)\n",
+    "\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, registry_name=registry\n",
+    ")\n",
+    "registry_ml_client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "    print(f\"GPU compute '{compute_cluster}' found.\")\n",
+    "except Exception as ex:\n",
+    "    print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpus_per_node = 1  # default value\n",
+    "gpu_count_found = False\n",
+    "ws_computes = workspace_ml_client.compute.list_sizes()\n",
+    "for ws_compute in ws_computes:\n",
+    "    if ws_compute.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = ws_compute.gpus\n",
+    "        print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpus_per_node > 0:\n",
+    "    gpu_count_found = True\n",
+    "else:\n",
+    "    gpu_count_found = False\n",
+    "    print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick the models to evaluate\n",
+    "\n",
+    "Verify that the models selected for evaluation are available in system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "gather": {
+     "logged": 1679319354708
+    },
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n",
+    "models = [\n",
+    "    {\"name\": \"t5-base\", \"version\": \"1\"},\n",
+    "    {\"name\": \"t5-large\", \"version\": \"1\"},\n",
+    "    {\"name\": \"t5-small\", \"version\": \"1\"},\n",
+    "]\n",
+    "for model in models:\n",
+    "    model = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    print(model.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the test dataset for evaluation\n",
+    "A copy of the wmt16/ro-en is available in the [wmt16/ro-en](./wmt16_ro-en/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n",
+    "* To use the entire dataset, uncomment the cells below and run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from datasets import load_dataset\n",
+    "\n",
+    "# hf_test_data = load_dataset('wmt16', 'ro-en')\n",
+    "\n",
+    "# hf_test_data['train'].to_pandas().head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = \"./wmt16_ro-en/small-test.jsonl\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "pd.read_json(test_data, lines=True).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the evaluation jobs using the model and data as inputs\n",
+    "\n",
+    "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n",
+    "\n",
+    "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `bleu_1`, `bleu_2`, `bleu_3` and `bleu_4` in this sample.\n",
+    "\n",
+    "All supported evaluation configurations for `text-translation` can be found in [README](./README.md)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml import Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"model_evaluation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def evaluation_pipeline(mlflow_model):\n",
+    "    evaluation_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry or a model from the workspace\n",
+    "        # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n",
+    "        mlflow_model=mlflow_model,\n",
+    "        # test data\n",
+    "        test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        input_column_names=\"input_string\",\n",
+    "        label_column_name=\"ro\",\n",
+    "        # Evaluation settings\n",
+    "        task=\"text-translation\",\n",
+    "        # config file containing the details of evaluation metrics to calculate\n",
+    "        evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n",
+    "        # config cluster/device job is running on\n",
+    "        # set device to GPU/CPU on basis if GPU count was found\n",
+    "        device=\"gpu\" if gpu_count_found else \"cpu\",\n",
+    "    )\n",
+    "    return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the jobs, passing the model as a parameter to the pipeline created in the above step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job for each model that we want to evaluate\n",
+    "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n",
+    "pipeline_jobs = []\n",
+    "\n",
+    "experiment_name = \"text-translation-evaluation\"\n",
+    "\n",
+    "for model in models:\n",
+    "    model_object = preview_registry_ml_client.models.get(\n",
+    "        model[\"name\"], version=model[\"version\"]\n",
+    "    )\n",
+    "    pipeline_object = evaluation_pipeline(\n",
+    "        mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n",
+    "    )\n",
+    "    # don't reuse cached results from previous jobs\n",
+    "    pipeline_object.settings.force_rerun = True\n",
+    "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
+    "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "        pipeline_object, experiment_name=experiment_name\n",
+    "    )\n",
+    "    # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
+    "    pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n",
+    "    # wait for the pipeline job to complete\n",
+    "    workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n",
+    "\n",
+    "![Model evaluation dashboard in AzureML studio](./text-translation-eval-dashboard.png)\n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "\n",
+    "metrics_df = pd.DataFrame()\n",
+    "for job in pipeline_jobs:\n",
+    "    # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "    filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n",
+    "    runs = mlflow.search_runs(\n",
+    "        experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    "    )\n",
+    "    # get the compute_metrics runs.\n",
+    "    # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "    for run in runs:\n",
+    "        # else, check if run.data.metrics.accuracy exists\n",
+    "        if \"bleu_1\" in run.data.metrics:\n",
+    "            # get the metrics from the mlflow run\n",
+    "            run_metric = run.data.metrics\n",
+    "            # add the model name to the run_metric dictionary\n",
+    "            run_metric[\"model_name\"] = job[\"model_name\"]\n",
+    "            # convert the run_metric dictionary to a pandas dataframe\n",
+    "            temp_df = pd.DataFrame(run_metric, index=[0])\n",
+    "            # concat the temp_df to the metrics_df\n",
+    "            metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n",
+    "\n",
+    "# move the model_name columns to the first column\n",
+    "cols = metrics_df.columns.tolist()\n",
+    "cols = cols[-1:] + cols[:-1]\n",
+    "metrics_df = metrics_df[cols]\n",
+    "metrics_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernel_info": {
+   "name": "python310-sdkv2"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "microsoft": {
+   "host": {
+    "AzureML": {
+     "notebookHasBeenCompleted": true
+    }
+   },
+   "ms_spell_check": {
+    "ms_spell_check_language": "en"
+   }
+  },
+  "nteract": {
+   "version": "nteract-front-end@1.0.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb b/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb
new file mode 100644
index 0000000000..bda741c304
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb
@@ -0,0 +1,625 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question Answering - Extractive Q&A with the SQUAD (Wikipedia Q&A) dataset\n",
+    "\n",
+    "This sample shows how to use `question-answering` components from the `azureml` system registry to fine tune a model to extract answers from a given context using the SQUAD dataset. We then deploy it to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n",
+    "\n",
+    "### Training data\n",
+    "We will use the [SQUAD](https://huggingface.co/datasets/squad) dataset. A copy of this dataset is available in the [squad-dataset](./squad-dataset/) folder for easy access. The [original source](https://rajpurkar.github.io/SQuAD-explorer/) of dataset describes it as follows: _\"Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\"_\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `fill-mask` task are generally good foundation models to fine tune for `question-answering`, specifically the extractive Q&A type. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace.  \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick a model to fine tune.\n",
+    "* Pick and explore training data.\n",
+    "* Configure the fine tuning job.\n",
+    "* Run the fine tuning job.\n",
+    "* Register the fine tuned model. \n",
+    "* Deploy the fine tuned model for real time inference.\n",
+    "* Clean up resources. \n",
+    "\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install azure-ai-ml\n",
+    "%pip install azure-identity\n",
+    "%pip install datasets==2.9.0\n",
+    "%pip install mlflow\n",
+    "%pip install azureml-mlflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential=credential)\n",
+    "except:\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential,\n",
+    "        subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "        resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "        workspace_name=\"<WORKSPACE_NAME>\",\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n",
+    "\n",
+    "experiment_name = \"question-answering-extractive-qna\"\n",
+    "\n",
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "except Exception as ex:\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpu_count_found = False\n",
+    "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n",
+    "available_sku_sizes = []\n",
+    "for compute_sku in workspace_compute_sku_list:\n",
+    "    available_sku_sizes.append(compute_sku.name)\n",
+    "    if compute_sku.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = compute_sku.gpus\n",
+    "        gpu_count_found = True\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpu_count_found:\n",
+    "    print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
+    "else:\n",
+    "    raise ValueError(\n",
+    "        f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
+    "        f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
+    "    )\n",
+    "# CPU based finetune works only for single-node single-process\n",
+    "if gpus_per_node == 0:\n",
+    "    print(\n",
+    "        \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n",
+    "    )\n",
+    "    gpus_per_node = 1\n",
+    "\n",
+    "# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a foundation model to fine tune\n",
+    "\n",
+    "Models that support `fill-mask` tasks are good candidates to fine tune for extractive Q&A style `question answering`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n",
+    "\n",
+    "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"bert-base-uncased\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the dataset for fine-tuning the model\n",
+    "\n",
+    "A copy of the SQUAD dataset is available in the [squad-dataset](./squad-dataset/) folder. The next few cells show basic data preparation for fine tuning:\n",
+    "* Visualize some data rows. Take note of the dataset fields: `question`, `context`, `answers`, `id` and `title`. The `answers` field has `start_key` and `text` fields in json format inside the `answers` field . The keys `question` and `context`, `answers`, `answer_start` and `text` are the relevant fields that need to be mapped to the parameters of the fine tuning pipeline.\n",
+    "* The dataset does not have a test split, split test into two halves, one for test and other for validation.\n",
+    "* We want this sample to run quickly, so save smaller `train` and `validation` files containing 5% of the original. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the train.jsonl and validation.jsonl files from the ./squad-dataset/ folder and show first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(\"squad-dataset/train.jsonl\", lines=True)\n",
+    "validation_df = pd.read_json(\"squad-dataset/validation.jsonl\", lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save 5% of the rows from the train dataframe into files with small_ prefix in the ./squad-dataset folder\n",
+    "train_df.sample(frac=0.05).to_json(\n",
+    "    \"./squad-dataset/small_train.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "# the original dataset does not have a test split, so split the validation dataframe into validation and test dataframes equally\n",
+    "validation_df, test_df = (\n",
+    "    validation_df[: len(validation_df) // 2],\n",
+    "    validation_df[len(validation_df) // 2 :],\n",
+    ")\n",
+    "# save 5% of the rows from the validation and test dataframes into files with small_ prefix in the ./squad-dataset folder\n",
+    "validation_df.sample(frac=0.05).to_json(\n",
+    "    \"./squad-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "test_df.sample(frac=0.05).to_json(\n",
+    "    \"./squad-dataset/small_test.jsonl\", orient=\"records\", lines=True\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the fine tuning job using the the model and data as inputs\n",
+    " \n",
+    "Create the job that uses the `question-answering` pipeline component. [Learn more]() about all the parameters supported for fine tuning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n",
+    "from azure.ai.ml import PyTorchDistribution, Input\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"question_answering_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def create_pipeline():\n",
+    "    finetuning_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry id identified in step #3\n",
+    "        mlflow_model_path=foundation_model.id,\n",
+    "        # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n",
+    "        compute_model_import=compute_cluster,\n",
+    "        compute_preprocess=compute_cluster,\n",
+    "        compute_finetune=compute_cluster,\n",
+    "        compute_model_evaluation=compute_cluster,\n",
+    "        # map the dataset splits to parameters\n",
+    "        train_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./squad-dataset/small_train.jsonl\"\n",
+    "        ),\n",
+    "        validation_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./squad-dataset/small_validation.jsonl\"\n",
+    "        ),\n",
+    "        test_file_path=Input(type=\"uri_file\", path=\"./squad-dataset/small_test.jsonl\"),\n",
+    "        evaluation_config=Input(\n",
+    "            type=\"uri_file\", path=\"./question-answering-config.json\"\n",
+    "        ),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        # the question whose answer needs to be extracted from the provided context\n",
+    "        # question_key parameter maps to the \"question\" field in the SQuAD dataset\n",
+    "        question_key=\"question\",\n",
+    "        # the context that contains the answer to the question\n",
+    "        # context_key parameter maps to the \"context\" field in the SQuAD dataset\n",
+    "        context_key=\"context\",\n",
+    "        # The value of this field is text in json format with two nested keys, answer_start_key and answer_text_key with their corresponding values\n",
+    "        # answers_key parameter maps to the \"answers\" field in the SQuAD dataset\n",
+    "        answers_key=\"answers\",\n",
+    "        # Refers to the position where the answer beings in context. Needs a value that maps to a nested key in the values of the answers_key parameter.\n",
+    "        # in the SQuAD dataset, the answer_start_key maps \"answer_start\" under \"answer\"\n",
+    "        answer_start_key=\"answer_start\",\n",
+    "        # Contains the answer to the question. Needs a value that maps to a nested key in the values of the answers_key parameter\n",
+    "        # in the SQuAD dataset, the answer_text_key maps to \"text\" under \"answer\"\n",
+    "        answer_text_key=\"text\",\n",
+    "        # training settings\n",
+    "        number_of_gpu_to_use_finetuning=gpus_per_node,  # set to the number of GPUs available in the compute\n",
+    "        num_train_epochs=2,\n",
+    "        learning_rate=2e-5,\n",
+    "    )\n",
+    "    return {\n",
+    "        # map the output of the fine tuning job to the output of the pipeline job so that we can easily register the fine tuned model\n",
+    "        # registering the model is required to deploy the model to an online or batch endpoint\n",
+    "        \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "pipeline_object = create_pipeline()\n",
+    "\n",
+    "# don't use cached results from previous jobs\n",
+    "pipeline_object.settings.force_rerun = True"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the job"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job\n",
+    "pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "    pipeline_object, experiment_name=experiment_name\n",
+    ")\n",
+    "# wait for the pipeline job to complete\n",
+    "workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review training and evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n",
+    "runs = mlflow.search_runs(\n",
+    "    experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    ")\n",
+    "training_run = None\n",
+    "evaluation_run = None\n",
+    "# get the training and evaluation runs.\n",
+    "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "for run in runs:\n",
+    "    # check if run.data.metrics.epoch exists\n",
+    "    if \"epoch\" in run.data.metrics:\n",
+    "        training_run = run\n",
+    "    # else, check if run.data.metrics.accuracy exists\n",
+    "    elif \"exact_match\" in run.data.metrics:\n",
+    "        evaluation_run = run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if training_run:\n",
+    "    print(\"Training metrics:\\n\\n\")\n",
+    "    print(json.dumps(training_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Training job found\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if evaluation_run:\n",
+    "    print(\"Evaluation metrics:\\n\\n\")\n",
+    "    print(json.dumps(evaluation_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Evaluation job found\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Register the fine tuned model with the workspace\n",
+    "\n",
+    "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import Model\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# check if the `trained_model` output is available\n",
+    "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n",
+    "\n",
+    "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n",
+    "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n",
+    "    pipeline_job.name, \"trained_model\"\n",
+    ")\n",
+    "\n",
+    "finetuned_model_name = model_name + \"-extractive-qna\"\n",
+    "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n",
+    "print(\"path to register model: \", model_path_from_job)\n",
+    "prepare_to_register_model = Model(\n",
+    "    path=model_path_from_job,\n",
+    "    type=AssetTypes.MLFLOW_MODEL,\n",
+    "    name=finetuned_model_name,\n",
+    "    version=timestamp,  # use timestamp as version to avoid version conflict\n",
+    "    description=model_name + \" fine tuned model for extractive Q&A\",\n",
+    ")\n",
+    "print(\"prepare to register model: \\n\", prepare_to_register_model)\n",
+    "# register the model from pipeline job output\n",
+    "registered_model = workspace_ml_client.models.create_or_update(\n",
+    "    prepare_to_register_model\n",
+    ")\n",
+    "print(\"registered model: \\n\", registered_model)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Deploy the fine tuned model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "\n",
+    "online_endpoint_name = \"ext-qna-\" + timestamp\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + registered_model.name\n",
+    "    + \", fine tuned model for emotion detection\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=registered_model.id,\n",
+    "    instance_type=\"Standard_DS3_v2\",\n",
+    "    instance_count=1,\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read ./squad-dataset/small_test.jsonl into a pandas dataframe\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "\n",
+    "test_df = pd.read_json(\"./squad-dataset/small_test.jsonl\", orient=\"records\", lines=True)\n",
+    "# take 10 random samples\n",
+    "test_df = test_df.sample(n=10)\n",
+    "# rebuild index\n",
+    "test_df.reset_index(drop=True, inplace=True)\n",
+    "# flatten the json object in the \"answer\" column with the keys \"answer_start\" and \"text\"\n",
+    "json_struct = json.loads(test_df.to_json(orient=\"records\"))\n",
+    "test_df = pd.json_normalize(json_struct)\n",
+    "# drop id and title columns\n",
+    "test_df = test_df.drop(columns=[\"id\", \"title\"])\n",
+    "test_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a json object with \"inputs\" as key and a list of json objects with \"question\" and \"context\" as keys\n",
+    "test_json = {\n",
+    "    \"inputs\": {\n",
+    "        \"question\": test_df[\"question\"].tolist(),\n",
+    "        \"context\": test_df[\"context\"].tolist(),\n",
+    "    }\n",
+    "}\n",
+    "print(test_json)\n",
+    "# write the json object to a file named sample_score.json in the ./squad-dataset folder\n",
+    "with open(\"./squad-dataset/sample_score.json\", \"w\") as f:\n",
+    "    json.dump(test_json, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./squad-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the response to a pandas dataframe and rename the label column as scored_label\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df = response_df.rename(columns={0: \"scored_answer\"})\n",
+    "response_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the test dataframe and the response dataframe on the index\n",
+    "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n",
+    "# drop the answers.answer_start, start and end columns and rename the answer column to scored_answer\n",
+    "merged_df = merged_df.drop(columns=[\"answers.answer_start\"])\n",
+    "# rename the answers.text column to ground_truth_answers\n",
+    "merged_df = merged_df.rename(columns={\"answers.text\": \"ground_truth_answers\"})\n",
+    "merged_df.head(10)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "notebooks-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json b/sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json
new file mode 100644
index 0000000000..15165acfe5
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json
@@ -0,0 +1,7 @@
+{
+  "metrics": ["exact_match", "f1_score"],
+  "regexes_to_ignore": ["$[A-Z]+"],
+  "ignore_case": false,
+  "ignore_numbers": false,
+  "ignore_punctuations": true
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/download-dataset.py
new file mode 100644
index 0000000000..e9ffb0b999
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/download-dataset.py
@@ -0,0 +1,28 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="squad", help="dataset name")
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset):
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, split=split)
+    # save the split of the dataset to the download directory as json lines file
+    dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl"))
+    # print dataset features
diff --git a/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json
new file mode 100644
index 0000000000..56a73ed392
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"question": ["Who would have been the lowest-ranked class?", "What is the Republic of Kenya named after?", "Who was the creator of the theme for the 50th Anniversary special?", "What is the only divisor besides 1 that a prime number can have?", "In what year did ENR compile data in nine market segments?", "When was Iqbal elected president of the Muslim League?", "How fast were the winds around St. Augustine in the 1964 hurricane?", "Who assisted Wesley with ordaining Whatcoat and Vasey as presbyters?", "What goal does Islamism have when it comes to society and government?", "What group is Newcastle native Andy Taylor the former lead guitarist of?"], "context": ["Kublai's government after 1262 was a compromise between preserving Mongol interests in China and satisfying the demands of his Chinese subjects. He instituted the reforms proposed by his Chinese advisers by centralizing the bureaucracy, expanding the circulation of paper money, and maintaining the traditional monopolies on salt and iron. He restored the Imperial Secretariat and left the local administrative structure of past Chinese dynasties unchanged. However, Kublai rejected plans to revive the Confucian imperial examinations and divided Yuan society into three, later four, classes with the Han Chinese occupying the lowest rank. Kublai's Chinese advisers still wielded significant power in the government, but their official rank was nebulous.", "The Republic of Kenya is named after Mount Kenya. The origin of the name Kenya is not clear, but perhaps linked to the Kikuyu, Embu and Kamba words Kirinyaga, Kirenyaa and Kiinyaa which mean \"God's resting place\" in all three languages. If so, then the British may not so much have mispronounced it ('Keenya'), as misspelled it. Prehistoric volcanic eruptions of Mount Kenya (now extinct) may have resulted in its association with divinity and creation among the indigenous Bantu ethnic groups, who are the native inhabitants of the agricultural land surrounding Mount Kenya.[original research?]", "A new arrangement of the theme, once again by Gold, was introduced in the 2007 Christmas special episode, \"Voyage of the Damned\"; Gold returned as composer for the 2010 series. He was responsible for a new version of the theme which was reported to have had a hostile reception from some viewers. In 2011, the theme tune charted at number 228 of radio station Classic FM's Hall of Fame, a survey of classical music tastes. A revised version of Gold's 2010 arrangement had its debut over the opening titles of the 2012 Christmas special \"The Snowmen\", and a further revision of the arrangement was made for the 50th Anniversary special \"The Day of the Doctor\" in November 2013.[citation needed]", "A prime number (or a prime) is a natural number greater than 1 that has no positive divisors other than 1 and itself. A natural number greater than 1 that is not a prime number is called a composite number. For example, 5 is prime because 1 and 5 are its only positive integer factors, whereas 6 is composite because it has the divisors 2 and 3 in addition to 1 and 6. The fundamental theorem of arithmetic establishes the central role of primes in number theory: any integer greater than 1 can be expressed as a product of primes that is unique up to ordering. The uniqueness in this theorem requires excluding 1 as a prime because one can include arbitrarily many instances of 1 in any factorization, e.g., 3, 1 \u00b7 3, 1 \u00b7 1 \u00b7 3, etc. are all valid factorizations of 3.", "Engineering News-Record (ENR) is a trade magazine for the construction industry. Each year, ENR compiles and reports on data about the size of design and construction companies. They publish a list of the largest companies in the United States (Top-40) and also a list the largest global firms (Top-250, by amount of work they are doing outside their home country). In 2014, ENR compiled the data in nine market segments. It was divided as transportation, petroleum, buildings, power, industrial, water, manufacturing, sewer/waste, telecom, hazardous waste plus a tenth category for other projects. In their reporting on the Top 400, they used data on transportation, sewer, hazardous waste and water to rank firms as heavy contractors.", "Iqbal expressed fears that not only would secularism and secular nationalism weaken the spiritual foundations of Islam and Muslim society, but that India's Hindu-majority population would crowd out Muslim heritage, culture and political influence. In his travels to Egypt, Afghanistan, Palestine and Syria, he promoted ideas of greater Islamic political co-operation and unity, calling for the shedding of nationalist differences. Sir Muhammad Iqbal was elected president of the Muslim League in 1930 at its session in Allahabad as well as for the session in Lahore in 1932. In his Allahabad Address on 29 December 1930, Iqbal outlined a vision of an independent state for Muslim-majority provinces in northwestern India. This address later inspired the Pakistan movement.", "Jacksonville has suffered less damage from hurricanes than most other east coast cities, although the threat does exist for a direct hit by a major hurricane. The city has only received one direct hit from a hurricane since 1871; however, Jacksonville has experienced hurricane or near-hurricane conditions more than a dozen times due to storms crossing the state from the Gulf of Mexico to the Atlantic Ocean, or passing to the north or south in the Atlantic and brushing past the area. The strongest effect on Jacksonville was from Hurricane Dora in 1964, the only recorded storm to hit the First Coast with sustained hurricane-force winds. The eye crossed St. Augustine with winds that had just barely diminished to 110 mph (180 km/h), making it a strong Category 2 on the Saffir-Simpson Scale. Jacksonville also suffered damage from 2008's Tropical Storm Fay which crisscrossed the state, bringing parts of Jacksonville under darkness for four days. Similarly, four years prior to this, Jacksonville was inundated by Hurricane Frances and Hurricane Jeanne, which made landfall south of the area. These tropical cyclones were the costliest indirect hits to Jacksonville. Hurricane Floyd in 1999 caused damage mainly to Jacksonville Beach. During Floyd, the Jacksonville Beach pier was severely damaged, and later demolished. The rebuilt pier was later damaged by Fay, but not destroyed. Tropical Storm Bonnie would cause minor damage in 2004, spawning a minor tornado in the process. On May 28, 2012, Jacksonville was hit by Tropical Storm Beryl, packing winds up to 70 miles per hour (113 km/h) which made landfall near Jacksonville Beach.", "Some argue that The United Methodist Church can lay a claim on apostolic succession, as understood in the traditional sense. As a result of the American Revolution, John Wesley was compelled in 1784 to break with standard practice and ordain two of his lay preachers as presbyters, Thomas Vasey and Richard Whatcoat. Dr. Thomas Coke, already an Anglican priest, assisted Wesley in this action. Coke was then \"set apart\" as a superintendent (bishop) by Wesley and dispatched with Vasey and Whatcoat to America to take charge of Methodist activities there. In defense of his action to ordain, Wesley himself cited an ancient opinion from the Church of Alexandria, which held that bishops and presbyters constituted one order and therefore, bishops are to be elected from and by the presbyterate. He knew that for two centuries the succession of bishops in the Church of Alexandria was preserved through ordination by presbyters alone and was considered valid by the ancient church. Methodists today who would argue for apostolic succession would do so on these grounds.", "Islamism, also known as Political Islam (Arabic: \u0625\u0633\u0644\u0627\u0645 \u0633\u064a\u0627\u0633\u064a\u200e isl\u0101m siy\u0101s\u012b), is an Islamic revival movement often characterized by moral conservatism, literalism, and the attempt \"to implement Islamic values in all spheres of life.\" Islamism favors the reordering of government and society in accordance with the Shari'a. The different Islamist movements have been described as \"oscillating between two poles\": at one end is a strategy of Islamization of society through state power seized by revolution or invasion; at the other \"reformist\" pole Islamists work to Islamize society gradually \"from the bottom up\". The movements have \"arguably altered the Middle East more than any trend since the modern states gained independence\", redefining \"politics and even borders\" according to one journalist (Robin Wright).", "Lindisfarne are a folk-rock group with a strong Tyneside connection. Their most famous song, \"Fog on the Tyne\" (1971), was covered by Geordie ex-footballer Paul Gascoigne in 1990. Venom, reckoned by many to be the originators of black metal and extremely influential to the extreme metal scene as a whole, formed in Newcastle in 1979. Folk metal band Skyclad, often regarded as the first folk metal band, also formed in Newcastle after the break-up of Martin Walkyier thrash metal band, Sabbat. Andy Taylor, former lead guitarist of Duran Duran was born here in 1961. Brian Johnson was a member of local rock band Geordie before becoming the lead vocalist of AC/DC."]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/download-dataset.py
new file mode 100644
index 0000000000..c25ad19702
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/download-dataset.py
@@ -0,0 +1,38 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="squad", help="dataset name")
+# add an argument to specify the config name of the dataset
+parser.add_argument(
+    "--config_name", type=str, default="plain_text", help="config name of the dataset"
+)
+# argument to save a fraction of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.05, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset, config_name=args.config_name):
+    print(f"Loading {split} split of {args.dataset} dataset...")
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, args.config_name, split=split)
+    # save the split of the dataset to the download directory as json lines file
+    dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+        os.path.join(args.download_dir, f"{split}.jsonl")
+    )
diff --git a/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json
new file mode 100644
index 0000000000..fea5783af5
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["(CNN)Deputies rushed Kenneth Morgan Stancil III from court Thursday after the 20-year-old murder suspect swore at a judge and tried to flip over a table. Stancil is accused of killing an employee Monday at Wayne Community College in Goldsboro, North Carolina. Relatives have said victim Ron Lane was gay, CNN affiliate WNCN reported, and investigators are looking into whether the shooting was a hate crime. Authorities arrested Stancil after he was found sleeping on a Florida beach on Tuesday. Just a few minutes into Thursday's hearing on the first-degree murder charge he faces, Stancil snapped back at the judge after he was offered a court-appointed lawyer. \"No, I don't need one,\" said Stancil, who stood before the judge with his legs shackled and his arms handcuffed in front of him. \"You know what I'm saying? I knew I would get life anyway.\" Superior Court Judge Arnold O. Jones interjected, pointing out that the maximum sentence Stancil faces is the death penalty. \"Yes, I know that,\" Stancil fired back. \"But when I knew what I had to do and I knew when I got caught, you know, I knew in my mind that I could get life, I could get the death penalty. You know what I'm saying? Do you follow my topic? I would have killed you, you know what I'm saying, if you're a f---ing child molester.\" The judge told him not to swear. \"I don't give a f--- what you want,\" Stancil said, lunging forward and lifting up the table in front of him. Deputies quickly corralled him and hustled him from the courtroom. The hearing resumed about 25 minutes later, when Stancil was brought back into the courtroom, this time with his arms handcuffed behind him. When asked again by Jones whether he wanted a lawyer, his response was quick -- and calm. \"Yes, sir,\" he said. In an interview with CNN affiliate WRAL, Stancil described himself as a neo-Nazi and said he hates gay people \"with a passion.\" Stancil had worked for Lane, the school's print shop operator, as part of a work-study program, but was let go from the program in early March because of poor attendance, college officials said. During the interview, and during a court appearance in Florida on Tuesday, Stancil said Lane deserved to die, accusing him of being a child molester who'd made advances in online messages to Stancil's 16-year-old brother. Lane's family has described those accusations as untrue and slanderous. His cousin, Steve Smith, told WRAL that Lane never made sexual advances toward children or anyone with whom he worked. He described him as a loving man who was dedicated to family and friends. \"Yes, Ron was gay. But people need to get over it,\" Smith said. \"That's between him and the Lord, him and his savior.\""]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb b/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb
new file mode 100644
index 0000000000..a4c9433fb3
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb
@@ -0,0 +1,613 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summarization - Generate news headlines style summary \n",
+    "\n",
+    "This sample shows how to use `summarization` components from the `azureml` system registry to fine tune a model to generate summary of a news article. We then deploy it to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n",
+    "\n",
+    "### Training data\n",
+    "We will use the [CNN DailyMail](https://huggingface.co/datasets/cnn_dailymail) dataset. A copy of this dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder for easy access. \n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `translation` task are generally good foundation models to fine tune for `summarization`. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace.  \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick a model to fine tune.\n",
+    "* Pick and explore training data.\n",
+    "* Configure the fine tuning job.\n",
+    "* Run the fine tuning job.\n",
+    "* Register the fine tuned model. \n",
+    "* Deploy the fine tuned model for real time inference.\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install azure-ai-ml\n",
+    "%pip install azure-identity\n",
+    "%pip install datasets==2.9.0\n",
+    "%pip install mlflow\n",
+    "%pip install azureml-mlflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential=credential)\n",
+    "except:\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential,\n",
+    "        subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "        resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "        workspace_name=\"<WORKSPACE_NAME>\",\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n",
+    "\n",
+    "experiment_name = \"summarization-news-summary\"\n",
+    "\n",
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "except Exception as ex:\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpu_count_found = False\n",
+    "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n",
+    "available_sku_sizes = []\n",
+    "for compute_sku in workspace_compute_sku_list:\n",
+    "    available_sku_sizes.append(compute_sku.name)\n",
+    "    if compute_sku.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = compute_sku.gpus\n",
+    "        gpu_count_found = True\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpu_count_found:\n",
+    "    print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
+    "else:\n",
+    "    raise ValueError(\n",
+    "        f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
+    "        f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
+    "    )\n",
+    "# CPU based finetune works only for single-node single-process\n",
+    "if gpus_per_node == 0:\n",
+    "    print(\n",
+    "        \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n",
+    "    )\n",
+    "    gpus_per_node = 1\n",
+    "\n",
+    "# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a foundation model to fine tune\n",
+    "\n",
+    "Models that support `translation` tasks are good candidates to fine tune for `summarization`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n",
+    "\n",
+    "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"t5-small\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the dataset for fine-tuning the model\n",
+    "\n",
+    "> The [CNN DailyMail](https://huggingface.co/datasets/cnn_dailymail) dataset is larger than 1GB when uncompressed. The [download-dataset.py](./news-summary-dataset/download-dataset.py) has supports downloading a smaller fraction of the dataset. The files in the [](./news-summary-dataset/) folder contain about 3% of the original dataset rows.  \n",
+    "\n",
+    "A copy of the dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder. \n",
+    "* Visualize some data rows. \n",
+    "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 20% of the already trimmed rows. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "# load the train.jsonl, test.jsonl and validation.jsonl files from the ./news-summary-dataset/ folder and show first 5 rows\n",
+    "train_df = pd.read_json(\"./news-summary-dataset/train.jsonl\", lines=True)\n",
+    "validation_df = pd.read_json(\"./news-summary-dataset/validation.jsonl\", lines=True)\n",
+    "test_df = pd.read_json(\"./news-summary-dataset/test.jsonl\", lines=True)\n",
+    "# drop the id column as it is not needed for fine tuning\n",
+    "train_df.drop(columns=[\"id\"], inplace=True)\n",
+    "validation_df.drop(columns=[\"id\"], inplace=True)\n",
+    "test_df.drop(columns=[\"id\"], inplace=True)\n",
+    "train_df.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save 20% of the rows from the dataframes into files with small_ prefix in the ./news-summary-dataset folder\n",
+    "train_df.sample(frac=0.2).to_json(\n",
+    "    \"./news-summary-dataset/small_train.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "validation_df.sample(frac=0.2).to_json(\n",
+    "    \"./news-summary-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "test_df.sample(frac=0.2).to_json(\n",
+    "    \"./news-summary-dataset/small_test.jsonl\", orient=\"records\", lines=True\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the fine tuning job using the the model and data as inputs\n",
+    " \n",
+    "Create the job that uses the `summarization` pipeline component. [Learn more]() about all the parameters supported for fine tuning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n",
+    "from azure.ai.ml import PyTorchDistribution, Input\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"summarization_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def create_pipeline():\n",
+    "    finetuning_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry id identified in step #3\n",
+    "        mlflow_model_path=foundation_model.id,\n",
+    "        # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n",
+    "        compute_model_import=compute_cluster,\n",
+    "        compute_preprocess=compute_cluster,\n",
+    "        compute_finetune=compute_cluster,\n",
+    "        compute_model_evaluation=compute_cluster,\n",
+    "        # map the dataset splits to parameters\n",
+    "        train_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./news-summary-dataset/small_train.jsonl\"\n",
+    "        ),\n",
+    "        validation_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./news-summary-dataset/small_validation.jsonl\"\n",
+    "        ),\n",
+    "        test_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./news-summary-dataset/small_test.jsonl\"\n",
+    "        ),\n",
+    "        evaluation_config=Input(type=\"uri_file\", path=\"./summarization-config.json\"),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        # document_key parameter maps to the \"article\" field in the news summary dataset\n",
+    "        document_key=\"article\",\n",
+    "        # summary_key parameter maps to the \"highlights\" field in the news summary dataset\n",
+    "        summary_key=\"highlights\",\n",
+    "        # training settings\n",
+    "        number_of_gpu_to_use_finetuning=gpus_per_node,  # set to the number of GPUs available in the compute\n",
+    "        num_train_epochs=2,\n",
+    "        learning_rate=2e-5,\n",
+    "    )\n",
+    "    return {\n",
+    "        # map the output of the fine tuning job to the output of the pipeline job so that we can easily register the fine tuned model\n",
+    "        # registering the model is required to deploy the model to an online or batch endpoint\n",
+    "        \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "pipeline_object = create_pipeline()\n",
+    "\n",
+    "# don't use cached results from previous jobs\n",
+    "pipeline_object.settings.force_rerun = True"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the job"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job\n",
+    "pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "    pipeline_object, experiment_name=experiment_name\n",
+    ")\n",
+    "# wait for the pipeline job to complete\n",
+    "workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review training and evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n",
+    "runs = mlflow.search_runs(\n",
+    "    experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    ")\n",
+    "training_run = None\n",
+    "evaluation_run = None\n",
+    "# get the training and evaluation runs.\n",
+    "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "for run in runs:\n",
+    "    # check if run.data.metrics.epoch exists\n",
+    "    if \"epoch\" in run.data.metrics:\n",
+    "        training_run = run\n",
+    "    # else, check if run.data.metrics.accuracy exists\n",
+    "    elif \"rouge1\" in run.data.metrics:\n",
+    "        evaluation_run = run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if training_run:\n",
+    "    print(\"Training metrics:\\n\\n\")\n",
+    "    print(json.dumps(training_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Training job found\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if evaluation_run:\n",
+    "    print(\"Evaluation metrics:\\n\\n\")\n",
+    "    print(json.dumps(evaluation_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Evaluation job found\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Register the fine tuned model with the workspace\n",
+    "\n",
+    "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import Model\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# check if the `trained_model` output is available\n",
+    "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n",
+    "\n",
+    "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n",
+    "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n",
+    "    pipeline_job.name, \"trained_model\"\n",
+    ")\n",
+    "\n",
+    "finetuned_model_name = model_name + \"-news-summary\"\n",
+    "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n",
+    "print(\"path to register model: \", model_path_from_job)\n",
+    "prepare_to_register_model = Model(\n",
+    "    path=model_path_from_job,\n",
+    "    type=AssetTypes.MLFLOW_MODEL,\n",
+    "    name=finetuned_model_name,\n",
+    "    version=timestamp,  # use timestamp as version to avoid version conflict\n",
+    "    description=model_name + \" fine tuned model for summarizing news articles\",\n",
+    ")\n",
+    "print(\"prepare to register model: \\n\", prepare_to_register_model)\n",
+    "# register the model from pipeline job output\n",
+    "registered_model = workspace_ml_client.models.create_or_update(\n",
+    "    prepare_to_register_model\n",
+    ")\n",
+    "print(\"registered model: \\n\", registered_model)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Deploy the fine tuned model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "\n",
+    "online_endpoint_name = \"news-summary-\" + timestamp\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + registered_model.name\n",
+    "    + \", fine tuned model for emotion detection\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=registered_model.id,\n",
+    "    instance_type=\"Standard_DS3_v2\",\n",
+    "    instance_count=1,\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read ./news-summary-dataset/small_test.jsonl into a pandas dataframe\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "\n",
+    "test_df = pd.read_json(\n",
+    "    \"./news-summary-dataset/test.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "# drop the id column\n",
+    "test_df.drop(columns=[\"id\"], inplace=True)\n",
+    "# take 1 random sample\n",
+    "test_df = test_df.sample(n=1)\n",
+    "# rebuild index\n",
+    "test_df.reset_index(drop=True, inplace=True)\n",
+    "# rename the highlights column to ground_truth_summary\n",
+    "test_df.rename(columns={\"highlights\": \"ground_truth_summary\"}, inplace=True)\n",
+    "test_df.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a json object with the key as \"inputs\" and value as a list of values from the article column of the test dataframe\n",
+    "test_json = {\"inputs\": {\"input_string\": test_df[\"article\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./emotion-dataset folder\n",
+    "with open(\"./news-summary-dataset/sample_score.json\", \"w\") as f:\n",
+    "    json.dump(test_json, f)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> If the input data is long or number of records are too may, you may run into the following error: \"Failed to test real-time endpoint\n",
+    "upstream request timeout Please check this guide to understand why this error code might have been returned [https://docs.microsoft.com/en-us/azure/machine-learning/how-to-troubleshoot-online-endpoints#http-status-codes]\". Try to submit smaller and fewer inputs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./news-summary-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the response to a pandas dataframe and rename the label column as scored_label\n",
+    "response_df = pd.read_json(response)\n",
+    "# rename summary_text column to scored_summary\n",
+    "response_df.rename(columns={0: \"scored_summary\"}, inplace=True)\n",
+    "response_df.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the test dataframe and the response dataframe on the index\n",
+    "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n",
+    "merged_df.head(1)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "notebooks-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/finetune/summarization/summarization-config.json b/sdk/python/foundation-models/system/finetune/summarization/summarization-config.json
new file mode 100644
index 0000000000..899d0c33b8
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/summarization/summarization-config.json
@@ -0,0 +1,5 @@
+{
+    "metrics": ["rouge1", "rouge2", "rougeL", "rougeLsum"],
+    "aggregator": true,
+    "stemmer": true
+}
diff --git a/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/label.json b/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/label.json
new file mode 100644
index 0000000000..a3d64dfc98
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/label.json
@@ -0,0 +1,18 @@
+{
+  "id2label": {
+    "0": "anger",
+    "1": "fear",
+    "2": "joy",
+    "3": "love",
+    "4": "sadness",
+    "5": "surprise"
+  },
+  "label2id": {
+    "anger": 0,
+    "fear": 1,
+    "joy": 2,
+    "love": 3,
+    "sadness": 4,
+    "surprise": 5
+  }
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json
new file mode 100644
index 0000000000..7e526c30ee
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["i plan to do so by obtaining an mba and from that mba program i feel that the most valuable outcomes i would like", "i came out of the airport that makes me feel irritable uncomfortable and even sadder", "i couldnt help feeling for him and this awful predicament he lives with on a daily and nightly basis and i was just so glad that once bel started to see the light he stuck it out and stood by daniel whilst no one else did including his family who im afraid i got really disgusted with", "i feel like i should have some sort of rockstar razzle dazzle lifestyle but i would at least like to spend a third of my life doing something i feel is worthwhile", "i love to dance but often feel inhibited by my own body unsure what i am capable of hyper concerned about other people watching me and having opinions on my style or just feeling awkward as if i have no idea what i am supposed to do here", "i feel so greedy so needy so helpless", "i feel is entirely more dangerous", "i mean i feel like a broke record sometimes", "i now im graduating in two days but i feel so sad right now", "i feel empty inside not surprising considering i havent eaten all day"]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb b/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb
new file mode 100644
index 0000000000..fd99a247ea
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb
@@ -0,0 +1,628 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Classification - Emotion Detection \n",
+    "\n",
+    "This sample shows how use `text-classification` components from the `azureml` system registry to fine tune a model to detect emotions using emotion dataset. We then deploy the fine tuned model to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n",
+    "\n",
+    "### Training data\n",
+    "We will use the [emotion](https://huggingface.co/datasets/dair-ai/emotion) dataset. A copy of this dataset is available in the [emotion-dataset](./emotion-dataset/) folder. \n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `fill-mask` task are generally good foundation models to fine tune for `text-classification`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace. \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick a model to fine tune.\n",
+    "* Pick and explore training data.\n",
+    "* Configure the fine tuning job.\n",
+    "* Run the fine tuning job.\n",
+    "* Review training and evaluation metrics. \n",
+    "* Register the fine tuned model. \n",
+    "* Deploy the fine tuned model for real time inference.\n",
+    "* Clean up resources. "
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install azure-ai-ml\n",
+    "%pip install azure-identity\n",
+    "%pip install datasets==2.9.0\n",
+    "%pip install mlflow\n",
+    "%pip install azureml-mlflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential=credential)\n",
+    "except:\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential,\n",
+    "        subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "        resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "        workspace_name=\"<WORKSPACE_NAME>\",\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n",
+    "\n",
+    "experiment_name = \"text-classification-emotion-detection\"\n",
+    "\n",
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "except Exception as ex:\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpu_count_found = False\n",
+    "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n",
+    "available_sku_sizes = []\n",
+    "for compute_sku in workspace_compute_sku_list:\n",
+    "    available_sku_sizes.append(compute_sku.name)\n",
+    "    if compute_sku.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = compute_sku.gpus\n",
+    "        gpu_count_found = True\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpu_count_found:\n",
+    "    print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
+    "else:\n",
+    "    raise ValueError(\n",
+    "        f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
+    "        f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
+    "    )\n",
+    "# CPU based finetune works only for single-node single-process\n",
+    "if gpus_per_node == 0:\n",
+    "    print(\n",
+    "        \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n",
+    "    )\n",
+    "    gpus_per_node = 1\n",
+    "\n",
+    "# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a foundation model to fine tune\n",
+    "\n",
+    "Models that support `fill-mask` tasks are good candidates to fine tune for `text-classification`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n",
+    "\n",
+    "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"bert-base-uncased\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the dataset for fine-tuning the model\n",
+    "\n",
+    "A copy of the emotion dataset is available in the [emotion-dataset](./emotion-dataset/) folder. The next few cells show basic data preparation for fine tuning:\n",
+    "* Visualize some data rows\n",
+    "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./emotion-dataset/label.json](./emotion-dataset/label.json). This step is needed if you want string labels such as `anger`, `joy`, etc. returned when scoring the model. If you skip this step, the model will return numerical categories such as 0, 1, 2, etc. and you will have to map them to what the category represents yourself. \n",
+    "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 10% of the original. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the ./emotion-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "df = pd.read_json(\"./emotion-dataset/train.jsonl\", lines=True)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the id2label json element of the ./emotion-dataset/label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n",
+    "import json\n",
+    "\n",
+    "with open(\"./emotion-dataset/label.json\") as f:\n",
+    "    id2label = json.load(f)\n",
+    "    id2label = id2label[\"id2label\"]\n",
+    "    label_df = pd.DataFrame.from_dict(\n",
+    "        id2label, orient=\"index\", columns=[\"label_string\"]\n",
+    "    )\n",
+    "    label_df[\"label\"] = label_df.index.astype(\"int64\")\n",
+    "    label_df = label_df[[\"label\", \"label_string\"]]\n",
+    "label_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load test.jsonl, train.jsonl and validation.jsonl form the ./emotion-dataset folder into pandas dataframes\n",
+    "test_df = pd.read_json(\"./emotion-dataset/test.jsonl\", lines=True)\n",
+    "train_df = pd.read_json(\"./emotion-dataset/train.jsonl\", lines=True)\n",
+    "validation_df = pd.read_json(\"./emotion-dataset/validation.jsonl\", lines=True)\n",
+    "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n",
+    "train_df = train_df.merge(label_df, on=\"label\", how=\"left\")\n",
+    "validation_df = validation_df.merge(label_df, on=\"label\", how=\"left\")\n",
+    "test_df = test_df.merge(label_df, on=\"label\", how=\"left\")\n",
+    "# show the first 5 rows of the train dataframe\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./emotion-dataset folder\n",
+    "train_df.sample(frac=0.1).to_json(\n",
+    "    \"./emotion-dataset/small_train.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "validation_df.sample(frac=0.1).to_json(\n",
+    "    \"./emotion-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "test_df.sample(frac=0.1).to_json(\n",
+    "    \"./emotion-dataset/small_test.jsonl\", orient=\"records\", lines=True\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the fine tuning job using the the model and data as inputs\n",
+    " \n",
+    "Create the job that uses the `text-classification` pipeline component. [Learn more]() about all the parameters supported for fine tuning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n",
+    "from azure.ai.ml import PyTorchDistribution, Input\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"text_classification_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def create_pipeline():\n",
+    "    finetuning_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry id identified in step #3\n",
+    "        mlflow_model_path=foundation_model.id,\n",
+    "        # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n",
+    "        compute_model_import=compute_cluster,\n",
+    "        compute_preprocess=compute_cluster,\n",
+    "        compute_finetune=compute_cluster,\n",
+    "        compute_model_evaluation=compute_cluster,\n",
+    "        # map the dataset splits to parameters\n",
+    "        train_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./emotion-dataset/small_train.jsonl\"\n",
+    "        ),\n",
+    "        validation_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./emotion-dataset/small_validation.jsonl\"\n",
+    "        ),\n",
+    "        test_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./emotion-dataset/small_test.jsonl\"\n",
+    "        ),\n",
+    "        # evaluation_config=Input(\n",
+    "        #     type=\"uri_file\", path=\"./text-classification-config.json\"\n",
+    "        # ),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        sentence1_key=\"text\",\n",
+    "        label_key=\"label_string\",\n",
+    "        # Training settings\n",
+    "        number_of_gpu_to_use_finetuning=gpus_per_node,  # set to the number of GPUs available in the compute\n",
+    "        num_train_epochs=3,\n",
+    "        learning_rate=2e-5,\n",
+    "    )\n",
+    "    return {\n",
+    "        # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n",
+    "        # registering the model is required to deploy the model to an online or batch endpoint\n",
+    "        \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "pipeline_object = create_pipeline()\n",
+    "\n",
+    "# don't use cached results from previous jobs\n",
+    "pipeline_object.settings.force_rerun = True"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the job"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job\n",
+    "pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "    pipeline_object, experiment_name=experiment_name\n",
+    ")\n",
+    "# wait for the pipeline job to complete\n",
+    "workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review training and evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n",
+    "runs = mlflow.search_runs(\n",
+    "    experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    ")\n",
+    "training_run = None\n",
+    "evaluation_run = None\n",
+    "# get the training and evaluation runs.\n",
+    "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "for run in runs:\n",
+    "    # check if run.data.metrics.epoch exists\n",
+    "    if \"epoch\" in run.data.metrics:\n",
+    "        training_run = run\n",
+    "    # else, check if run.data.metrics.accuracy exists\n",
+    "    elif \"accuracy\" in run.data.metrics:\n",
+    "        evaluation_run = run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if training_run:\n",
+    "    print(\"Training metrics:\\n\\n\")\n",
+    "    print(json.dumps(training_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Training job found\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if evaluation_run:\n",
+    "    print(\"Evaluation metrics:\\n\\n\")\n",
+    "    print(json.dumps(evaluation_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Evaluation job found\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Register the fine tuned model with the workspace\n",
+    "\n",
+    "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import Model\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# check if the `trained_model` output is available\n",
+    "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n",
+    "\n",
+    "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n",
+    "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n",
+    "    pipeline_job.name, \"trained_model\"\n",
+    ")\n",
+    "\n",
+    "finetuned_model_name = model_name + \"-emotion-detection\"\n",
+    "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n",
+    "print(\"path to register model: \", model_path_from_job)\n",
+    "prepare_to_register_model = Model(\n",
+    "    path=model_path_from_job,\n",
+    "    type=AssetTypes.MLFLOW_MODEL,\n",
+    "    name=finetuned_model_name,\n",
+    "    version=timestamp,  # use timestamp as version to avoid version conflict\n",
+    "    description=model_name + \" fine tuned model for emotion detection\",\n",
+    ")\n",
+    "print(\"prepare to register model: \\n\", prepare_to_register_model)\n",
+    "# register the model from pipeline job output\n",
+    "registered_model = workspace_ml_client.models.create_or_update(\n",
+    "    prepare_to_register_model\n",
+    ")\n",
+    "print(\"registered model: \\n\", registered_model)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Deploy the fine tuned model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "\n",
+    "online_endpoint_name = \"emotion-\" + timestamp\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + registered_model.name\n",
+    "    + \", fine tuned model for emotion detection\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=registered_model.id,\n",
+    "    instance_type=\"Standard_DS3_v2\",\n",
+    "    instance_count=1,\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read ./emotion-dataset/small_test.jsonl into a pandas dataframe\n",
+    "test_df = pd.read_json(\"./emotion-dataset/small_test.jsonl\", lines=True)\n",
+    "# take 10 random samples\n",
+    "test_df = test_df.sample(n=10)\n",
+    "# rebuild index\n",
+    "test_df.reset_index(drop=True, inplace=True)\n",
+    "# rename the label_string column to ground_truth_label\n",
+    "test_df = test_df.rename(columns={\"label_string\": \"ground_truth_label\"})\n",
+    "test_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a json object with the key as \"inputs\" and value as a list of values from the text column of the test dataframe\n",
+    "test_json = {\"inputs\": {\"input_string\": test_df[\"text\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./emotion-dataset folder\n",
+    "with open(\"./emotion-dataset/sample_score.json\", \"w\") as f:\n",
+    "    json.dump(test_json, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./emotion-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the response to a pandas dataframe and rename the label column as scored_label\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df = response_df.rename(columns={0: \"scored_label\"})\n",
+    "response_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the test dataframe and the response dataframe on the index\n",
+    "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n",
+    "merged_df.head(10)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "notebooks-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json b/sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json
new file mode 100644
index 0000000000..597603459e
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json
@@ -0,0 +1,7 @@
+{
+  "metrics": ["average_precision_score_macro", "AUC_macro", "recall_score_macro", "average_precision_score_binary", "average_precision_score_micro", "AUC_binary", "recall_score_micro", "AUC_micro", "norm_macro_recall", "average_precision_score_weighted", "weighted_accuracy", "precision_score_micro", "f1_score_binary", "accuracy_table", "precision_score_macro", "f1_score_micro", "precision_score_weighted", "f1_score_weighted", "confusion_matrix", "recall_score_binary", "matthews_correlation", "log_loss", "accuracy", "precision_score_binary", "balanced_accuracy", "AUC_weighted", "f1_score_macro", "recall_score_weighted"],
+  "multilabel": false,
+  "enable_metric_confidence": true,
+  "confidence_metrics": ["accuracy", "f1_score_micro"],
+  "use_binary": false
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/download-dataset.py
new file mode 100644
index 0000000000..3702e2a14f
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/download-dataset.py
@@ -0,0 +1,39 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="conll2003", help="dataset name")
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+
+def format_ner_tags(example, class_names):
+    example["text"] = " ".join(example["tokens"])
+    example["ner_tags_str"] = [class_names[id] for id in example["ner_tags"]]
+    return example
+
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+from functools import partial
+
+for split in get_dataset_split_names(args.dataset):
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, split=split)
+    dataset = dataset.map(
+        partial(format_ner_tags, class_names=dataset.features["ner_tags"].feature.names)
+    )
+    # save the split of the dataset to the download directory as json lines file
+    dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl"))
+    # print dataset features
diff --git a/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json
new file mode 100644
index 0000000000..899e024028
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["BUENOS AIRES 1996-12-06", "TALLINN 1996-12-06", "Sunseeds 219 216 220 216", "The London club had been rocked by a two-goal burst from forwards Dean Sturridge and Darryl Powell in the 62nd and 71st minutes which overturned Arsenal 's 1-0 lead from a diving header by captain Tony Adams on the stroke of halftime .", "Gianluca Vialli ( Chelsea )", "Source : Manitoba Pork .", "( ( Chicago Newsdesk 312-408-8720 ) )", "On Thursday , overnight rates moved between 21.625 and 22.125 .", "Lazio ( 12 ) v AS Roma ( 7 ) 1930", "Previous World Cup victories : None"]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json b/sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json
new file mode 100644
index 0000000000..23efa790c7
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json
@@ -0,0 +1,3 @@
+{
+  "metrics": ["precision_score_macro", "f1_score_micro", "recall_score_macro", "f1_score_weighted", "recall_score_micro", "accuracy", "precision_score_weighted", "precision_score_micro", "f1_score_macro", "recall_score_weighted"]
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb b/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb
new file mode 100644
index 0000000000..c7377c17da
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb
@@ -0,0 +1,608 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Token Classification - Named Entity Recognition (NER)\n",
+    "\n",
+    "This sample shows how use `token-classification` components from the `azureml` system registry to fine tune a model to detect entities using conll2003 dataset. We then deploy the fine tuned model to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n",
+    "\n",
+    "### Training data\n",
+    "We will use the [conll2003](https://huggingface.co/datasets/conll2003) dataset. A copy of this dataset is available in the [conll2003-dataset](./conll2003-dataset/) folder. \n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `fill-mask` task are generally good foundation models to fine tune for `token-classification`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace. \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick a model to fine tune.\n",
+    "* Pick and explore training data.\n",
+    "* Configure the fine tuning job.\n",
+    "* Run the fine tuning job.\n",
+    "* Register the fine tuned model. \n",
+    "* Deploy the fine tuned model for real time inference.\n",
+    "* Clean up resources. \n",
+    "\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install azure-ai-ml\n",
+    "%pip install azure-identity\n",
+    "%pip install datasets==2.9.0\n",
+    "%pip install mlflow\n",
+    "%pip install azureml-mlflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential=credential)\n",
+    "except:\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential,\n",
+    "        subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "        resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "        workspace_name=\"<WORKSPACE_NAME>\",\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n",
+    "\n",
+    "experiment_name = \"token-classification-ner\"\n",
+    "\n",
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "except Exception as ex:\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpu_count_found = False\n",
+    "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n",
+    "available_sku_sizes = []\n",
+    "for compute_sku in workspace_compute_sku_list:\n",
+    "    available_sku_sizes.append(compute_sku.name)\n",
+    "    if compute_sku.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = compute_sku.gpus\n",
+    "        gpu_count_found = True\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpu_count_found:\n",
+    "    print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
+    "else:\n",
+    "    raise ValueError(\n",
+    "        f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
+    "        f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
+    "    )\n",
+    "# CPU based finetune works only for single-node single-process\n",
+    "if gpus_per_node == 0:\n",
+    "    print(\n",
+    "        \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n",
+    "    )\n",
+    "    gpus_per_node = 1\n",
+    "\n",
+    "# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a foundation model to fine tune\n",
+    "\n",
+    "Models that support `fill-mask` tasks are good candidates to fine tune for `token-classification`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n",
+    "\n",
+    "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"bert-base-uncased\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the dataset for fine-tuning the model\n",
+    "\n",
+    "A copy of the conll2003 dataset is available in the [conll2003-dataset](./conll2003-dataset/) folder. The next few cells show basic data preparation for fine tuning:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 10% of the original. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. \n",
+    "\n",
+    "> The [download-dataset.py](./conll2003-dataset/download-dataset.py) is used to download the conll2003 dataset and transform the dataset into finetune pipeline component consumable format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the ./conll2003-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "df = pd.read_json(\"./conll2003-dataset/train.jsonl\", lines=True)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load test.jsonl, train.jsonl and validation.jsonl form the ./conll2003-dataset folder into pandas dataframes\n",
+    "test_df = pd.read_json(\"./conll2003-dataset/test.jsonl\", lines=True)\n",
+    "train_df = pd.read_json(\"./conll2003-dataset/train.jsonl\", lines=True)\n",
+    "validation_df = pd.read_json(\"./conll2003-dataset/validation.jsonl\", lines=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./conll2003-dataset folder\n",
+    "train_df.sample(frac=0.1).to_json(\n",
+    "    \"./conll2003-dataset/small_train.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "validation_df.sample(frac=0.1).to_json(\n",
+    "    \"./conll2003-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "test_df.sample(frac=0.1).to_json(\n",
+    "    \"./conll2003-dataset/small_test.jsonl\", orient=\"records\", lines=True\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the fine tuning job using the the model and data as inputs\n",
+    " \n",
+    "Create the job that uses the `token-classification` pipeline component. [Learn more]() about all the parameters supported for fine tuning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n",
+    "from azure.ai.ml import PyTorchDistribution, Input\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"token_classification_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def create_pipeline():\n",
+    "    finetuning_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry id identified in step #3\n",
+    "        mlflow_model_path=foundation_model.id,\n",
+    "        # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n",
+    "        compute_model_import=compute_cluster,\n",
+    "        compute_preprocess=compute_cluster,\n",
+    "        compute_finetune=compute_cluster,\n",
+    "        compute_model_evaluation=compute_cluster,\n",
+    "        # map the dataset splits to parameters\n",
+    "        train_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./conll2003-dataset/small_train.jsonl\"\n",
+    "        ),\n",
+    "        validation_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./conll2003-dataset/small_validation.jsonl\"\n",
+    "        ),\n",
+    "        test_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./conll2003-dataset/small_test.jsonl\"\n",
+    "        ),\n",
+    "        evaluation_config=Input(\n",
+    "            type=\"uri_file\", path=\"./token-classification-config.json\"\n",
+    "        ),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        token_key=\"tokens\",\n",
+    "        tag_key=\"ner_tags_str\",\n",
+    "        # Training settings\n",
+    "        number_of_gpu_to_use_finetuning=gpus_per_node,  # set to the number of GPUs available in the compute\n",
+    "        num_train_epochs=3,\n",
+    "        learning_rate=2e-5,\n",
+    "    )\n",
+    "    return {\n",
+    "        # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n",
+    "        # registering the model is required to deploy the model to an online or batch endpoint\n",
+    "        \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "pipeline_object = create_pipeline()\n",
+    "\n",
+    "# don't use cached results from previous jobs\n",
+    "pipeline_object.settings.force_rerun = True"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the job"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job\n",
+    "pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "    pipeline_object, experiment_name=experiment_name\n",
+    ")\n",
+    "# wait for the pipeline job to complete\n",
+    "workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review training and evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n",
+    "runs = mlflow.search_runs(\n",
+    "    experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    ")\n",
+    "training_run = None\n",
+    "evaluation_run = None\n",
+    "# get the training and evaluation runs.\n",
+    "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "for run in runs:\n",
+    "    # check if run.data.metrics.epoch exists\n",
+    "    if \"epoch\" in run.data.metrics:\n",
+    "        training_run = run\n",
+    "    # else, check if run.data.metrics.accuracy exists\n",
+    "    elif \"accuracy\" in run.data.metrics:\n",
+    "        evaluation_run = run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if training_run:\n",
+    "    print(\"Training metrics:\\n\\n\")\n",
+    "    print(json.dumps(training_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Training job found\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if evaluation_run:\n",
+    "    print(\"Evaluation metrics:\\n\\n\")\n",
+    "    print(json.dumps(evaluation_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Evaluation job found\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Register the fine tuned model with the workspace\n",
+    "\n",
+    "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import Model\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# check if the `trained_model` output is available\n",
+    "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n",
+    "\n",
+    "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n",
+    "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n",
+    "    pipeline_job.name, \"trained_model\"\n",
+    ")\n",
+    "\n",
+    "finetuned_model_name = model_name + \"-ner\"\n",
+    "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n",
+    "print(\"path to register model: \", model_path_from_job)\n",
+    "prepare_to_register_model = Model(\n",
+    "    path=model_path_from_job,\n",
+    "    type=AssetTypes.MLFLOW_MODEL,\n",
+    "    name=finetuned_model_name,\n",
+    "    version=timestamp,  # use timestamp as version to avoid version conflict\n",
+    "    description=model_name + \" fine tuned model for named entity recognition\",\n",
+    ")\n",
+    "print(\"prepare to register model: \\n\", prepare_to_register_model)\n",
+    "# register the model from pipeline job output\n",
+    "registered_model = workspace_ml_client.models.create_or_update(\n",
+    "    prepare_to_register_model\n",
+    ")\n",
+    "print(\"registered model: \\n\", registered_model)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Deploy the fine tuned model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "\n",
+    "online_endpoint_name = \"ner-\" + timestamp\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + registered_model.name\n",
+    "    + \", fine tuned model for named entity recognition\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=registered_model.id,\n",
+    "    instance_type=\"Standard_DS3_v2\",\n",
+    "    instance_count=1,\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read ./conll2003-dataset/small_test.jsonl into a pandas dataframe\n",
+    "test_df = pd.read_json(\"./conll2003-dataset/small_test.jsonl\", lines=True)\n",
+    "# take 10 random samples\n",
+    "test_df = test_df.sample(n=10)\n",
+    "# drop the id, pos_tags, chunk_tags, ner_tags column\n",
+    "test_df.drop(columns=[\"id\", \"pos_tags\", \"chunk_tags\", \"ner_tags\"], inplace=True)\n",
+    "# rebuild index\n",
+    "test_df.reset_index(drop=True, inplace=True)\n",
+    "# rename the ner_tags_str column to ground_truth_label\n",
+    "test_df = test_df.rename(columns={\"ner_tags_str\": \"ground_truth_tags\"})\n",
+    "test_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "# create a json object with the key as \"inputs\" and value as a list of values from the text column of the test dataframe\n",
+    "test_json = {\"inputs\": {\"input_string\": test_df[\"text\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./conll2003-dataset folder\n",
+    "with open(\"./conll2003-dataset/sample_score.json\", \"w\") as f:\n",
+    "    json.dump(test_json, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./conll2003-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "# rename the column to predicted_tags\n",
+    "response_df.rename(columns={0: \"predicted_tags\"}, inplace=True)\n",
+    "response_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the test dataframe and the response dataframe on the index\n",
+    "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n",
+    "merged_df.head(10)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "notebooks-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/finetune/translation/translation-config.json b/sdk/python/foundation-models/system/finetune/translation/translation-config.json
new file mode 100644
index 0000000000..f293ed61bb
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/translation/translation-config.json
@@ -0,0 +1,4 @@
+{
+  "metrics": ["bleu_1", "bleu_2", "bleu_3", "bleu_4"],
+  "smoothing":false
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/finetune/translation/translation.ipynb b/sdk/python/foundation-models/system/finetune/translation/translation.ipynb
new file mode 100644
index 0000000000..1270692962
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/translation/translation.ipynb
@@ -0,0 +1,604 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Translation - Translate english to romanian\n",
+    "\n",
+    "This sample shows how to use `translation` components from the `azureml` system registry to fine tune a model to translate english language to romanian language. We then deploy it to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n",
+    "\n",
+    "### Training data\n",
+    "We will use the [wmt16 (ro-en)](https://huggingface.co/datasets/wmt16) dataset. A copy of this dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder for easy access. \n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `translation` task are used here. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace.  \n",
+    "\n",
+    "### Outline\n",
+    "* Setup pre-requisites such as compute.\n",
+    "* Pick a model to fine tune.\n",
+    "* Pick and explore training data.\n",
+    "* Configure the fine tuning job.\n",
+    "* Run the fine tuning job.\n",
+    "* Register the fine tuned model. \n",
+    "* Deploy the fine tuned model for real time inference.\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry\n",
+    "* Set an optional experiment name\n",
+    "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install dependencies by running below cell. This is not an optional step if running in a new environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install azure-ai-ml\n",
+    "%pip install azure-identity\n",
+    "%pip install datasets==2.9.0\n",
+    "%pip install mlflow\n",
+    "%pip install azureml-mlflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential=credential)\n",
+    "except:\n",
+    "    workspace_ml_client = MLClient(\n",
+    "        credential,\n",
+    "        subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "        resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "        workspace_name=\"<WORKSPACE_NAME>\",\n",
+    "    )\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n",
+    "\n",
+    "experiment_name = \"translation-wmt16-en-ro\"\n",
+    "\n",
+    "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n",
+    "compute_cluster = \"gpu-cluster-big\"\n",
+    "try:\n",
+    "    compute = workspace_ml_client.compute.get(compute_cluster)\n",
+    "except Exception as ex:\n",
+    "    compute = AmlCompute(\n",
+    "        name=compute_cluster,\n",
+    "        size=\"Standard_ND40rs_v2\",\n",
+    "        max_instances=2,  # For multi node training set this to an integer value more than 1\n",
+    "    )\n",
+    "    workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
+    "\n",
+    "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
+    "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
+    "# Setting this to more than the number of GPUs will result in an error.\n",
+    "gpu_count_found = False\n",
+    "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n",
+    "available_sku_sizes = []\n",
+    "for compute_sku in workspace_compute_sku_list:\n",
+    "    available_sku_sizes.append(compute_sku.name)\n",
+    "    if compute_sku.name.lower() == compute.size.lower():\n",
+    "        gpus_per_node = compute_sku.gpus\n",
+    "        gpu_count_found = True\n",
+    "# if gpu_count_found not found, then print an error\n",
+    "if gpu_count_found:\n",
+    "    print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
+    "else:\n",
+    "    raise ValueError(\n",
+    "        f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
+    "        f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
+    "    )\n",
+    "# CPU based finetune works only for single-node single-process\n",
+    "if gpus_per_node == 0:\n",
+    "    print(\n",
+    "        \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n",
+    "    )\n",
+    "    gpus_per_node = 1\n",
+    "\n",
+    "# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "timestamp = str(int(time.time()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a foundation model to fine tune\n",
+    "\n",
+    "Models that support `translation` tasks are picked to fine tune. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n",
+    "\n",
+    "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"t5-small\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Pick the dataset for fine-tuning the model \n",
+    "\n",
+    "A copy of the dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. \n",
+    "* Visualize some data rows. \n",
+    "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 20% of the already trimmed rows. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. \n",
+    "\n",
+    "> The [download-dataset.py](./wmt16-en-ro-dataset/download-dataset.py) is used to download the wmt16 (ro-en) dataset and transform the dataset into finetune pipeline component consumable format. Also as the dataset is large, hence we here have only part of the dataset.\n",
+    "\n",
+    "> **Note** : Some language models have different language codes and hence the column names in the dataset should reflect the same."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "# load the train.jsonl, test.jsonl and validation.jsonl files from the ./wmt16-en-ro-dataset/ folder and show first 5 rows\n",
+    "train_df = pd.read_json(\"./wmt16-en-ro-dataset/train.jsonl\", lines=True)\n",
+    "validation_df = pd.read_json(\"./wmt16-en-ro-dataset/validation.jsonl\", lines=True)\n",
+    "test_df = pd.read_json(\"./wmt16-en-ro-dataset/test.jsonl\", lines=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save 20% of the rows from the dataframes into files with small_ prefix in the ./wmt16-en-ro-dataset folder\n",
+    "train_df.sample(frac=0.2).to_json(\n",
+    "    \"./wmt16-en-ro-dataset/small_train.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "validation_df.sample(frac=0.2).to_json(\n",
+    "    \"./wmt16-en-ro-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n",
+    ")\n",
+    "test_df.sample(frac=0.2).to_json(\n",
+    "    \"./wmt16-en-ro-dataset/small_test.jsonl\", orient=\"records\", lines=True\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Submit the fine tuning job using the the model and data as inputs\n",
+    " \n",
+    "Create the job that uses the `translation` pipeline component. [Learn more]() about all the parameters supported for fine tuning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.dsl import pipeline\n",
+    "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n",
+    "from azure.ai.ml import PyTorchDistribution, Input\n",
+    "\n",
+    "# fetch the pipeline component\n",
+    "pipeline_component_func = registry_ml_client.components.get(\n",
+    "    name=\"translation_pipeline\", label=\"latest\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# define the pipeline job\n",
+    "@pipeline()\n",
+    "def create_pipeline():\n",
+    "    finetuning_job = pipeline_component_func(\n",
+    "        # specify the foundation model available in the azureml system registry id identified in step #3\n",
+    "        mlflow_model_path=foundation_model.id,\n",
+    "        # huggingface_id = 't5-small', # if you want to use a huggingface model, uncomment this line and comment the above line\n",
+    "        compute_model_import=compute_cluster,\n",
+    "        compute_preprocess=compute_cluster,\n",
+    "        compute_finetune=compute_cluster,\n",
+    "        compute_model_evaluation=compute_cluster,\n",
+    "        # map the dataset splits to parameters\n",
+    "        train_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./wmt16-en-ro-dataset/small_train.jsonl\"\n",
+    "        ),\n",
+    "        validation_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./wmt16-en-ro-dataset/small_validation.jsonl\"\n",
+    "        ),\n",
+    "        test_file_path=Input(\n",
+    "            type=\"uri_file\", path=\"./wmt16-en-ro-dataset/small_test.jsonl\"\n",
+    "        ),\n",
+    "        evaluation_config=Input(type=\"uri_file\", path=\"./translation-config.json\"),\n",
+    "        # The following parameters map to the dataset fields\n",
+    "        # source_lang parameter maps to the \"en\" field in the wmt16 dataset\n",
+    "        source_lang=\"en\",\n",
+    "        # target_lang parameter maps to the \"ro\" field in the wmt16 dataset\n",
+    "        target_lang=\"ro\",\n",
+    "        # training settings\n",
+    "        number_of_gpu_to_use_finetuning=gpus_per_node,  # set to the number of GPUs available in the compute\n",
+    "        num_train_epochs=3,\n",
+    "        learning_rate=2e-5,\n",
+    "    )\n",
+    "    return {\n",
+    "        # map the output of the fine tuning job to the output of the pipeline job so that we can easily register the fine tuned model\n",
+    "        # registering the model is required to deploy the model to an online or batch endpoint\n",
+    "        \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "pipeline_object = create_pipeline()\n",
+    "\n",
+    "# don't use cached results from previous jobs\n",
+    "pipeline_object.settings.force_rerun = True"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Submit the job"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# submit the pipeline job\n",
+    "pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
+    "    pipeline_object, experiment_name=experiment_name\n",
+    ")\n",
+    "# wait for the pipeline job to complete\n",
+    "workspace_ml_client.jobs.stream(pipeline_job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Review training and evaluation metrics\n",
+    "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n",
+    "\n",
+    "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlflow, json\n",
+    "\n",
+    "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n",
+    "    workspace_ml_client.workspace_name\n",
+    ").mlflow_tracking_uri\n",
+    "mlflow.set_tracking_uri(mlflow_tracking_uri)\n",
+    "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n",
+    "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n",
+    "runs = mlflow.search_runs(\n",
+    "    experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n",
+    ")\n",
+    "training_run = None\n",
+    "evaluation_run = None\n",
+    "# get the training and evaluation runs.\n",
+    "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n",
+    "for run in runs:\n",
+    "    # check if run.data.metrics.epoch exists\n",
+    "    if \"epoch\" in run.data.metrics:\n",
+    "        training_run = run\n",
+    "    # else, check if run.data.metrics.accuracy exists\n",
+    "    elif \"bleu_1\" in run.data.metrics:\n",
+    "        evaluation_run = run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if training_run:\n",
+    "    print(\"Training metrics:\\n\\n\")\n",
+    "    print(json.dumps(training_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Training job found\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if evaluation_run:\n",
+    "    print(\"Evaluation metrics:\\n\\n\")\n",
+    "    print(json.dumps(evaluation_run.data.metrics, indent=2))\n",
+    "else:\n",
+    "    print(\"No Evaluation job found\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Register the fine tuned model with the workspace\n",
+    "\n",
+    "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import Model\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "\n",
+    "# check if the `trained_model` output is available\n",
+    "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n",
+    "\n",
+    "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n",
+    "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n",
+    "    pipeline_job.name, \"trained_model\"\n",
+    ")\n",
+    "\n",
+    "finetuned_model_name = model_name + \"-wmt16-en-ro-src\"\n",
+    "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n",
+    "print(\"path to register model: \", model_path_from_job)\n",
+    "prepare_to_register_model = Model(\n",
+    "    path=model_path_from_job,\n",
+    "    type=AssetTypes.MLFLOW_MODEL,\n",
+    "    name=finetuned_model_name,\n",
+    "    version=timestamp,  # use timestamp as version to avoid version conflict\n",
+    "    description=model_name + \" fine tuned model for translation wmt16 en to ro\",\n",
+    ")\n",
+    "print(\"prepare to register model: \\n\", prepare_to_register_model)\n",
+    "# register the model from pipeline job output\n",
+    "registered_model = workspace_ml_client.models.create_or_update(\n",
+    "    prepare_to_register_model\n",
+    ")\n",
+    "print(\"registered model: \\n\", registered_model)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Deploy the fine tuned model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "\n",
+    "online_endpoint_name = \"translation-en-ro-\" + timestamp\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + registered_model.name\n",
+    "    + \", fine tuned model for emotion detection\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=registered_model.id,\n",
+    "    instance_type=\"Standard_DS3_v2\",\n",
+    "    instance_count=1,\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read ./wmt16-en-ro-dataset/small_test.jsonl into a pandas dataframe\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "\n",
+    "test_df = pd.read_json(\"./wmt16-en-ro-dataset/test.jsonl\", orient=\"records\", lines=True)\n",
+    "# take 1 random sample\n",
+    "test_df = test_df.sample(n=1)\n",
+    "# rebuild index\n",
+    "test_df.reset_index(drop=True, inplace=True)\n",
+    "test_df.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a json object with the key as \"inputs\" and value as a list of values from the en column of the test dataframe\n",
+    "test_json = {\"inputs\": {\"input_string\": test_df[\"en\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./wmt16-en-ro-dataset folder\n",
+    "with open(\"./wmt16-en-ro-dataset/sample_score.json\", \"w\") as f:\n",
+    "    json.dump(test_json, f)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> If the input data is long or number of records are too may, you may run into the following error: \"Failed to test real-time endpoint\n",
+    "upstream request timeout Please check this guide to understand why this error code might have been returned [https://docs.microsoft.com/en-us/azure/machine-learning/how-to-troubleshoot-online-endpoints#http-status-codes]\". Try to submit smaller and fewer inputs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./wmt16-en-ro-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "# rename the column to predicted_tags\n",
+    "response_df.rename(columns={0: \"predicted_translation\"}, inplace=True)\n",
+    "response_df.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the test dataframe and the response dataframe on the index\n",
+    "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n",
+    "merged_df.head(1)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 9. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "notebooks-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/download-dataset.py
new file mode 100644
index 0000000000..d945ea740e
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/download-dataset.py
@@ -0,0 +1,45 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="wmt16", help="dataset name")
+# add an argument to specify a dataset name to download
+parser.add_argument(
+    "--dataset_subset", type=str, default="ro-en", help="dataset subset name"
+)
+# argument to save a fraction of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.05, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+
+def format_translation(example):
+    for key in example["translation"]:
+        example[key] = example["translation"][key]
+    return example
+
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset, args.dataset_subset):
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, args.dataset_subset, split=split)
+    dataset = dataset.map(format_translation, remove_columns=["translation"])
+    # save the split of the dataset to the download directory as json lines file
+    dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+        os.path.join(args.download_dir, f"{split}.jsonl")
+    )
diff --git a/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json
new file mode 100644
index 0000000000..5a9a88d089
--- /dev/null
+++ b/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["8 Poll Numbers That Show Donald Trump Is For Real"]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.ipynb
new file mode 100644
index 0000000000..ad5ae41c22
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.ipynb
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Automatic Speech Recognition Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `automatic-speech-recognition` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`automatic-speech-recognition` (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `automatic-speech-recognition` task are tagged with `task: automatic-speech-recognition`. We will use the `openai-whisper-large` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use custom audio files that have been uploaded to the cloud. \\\n",
+    "You can replace the links with any audio file stored on the cloud and verify inference.\n",
+    "- Most common audio formats (m4a, wav, flac, wma, mp3, etc.) are supported.\n",
+    "- The whisper model can process only 30 seconds of data at a time, so if the file you upload is longer than 30 seconds, only the first 30 seconds will be transcribed. This can be circumvented by splitting the file into 30 second chunks.\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `automatic-speech-recognition` task. In this example, we use the `openai-whisper-large` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"openai-whisper-large\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    OnlineRequestSettings,\n",
+    ")\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"asr-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for automatic-speech-recognition task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS4_v2\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=60000,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch the sample scoring request and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./sample-request/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json b/sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json
new file mode 100644
index 0000000000..0c87bf55cd
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json
@@ -0,0 +1,6 @@
+{
+    "inputs": {
+        "audio": ["https://audiovisionfiles.blob.core.windows.net/audio/audio.m4a", "https://audiovisionfiles.blob.core.windows.net/audio/audio.m4a"],
+        "language": ["en", "fr"]
+    }
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/download-dataset.py
new file mode 100644
index 0000000000..dc41e786f3
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/download-dataset.py
@@ -0,0 +1,34 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="bookcorpus", help="dataset name")
+# add an argument to specify the config name of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.001, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="./",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset):
+    print(f"Loading {split} split of {args.dataset} dataset...")
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, split=split)
+    # save the split of the dataset to the download directory as json lines file
+    dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+        os.path.join(args.download_dir, f"{split}.jsonl")
+    )
diff --git a/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json
new file mode 100644
index 0000000000..88135323be
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["`` was there no [MASK] between you two? \\'\\'"]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.ipynb
new file mode 100644
index 0000000000..bd7d8ad217
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.ipynb
@@ -0,0 +1,517 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fill Mask Inference using Batch Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `fill-mask` type models to a batch endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`fill-mask` task is about predicting masked words in a sentence. Models that perform this have a good understanding of the language structure and domain of the dataset that they are trained on. `fill-mask` models are typically used as foundation models for more scenario oriented tasks such as `text-classification` or `token-classification`.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `fill-mask` task are tagged with `task: fill-mask`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. \n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for batch inference.\n",
+    "* Run a batch inference job.\n",
+    "* Review inference predictions.\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies.\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry.\n",
+    "* Create or update compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import packages used by the following code snippets\n",
+    "import csv\n",
+    "import json\n",
+    "import os\n",
+    "import random\n",
+    "import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "import urllib.request\n",
+    "\n",
+    "from azure.ai.ml import Input, MLClient\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import (\n",
+    "    AmlCompute,\n",
+    "    BatchDeployment,\n",
+    "    BatchEndpoint,\n",
+    "    BatchRetrySettings,\n",
+    "    Model,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "resource_group_name = \"<RESOURCE_GROUP>\"\n",
+    "workspace_name = \"<WORKSPACE_NAME>\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Connect to workspace and registry using ML clients."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group_name,\n",
+    "    workspace_name=workspace_name,\n",
+    ")\n",
+    "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a compute cluster.\n",
+    "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_name = \"cpu-cluster\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_cluster = AmlCompute(\n",
+    "    name=compute_name,\n",
+    "    description=\"An AML compute cluster\",\n",
+    "    size=\"Standard_DS3_V2\",\n",
+    "    min_instances=0,\n",
+    "    max_instances=3,\n",
+    "    idle_time_before_scale_down=120,\n",
+    ")  # 120 seconds\n",
+    "\n",
+    "workspace_ml_client.begin_create_or_update(compute_cluster)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"bert-base-uncased\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows.\n",
+    "* We will `<mask>` one word in each sentence so that the model can predict the masked words.\n",
+    "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define directories and filenames as variables\n",
+    "dataset_dir = \"book-corpus-dataset\"\n",
+    "training_datafile = \"train.jsonl\"\n",
+    "\n",
+    "batch_dir = \"batch\"\n",
+    "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n",
+    "batch_input_file = \"batch_input.csv\"\n",
+    "os.makedirs(batch_dir, exist_ok=True)\n",
+    "os.makedirs(batch_inputs_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # Set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Transform the data using the masking token."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the right mask token from huggingface\n",
+    "with urllib.request.urlopen(f\"https://huggingface.co/api/models/{model_name}\") as url:\n",
+    "    data = json.load(url)\n",
+    "    mask_token = data[\"mask_token\"]\n",
+    "\n",
+    "# Take the value of the \"text\" column, replace a random word with the mask token, and save the result in the \"masked_text\" column\n",
+    "train_df[\"masked_text\"] = train_df[\"text\"].apply(\n",
+    "    lambda x: x.replace(random.choice(x.split()), mask_token, 1)\n",
+    ")\n",
+    "\n",
+    "# Save the train_df dataframe to a jsonl file in the ./book-corpus-dataset/batch folder with the `masked_` prefix\n",
+    "masked_datafile = os.path.join(batch_dir, \"masked_\" + training_datafile)\n",
+    "train_df.to_json(masked_datafile, orient=\"records\", lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"masked_text\"` column.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_df = (\n",
+    "    train_df[[\"masked_text\"]]\n",
+    "    .rename(columns={\"masked_text\": \"input_string\"})\n",
+    "    .sample(frac=0.1)\n",
+    ")\n",
+    "\n",
+    "# Divide this into files of 100 rows each\n",
+    "batch_size_per_predict = 100\n",
+    "for i in range(0, len(batch_df), batch_size_per_predict):\n",
+    "    j = i + batch_size_per_predict\n",
+    "    batch_df[i:j].to_csv(\n",
+    "        os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n",
+    "    )\n",
+    "\n",
+    "# Check out the first and last file name created\n",
+    "input_files = os.listdir(batch_inputs_dir)\n",
+    "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to a batch endpoint\n",
+    "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n",
+    "\n",
+    "* Create a batch endpoint.\n",
+    "* Create a batch deployment.\n",
+    "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n",
+    "\n",
+    "#### Create the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "endpoint_name = \"fill-mask-\" + str(timestamp)\n",
+    "\n",
+    "endpoint = BatchEndpoint(\n",
+    "    name=endpoint_name,\n",
+    "    description=\"Batch endpoint for \" + foundation_model.name + \", for fill-mask task\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the deployment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployment_name = \"demo\"\n",
+    "\n",
+    "deployment = BatchDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    compute=compute_name,\n",
+    "    error_threshold=0,\n",
+    "    instance_count=1,\n",
+    "    logging_level=\"info\",\n",
+    "    max_concurrency_per_instance=2,\n",
+    "    mini_batch_size=10,\n",
+    "    output_file_name=\"predictions.csv\",\n",
+    "    retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(deployment).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set the deployment as default."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "endpoint.defaults.deployment_name = deployment_name\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()\n",
+    "\n",
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Run a batch inference job.\n",
+    "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n",
+    "\n",
+    "job = workspace_ml_client.batch_endpoints.invoke(\n",
+    "    endpoint_name=endpoint.name, input=input\n",
+    ")\n",
+    "\n",
+    "workspace_ml_client.jobs.stream(job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Review inference predictions.\n",
+    "Download the predictions from the job output and review the predictions using a dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n",
+    "\n",
+    "workspace_ml_client.jobs.download(\n",
+    "    name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n",
+    ")\n",
+    "\n",
+    "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n",
+    "\n",
+    "# Load the batch predictions file with no headers into a dataframe and set your column names\n",
+    "score_df = pd.read_csv(\n",
+    "    predictions_file,\n",
+    "    header=None,\n",
+    "    names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n",
+    ")\n",
+    "score_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_df = []\n",
+    "for file in input_files:\n",
+    "    input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n",
+    "    input.reset_index(inplace=True)\n",
+    "    input[\"batch_input_file_name\"] = file\n",
+    "    input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n",
+    "    input_df.append(input)\n",
+    "input_df = pd.concat(input_df)\n",
+    "input_df.set_index(\"index\", inplace=True)\n",
+    "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n",
+    "\n",
+    "input_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Join the predictions with input data to compare them to ground truth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.merge(\n",
+    "    input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n",
+    ")\n",
+    "\n",
+    "# Show the first few rows of the results\n",
+    "df.head(20)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Clean up resources\n",
+    "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n",
+    "workspace_ml_client.compute.begin_delete(name=compute_name).result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.ipynb
new file mode 100644
index 0000000000..6466224c76
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.ipynb
@@ -0,0 +1,323 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fill Mask Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `fill-mask` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`fill-mask` task is about predicting masked words in a sentence. Models that perform this have a good understanding of the language structure and domain of the dataset that they are trained on. `fill-mask` models are typically used as foundation models for more scenario oriented tasks such as `text-classification` or `token-classification`.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `fill-mask` task are tagged with `task: fill-mask`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. \n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"bert-base-uncased\"\n",
+    "model_version = \"3\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We will `<mask>` one word in each sentence so that the model can predict the masked words.\n",
+    "* Save few samples in the format that can be passed as input to the online-inference endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(\"./book-corpus-dataset/train.jsonl\", lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the right mask token from huggingface\n",
+    "import urllib.request, json\n",
+    "\n",
+    "with urllib.request.urlopen(f\"https://huggingface.co/api/models/{model_name}\") as url:\n",
+    "    data = json.load(url)\n",
+    "    mask_token = data[\"mask_token\"]\n",
+    "\n",
+    "# take the value of the \"text\" column, replace a random word with the mask token and save the result in the \"masked_text\" column\n",
+    "import random, os\n",
+    "\n",
+    "train_df[\"masked_text\"] = train_df[\"text\"].apply(\n",
+    "    lambda x: x.replace(random.choice(x.split()), mask_token, 1)\n",
+    ")\n",
+    "# save the train_df dataframe to a jsonl file in the ./book-corpus-dataset folder with the masked_ prefix\n",
+    "train_df.to_json(\n",
+    "    os.path.join(\".\", \"book-corpus-dataset\", \"masked_train.jsonl\"),\n",
+    "    orient=\"records\",\n",
+    "    lines=True,\n",
+    ")\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    OnlineRequestSettings,\n",
+    ")\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"fill-mask-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \" + foundation_model.name + \", for fill-mask task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS2_v2\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=60000,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "# read the ./book-corpus-dataset/masked_train.jsonl file into a pandas dataframe\n",
+    "df = pd.read_json(\"./book-corpus-dataset/masked_train.jsonl\", lines=True)\n",
+    "# escape single and double quotes in the masked_text column\n",
+    "df[\"masked_text\"] = df[\"masked_text\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n",
+    "# pick 1 random row\n",
+    "sample_df = df.sample(1)\n",
+    "# create a json object with the key as \"inputs\" and value as a list of values from the masked_text column of the sample_df dataframe\n",
+    "test_json = {\"inputs\": {\"input_string\": sample_df[\"masked_text\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./book-corpus-dataset folder\n",
+    "with open(os.path.join(\".\", \"book-corpus-dataset\", \"sample_score.json\"), \"w\") as f:\n",
+    "    json.dump(test_json, f)\n",
+    "sample_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./book-corpus-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compare the predicted squences with the ground truth sequence\n",
+    "compare_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"ground_truth_sequence\": sample_df[\"text\"].tolist() * 5,\n",
+    "        \"predicted_sequence\": response_df[\"sequence\"].tolist(),\n",
+    "        \"score\": response_df[\"score\"].tolist(),\n",
+    "    }\n",
+    ")\n",
+    "compare_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.ipynb
new file mode 100644
index 0000000000..8ebdd2820f
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.ipynb
@@ -0,0 +1,485 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question Answering Inference using Batch Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `question-answering` type models to a batch endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`question-answering` tasks return an answer given a question. There are two common types of `question-answering` tasks:\n",
+    "\n",
+    "* Extractive: extract the answer from the given context.\n",
+    "* Abstractive: generate an answer from the context that correctly answers the question.\n",
+    " \n",
+    "### Model\n",
+    "Models that can perform the `question-answering` task are tagged with `task: question-answering`. We will use the `deepset-minilm-uncased-squad2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [SQUAD](https://huggingface.co/datasets/squad) dataset. A copy of this dataset is available in the [squad-dataset](./squad-dataset/) folder. The [original source](https://rajpurkar.github.io/SQuAD-explorer/) of dataset describes it as follows: _\"Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\"_\n",
+    "\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for batch inference.\n",
+    "* Run a batch inference job.\n",
+    "* Review inference predictions.\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies.\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry.\n",
+    "* Create or update compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import packages used by the following code snippets\n",
+    "import csv\n",
+    "import os\n",
+    "import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from azure.ai.ml import Input, MLClient\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import (\n",
+    "    AmlCompute,\n",
+    "    BatchDeployment,\n",
+    "    BatchEndpoint,\n",
+    "    BatchRetrySettings,\n",
+    "    Model,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "resource_group_name = \"<RESOURCE_GROUP>\"\n",
+    "workspace_name = \"<WORKSPACE_NAME>\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Connect to workspace and registry using ML clients."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group_name,\n",
+    "    workspace_name=workspace_name,\n",
+    ")\n",
+    "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a compute cluster.\n",
+    "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_name = \"cpu-cluster\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_cluster = AmlCompute(\n",
+    "    name=compute_name,\n",
+    "    description=\"An AML compute cluster\",\n",
+    "    size=\"Standard_DS3_V2\",\n",
+    "    min_instances=0,\n",
+    "    max_instances=3,\n",
+    "    idle_time_before_scale_down=120,\n",
+    ")  # 120 seconds\n",
+    "\n",
+    "workspace_ml_client.begin_create_or_update(compute_cluster)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `question-answering` task. In this example, we use the `deepset-minilm-uncased-squad2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"deepset-minilm-uncased-squad2\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the SQUAD dataset is available in the [squad-dataset](./squad-dataset/) folder.  The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define directories and filenames as variables\n",
+    "dataset_dir = \"squad-dataset\"\n",
+    "training_datafile = \"train.jsonl\"\n",
+    "\n",
+    "batch_dir = \"batch\"\n",
+    "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n",
+    "batch_input_file = \"batch_input.csv\"\n",
+    "os.makedirs(batch_dir, exist_ok=True)\n",
+    "os.makedirs(batch_inputs_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the ./squad-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # Set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"question\"` and a column named `\"context\"`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_df = train_df[[\"question\", \"context\"]].sample(frac=0.01)\n",
+    "\n",
+    "# Divide this into files of 50 rows each\n",
+    "batch_size_per_predict = 50\n",
+    "for i in range(0, len(batch_df), batch_size_per_predict):\n",
+    "    j = i + batch_size_per_predict\n",
+    "    batch_df[i:j].to_csv(\n",
+    "        os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n",
+    "    )\n",
+    "\n",
+    "# Check out the first and last file name created\n",
+    "input_files = os.listdir(batch_inputs_dir)\n",
+    "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to a batch endpoint\n",
+    "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n",
+    "\n",
+    "* Create a batch endpoint.\n",
+    "* Create a batch deployment.\n",
+    "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n",
+    "\n",
+    "#### Create the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "endpoint_name = \"question-answering-\" + str(timestamp)\n",
+    "\n",
+    "endpoint = BatchEndpoint(\n",
+    "    name=endpoint_name,\n",
+    "    description=\"Batch endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for question-answering task\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the deployment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployment_name = \"demo\"\n",
+    "\n",
+    "deployment = BatchDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    compute=compute_name,\n",
+    "    error_threshold=0,\n",
+    "    instance_count=1,\n",
+    "    logging_level=\"info\",\n",
+    "    max_concurrency_per_instance=1,\n",
+    "    mini_batch_size=10,\n",
+    "    output_file_name=\"predictions.csv\",\n",
+    "    retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(deployment).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set the deployment as default."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "endpoint.defaults.deployment_name = deployment_name\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()\n",
+    "\n",
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Run a batch inference job.\n",
+    "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n",
+    "\n",
+    "job = workspace_ml_client.batch_endpoints.invoke(\n",
+    "    endpoint_name=endpoint.name, input=input\n",
+    ")\n",
+    "\n",
+    "workspace_ml_client.jobs.stream(job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Review inference predictions. \n",
+    "Download the predictions from the job output and review the predictions using a dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n",
+    "\n",
+    "workspace_ml_client.jobs.download(\n",
+    "    name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n",
+    ")\n",
+    "\n",
+    "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n",
+    "\n",
+    "# Load the batch predictions file with no headers into a dataframe and set your column names\n",
+    "score_df = pd.read_csv(\n",
+    "    predictions_file,\n",
+    "    header=None,\n",
+    "    names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n",
+    ")\n",
+    "score_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_df = []\n",
+    "for file in input_files:\n",
+    "    input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n",
+    "    input.reset_index(inplace=True)\n",
+    "    input[\"batch_input_file_name\"] = file\n",
+    "    input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n",
+    "    input_df.append(input)\n",
+    "input_df = pd.concat(input_df)\n",
+    "input_df.set_index(\"index\", inplace=True)\n",
+    "input_df = input_df.drop(columns=[\"question\", \"context\"]).join(train_df)\n",
+    "\n",
+    "input_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Join the predictions with input data to compare them to ground truth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.merge(\n",
+    "    input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n",
+    ")\n",
+    "\n",
+    "# Show the first few rows of the results\n",
+    "df.head(20)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Clean up resources\n",
+    "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n",
+    "workspace_ml_client.compute.begin_delete(name=compute_name).result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/question-answering/question-answering-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/question-answering/question-answering-online-endpoint.ipynb
new file mode 100644
index 0000000000..d9f3c7da7e
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/question-answering/question-answering-online-endpoint.ipynb
@@ -0,0 +1,302 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question Answering Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `question-answering` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`question-answering` tasks return an answer given a question. There are two common types of `question-answering` tasks:\n",
+    "\n",
+    "* Extractive: extract the answer from the given context.\n",
+    "* Abstractive: generate an answer from the context that correctly answers the question.\n",
+    " \n",
+    "### Model\n",
+    "Models that can perform the `question-answering` task are tagged with `task: question-answering`. We will use the `deepset-minilm-uncased-squad2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [SQUAD](https://huggingface.co/datasets/squad) dataset. A copy of this dataset is available in the [squad-dataset](./squad-dataset/) folder. The [original source](https://rajpurkar.github.io/SQuAD-explorer/) of dataset describes it as follows: _\"Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\"_\n",
+    "\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `question-answering` task. In this example, we use the `deepset-minilm-uncased-squad2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"deepset-minilm-uncased-squad2\"\n",
+    "model_version = \"3\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the SQUAD dataset is available in the [squad-dataset](./squad-dataset/) folder.  The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Save few samples in the format that can be passed as input to the online-inference endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the ./squad-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(\"./squad-dataset/train.jsonl\", lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    OnlineRequestSettings,\n",
+    ")\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"question-answering-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for question-answering task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS2_v2\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=60000,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "# read the ./squad-dataset/train.jsonl file into a pandas dataframe\n",
+    "df = pd.read_json(\"./squad-dataset/train.jsonl\", lines=True)\n",
+    "# escape single and double quotes in the text column\n",
+    "df[\"question\"] = df[\"question\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n",
+    "df[\"context\"] = df[\"context\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n",
+    "# pick 1 random row\n",
+    "sample_df = df.sample(1)\n",
+    "# create a json object with the key as \"inputs\" and value as a list of question-context pairs from columns of the sample_df dataframe\n",
+    "test_json = {\n",
+    "    \"inputs\": {\n",
+    "        \"question\": sample_df[\"question\"].to_list(),\n",
+    "        \"context\": sample_df[\"context\"].to_list(),\n",
+    "    }\n",
+    "}\n",
+    "# save the json object to a file named sample_score.json in the ./squad-dataset folder\n",
+    "with open(os.path.join(\".\", \"squad-dataset\", \"sample_score.json\"), \"w\") as f:\n",
+    "    json.dump(test_json, f)\n",
+    "sample_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./squad-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response, typ=\"series\")\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compare the predicted answer with the actual answer\n",
+    "response_df = pd.DataFrame({\"predicted_answer\": [response_df[\"answer\"]]})\n",
+    "response_df[\"ground_truth_answer\"] = sample_df[\"answers\"].to_list()[0][\"text\"]\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/download-dataset.py
new file mode 100644
index 0000000000..e9ffb0b999
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/download-dataset.py
@@ -0,0 +1,28 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="squad", help="dataset name")
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset):
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, split=split)
+    # save the split of the dataset to the download directory as json lines file
+    dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl"))
+    # print dataset features
diff --git a/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json
new file mode 100644
index 0000000000..1d18e6ca1b
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"question": ["What high profile controversial project was Von Neumann a prinipal of?"], "context": ["He was a pioneer of the application of operator theory to quantum mechanics, in the development of functional analysis, a principal member of the Manhattan Project and the Institute for Advanced Study in Princeton (as one of the few originally appointed), and a key figure in the development of game theory and the concepts of cellular automata, the universal constructor and the digital computer. He published 150 papers in his life; 60 in pure mathematics, 20 in physics, and 60 in applied mathematics. His last work, an unfinished manuscript written while in the hospital, was later published in book form as The Computer and the Brain."]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/download-dataset.py
new file mode 100644
index 0000000000..c25ad19702
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/download-dataset.py
@@ -0,0 +1,38 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="squad", help="dataset name")
+# add an argument to specify the config name of the dataset
+parser.add_argument(
+    "--config_name", type=str, default="plain_text", help="config name of the dataset"
+)
+# argument to save a fraction of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.05, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset, config_name=args.config_name):
+    print(f"Loading {split} split of {args.dataset} dataset...")
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, args.config_name, split=split)
+    # save the split of the dataset to the download directory as json lines file
+    dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+        os.path.join(args.download_dir, f"{split}.jsonl")
+    )
diff --git a/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json
new file mode 100644
index 0000000000..ff3e4f7286
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["NEW YORK (CNN) -- Nearly a year after being beaten into a coma, Bryan Steinhauer said Wednesday he does not hate the Serbian basketball player witnesses said brutally assaulted him. Bryan Steinhauer, who was beaten into a coma, is making significant progress in his rehabilitation. \\\"I am not full of hate; hatred kills progress,\\\" he said. Appearing alongside his parents and doctors at New York\\'s Mount Sinai Hospital, Steinhauer struggled to speak but his message was clear. \\\"Please don\\'t feel sorry for me,\\\" he said. \\\"Tragedy leads to wisdom, and this experience has taught me so much about life.\\\"  Watch Steinhauer talk about his recovery \u00bb . The 22-year-old from Brooklyn was about to graduate and had a job lined up at accounting giant KPMG when he got into an argument last May that nearly cost him his life. According to police, witnesses said Steinhauer and college basketball player Miladin Kovacevic had exchanged harsh words at an upstate New York bar near Binghamton University after Steinhauer danced with the girlfriend of one of Kovacevic\\'s friends. The witnesses said the fight went outside the bar, where several men attacked Steinhauer, with Kovacevic beating him about the head, according to police. Kovacevic is 6-foot-9 and 260 pounds while Steinhauer was 130 pounds. Kovacevic was arrested but jumped bail and fled to Serbia with the help of Serbian consular officials in New York. The case strained relations between the United States and Serbia.Hillary Clinton intervened, first as U.S. senator from New York and later as secretary of state, as did Sen. Charles Schumer, D-New York, to make sure Kovacevic was prosecuted. Serbia has no extradition treaty with the United States, but Serbian officials arrested Kovacevic last October and are working on prosecuting him with the assistance of the district attorney of Broome County, where the beating took place. In addition, the Serbian government recently paid the Steinhauer family $900,000 in recognition of the misconduct of Serbian government officials and the financial burdens placed upon the Steinhauer family as a result of the beating. Steinhauer awoke from his coma last August, three months after the beating that left him with skull fractures, a severe brain injury and no memory of the attack. He weighed less than 100 pounds, could not speak or walk, and was fed through a tube, doctors said Wednesday. \\\"He had hemorrhages and contusions affecting almost every lobe of his brain,\\\" said Dr. Brian Greenwald. Working with specialists and boosted by the support of his family, Steinhauer surprised even his doctors in his quick recovery, they said. He now has outpatient rehabilitation four times a week, goes to a gym, and receives acupuncture treatments. While he can eat on his own now and is making significant progress, Steinhauer continues to undergo intensive therapy. Steinhauer says he doesn\\'t think about Kovacevic because he\\'s not concerned about other people. \\\"I\\'ve had a second birth and raising at Mount Sinai,\\\" he said. \\\"Live long and prosper.\\\""]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/summarization/summarization-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/summarization/summarization-online-endpoint.ipynb
new file mode 100644
index 0000000000..3370e3e4a5
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/summarization/summarization-online-endpoint.ipynb
@@ -0,0 +1,298 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summarization Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `summarization` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`summarization` creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. \n",
+    "`summarization` can be:\n",
+    "\n",
+    "* Extractive: extract the most relevant information from a document.\n",
+    "* Abstractive: generate new text that captures the most relevant information.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `summarization` task are tagged with `task: summarization`. We will use the `sshleifer-distilbart-cnn-12-6` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [CNN DailyMail](https://huggingface.co/datasets/cnn_dailymail) dataset. A copy of this dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder.\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `summarization` task. In this example, we use the `sshleifer-distilbart-cnn-12-6` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"sshleifer-distilbart-cnn-12-6\"\n",
+    "model_version = \"3\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the news summary dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Save few samples in the format that can be passed as input to the online-inference endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the ./news-summary-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(\"./news-summary-dataset/train.jsonl\", lines=True)\n",
+    "train_df.head(2)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    OnlineRequestSettings,\n",
+    ")\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"summarization-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for summarization task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS3_v2\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=60000,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "# read the ./news-summary-dataset/train.jsonl file into a pandas dataframe\n",
+    "df = pd.read_json(\"./news-summary-dataset/train.jsonl\", lines=True)\n",
+    "# escape single and double quotes in the masked_text column\n",
+    "df[\"article\"] = df[\"article\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n",
+    "# pick 1 random row\n",
+    "sample_df = df.sample(1)\n",
+    "# create a json object with the key as \"inputs\" and value as a list of values from the article column of the sample_df dataframe\n",
+    "sample_json = {\"inputs\": sample_df[\"article\"].tolist()}\n",
+    "# save the json object to a file named sample_score.json in the ./news-summary-dataset folder\n",
+    "test_json = {\"inputs\": {\"input_string\": sample_df[\"article\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./news-summary-dataset folder\n",
+    "with open(os.path.join(\".\", \"news-summary-dataset\", \"sample_score.json\"), \"w\") as f:\n",
+    "    json.dump(test_json, f)\n",
+    "sample_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./news-summary-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compare the true summary with the predicted summary\n",
+    "response_df.rename(columns={\"summary_text\": \"predicted_summary\"}, inplace=True)\n",
+    "response_df[\"ground_truth_summary\"] = [sample_df[\"highlights\"].tolist()[0]]\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-batch.ipynb b/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-batch.ipynb
new file mode 100644
index 0000000000..329689fc0b
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-batch.ipynb
@@ -0,0 +1,614 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "source": [
+    "## Text Classification Inference using Batch Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `text-classification` type models to a batch endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. In this example, we will test for entailment v/s contradiction, meaning given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral). \n",
+    "\n",
+    "### Inference data\n",
+    "The Multi-Genre Natural Language Inference Corpus, or MNLI is a crowd sourced collection of sentence pairs with textual entailment annotations.The [MNLI](https://huggingface.co/datasets/glue) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-mnli-dataset](./glue-mnli-dataset/) folder.\n",
+    "\n",
+    "### Model\n",
+    "Look for models tagged with `text-classification` in the system registry. Just looking for `text-classification` is not sufficient, you need to check if the model is specifically finetuned for  entailment v/s contradiction by studying the model card and looking at the input/output samples or signatures of the model. In this notebook, we use the `microsoft-deberta-base-mnli` model.\n",
+    "\n",
+    "  \n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for batch inference.\n",
+    "* Run a batch inference job.\n",
+    "* Review inference predictions.\n",
+    "* Clean up resources.\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies.\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry.\n",
+    "* Create or update compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import packages used by the following code snippets\n",
+    "import csv\n",
+    "import json\n",
+    "import os\n",
+    "import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from azure.ai.ml import Input, MLClient\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import (\n",
+    "    AmlCompute,\n",
+    "    BatchDeployment,\n",
+    "    BatchEndpoint,\n",
+    "    BatchRetrySettings,\n",
+    "    Model,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "resource_group_name = \"<RESOURCE_GROUP>\"\n",
+    "workspace_name = \"<WORKSPACE_NAME>\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Connect to workspace and registry using ML clients."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group_name,\n",
+    "    workspace_name=workspace_name,\n",
+    ")\n",
+    "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a compute cluster.\n",
+    "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_name = \"cpu-cluster\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_cluster = AmlCompute(\n",
+    "    name=compute_name,\n",
+    "    description=\"An AML compute cluster\",\n",
+    "    size=\"Standard_DS3_V2\",\n",
+    "    min_instances=0,\n",
+    "    max_instances=3,\n",
+    "    idle_time_before_scale_down=120,\n",
+    ")  # 120 seconds\n",
+    "\n",
+    "workspace_ml_client.begin_create_or_update(compute_cluster)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `text-classification` task. In this example, we use the `microsoft-deberta-base-mnli` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model_name = \"microsoft-deberta-base-mnli\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the MNLI is available in the [ glue-mnli](./glue-mnli/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-mnli-dataset/label.json](./glue-mnli-dataset/label.json). This step is needed because the selected models will return labels such `CONTRADICTION`, `CONTRADICTION`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, `2`, etc., then they would not match with prediction labels returned by the models.\n",
+    "* The dataset contains `premise` and `hypothesis` as two different columns. However, the models expect a single string for prediction in the format `[CLS] <premise text> [SEP] <hypothesis text> [SEP]`. Hence we merge the columns and drop the original columns.\n",
+    "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original.\n",
+    "* Since we are using a `mlflow` model, we don't need to write any inference code. However, we need the inference data to be in a shape can can be used for inference. Specifically, batch inference does not support jsonl lines files, but supports `csv` and `parquet`. We will dump a csv version from the pandas dataframe. Next, the rows of the batch inference csv file must strictly contain only the columns that will be passed to the model as input and the column header must match the model signature. In our case, the model signature which can be found in the `MLmodel` file in the model artifacts expects `input_string` as input. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define directories and filenames as variables\n",
+    "dataset_dir = \"glue-mnli-dataset\"\n",
+    "training_datafile = \"train.jsonl\"\n",
+    "label_datafile = \"label.json\"\n",
+    "\n",
+    "batch_dir = \"batch\"\n",
+    "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n",
+    "batch_input_file = \"batch_input.csv\"\n",
+    "os.makedirs(batch_dir, exist_ok=True)\n",
+    "os.makedirs(batch_inputs_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the below cell, we load the input file and look at some sample data "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the ./glue-mnli-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # Set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Replace numerical labels with string labels and drop the columns not needed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n",
+    "with open(os.path.join(dataset_dir, label_datafile)) as f:\n",
+    "    id2label = json.load(f)\n",
+    "    id2label = id2label[\"id2label\"]\n",
+    "    label_df = pd.DataFrame.from_dict(\n",
+    "        id2label, orient=\"index\", columns=[\"label_string\"]\n",
+    "    )\n",
+    "    label_df[\"label\"] = label_df.index.astype(\"int64\")\n",
+    "    label_df = label_df[[\"label\", \"label_string\"]]\n",
+    "\n",
+    "# Join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n",
+    "train_df = train_df.merge(label_df, on=\"label\", how=\"left\")\n",
+    "# Concat the premise and hypothesis columns to with \"[CLS]\" in the beginning and \"[SEP]\" in the middle and end to get the text column\n",
+    "train_df[\"text\"] = train_df.apply(\n",
+    "    lambda row: \"[CLS] \" + row.premise + \" [SEP] \" + row.hypothesis + \" [SEP]\", axis=1\n",
+    ")\n",
+    "# Drop the idx, premise and hypothesis columns as they are not needed\n",
+    "train_df.drop(columns=[\"idx\", \"premise\", \"hypothesis\", \"label\"], inplace=True)\n",
+    "# Rename the label_string column to ground_truth_label\n",
+    "train_df.rename(columns={\"label_string\": \"ground_truth_label\"}, inplace=True)\n",
+    "\n",
+    "# Save the train_df dataframe to a jsonl file in the ./glue-mnli-dataset/batch folder with the `cls_sep_` prefix\n",
+    "cls_sep_datafile = os.path.join(batch_dir, \"cls_sep_\" + training_datafile)\n",
+    "train_df.to_json(cls_sep_datafile, orient=\"records\", lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"text\"` column.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_df = train_df[[\"text\"]].rename(columns={\"text\": \"input_string\"}).sample(frac=0.05)\n",
+    "\n",
+    "# Divide this into files of 100 rows each\n",
+    "batch_size_per_predict = 100\n",
+    "for i in range(0, len(batch_df), batch_size_per_predict):\n",
+    "    j = i + batch_size_per_predict\n",
+    "    batch_df[i:j].to_csv(\n",
+    "        os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n",
+    "    )\n",
+    "\n",
+    "# Check out the first and last file name created\n",
+    "input_files = os.listdir(batch_inputs_dir)\n",
+    "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to a batch endpoint\n",
+    "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n",
+    "\n",
+    "* Create a batch endpoint.\n",
+    "* Create a batch deployment.\n",
+    "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n",
+    "\n",
+    "#### Create the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "endpoint_name = \"text-classification-\" + str(timestamp)\n",
+    "\n",
+    "endpoint = BatchEndpoint(\n",
+    "    name=endpoint_name,\n",
+    "    description=\"Batch endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for text-classification task\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the deployment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "deployment_name = \"demo\"\n",
+    "\n",
+    "deployment = BatchDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    compute=compute_name,\n",
+    "    error_threshold=0,\n",
+    "    instance_count=1,\n",
+    "    logging_level=\"info\",\n",
+    "    max_concurrency_per_instance=1,\n",
+    "    mini_batch_size=10,\n",
+    "    output_file_name=\"predictions.csv\",\n",
+    "    retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(deployment).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set the deployment as default."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "endpoint.defaults.deployment_name = deployment_name\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()\n",
+    "\n",
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Run a batch inference job.\n",
+    "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n",
+    "\n",
+    "job = workspace_ml_client.batch_endpoints.invoke(\n",
+    "    endpoint_name=endpoint.name, input=input\n",
+    ")\n",
+    "\n",
+    "workspace_ml_client.jobs.stream(job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Review inference predictions. \n",
+    "Download the predictions from the job output and review the predictions using a dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "jupyter": {
+     "outputs_hidden": false,
+     "source_hidden": false
+    },
+    "nteract": {
+     "transient": {
+      "deleting": false
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n",
+    "\n",
+    "workspace_ml_client.jobs.download(\n",
+    "    name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n",
+    ")\n",
+    "\n",
+    "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n",
+    "\n",
+    "# Load the batch predictions file with no headers into a dataframe and set your column names\n",
+    "score_df = pd.read_csv(\n",
+    "    predictions_file,\n",
+    "    header=None,\n",
+    "    names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n",
+    ")\n",
+    "score_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_df = []\n",
+    "for file in input_files:\n",
+    "    input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n",
+    "    input.reset_index(inplace=True)\n",
+    "    input[\"batch_input_file_name\"] = file\n",
+    "    input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n",
+    "    input_df.append(input)\n",
+    "input_df = pd.concat(input_df)\n",
+    "input_df.set_index(\"index\", inplace=True)\n",
+    "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n",
+    "\n",
+    "input_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Join the predictions with input data to compare them to ground truth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.merge(\n",
+    "    input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n",
+    ")\n",
+    "\n",
+    "# Show the first few rows of the results\n",
+    "df.head(20)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Clean up resources\n",
+    "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n",
+    "workspace_ml_client.compute.begin_delete(name=compute_name).result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernel_info": {
+   "name": "amlv2"
+  },
+  "kernelspec": {
+   "display_name": "hf",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "nteract": {
+   "version": "nteract-front-end@1.0.0"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "429d412e307b288f3a8cba821a3ba110e77b02cf5672d0d0b14db25cc0bc89f4"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-online.ipynb b/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-online.ipynb
new file mode 100644
index 0000000000..ca258f8a8d
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-online.ipynb
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Classification Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `text-classification` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. In this example, we will test for entailment v/s contradiction, meaning given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral). \n",
+    "\n",
+    "### Inference data\n",
+    "The Multi-Genre Natural Language Inference Corpus, or MNLI is a crowd sourced collection of sentence pairs with textual entailment annotations.The [MNLI](https://huggingface.co/datasets/glue) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-mnli](./glue-mnli/) folder.\n",
+    "\n",
+    "### Model\n",
+    "Look for models tagged with `text-classification` in the system registry. Just looking for `text-classification` is not sufficient, you need to check if the model is specifically finetuned for  entailment v/s contradiction by studying the model card and looking at the input/output samples or signatures of the model. In this notebook, we use the `microsoft-deberta-base-mnli` model.\n",
+    "\n",
+    "   \n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"microsoft-deberta-base-mnli\"\n",
+    "model_version = \"2\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the MNLI is available in the [ glue-mnli-dataset](./glue-mnli-dataset/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-mnli-dataset/label.json](./glue-mnli-dataset/label.json). This step is needed because the selected models will return labels such `CONTRADICTION`, `CONTRADICTION`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, `2`, etc., then they would not match with prediction labels returned by the models.\n",
+    "* The dataset contains `premise` and `hypothesis` as two different columns. However, the models expect a single string for prediction in the format `[CLS] <premise text> [SEP] <hypothesis text> [SEP]`. Hence we merge the columns and drop the original columns.\n",
+    "* We want this sample to run quickly, so save smaller dataset containing 10% of the original. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "dataset_dir = \"./glue-mnli-dataset\"\n",
+    "data_file = \"train.jsonl\"\n",
+    "\n",
+    "# load the train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "df = pd.read_json(os.path.join(dataset_dir, data_file), lines=True)\n",
+    "df.head()\n",
+    "\n",
+    "# load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n",
+    "import json\n",
+    "\n",
+    "label_file = \"label.json\"\n",
+    "with open(os.path.join(dataset_dir, label_file)) as f:\n",
+    "    id2label = json.load(f)\n",
+    "    id2label = id2label[\"id2label\"]\n",
+    "    label_df = pd.DataFrame.from_dict(\n",
+    "        id2label, orient=\"index\", columns=[\"label_string\"]\n",
+    "    )\n",
+    "    label_df[\"label\"] = label_df.index.astype(\"int64\")\n",
+    "    label_df = label_df[[\"label\", \"label_string\"]]\n",
+    "\n",
+    "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n",
+    "df = df.merge(label_df, on=\"label\", how=\"left\")\n",
+    "# concat the premise and hypothesis columns to with \"[CLS]\" in the beginning and \"[SEP]\" in the middle and end to get the text column\n",
+    "df[\"text\"] = \"[CLS] \" + df[\"premise\"] + \" [SEP] \" + df[\"hypothesis\"] + \" [SEP]\"\n",
+    "# drop the idx, premise and hypothesis columns as they are not needed\n",
+    "df = df.drop(columns=[\"idx\", \"premise\", \"hypothesis\", \"label\"])\n",
+    "# rename the label_string column to ground_truth_label\n",
+    "df = df.rename(columns={\"label_string\": \"ground_truth_label\"})\n",
+    "\n",
+    "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./dataset_dir folder\n",
+    "small_data_file = \"small_train.jsonl\"\n",
+    "df.sample(frac=0.1).to_json(\n",
+    "    os.path.join(dataset_dir, small_data_file), orient=\"records\", lines=True\n",
+    ")\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"entail-contra-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", to detect entailment v/s contradiction\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS2_v2\",\n",
+    "    instance_count=1,\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "data_file_small = \"small_train.jsonl\"\n",
+    "score_file = \"sample_score.json\"\n",
+    "# read the data file into a pandas dataframe\n",
+    "df = pd.read_json(os.path.join(dataset_dir, data_file_small), lines=True)\n",
+    "# escape single and double quotes in the masked_text column\n",
+    "# pick 5 random rows\n",
+    "sample_df = df.sample(5)\n",
+    "# reset the index of sample_df\n",
+    "sample_df = sample_df.reset_index(drop=True)\n",
+    "\n",
+    "# save the json object to a file named sample_score.json in the\n",
+    "test_json = {\"inputs\": {\"input_string\": sample_df[\"text\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./glue-mnli-dataset folder\n",
+    "with open(os.path.join(\".\", dataset_dir, score_file), \"w\") as f:\n",
+    "    json.dump(test_json, f)\n",
+    "sample_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=os.path.join(\".\", dataset_dir, score_file),\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "# rename label column to predicted_label\n",
+    "response_df = response_df.rename(columns={\"label\": \"predicted_label\"})\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the sample_df and response_df dataframes\n",
+    "merged_df = sample_df.merge(response_df, left_index=True, right_index=True)\n",
+    "merged_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/download-dataset.py
new file mode 100644
index 0000000000..b6794c4b4f
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/download-dataset.py
@@ -0,0 +1,43 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="glue", help="dataset name")
+# add an argument to specify the config name of the dataset
+parser.add_argument(
+    "--config_name", type=str, default="mnli", help="config name of the dataset"
+)
+# argument to save a fraction of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.1, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="./",
+    help="directory to download the dataset to",
+)
+# add an argument to specify the split of the dataset to download
+parser.add_argument(
+    "--split", type=str, default="train", help="split of the dataset to download"
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset, config_name=args.config_name):
+    if split == args.split:
+        print(f"Loading {split} split of {args.dataset} dataset...")
+        # load the split of the dataset
+        dataset = load_dataset(args.dataset, args.config_name, split=split)
+        # save the split of the dataset to the download directory as json lines file
+        dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+            os.path.join(args.download_dir, f"{split}.jsonl")
+        )
diff --git a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/label.json b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/label.json
new file mode 100644
index 0000000000..b836faff17
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/label.json
@@ -0,0 +1,12 @@
+{
+  "id2label": {
+    "0": "ENTAILMENT",
+    "1": "NEUTRAL",
+    "2": "CONTRADICTION"
+  },
+  "label2id": {
+    "ENTAILMENT": 0,
+    "CONTRADICTION": 2,
+    "NEUTRAL": 1
+  }
+}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json
new file mode 100644
index 0000000000..e121a4c5ab
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["[CLS] These two separate increases in the fees lawyers pay is a step closer to achieving those two goals, McMorrow said in written statement.  [SEP] Two separate fees that lawyers pay are decreasing. [SEP]", "[CLS] In this particular case, let's just say when the original advice was given the wheel was spinning, but the hamster had gone. [SEP] There has been advice given.  [SEP]", "[CLS] The gardens are popular with children who enjoy watching the marionette shows, riding donkeys, and sailing boats on the circular ponds. [SEP] Children really like the marionette shows in the gardens. [SEP]", "[CLS] this is the kid who who really you you know barely made it through high school [SEP] He passed high school with straight A's.  [SEP]", "[CLS] good don't ever drink Scotch it's terrible i quit drinking Scotch when i found out about that but anyway but uh as far as as far as you know Central and South America we our policy pretty much uh it depends on who we're what government we're buying down there at the particular time [SEP] I no longer drink Scotch. [SEP]"]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/download-dataset.py
new file mode 100644
index 0000000000..dc41e786f3
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/download-dataset.py
@@ -0,0 +1,34 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="bookcorpus", help="dataset name")
+# add an argument to specify the config name of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.001, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="./",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset):
+    print(f"Loading {split} split of {args.dataset} dataset...")
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, split=split)
+    # save the split of the dataset to the download directory as json lines file
+    dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+        os.path.join(args.download_dir, f"{split}.jsonl")
+    )
diff --git a/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json
new file mode 100644
index 0000000000..f80ed6b523
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["rocco noticed the almost defeated look on her lovely face and did n\\'t like it ."]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.ipynb
new file mode 100644
index 0000000000..c4a01ae97c
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.ipynb
@@ -0,0 +1,482 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Generation Inference using Batch Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `text-generation` type models to a batch endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`text-generation`  is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase. Some common applications of text generation are code generation and story generation.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `text-generation` task are tagged with `task: text-generation`. We will use the `gpt2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder.\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for batch inference.\n",
+    "* Run a batch inference job.\n",
+    "* Review inference predictions.\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies.\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry.\n",
+    "* Create or update compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import packages used by the following code snippets\n",
+    "import csv\n",
+    "import os\n",
+    "import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from azure.ai.ml import Input, MLClient\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import (\n",
+    "    AmlCompute,\n",
+    "    BatchDeployment,\n",
+    "    BatchEndpoint,\n",
+    "    BatchRetrySettings,\n",
+    "    Model,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "resource_group_name = \"<RESOURCE_GROUP>\"\n",
+    "workspace_name = \"<WORKSPACE_NAME>\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Connect to workspace and registry using ML clients."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group_name,\n",
+    "    workspace_name=workspace_name,\n",
+    ")\n",
+    "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a compute cluster.\n",
+    "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_name = \"cpu-cluster\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_cluster = AmlCompute(\n",
+    "    name=compute_name,\n",
+    "    description=\"An AML compute cluster\",\n",
+    "    size=\"Standard_DS3_V2\",\n",
+    "    min_instances=0,\n",
+    "    max_instances=3,\n",
+    "    idle_time_before_scale_down=120,\n",
+    ")  # 120 seconds\n",
+    "\n",
+    "workspace_ml_client.begin_create_or_update(compute_cluster)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `text-generation` task. In this example, we use the `gpt2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"gpt2\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define directories and filenames as variables\n",
+    "dataset_dir = \"book-corpus-dataset\"\n",
+    "training_datafile = \"train.jsonl\"\n",
+    "\n",
+    "batch_dir = \"batch\"\n",
+    "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n",
+    "batch_input_file = \"batch_input.csv\"\n",
+    "os.makedirs(batch_dir, exist_ok=True)\n",
+    "os.makedirs(batch_inputs_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # Set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the `\"text\"` column. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_df = (\n",
+    "    train_df[[\"text\"]].rename(columns={\"text\": \"input_string\"}).sample(frac=0.001)\n",
+    ")\n",
+    "\n",
+    "# Divide this into files of 25 rows each\n",
+    "batch_size_per_predict = 25\n",
+    "for i in range(0, len(batch_df), batch_size_per_predict):\n",
+    "    j = i + batch_size_per_predict\n",
+    "    batch_df[i:j].to_csv(\n",
+    "        os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n",
+    "    )\n",
+    "\n",
+    "# Check out the first and last file name created\n",
+    "input_files = os.listdir(batch_inputs_dir)\n",
+    "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to a batch endpoint\n",
+    "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n",
+    "\n",
+    "* Create a batch endpoint.\n",
+    "* Create a batch deployment.\n",
+    "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n",
+    "\n",
+    "#### Create the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "endpoint_name = \"text-generation-\" + str(timestamp)\n",
+    "\n",
+    "endpoint = BatchEndpoint(\n",
+    "    name=endpoint_name,\n",
+    "    description=\"Batch endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for text-generation task\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the deployment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployment_name = \"demo\"\n",
+    "\n",
+    "deployment = BatchDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    compute=compute_name,\n",
+    "    error_threshold=0,\n",
+    "    instance_count=1,\n",
+    "    logging_level=\"info\",\n",
+    "    max_concurrency_per_instance=1,\n",
+    "    mini_batch_size=10,\n",
+    "    output_file_name=\"predictions.csv\",\n",
+    "    retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(deployment).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set the deployment as default."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "endpoint.defaults.deployment_name = deployment_name\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()\n",
+    "\n",
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Run a batch inference job.\n",
+    "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n",
+    "\n",
+    "job = workspace_ml_client.batch_endpoints.invoke(\n",
+    "    endpoint_name=endpoint.name, input=input\n",
+    ")\n",
+    "\n",
+    "workspace_ml_client.jobs.stream(job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Review inference predictions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n",
+    "\n",
+    "workspace_ml_client.jobs.download(\n",
+    "    name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n",
+    ")\n",
+    "\n",
+    "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n",
+    "\n",
+    "# Load the batch predictions file with no headers into a dataframe and set your column names\n",
+    "score_df = pd.read_csv(\n",
+    "    predictions_file,\n",
+    "    header=None,\n",
+    "    names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n",
+    ")\n",
+    "score_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_df = []\n",
+    "for file in input_files:\n",
+    "    input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n",
+    "    input.reset_index(inplace=True)\n",
+    "    input[\"batch_input_file_name\"] = file\n",
+    "    input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n",
+    "    input_df.append(input)\n",
+    "input_df = pd.concat(input_df)\n",
+    "input_df.set_index(\"index\", inplace=True)\n",
+    "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n",
+    "\n",
+    "input_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Join the predictions with input data to compare them to ground truth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.merge(\n",
+    "    input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n",
+    ")\n",
+    "\n",
+    "# Show the first few rows of the results\n",
+    "df.head(20)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Clean up resources\n",
+    "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n",
+    "workspace_ml_client.compute.begin_delete(name=compute_name).result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/text-generation/text-generation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/text-generation/text-generation-online-endpoint.ipynb
new file mode 100644
index 0000000000..fa46f0afa3
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/text-generation/text-generation-online-endpoint.ipynb
@@ -0,0 +1,282 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Generation Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `text-generation` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`text-generation`  is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase. Some common applications of text generation are code generation and story generation.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `text-generation` task are tagged with `task: text-generation`. We will use the `gpt2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder.\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `text-generation` task. In this example, we use the `gpt2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"gpt2\"\n",
+    "model_version = \"3\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Save few samples in the format that can be passed as input to the online-inference endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(\"./book-corpus-dataset/train.jsonl\", lines=True)\n",
+    "train_df.head(2)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    OnlineRequestSettings,\n",
+    ")\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"text-generation-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for text-generation task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS2_v2\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=60000,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "# read the ./book-corpus-dataset/train.jsonl file into a pandas dataframe\n",
+    "df = pd.read_json(\"./book-corpus-dataset/train.jsonl\", lines=True)\n",
+    "# escape single and double quotes in the text column\n",
+    "df[\"text\"] = df[\"text\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n",
+    "# pick 1 random row\n",
+    "sample_df = df.sample(1)\n",
+    "# create a json object with the key as \"inputs\" and value as a list of values from the article column of the sample_df dataframe\n",
+    "sample_json = {\"inputs\": sample_df[\"text\"].tolist()}\n",
+    "# save the json object to a file named sample_score.json in the ./book-corpus-dataset folder\n",
+    "test_json = {\"inputs\": {\"input_string\": sample_df[\"text\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./book-corpus-dataset folder\n",
+    "with open(os.path.join(\".\", \"book-corpus-dataset\", \"sample_score.json\"), \"w\") as f:\n",
+    "    json.dump(test_json, f)\n",
+    "sample_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./book-corpus-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/download-dataset.py b/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/download-dataset.py
new file mode 100644
index 0000000000..d3836a7456
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/download-dataset.py
@@ -0,0 +1,41 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument(
+    "--dataset", type=str, default="Jean-Baptiste/wikiner_fr", help="dataset name"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+
+def format_ner_tags(example, class_names):
+    example["text"] = " ".join(example["tokens"])
+    example["ner_tags_str"] = [class_names[id] for id in example["ner_tags"]]
+    return example
+
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+from functools import partial
+
+for split in get_dataset_split_names(args.dataset):
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, split=split)
+    dataset = dataset.map(
+        partial(format_ner_tags, class_names=dataset.features["ner_tags"].feature.names)
+    )
+    # save the split of the dataset to the download directory as json lines file
+    dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl"))
+    # print dataset features
diff --git a/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json b/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json
new file mode 100644
index 0000000000..4fe2e37452
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["Il est difficile de se rendre compte de la taille de la Maison Blanche , car une grande partie est souterraine ou cach\u00e9e par le paysage ."]}}
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.ipynb
new file mode 100644
index 0000000000..24b900a634
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.ipynb
@@ -0,0 +1,484 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Token Classification Inference using Batch Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `token-classification` type models to a batch endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`token-classification` assigns a label to individual tokens in a sentence. One of the most common `token-classification` tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `token-classification` task are tagged with `task: token-classification`. We will use the `Jean-Baptiste-camembert-ner` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [Jean-Baptiste/wikiner_fr](https://huggingface.co/datasets/Jean-Baptiste/wikiner_fr) dataset. A copy of this dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder. \\\n",
+    "Please note that the dataset used here is a French dataset, as the Jean-Baptiste/camembert-ner model was trained in French.\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for batch inference.\n",
+    "* Run a batch inference job.\n",
+    "* Review inference predictions.\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies.\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry.\n",
+    "* Create or update compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import packages used by the following code snippets\n",
+    "import csv\n",
+    "import os\n",
+    "import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from azure.ai.ml import Input, MLClient\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import (\n",
+    "    AmlCompute,\n",
+    "    BatchDeployment,\n",
+    "    BatchEndpoint,\n",
+    "    BatchRetrySettings,\n",
+    "    Model,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "resource_group_name = \"<RESOURCE_GROUP>\"\n",
+    "workspace_name = \"<WORKSPACE_NAME>\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Connect to workspace and registry using ML clients."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group_name,\n",
+    "    workspace_name=workspace_name,\n",
+    ")\n",
+    "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a compute cluster.\n",
+    "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_name = \"cpu-cluster\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_cluster = AmlCompute(\n",
+    "    name=compute_name,\n",
+    "    description=\"An AML compute cluster\",\n",
+    "    size=\"Standard_DS3_V2\",\n",
+    "    min_instances=0,\n",
+    "    max_instances=3,\n",
+    "    idle_time_before_scale_down=120,\n",
+    ")  # 120 seconds\n",
+    "\n",
+    "workspace_ml_client.begin_create_or_update(compute_cluster)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `token-classification` task. In this example, we use the `Jean-Baptiste-camembert-ner` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"Jean-Baptiste-camembert-ner\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the Jean-Baptiste-wikiner_fr dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder.  The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define directories and filenames as variables\n",
+    "dataset_dir = \"Jean-Baptiste-wikiner_fr\"\n",
+    "training_datafile = \"train.jsonl\"\n",
+    "\n",
+    "batch_dir = \"batch\"\n",
+    "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n",
+    "batch_input_file = \"batch_input.csv\"\n",
+    "os.makedirs(batch_dir, exist_ok=True)\n",
+    "os.makedirs(batch_inputs_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the ./Jean-Baptiste-wikiner_fr/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # Set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"text\"` column. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_df = (\n",
+    "    train_df[[\"text\"]].rename(columns={\"text\": \"input_string\"}).sample(frac=0.001)\n",
+    ")\n",
+    "\n",
+    "# Divide this into files of 25 rows each\n",
+    "batch_size_per_predict = 25\n",
+    "for i in range(0, len(batch_df), batch_size_per_predict):\n",
+    "    j = i + batch_size_per_predict\n",
+    "    batch_df[i:j].to_csv(\n",
+    "        os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n",
+    "    )\n",
+    "\n",
+    "# Check out the first and last file name created\n",
+    "input_files = os.listdir(batch_inputs_dir)\n",
+    "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to a batch endpoint\n",
+    "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n",
+    "\n",
+    "* Create a batch endpoint.\n",
+    "* Create a batch deployment.\n",
+    "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n",
+    "\n",
+    "#### Create the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "endpoint_name = \"token-classification-\" + str(timestamp)\n",
+    "\n",
+    "endpoint = BatchEndpoint(\n",
+    "    name=endpoint_name,\n",
+    "    description=\"Batch endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for token-classification task\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the deployment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployment_name = \"demo\"\n",
+    "\n",
+    "deployment = BatchDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    compute=compute_name,\n",
+    "    error_threshold=0,\n",
+    "    instance_count=1,\n",
+    "    logging_level=\"info\",\n",
+    "    max_concurrency_per_instance=1,\n",
+    "    mini_batch_size=10,\n",
+    "    output_file_name=\"predictions.csv\",\n",
+    "    retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(deployment).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set the deployment as default."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "endpoint.defaults.deployment_name = deployment_name\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()\n",
+    "\n",
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Run a batch inference job.\n",
+    "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n",
+    "\n",
+    "job = workspace_ml_client.batch_endpoints.invoke(\n",
+    "    endpoint_name=endpoint.name, input=input\n",
+    ")\n",
+    "\n",
+    "workspace_ml_client.jobs.stream(job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Review inference predictions. \n",
+    "Download the predictions from the job output and review the predictions using a dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n",
+    "\n",
+    "workspace_ml_client.jobs.download(\n",
+    "    name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n",
+    ")\n",
+    "\n",
+    "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n",
+    "\n",
+    "# Load the batch predictions file with no headers into a dataframe and set your column names\n",
+    "score_df = pd.read_csv(\n",
+    "    predictions_file,\n",
+    "    header=None,\n",
+    "    names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n",
+    ")\n",
+    "score_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_df = []\n",
+    "for file in input_files:\n",
+    "    input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n",
+    "    input.reset_index(inplace=True)\n",
+    "    input[\"batch_input_file_name\"] = file\n",
+    "    input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n",
+    "    input_df.append(input)\n",
+    "input_df = pd.concat(input_df)\n",
+    "input_df.set_index(\"index\", inplace=True)\n",
+    "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n",
+    "\n",
+    "input_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Join the predictions with input data to compare them to ground truth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.merge(\n",
+    "    input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n",
+    ")\n",
+    "\n",
+    "# Show the first few rows of the results\n",
+    "df.head(20)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Clean up resources\n",
+    "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n",
+    "workspace_ml_client.compute.begin_delete(name=compute_name).result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/token-classification/token-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/token-classification/token-classification-online-endpoint.ipynb
new file mode 100644
index 0000000000..5e47958212
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/token-classification/token-classification-online-endpoint.ipynb
@@ -0,0 +1,304 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Token Classification Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `token-classification` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`token-classification` assigns a label to individual tokens in a sentence. One of the most common `token-classification` tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `token-classification` task are tagged with `task: token-classification`. We will use the `Jean-Baptiste-camembert-ner` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [Jean-Baptiste/wikiner_fr](https://huggingface.co/datasets/Jean-Baptiste/wikiner_fr) dataset. A copy of this dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder. \\\n",
+    "Please note that the dataset used here is a French dataset, as the Jean-Baptiste/camembert-ner model was trained in French.\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `token-classification` task. In this example, we use the `Jean-Baptiste-camembert-ner` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"Jean-Baptiste-camembert-ner\"\n",
+    "model_version = \"3\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the Jean-Baptiste-wikiner_fr dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder.  The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Save few samples in the format that can be passed as input to the online-inference endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the ./Jean-Baptiste-wikiner_fr/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(\"./Jean-Baptiste-wikiner_fr/train.jsonl\", lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    OnlineRequestSettings,\n",
+    ")\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"token-classification-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for token-classification task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS2_v2\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=60000,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "# read the ./Jean-Baptiste-wikiner_fr/train.jsonl file into a pandas dataframe\n",
+    "df = pd.read_json(\"./Jean-Baptiste-wikiner_fr/train.jsonl\", lines=True)\n",
+    "# escape single and double quotes in the text column\n",
+    "df[\"text\"] = df[\"text\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n",
+    "# pick 1 random row\n",
+    "sample_df = df.sample(1)\n",
+    "# create a json object with the key as \"inputs\" and value as a list of values from the en column of the sample_df dataframe\n",
+    "sample_json = {\"inputs\": sample_df[\"text\"].tolist()}\n",
+    "# save the json object to a file named sample_score.json in the ./Jean-Baptiste-wikiner_fr folder\n",
+    "test_json = {\"inputs\": {\"input_string\": sample_df[\"text\"].tolist()}}\n",
+    "# save the json object to a file named sample_score.json in the ./Jean-Baptiste-wikiner_fr folder\n",
+    "with open(os.path.join(\".\", \"Jean-Baptiste-wikiner_fr\", \"sample_score.json\"), \"w\") as f:\n",
+    "    json.dump(test_json, f)\n",
+    "sample_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./Jean-Baptiste-wikiner_fr/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compare the predicted labels with the actual labels\n",
+    "num_tokens = len(sample_df[\"text\"].tolist()[0].split())\n",
+    "predicted_labels = [\"O\"] * num_tokens\n",
+    "for col in response_df.columns:\n",
+    "    prediction = response_df[col].tolist()[0]\n",
+    "    predicted_labels[prediction[\"index\"] - 1] = prediction[\"entity\"]\n",
+    "compare_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"ground_truth_labels\": sample_df[\"ner_tags_str\"].tolist(),\n",
+    "        \"predicted_labels\": [predicted_labels],\n",
+    "    }\n",
+    ")\n",
+    "compare_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/translation/translation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/translation/translation-batch-endpoint.ipynb
new file mode 100644
index 0000000000..a5b2e41fdd
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/translation/translation-batch-endpoint.ipynb
@@ -0,0 +1,481 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Translation Inference using Batch Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `translation` type models to a batch endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`translation` converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. `translation` systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `translation` task are tagged with `task: translation`. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [wmt16 (ro-en)](https://huggingface.co/datasets/wmt16) dataset. A copy of this dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. \n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for batch inference.\n",
+    "* Run a batch inference job.\n",
+    "* Review inference predictions.\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies.\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry.\n",
+    "* Create or update compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import packages used by the following code snippets\n",
+    "import csv\n",
+    "import os\n",
+    "import time\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from azure.ai.ml import Input, MLClient\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "from azure.ai.ml.entities import (\n",
+    "    AmlCompute,\n",
+    "    BatchDeployment,\n",
+    "    BatchEndpoint,\n",
+    "    BatchRetrySettings,\n",
+    "    Model,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "resource_group_name = \"<RESOURCE_GROUP>\"\n",
+    "workspace_name = \"<WORKSPACE_NAME>\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Connect to workspace and registry using ML clients."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group_name,\n",
+    "    workspace_name=workspace_name,\n",
+    ")\n",
+    "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a compute cluster.\n",
+    "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_name = \"cpu-cluster\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compute_cluster = AmlCompute(\n",
+    "    name=compute_name,\n",
+    "    description=\"An AML compute cluster\",\n",
+    "    size=\"Standard_DS3_V2\",\n",
+    "    min_instances=0,\n",
+    "    max_instances=3,\n",
+    "    idle_time_before_scale_down=120,\n",
+    ")  # 120 seconds\n",
+    "\n",
+    "workspace_ml_client.begin_create_or_update(compute_cluster)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"t5-small\"\n",
+    "model_version = \"1\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the wmt16-en-ro dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder.  The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define directories and filenames as variables\n",
+    "dataset_dir = \"wmt16-en-ro-dataset\"\n",
+    "training_datafile = \"train.jsonl\"\n",
+    "\n",
+    "batch_dir = \"batch\"\n",
+    "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n",
+    "batch_input_file = \"batch_input.csv\"\n",
+    "os.makedirs(batch_dir, exist_ok=True)\n",
+    "os.makedirs(batch_inputs_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the ./wmt16-en-ro-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # Set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"en\"` column."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_df = train_df[[\"en\"]].rename(columns={\"en\": \"input_string\"}).sample(frac=0.01)\n",
+    "\n",
+    "# Divide this into files of 25 rows each\n",
+    "batch_size_per_predict = 25\n",
+    "for i in range(0, len(batch_df), batch_size_per_predict):\n",
+    "    j = i + batch_size_per_predict\n",
+    "    batch_df[i:j].to_csv(\n",
+    "        os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n",
+    "    )\n",
+    "\n",
+    "# Check out the first and last file name created\n",
+    "input_files = os.listdir(batch_inputs_dir)\n",
+    "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to a batch endpoint\n",
+    "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n",
+    "\n",
+    "* Create a batch endpoint.\n",
+    "* Create a batch deployment.\n",
+    "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n",
+    "\n",
+    "#### Create the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "endpoint_name = \"translation-\" + str(timestamp)\n",
+    "\n",
+    "endpoint = BatchEndpoint(\n",
+    "    name=endpoint_name,\n",
+    "    description=\"Batch endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for translation task\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the deployment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployment_name = \"demo\"\n",
+    "\n",
+    "deployment = BatchDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    compute=compute_name,\n",
+    "    error_threshold=0,\n",
+    "    instance_count=1,\n",
+    "    logging_level=\"info\",\n",
+    "    max_concurrency_per_instance=1,\n",
+    "    mini_batch_size=10,\n",
+    "    output_file_name=\"predictions.csv\",\n",
+    "    retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(deployment).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set the deployment as default."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "endpoint.defaults.deployment_name = deployment_name\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()\n",
+    "\n",
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Run a batch inference job.\n",
+    "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n",
+    "\n",
+    "job = workspace_ml_client.batch_endpoints.invoke(\n",
+    "    endpoint_name=endpoint.name, input=input\n",
+    ")\n",
+    "\n",
+    "workspace_ml_client.jobs.stream(job.name)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Review inference predictions. \n",
+    "Download the predictions from the job output and review the predictions using a dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n",
+    "\n",
+    "workspace_ml_client.jobs.download(\n",
+    "    name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n",
+    ")\n",
+    "\n",
+    "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n",
+    "\n",
+    "# Load the batch predictions file with no headers into a dataframe and set your column names\n",
+    "score_df = pd.read_csv(\n",
+    "    predictions_file,\n",
+    "    header=None,\n",
+    "    names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n",
+    ")\n",
+    "score_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_df = []\n",
+    "for file in input_files:\n",
+    "    input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n",
+    "    input.reset_index(inplace=True)\n",
+    "    input[\"batch_input_file_name\"] = file\n",
+    "    input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n",
+    "    input_df.append(input)\n",
+    "input_df = pd.concat(input_df)\n",
+    "input_df.set_index(\"index\", inplace=True)\n",
+    "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n",
+    "\n",
+    "input_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Join the predictions with input data to compare them to ground truth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.merge(\n",
+    "    input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n",
+    ")\n",
+    "\n",
+    "# Show the first few rows of the results\n",
+    "df.head(20)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Clean up resources\n",
+    "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n",
+    "workspace_ml_client.compute.begin_delete(name=compute_name).result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/translation/translation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/translation/translation-online-endpoint.ipynb
new file mode 100644
index 0000000000..1e7c59c31f
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/translation/translation-online-endpoint.ipynb
@@ -0,0 +1,295 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Translation Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `translation` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`translation` converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. `translation` systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `translation` task are tagged with `task: translation`. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [wmt16 (ro-en)](https://huggingface.co/datasets/wmt16) dataset. A copy of this dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. \n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites.\n",
+    "* Pick a model to deploy.\n",
+    "* Prepare data for inference. \n",
+    "* Deploy the model for real time inference.\n",
+    "* Test the endpoint\n",
+    "* Clean up resources."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    "    ClientSecretCredential,\n",
+    ")\n",
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=\"<SUBSCRIPTION_ID>\",\n",
+    "    resource_group_name=\"<RESOURCE_GROUP>\",\n",
+    "    workspace_name=\"<WORKSPACE_NAME>\",\n",
+    ")\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"t5-small\"\n",
+    "model_version = \"4\"\n",
+    "foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "print(\n",
+    "    \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "        foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference.\n",
+    "\n",
+    "A copy of the wmt16-en-ro dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder.  The next few cells show basic data preparation:\n",
+    "* Visualize some data rows\n",
+    "* Save few samples in the format that can be passed as input to the online-inference endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the ./wmt16-en-ro-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option(\n",
+    "    \"display.max_colwidth\", 0\n",
+    ")  # set the max column width to 0 to display the full text\n",
+    "train_df = pd.read_json(\"./wmt16-en-ro-dataset/train.jsonl\", lines=True)\n",
+    "train_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time, sys\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    OnlineRequestSettings,\n",
+    ")\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"translation-\" + str(timestamp)\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for translation task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=\"demo\",\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS2_v2\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=60000,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {\"demo\": 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint with sample data\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "# read the ./wmt16-en-ro-dataset/train.jsonl file into a pandas dataframe\n",
+    "df = pd.read_json(\"./wmt16-en-ro-dataset/train.jsonl\", lines=True)\n",
+    "# escape single and double quotes in the text column\n",
+    "df[\"en\"] = df[\"en\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n",
+    "# pick 1 random row\n",
+    "sample_df = df.sample(1)\n",
+    "# create a json object with the key as \"inputs\" and value as a list of values from the en column of the sample_df dataframe\n",
+    "test_json = {\n",
+    "    \"inputs\": {\"input_string\": sample_df[\"en\"].tolist()},\n",
+    "    \"parameters\": {\"task_type\": \"translation_en_to_ro\"},\n",
+    "}\n",
+    "# save the json object to a file named sample_score.json in the ./wmt16-en-ro-dataset folder\n",
+    "with open(os.path.join(\".\", \"wmt16-en-ro-dataset\", \"sample_score.json\"), \"w\") as f:\n",
+    "    json.dump(test_json, f)\n",
+    "sample_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=\"demo\",\n",
+    "    request_file=\"./wmt16-en-ro-dataset/sample_score.json\",\n",
+    ")\n",
+    "print(\"raw response: \\n\", response, \"\\n\")\n",
+    "# convert the json response to a pandas dataframe\n",
+    "response_df = pd.read_json(response)\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compare the predicted translation with the ground truth translation\n",
+    "response_df.rename(columns={\"translation_text\": \"predicted_translation\"}, inplace=True)\n",
+    "response_df[\"ground_truth_translation\"] = sample_df[\"ro\"].tolist()\n",
+    "response_df.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/download-dataset.py
new file mode 100644
index 0000000000..d945ea740e
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/download-dataset.py
@@ -0,0 +1,45 @@
+# import library to parse command line arguments
+import argparse, os
+
+parser = argparse.ArgumentParser()
+# add an argument to specify a dataset name to download
+parser.add_argument("--dataset", type=str, default="wmt16", help="dataset name")
+# add an argument to specify a dataset name to download
+parser.add_argument(
+    "--dataset_subset", type=str, default="ro-en", help="dataset subset name"
+)
+# argument to save a fraction of the dataset
+parser.add_argument(
+    "--fraction", type=float, default=0.05, help="fraction of the dataset to save"
+)
+# add an argument to specify the directory to download the dataset to
+parser.add_argument(
+    "--download_dir",
+    type=str,
+    default="data",
+    help="directory to download the dataset to",
+)
+args = parser.parse_args()
+
+# create the download directory if it does not exist
+if not os.path.exists(args.download_dir):
+    os.makedirs(args.download_dir)
+
+
+def format_translation(example):
+    for key in example["translation"]:
+        example[key] = example["translation"][key]
+    return example
+
+
+# import hugging face datasets library
+from datasets import load_dataset, get_dataset_split_names
+
+for split in get_dataset_split_names(args.dataset, args.dataset_subset):
+    # load the split of the dataset
+    dataset = load_dataset(args.dataset, args.dataset_subset, split=split)
+    dataset = dataset.map(format_translation, remove_columns=["translation"])
+    # save the split of the dataset to the download directory as json lines file
+    dataset.select(range(int(dataset.num_rows * args.fraction))).to_json(
+        os.path.join(args.download_dir, f"{split}.jsonl")
+    )
diff --git a/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json
new file mode 100644
index 0000000000..83e42a0903
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json
@@ -0,0 +1 @@
+{"inputs": {"input_string": ["European and international systems for monitoring production and the market as an early warning system for identifying production trends;"]}, "parameters": {"task_type": "translation_en_to_ro"}}
\ No newline at end of file