In [None]:
import os
import pathlib
import logging
import time
import datetime as dt

from azureml.core import Workspace, Experiment, Environment, Datastore, Dataset, ComputeTarget
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration
from azureml.data.data_reference import DataReference
from azureml.core.runconfig import EggLibrary
from azureml.core.databricks import PyPiLibrary
from azureml.data import TabularDataset, FileDataset, OutputFileDatasetConfig
from azureml.pipeline.core import Pipeline, PipelineParameter, PipelineData, PipelineRun, PublishedPipeline, PortDataReference, StepRun, TrainingOutput
from azureml.pipeline.steps import DatabricksStep, PythonScriptStep, AutoMLStep
from azureml.widgets import RunDetails

In [None]:
workspace = Workspace.from_config("conf/azure_config.json")
datastore = Datastore.get(workspace, "sten_blobstore")
compute_target = ComputeTarget(workspace=workspace, name="npedignc12amlc01")

In [None]:
def get_or_create_env(ws, env_name, conda_packages, pip_packages, version=None):
    """
    Gets the environment from the workspace. If environment is not found, creates a new environment.
    :param ws: Azure ML workspace containing the environment
    :param env_name: name of environment
    :param conda_packages: conda packages required for creating environment
    :param pip_packages: pip packages required for creating environment
    :param version: specific version of environment to get or create
    :return The registered environment
    """
    try:
        env = Environment.get(ws, env_name, version=version)
        print('Environment ' + env_name + ' found!')
    except Exception as e:
        print("Environment %s not found, creating it", env_name)
        env = Environment(env_name)
        packages = CondaDependencies.create(conda_packages=conda_packages,
                                            pip_packages=pip_packages)
        env.python.conda_dependencies = packages
        env.register(workspace=ws)

    reg_env = Environment.get(ws, env_name)
    return reg_env

In [None]:
conda_packages = [
    'python=3.7.10',
    "scikit-learn",
    "scipy",
    "pandas",
    "pip",
    "matplotlib",
]
pip_packages = [
    "azureml-defaults",
    "azureml-dataprep[pandas]",
    "setuptools",
    "wheel",
    "joblib",
    "pyarrow",
    "numpy<=1.18.5",
    "Pillow",
    "pyarrow",
    "imageio",
    "opencv-python",
    "sklearn",
    "seaborn",
    "click==7.1.2",
    "torch==1.10.0",
    "torchvision==0.11.0",
    "pycocotools",
]

In [None]:
curr_dir = os.getcwd()

In [None]:
whl_url = Environment.add_private_pip_wheel(
    workspace=workspace,
    file_path = os.path.join(curr_dir, "dist/apple_seg-0.0.1-py3-none-any.whl"),
    exist_ok=True)
print(f"oip_whl_url: {whl_url}")

In [None]:
env = get_or_create_env(workspace, "cv_train", conda_packages, pip_packages, version="5")

In [None]:
conda_dep = env.python.conda_dependencies
conda_dep.add_pip_package(whl_url)
env.python.conda_dependencies = conda_dep


In [None]:
pipeline_run_config = RunConfiguration()
pipeline_run_config.target = compute_target
pipeline_run_config.environment = env

In [None]:
epochs = 64
lr = "1e-3"
batch = 3
arch_type = "fcn"
backbone = "resnet50"
pretrained = True

model_name = f"{arch_type}_{backbone}"
model = PipelineParameter('model', model_name)
data_path: FileDataset = Dataset.File.from_files(path=(datastore, os.path.join('Dataset', 'MinneApple', 'detection')))

output_dir_name = f"lr{lr}_b{str(batch)}_ep{str(epochs)}"
ds_path = os.path.join("Models", model_name, "Pretrained" if pretrained else "Not_Pretrained", output_dir_name)
output_dir = OutputFileDatasetConfig(
    name="output_dir",
    destination=(datastore, ds_path))\
    .as_upload(overwrite=True)


In [None]:
train_step = PythonScriptStep(
    name="train model",
    source_directory='.',
    script_name="train_fcn.py",
    arguments=[
        '--model', model,
        '--data_path', data_path.as_named_input("data_path").as_mount(),
        '--output-dir', output_dir,
        '--epochs', epochs,
        '--lr', float(lr),
        '--batch-size', batch,
        '--pretrained',
        '--device', "cuda:1"
        ],
    outputs=[output_dir],
    compute_target=compute_target,
    runconfig=pipeline_run_config,
    allow_reuse=False
    )

In [None]:
experiment = Experiment(workspace=workspace, name="app_seg")
pipeline = Pipeline(workspace=workspace, steps=[train_step])
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
RunDetails(pipeline_run).show()

In [None]:
pipeline_run.wait_for_completion(show_output=True)
