In [None]:
import mlrun
from dotenv import load_dotenv

### Create `project_setup.py`
Note: The context will be `src` so the paths are written with that in mind

In [None]:
%%writefile src/project_setup.py
import os

import mlrun


def setup(project: mlrun.projects.MlrunProject) -> mlrun.projects.MlrunProject:
    source = project.get_param("source")
    secrets_file = project.get_param("secrets_file")
    default_image = project.get_param("default_image")

    if source:
        print(f"Project Source: {source}")
        project.set_source(project.get_param("source"), pull_at_runtime=True)

    if secrets_file and os.path.exists(secrets_file):
        project.set_secrets(file_path=secrets_file)
        mlrun.set_env_from_file(secrets_file)

    if default_image:
        project.set_default_image(default_image)

    # MLRun Functions
    project.set_function(
        name="get-data",
        func="data.py",
        kind="job",
        handler="get_data",
    )

    project.set_function(
        name="train",
        func="train.py",
        kind="job",
        handler="train_model",
    )

    # MLRun Workflows
    project.set_workflow("main", "main_workflow.py")

    # Save and return the project:
    project.save()
    return project


### Load project

In [None]:
SOURCE = "https://github.com/mlrun/test-notebooks#main"
SECRETS_FILE = "mock_secrets.env"
DEFAULT_IMAGE = "mlrun/mlrun"

PARAMTERS = {
    "source" : SOURCE,
    "secrets_file" : SECRETS_FILE,
    "default_image" : DEFAULT_IMAGE
}

In [None]:
project = mlrun.get_or_create_project(name="test-project-setup", context="./src", parameters=PARAMTERS)

### Check source

In [None]:
assert project.source == SOURCE

In [None]:
assert project.spec.load_source_on_run == True

### Check functions

In [None]:
assert len(project.spec.functions) == 2

In [None]:
functions = ["get-data", "train"]
functions_check = []

In [None]:
for f in project.spec.functions:
    functions_check.append(f["name"])

In [None]:
assert functions == functions_check

### Check workflows

In [None]:
assert len(project.spec.workflows) == 1

In [None]:
workflows = ["main"]
workflows_check = []

In [None]:
for w in project.spec.workflows:
    workflows_check.append(w["name"])

In [None]:
assert workflows == workflows_check

### Check params

In [None]:
assert project.spec.params == PARAMTERS

### Check project image

In [None]:
assert project.default_image == DEFAULT_IMAGE

### Check project secrets

In [None]:
# Get dict with secret file contents for comparison
secret_dict = dict(mlrun.set_env_from_file(SECRETS_FILE, return_dict=True))

In [None]:
# Cannot check secret contents but can check keys
db = mlrun.get_run_db()
secret_keys = db.list_project_secret_keys(project=project.metadata.name)

In [None]:
assert set(secret_keys.secret_keys) == set(secret_dict.keys())

### Run workflow

In [None]:
run_uid = project.run(
    name="main",
    arguments={
        "dataset" : "https://s3.wasabisys.com/iguazio/data/batch-predict/training_set.parquet",
        "label_column" : "label",
        "model_name" : "model",
        "test_size" : 0.2,
        "random_state" : 42
    },
    watch=True,
    dirty=True
)

### Check run outputs

In [None]:
workflow_runs = project.list_runs(labels=f"workflow={run_uid}").to_objects()

In [None]:
assert len(workflow_runs) == 2

In [None]:
DATA_OUTPUTS = ['num_rows', 'cleaned_data']
TRAIN_OUTPUTS = ['accuracy',
 'f1_score',
 'precision_score',
 'recall_score',
 'feature-importance',
 'test_set',
 'confusion-matrix',
 'roc-curves',
 'calibration-curve',
 'model']

In [None]:
for run in workflow_runs:
    assert run.metadata.name in ["get-data", "train"]
    if run.metadata.name == "get-data":
        assert list(run.outputs.keys()) == DATA_OUTPUTS
    elif run.metadata.name == "train":
        assert list(run.outputs.keys()) == TRAIN_OUTPUTS

### Clean up

In [None]:
db.delete_project(name=project.metadata.name, deletion_strategy="cascade")