In [1]:
import typing
import pandas as pd
from datetime import datetime

from typing_extensions import Annotated

from flytekit.remote.remote import FlyteRemote
from flytekit import task, workflow, LaunchPlan
from flytekit.configuration import Config
from flytekit.core.artifact import Artifact
from flytekit.extend import TypeEngine

from flytekit.core.context_manager import FlyteContextManager
from flytekit.types.structured.structured_dataset import StructuredDataset

In [9]:
r = FlyteRemote(
    Config.auto(config_file="/Users/ytong/.flyte/local_admin.yaml"),
    default_project="flytesnacks",
    default_domain="development",
)

### Existing Interaction Model

Ran the `run_gather_data` Fetch and traverse nodes

In [3]:
e1 = r.fetch_execution(name="ajwx5v6qldmtkdzbsgxl")

In [4]:
r.sync_execution(e1, sync_nodes=True)

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "ajwx5v6qldmtkdzbsgxl" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "artifact_ux.basic_ml.run_gather_data" version: "NRFyWGQyjORIYkH14yQ7IA==" } metadata { system_metadata { } } labels { } annotations { } auth_role { } } closure { outputs { uri: "s3://my-s3-bucket/metadata/propeller/flytesnacks-development-ajwx5v6qldmtkdzbsgxl/end-node/data/0/outputs.pb" } phase: SUCCEEDED started_at { seconds: 1693880538 nanos: 526056000 } duration { seconds: 31 nanos: 891672000 } created_at { seconds: 1693880538 nanos: 511048000 } updated_at { seconds: 1693880570 nanos: 417728000 } }>

In [5]:
v = e1.node_executions["n0"].outputs.get("o0")

In [6]:
df = v.open(pd.DataFrame).all()

In [7]:
df

Unnamed: 0,sectors,rides
0,SEA,696
1,SAE,253
2,ESA,397
3,EAS,816
4,ASE,532
5,AES,811


### Union Artifact Model

#### Data Access

In [8]:
execid = "ajwx5v6qldmtkdzbsgxl"

In [9]:
a = r.get_artifact(f"flyte://av0.1/flytesnacks/development/{execid}/n0/0/o/o0")

In [23]:
a

Artifact: project=flytesnacks, domain=development, name=amt7q2qbrz7j42hq48x5/n0/0/o/o0, version=amt7q2qbrz7j42hq48x5
  name=amt7q2qbrz7j42hq48x5/n0/0/o/o0
  partitions=None
  tags=None
  literal_type=<FlyteLiteral structured_dataset_type { }>, literal=<FlyteLiteral scalar { structured_dataset { uri: "s3://my-s3-bucket/data/55/amt7q2qbrz7j42hq48x5-n0-0/7474760bb307fb2b2589365146d82c2f" metadata { structured_dataset_type { format: "parquet" } } } }>)

In [3]:
a = r.get_artifact(f"flyte://av0.1/flytesnacks/development/ride_count_data?region=SEA&ds=2023-09-05")

In [4]:
a

Artifact: project=flytesnacks, domain=development, name=ride_count_data, version=adb7ssdvvnjs8lchx4ld/n0/0/o0
  name=ride_count_data
  partitions=<flytekit.core.artifact.Partitions object at 0x158768650>
  tags=None
  literal_type=<FlyteLiteral structured_dataset_type { }>, literal=<FlyteLiteral scalar { structured_dataset { uri: "s3://my-s3-bucket/data/sb/adb7ssdvvnjs8lchx4ld-n0-0/917af49e434b530001eea264d2a064c1" metadata { structured_dataset_type { format: "parquet" } } } }>)

In [24]:
ctx = FlyteContextManager.current_context()
v = TypeEngine.to_python_value(ctx, a.literal, pd.DataFrame)

In [25]:
v

Unnamed: 0,sectors,rides
0,SEA,314
1,SAE,869
2,ESA,346
3,EAS,895
4,ASE,140
5,AES,256


#### Ability to Launch

kick off new execution with the fetched artifact, confirm it can be used and querying doesn't fail.
then kick it off again without any artifact at all.

In [10]:
run_train_model_wf = r.fetch_workflow(
        "flytesnacks", "development", "artifact_ux.basic_ml.run_train_model", "a4"
    )

In [10]:
r.execute(run_train_model_wf, inputs={"region": "SEA", "data": a})

Execute without specifying the data. Note that the fetched artifact should match what was picked up by the query when it was kicked off.

In [16]:
import pytz
dd = datetime(2023, 9, 5)
dd = dd.astimezone(pytz.UTC).replace(tzinfo=None)
             

In [17]:
r.execute(run_train_model_wf, inputs={"region": "SEA", "kickoff_time": dd})

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "f02f191a29dfc494bacf" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "artifact_ux.basic_ml.run_train_model" version: "a4" } metadata { system_metadata { } } notifications { } labels { } annotations { } auth_role { } } closure { started_at { } duration { } created_at { seconds: 1695419932 nanos: 514329000 } updated_at { seconds: 1695419932 nanos: 514329000 } }>

In [10]:
queried_artifact = r.get_artifact(f"flyte://av0.1/flytesnacks/development/ride_count_data?region=SEA&ds=2023-09-58")

In [11]:
queried_artifact

Artifact: project=flytesnacks, domain=development, name=ride_count_data, version=flytesnacks/development/allr5shv4s48682kqvzw/n0/0/o0
  name=ride_count_data
  partitions={'ds': '2023-09-58', 'region': 'SEA'}
  tags=None
  literal_type=<FlyteLiteral structured_dataset_type { }>, literal=<FlyteLiteral scalar { structured_dataset { uri: "s3://my-s3-bucket/data/yq/allr5shv4s48682kqvzw-n0-0/8b770555d240e71f4f25120b9bedc66d" metadata { structured_dataset_type { format: "parquet" } } } }>)

In [31]:
model_artifact = r.get_artifact(f"flyte://av0.1/flytesnacks/development/my-model:SEA")

In [32]:
model_artifact

Artifact: project=flytesnacks, domain=development, name=my-model, version=f68afeb27fef44450b74
  name=my-model
  partitions=None
  tags=['SEA']
  literal_type=<FlyteLiteral blob { }>, literal=<FlyteLiteral scalar { blob { metadata { type { } } uri: "s3://my-s3-bucket/data/i3/f68afeb27fef44450b74-n0-0/ec021ab359f2e8c36198b2a4b553dbd5/demo_ml.py" } }>)