In [14]:
import typing
import pandas as pd

from typing_extensions import Annotated

from flytekit.remote.remote import FlyteRemote
from flytekit import task, workflow, LaunchPlan
from flytekit.configuration import Config
from flytekit.core.artifact import Artifact
from flytekit.extend import TypeEngine

from flytekit.core.context_manager import FlyteContextManager
from flytekit.types.structured.structured_dataset import StructuredDataset

In [15]:
r = FlyteRemote(
    Config.auto(config_file="/Users/ytong/.flyte/local_admin.yaml"),
    default_project="flytesnacks",
    default_domain="development",
)

### Existing Interaction Model

Ran the `run_gather_data` Fetch and traverse nodes

In [3]:
e1 = r.fetch_execution(name="ab7g4qhpvz5c7r5lgh6w")

In [16]:
e1 = r.fetch_execution(name="amt7q2qbrz7j42hq48x5")

In [35]:
e1 = r.fetch_execution(name="atfrwpjddkzrfbcghxxq")

In [17]:
r.sync_execution(e1, sync_nodes=True)

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "amt7q2qbrz7j42hq48x5" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "demo_ml.run_gather_data" version: "DgsdQvpLg82OZonb2-cgag==" } metadata { system_metadata { } } labels { } annotations { } auth_role { } } closure { outputs { uri: "s3://my-s3-bucket/metadata/propeller/flytesnacks-development-amt7q2qbrz7j42hq48x5/end-node/data/0/outputs.pb" } phase: SUCCEEDED started_at { seconds: 1692294699 nanos: 190720000 } duration { seconds: 25 nanos: 690669000 } created_at { seconds: 1692294699 nanos: 177501000 } updated_at { seconds: 1692294724 nanos: 881389000 } }>

In [18]:
v = e1.node_executions["n0"].outputs.get("o0")

In [19]:
df = v.open(pd.DataFrame).all()

In [20]:
df

Unnamed: 0,sectors,rides
0,SEA,314
1,SAE,869
2,ESA,346
3,EAS,895
4,ASE,140
5,AES,256


### Union Artifact Model

#### Data Access

In [21]:
execid = "amt7q2qbrz7j42hq48x5"

In [22]:
a = r.get_artifact(f"flyte://av0.1/flytesnacks/development/{execid}/n0/0/o/o0")

In [23]:
a

Artifact: project=flytesnacks, domain=development, name=amt7q2qbrz7j42hq48x5/n0/0/o/o0, version=amt7q2qbrz7j42hq48x5
  name=amt7q2qbrz7j42hq48x5/n0/0/o/o0
  partitions=None
  tags=None
  literal_type=<FlyteLiteral structured_dataset_type { }>, literal=<FlyteLiteral scalar { structured_dataset { uri: "s3://my-s3-bucket/data/55/amt7q2qbrz7j42hq48x5-n0-0/7474760bb307fb2b2589365146d82c2f" metadata { structured_dataset_type { format: "parquet" } } } }>)

In [24]:
ctx = FlyteContextManager.current_context()
v = TypeEngine.to_python_value(ctx, a.literal, pd.DataFrame)

In [25]:
v

Unnamed: 0,sectors,rides
0,SEA,314
1,SAE,869
2,ESA,346
3,EAS,895
4,ASE,140
5,AES,256


#### Ability to Launch

kick off new execution with the fetched artifact, confirm it can be used and querying doesn't fail.
then kick it off again without any artifact at all.

In [26]:
run_train_model_wf = r.fetch_workflow(
        "flytesnacks", "development", "demo_ml.run_train_model", "QsqHt4khPRSWCBQR30ih4A=="
    )

In [27]:
r.execute(run_train_model_wf, inputs={"region": "SEA", "data": a})

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "f5c244f1138094b89ba5" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "demo_ml.run_train_model" version: "QsqHt4khPRSWCBQR30ih4A==" } metadata { system_metadata { } } notifications { } labels { } annotations { } auth_role { } } closure { started_at { } duration { } created_at { seconds: 1692814409 nanos: 352738000 } updated_at { seconds: 1692814409 nanos: 352738000 } }>

Execute without specifying the data. Note that the fetched artifact should match what was picked up by the query when it was kicked off.

In [28]:
r.execute(run_train_model_wf, inputs={"region": "SEA"})

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "f68afeb27fef44450b74" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "demo_ml.run_train_model" version: "QsqHt4khPRSWCBQR30ih4A==" } metadata { system_metadata { } } notifications { } labels { } annotations { } auth_role { } } closure { started_at { } duration { } created_at { seconds: 1692814439 nanos: 978113000 } updated_at { seconds: 1692814439 nanos: 978113000 } }>

In [29]:
queried_artifact = r.get_artifact(f"flyte://av0.1/flytesnacks/development/ride_count_data?region=SEA&ds=2023-08-58")

In [30]:
queried_artifact

Artifact: project=flytesnacks, domain=development, name=ride_count_data, version=atfrwpjddkzrfbcghxxq
  name=ride_count_data
  partitions={'ds': '2023-08-58', 'region': 'SEA'}
  tags=None
  literal_type=<FlyteLiteral structured_dataset_type { }>, literal=<FlyteLiteral scalar { structured_dataset { uri: "s3://my-s3-bucket/data/u1/atfrwpjddkzrfbcghxxq-n0-0/fc74d6e685a15afb2398cf133ec7c4aa" metadata { structured_dataset_type { format: "parquet" } } } }>)

In [31]:
model_artifact = r.get_artifact(f"flyte://av0.1/flytesnacks/development/my-model:SEA")

In [32]:
model_artifact

Artifact: project=flytesnacks, domain=development, name=my-model, version=f68afeb27fef44450b74
  name=my-model
  partitions=None
  tags=['SEA']
  literal_type=<FlyteLiteral blob { }>, literal=<FlyteLiteral scalar { blob { metadata { type { } } uri: "s3://my-s3-bucket/data/i3/f68afeb27fef44450b74-n0-0/ec021ab359f2e8c36198b2a4b553dbd5/demo_ml.py" } }>)