In [4]:
%pip install "git+https://github.com/flyteorg/flytekit@3e3ec17ea" union "pydantic>2" pandas pyarrow

Collecting git+https://github.com/flyteorg/flytekit@3e3ec17ea
  Cloning https://github.com/flyteorg/flytekit (to revision 3e3ec17ea) to /private/var/folders/4q/frdnh9l10h53gggw1m59gr9m0000gp/T/pip-req-build-cdnu3ehs
  Running command git clone --filter=blob:none --quiet https://github.com/flyteorg/flytekit /private/var/folders/4q/frdnh9l10h53gggw1m59gr9m0000gp/T/pip-req-build-cdnu3ehs
[0m  Running command git checkout -q 3e3ec17ea
  Resolved https://github.com/flyteorg/flytekit to commit 3e3ec17ea
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [8]:
import flytekit as fk
import pandas as pd
import sys


image = fk.ImageSpec(
    name="jupyter-notebook-workshop",
    packages=[
        "pandas",
        "pyarrow",
        "git+https://github.com/flyteorg/flytekit@3e3ec17ea",
        "union==0.1.92",
    ],
    apt_packages=["git"],
    python_version=f"{sys.version_info.major}.{sys.version_info.minor}",
)

task = fk.task(container_image=image)


@task
def get_df() -> pd.DataFrame:
    """Returns a new Dataframe with cols [Name, Age, Grade and PassedTest]"""
    return pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [23, 25, 22, 24, 23],
    'Grade': ['A', 'B', 'A', 'C', 'B'],
    'PassedTest': [True, False, True, False, True]
})

In [9]:
from union.remote import UnionRemote
from union.ucimage._image_builder import _register_union_image_builder

_register_union_image_builder() 
remote = UnionRemote()

In [10]:
exe = remote.execute(get_df, inputs={})
exe

[34mImage cr.union.ai/jupyter-notebook-workshop:Y6BvxH4QmsZJtXDChYQf7w not found.[0m
[34m[1m🐳 Build not found, submitting a new build...[0m
[33m[1m👍 Build submitted![0m
[1m⏳ Waiting for build to finish at: [36mhttps://serverless.union.ai/org/cosmicbboy/projects/system/domains/development/executions/a4pxnr8tfng4hzm7b9dc[0m[0m
[32m[1m✅ Build completed in 0:00:25![0m


In [12]:
exe.wait(poll_interval=1)

In [13]:
dataframe = exe.outputs['o0']

In [14]:
dataframe

Unnamed: 0,Name,Age,Grade,PassedTest
0,Alice,23,A,True
1,Bob,25,B,False
2,Charlie,22,A,True
3,David,24,C,False
4,Eva,23,B,True


In [15]:
def local_function(dataframe: pd.DataFrame):
    return dataframe.groupby("Grade")["Age"].sum()

local_function(dataframe)

Grade
A    45
B    48
C    24
Name: Age, dtype: int64

In [16]:
from flytekit.types.file import FlyteFile


training_image = fk.ImageSpec(
    name="jupyter-notebook-workshop-training",
    packages=["pandas", "pyarrow", "joblib", "scikit-learn", "git+https://github.com/flyteorg/flytekit@3e3ec17ea"],
    apt_packages=["git"],
    python_version=f"{sys.version_info.major}.{sys.version_info.minor}",
)

training_task = fk.task(container_image=training_image)

@training_task
def train_model(dataframe: pd.DataFrame) -> FlyteFile:
    import joblib
    from sklearn.linear_model import LogisticRegression

    model = LogisticRegression()
    model.fit(dataframe[["Age"]], dataframe["PassedTest"])

    with open("model.pkl", "wb") as f:
        joblib.dump(model, f)

    return FlyteFile(path="model.pkl")


@fk.workflow
def train_wf(dataframe: pd.DataFrame) -> FlyteFile:
    return train_model(dataframe)

In [17]:
model_exe = remote.execute(train_wf, inputs={"dataframe": dataframe})
model_exe

[34mImage cr.union.ai/jupyter-notebook-workshop-training:NZgUU3lWlNGjEPbANb6CUw found. Skip building.[0m


In [18]:
model_exe.wait(poll_interval=1)
model = model_exe.outputs['o0']
model