# Intro to Flyte 2

To run this tutorial you'll need to be either an exsisting Union.ai user with Flyte v2, or [sign up for the free Beta access](https://www.union.ai/beta)!

Flyte 2.0



## Setup

if you're running this notebook locally we suggest creating a virtual environment and installing the packages locally with uv. You can follow the instructions on the README.
```
uv venv .venv --python=python3.11
source .venv/bin/activate
uv pip install flyte>=0.2.0b21 --prerelease=allow
```
If you're running this in google colab run the two setup cells below:

In [2]:

!uv pip install flyte>=0.2.0b21 --prerelease=allow

zsh:1: 0.2.0b21 not found


##### Flyte Config
if you're running this locally remove `--auth-type headless\`

In [None]:
!flyte create config \
    --endpoint demo.hosted.unionai.cloud \
    --auth-type headless\
    --builder remote \
    --domain development \
    --project flytesnacks

## 👋 Hello Flyte Tasks

In [None]:
import flyte

env = flyte.TaskEnvironment(
    name="hello_flyte_v2",
    resources=flyte.Resources(cpu=1, memory="250Mi"),
)


@env.task
def extract_features(user_input: str) -> dict:
    clean = user_input.strip().lower()
    return {
        "text": clean,
        "length": len(clean),
        "word_count": len(clean.split()),
        "has_numbers": any(c.isdigit() for c in clean)
    }

@env.task
def validate_features(features: dict) -> bool:
    return features["length"] > 5 and features["word_count"] >= 2

@env.task
def prepare_training_data(raw_inputs: list[str]) -> dict:
    if len(raw_inputs) < 5:
        raise ValueError(f"Need at least 5 samples, got: {len(raw_inputs)}")

    features = list(flyte.map(extract_features, raw_inputs))
    valid_flags = list(flyte.map(validate_features, features))

    return {
        "total_samples": len(features),
        "valid_samples": sum(valid_flags),
        "avg_length": sum(f["length"] for f in features) / len(features),
        "ready_for_training": sum(valid_flags) >= len(features) * 0.8
    }

# Sample user reviews/comments for sentiment analysis training
sample_inputs = [
    "  This product is amazing! I love it.  ",    # valid: >5 chars, 2+ words
    "Great quality and fast shipping",             # valid: >5 chars, 2+ words
    " Bad ",                                       # invalid: too short
    "okay",                                        # invalid: <2 words
    "The delivery was delayed by 3 days",         # valid: >5 chars, 2+ words, has numbers
    "Excellent customer service team",             # valid: >5 chars, 2+ words
    "Perfect for my home office setup",           # valid: >5 chars, 2+ words
    "meh"                                          # invalid: too short, 1 word
]

flyte.init_from_config(".flyte/config.yaml")
execution = flyte.run(prepare_training_data, raw_inputs=sample_inputs)
print(f"Execution: {execution.name}")
print(f"URL: {execution.url}")
# Click on signin link to auth the first workflow 👇


#### Run tasks locally

In [None]:
flyte.init() #overwrite flyte init from config
execution = flyte.run(prepare_training_data, raw_inputs=sample_inputs)
print(f"Execution: {execution.name}")
print(f"URL: {execution.url}")
#todo: output local info

## Build an ML Pipeline (and see more features)

In [None]:
import flyte
from flyte.io import Dir, File

# Custom environment with scikit-learn installed
env = flyte.TaskEnvironment(
    name="scikit_learn",
    resources=flyte.Resources(cpu=1, memory="500Mi"),
    image=flyte.Image.from_debian_base().with_pip_packages("scikit-learn",
                                                           "unionai-reuse==0.1.5"),
    reusable=flyte.ReusePolicy(
        replicas=3,
        idle_ttl=60,
        concurrency=6,
        scaledown_ttl=60,
    ),
)

@env.task(cache="auto")
async def load_iris_data() -> tuple[File, File]:
    """Load the iris dataset, perform train-test split, and save to separate files."""
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    import pickle

    iris = load_iris()

    # Perform train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target, test_size=0.3, random_state=42
    )

    # Package training data
    train_data = {
        "features": X_train.tolist(),
        "targets": y_train.tolist(),
        "feature_names": list(iris.feature_names)
    }

    # Package test data
    test_data = {
        "features": X_test.tolist(),
        "targets": y_test.tolist(),
        "feature_names": list(iris.feature_names)
    }

    # Save training data to file
    with open("train_data.pkl", "wb") as f:
        pickle.dump(train_data, f)

    # Save test data to file
    with open("test_data.pkl", "wb") as f:
        pickle.dump(test_data, f)

    train_file = await File.from_local("train_data.pkl")
    test_file = await File.from_local("test_data.pkl")

    return train_file, test_file

@env.task
async def train_model(train_data_file: File) -> File:
    """Train a classifier using the training data."""
    from sklearn.ensemble import RandomForestClassifier
    import pickle

    # Load training data
    async with train_data_file.open() as f:
        if hasattr(f, 'read'):
            content = f.read()
        else:
            content = await f.read()
    train_data = pickle.loads(content)

    X_train = train_data["features"]
    y_train = train_data["targets"]

    # Train model
    clf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf.fit(X_train, y_train)

    # Package model with metadata
    model_package = {
        "model": clf,
        "feature_names": train_data["feature_names"],
        "train_samples": len(X_train)
    }

    # Save model to file
    with open("trained_model.pkl", "wb") as f:
        pickle.dump(model_package, f)

    model_file = await File.from_local("trained_model.pkl")
    return model_file

@env.task
async def validate_model(model_file: File, test_data_file: File) -> dict:
    """Validate the trained model using test data and return performance metrics."""
    from sklearn.metrics import accuracy_score
    import pickle

    # Load trained model
    async with model_file.open() as f:
        if hasattr(f, 'read'):
            model_content = f.read()
        else:
            model_content = await f.read()

    if isinstance(model_content, str):
        model_content = model_content.encode()

    model_package = pickle.loads(model_content)
    clf = model_package["model"]

    # Load test data
    async with test_data_file.open() as f:
        if hasattr(f, 'read'):
            test_content = f.read()
        else:
            test_content = await f.read()

    if isinstance(test_content, str):
        test_content = test_content.encode()

    test_data = pickle.loads(test_content)
    X_test = test_data["features"]
    y_test = test_data["targets"]

    # Evaluate model on test data
    test_predictions = clf.predict(X_test)
    test_acc = accuracy_score(y_test, test_predictions)

    return {
        "model_type": "RandomForest",
        "dataset": "iris",
        "train_samples": model_package["train_samples"],
        "test_samples": len(X_test),
        "test_accuracy": round(test_acc, 3),
        "feature_importance": {
            name: round(importance, 3)
            for name, importance in zip(model_package["feature_names"], clf.feature_importances_)
        }
    }

@env.task
async def ml_pipeline() -> File:
    """Complete ML pipeline: data loading → training → validation."""

    # Step 1: Load data and perform train-test split
    train_file, test_file = await load_iris_data()

    # Step 2: Train model using training data
    model_file = await train_model(train_file)

    # Step 3: Validate model using test data
    validation_results = await validate_model(model_file, test_file)

    # Add pipeline metadata
    validation_results["pipeline_status"] = "completed"

    return model_file

# Main workflow
if __name__ == "__main__":
    flyte.init_from_config(".flyte/config.yaml")
    # flyte.init() # uncomment to run locally

    # Run the complete pipeline
    execution = flyte.run(ml_pipeline)

    print(f"Execution: {execution.name}")
    print(f"URL: {execution.url}")

#### Outputs & Remote

## ⚠️ Error Handling & Dynamic Infrastructure


In [None]:
import asyncio

import flyte
import flyte.errors

env = flyte.TaskEnvironment(
    name="fail",
    resources=flyte.Resources(cpu=1, memory="250Mi"),
)


@env.task
async def oomer(x: int):
    large_list = [0] * 100000000
    print(len(large_list))


@env.task
async def always_succeeds() -> int:
    await asyncio.sleep(1)
    return 42


@env.task
async def failure_recovery() -> int:
    try:
        await oomer(2)
    except flyte.errors.OOMError as e:
        print(f"Failed with oom trying with more resources: {e}, of type {type(e)}, {e.code}")
        try:
            await oomer.override(resources=flyte.Resources(cpu=1, memory="1Gi"))(5)
        except flyte.errors.OOMError as e:
            print(f"Failed with OOM Again giving up: {e}, of type {type(e)}, {e.code}")
            raise e
    finally:
        await always_succeeds()

    return await always_succeeds()


if __name__ == "__main__":
    flyte.init_from_config(".flyte/config.yaml")

    run = flyte.run(failure_recovery)
    print(run.url)
    run.wait(run)

## AI Agents & Agentic Workflows

Flyte 2.0 built in dynamic task and workflows make it easy to build agentic workflows that can call LLMs and other AI models to help make decisions and take actions with out of the box support for most major agent frameworks and LLM providers.

Example Coming soon!