In [1]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

def train_svc_on_digits(kernel='rbf', C=1.0, gamma='scale'):
    """
    Train an SVC model on the sklearn digits dataset.

    Args:
        kernel (str): Kernel type for SVC ('linear', 'rbf', 'poly', etc.).
        C (float): Regularization parameter.
        gamma (str or float): Kernel coefficient.

    Returns:
        model: Trained SVC model.
        accuracy: Accuracy on the test set.
        report: Classification report string.
    """
    # Load the dataset
    digits = load_digits()
    X, y = digits.data, digits.target

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the SVC model
    model = SVC(kernel=kernel, C=C, gamma=gamma)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Test Accuracy: {acc:.4f}")
    print("Classification Report:\n", report)

    return model, acc, report

In [2]:
model, accuracy, report = train_svc_on_digits(kernel='rbf', C=10.0, gamma=0.001)

Test Accuracy: 0.9889
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      0.97      0.99        34
           4       1.00      1.00      1.00        46
           5       0.98      0.98      0.98        47
           6       0.97      1.00      0.99        35
           7       0.97      0.97      0.97        34
           8       1.00      1.00      1.00        30
           9       0.97      0.97      0.97        40

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360



In [3]:
from typing_extensions import Annotated
from typing import Tuple
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import numpy as np
from zenml import step

@step
def import_digits_data() -> Tuple[
    Annotated[np.ndarray, "X_train"], 
    Annotated[np.ndarray, "y_train"], 
    Annotated[np.ndarray, "X_test"], 
    Annotated[np.ndarray, "y_test"],
]:
    """
    Import and split the digits dataset into training and testing sets.

    Returns:
        x_train: Training features
        y_train: Training labels
        x_test: Testing features
        y_test: Testing labels
    """
    digits = load_digits()
    X = digits.images.reshape((len(digits.images), -1))
    y = digits.target
    x_train, x_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    return x_train, y_train, x_test, y_test

In [10]:
# ⚠️ Temporarily run step like a normal function to see output
x_train, y_train, x_test, y_test = import_digits_data()

# ✅ Now you can use these directly
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)


[1;35mRunning single step pipeline to execute step [0m[1;36mimport_digits_data[1;35m[0m
[1;35mInitiating a new run for the pipeline: [0m[1;36mimport_digits_data[1;35m.[0m
[1;35mCaching is disabled by default for [0m[1;36mimport_digits_data[1;35m.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35mYou can visualize your pipeline runs in the [0m[1;36mZenML Dashboard[1;35m. In order to try it locally, please run [0m[1;36mzenml login --local[1;35m.[0m
[1;35mStep [0m[1;36mimport_digits_data[1;35m has started.[0m
[1;35mStep [0m[1;36mimport_digits_data[1;35m has finished in [0m[1;36m2.956s[1;35m.[0m
[1;35mPipeline run has finished in [0m[1;36m3.449s[1;35m.[0m
(1437, 64) (1437,) (360, 64) (360,)


In [4]:
from zenml import pipeline

@pipeline
def data_pipeline(importer):
    return importer()

# Instantiate the pipeline with its steps
# importer_step = import_digits_data()
pipeline_instance = data_pipeline(importer=import_digits_data())
# 
# # Run the pipeline
# pipeline_instance.run()

[1;35mRunning single step pipeline to execute step [0m[1;36mimport_digits_data[1;35m[0m
[1;35mInitiating a new run for the pipeline: [0m[1;36mimport_digits_data[1;35m.[0m


  import pkg_resources


[33mIn a future release, the default Python package installer used by ZenML to build container images for your containerized pipelines will change from 'pip' to 'uv'. To maintain current behavior, you can explicitly set [0m[1;36mpython_package_installer=PythonPackageInstaller.PIP[33m in your DockerSettings.[0m
[1;35mCaching is disabled by default for [0m[1;36mimport_digits_data[1;35m.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35mYou can visualize your pipeline runs in the [0m[1;36mZenML Dashboard[1;35m. In order to try it locally, please run [0m[1;36mzenml login --local[1;35m.[0m
[1;35mStep [0m[1;36mimport_digits_data[1;35m has started.[0m
[1;35mStep [0m[1;36mimport_digits_data[1;35m has finished in [0m[1;36m4.816s[1;35m.[0m
[1;35mPipeline run has finished in [0m[1;36m5.287s[1;35m.[

In [13]:
importer_step.shape

In [4]:
print("Train shape:", x_train.shape, y_train.shape)
print("Test shape:", x_test.shape, y_test.shape)

In [6]:
!pip install "setuptools<81"


Defaulting to user installation because normal site-packages is not writeable
