Uploading existing zeno project to a zeno backend using the Zeno API.

We assume that this existing project has been created with an older version of zeno.
Therefore, this script makes assumptions about the column names and data structure of
the CSV.

In [None]:
API_KEY = ""
ENDPOINT = "http://localhost:8000"
PROJECT_NAME = "accent-project"
PROJECT_VIEW = "audio-transcription"
DATA_URL = "https://zenoml.s3.amazonaws.com/accents/"
EXISTING_PROJECT_PATH = "accents.csv"

In [None]:
%load_ext autoreload
%autoreload 2

%env PUBLIC_BACKEND_ENDPOINT=http://localhost:8000

from zeno_api import ZenoClient
import pandas as pd


In [None]:

data_frame = pd.read_csv(EXISTING_PROJECT_PATH)
data_frame = data_frame.fillna("")
output_cols = list(
    filter(lambda col: str(col).startswith("OUTPUToutput"), data_frame.columns)
)
models = list(
    set(list(map(lambda col: str(col).replace("OUTPUToutput", ""), output_cols)))
)
predistill_cols = list(
    filter(lambda col: str(col).startswith("PREDISTILL"), data_frame.columns)
)
postdistill_cols = list(
    filter(lambda col: str(col).startswith("POSTDISTILL"), data_frame.columns)
)
embedding_cols = list(
    filter(lambda col: str(col).startswith("EMBEDDING"), data_frame.columns)
)

In [None]:
df_dataset = data_frame.drop(output_cols + predistill_cols + postdistill_cols + embedding_cols, axis=1)

In [None]:
df_dataset.head()

In [None]:
zeno_client = ZenoClient(
    API_KEY, endpoint=ENDPOINT
)
project = zeno_client.create_project(
    PROJECT_NAME, 
    view=PROJECT_VIEW,
    data_url=DATA_URL,
)

In [None]:
project.upload_dataset(df_dataset, "id", label_column="label", data_column="id.1")

In [None]:
for i, model in enumerate(models):
    project.upload_system(
        model,
        data_frame[["id", f"OUTPUToutput{model}"]],
        f"OUTPUToutput{model}",
        id_column="id",
    )