# Uploading Legacy Zeno Project

Uploading existing zeno project to a zeno backend using the Zeno client.

We assume that this existing project has been created with an older version of zeno.
Therefore, this script makes assumptions about the column names and data structure of
the CSV.

In this example we are uploading the audio transcription example.

In [None]:
from zeno_client import ZenoClient, ZenoMetric
import pandas as pd
import os

In [None]:
API_KEY = "" 
PROJECT_NAME = "Audio Transcription Accents"
PROJECT_VIEW = "audio-transcription"
DATA_URL = "https://zenoml.s3.amazonaws.com/accents/"
EXISTING_PROJECT_PATH = "accents.csv"

In [None]:

data_frame = pd.read_csv(EXISTING_PROJECT_PATH)
data_frame = data_frame.fillna("")
output_cols = list(
    filter(lambda col: str(col).startswith("OUTPUToutput"), data_frame.columns)
)
models = list(
    set(list(map(lambda col: str(col).replace("OUTPUToutput", ""), output_cols)))
)
predistill_cols = list(
    filter(lambda col: str(col).startswith("PREDISTILL"), data_frame.columns)
)
postdistill_cols = list(
    filter(lambda col: str(col).startswith("POSTDISTILL"), data_frame.columns)
)
embedding_cols = list(
    filter(lambda col: str(col).startswith("EMBEDDING"), data_frame.columns)
)

In [None]:
data_frame.head()

In [None]:
df_dataset = data_frame.drop(output_cols + predistill_cols + postdistill_cols + embedding_cols, axis=1)
df_dataset["continent"] = df_dataset.apply(lambda row: "North America" if row["country"] == "usa" else row["continent"], axis=1)

In [None]:
df_dataset.head()

In [None]:
zeno_client = ZenoClient(API_KEY)
project = zeno_client.create_project(
    PROJECT_NAME, 
    view=PROJECT_VIEW,
    data_url=DATA_URL,
    metrics=[ZenoMetric(name="avg_wer", type="mean", columns=["wer"])]
)

In [None]:
project.upload_dataset(df_dataset, "id", label_column="label", data_column="id.1")

In [None]:
for i, model in enumerate(models):
    df_to_upload = data_frame[["id", f"OUTPUToutput{model}", f"POSTDISTILLwer_m{model}"]].rename(columns={f"OUTPUToutput{model}": "output", f"POSTDISTILLwer_m{model}": "wer"})
    project.upload_system(
        model,
        df_to_upload,
        output_column="output",
        id_column="id",
    )