<a href="https://colab.research.google.com/github/wandb/davis-contest/blob/main/colabs/starter_torch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Instructions and Starter Code for Submitting Results in the DAVIS Contest

In [None]:
%%capture

!pip install wandb
!pip install --ignore-installed git+https://github.com/wandb/davis-contest.git#egg=contest
!pip install ptflops pytorch_lightning

In [None]:
import os 

import wandb

import contest
from contest.utils import clips, paths

## 0️⃣ Create a Weights & Biases account if you don't have one.

## 1️⃣ Download the training data from Weights & Biases

In [None]:
entity = "charlesfrye"
project = "davis"
mode = "train"
tag = "latest"

training_data_artifact_name = os.path.join(entity, project, f"davis2016-{mode}") + ":" + tag
training_data_artifact_name

In [None]:
with wandb.init(project=project, job_type="download") as run:
  training_data_artifact = run.use_artifact(training_data_artifact_name)
  training_data_dir = training_data_artifact.download()
  print("\ntraining data downloaded to " + training_data_dir)

### Viewing the Dataset in Weights & Biases

Link to dsviz version, include screenshots.

## 2️⃣ Define and train a model on the data

### Splitting up the data

In [None]:
print(clips.split_on_clips.__doc__)

In [None]:
print(contest.torch.data.VidSegDataModule.__doc__)

In [None]:
print(contest.torch.data.VidSegDataset.__doc__)

First, set up the validation split, at a clipwise level.

In [None]:
def log_holdout_split(data_artifact, train_split_df, holdout_split_df):
  log_datasplit_artifact(data_artifact, train_split_df, "train")
  log_datasplit_artifact(data_artifact, holdout_split_df, "holdout")


def log_datasplit_artifact(data_artifact, split_df, splitname, folder="wandb"):
  dataset_artifact = wandb.Artifact(name=f"davis2016-split-{splitname}", type="split-data")
  path = os.path.join(folder, splitname + ".json")
  split_df.to_json(path)
  dataset_artifact.add_file(path, "paths.json")

  wandb.run.log_artifact(dataset_artifact)

In [None]:
config = {"training_fraction": 0.8}

with wandb.init(project=project,
                job_type="split-data", config=config) as run:
  training_data_artifact = run.use_artifact(training_data_artifact_name)
  paths_df = paths.artifact_paths(training_data_artifact)

  training_paths_df, holdout_paths_df = clips.split_on_clips(paths_df)
  log_holdout_split(training_data_artifact,
                    training_paths_df,
                    holdout_paths_df)

### Model Code

In [None]:
import pytorch_lightning as pl
import torch
import torch.nn.functional as F

In [None]:
class DummyModel(pl.LightningModule):

  def __init__(self):
    super().__init__()
    self.conv = torch.nn.Conv2d(in_channels=3, out_channels=1, kernel_size=1)

  def forward(self, xs):
    return torch.sigmoid(self.conv(xs))

  def training_step(self, batch, batch_idx):
    loss = self.forward_on_batch(batch)
    return loss

  def validation_step(self, batch, batch_idx):
    loss = self.forward_on_batch(batch)
    return loss

  def forward_on_batch(self, batch):
    xs, ys = batch
    y_hats = self.forward(xs)
    loss = F.binary_cross_entropy(y_hats, ys)
    return loss

  def configure_optimizers(self):
    return torch.optim.SGD(self.parameters(), lr=0.0)

  def count_params(self):
    return sum(p.numel() for p in self.parameters())

For a more realistic model, see _this notebook_.

### Training Code

#### Training the model

In [None]:
model_artifact_name = "dummy-baseline"

In [None]:
config = {"batch_size": 32,
          "max_epochs": 1,
          "gpus": 1}

with wandb.init(project=project, config=config, job_type="train") as run:

  training_data_artifact = run.use_artifact(training_data_artifact_name)
  training_data_artifact.download()

  trainsplit_artifact = run.use_artifact("davis2016-split-train:latest")
  trainsplit_paths = paths.get_paths(trainsplit_artifact)

  holdoutsplit_artifact = run.use_artifact("davis2016-split-holdout:latest")
  holdoutsplit_paths = paths.get_paths(holdoutsplit_artifact)

  datamodule = contest.torch.data.VidSegDataModule(
      trainsplit_paths, holdoutsplit_paths,
      batch_size=wandb.config["batch_size"])
  datamodule.setup()

  model = DummyModel()
  wandb.config["nparams"] = contest.torch.profile.count_params(model)
  wandb.config["nflops"] = contest.torch.profile.count_flops(model, torch.cuda.device(0))
  
  logger = pl.loggers.wandb.WandbLogger(experiment=run)
  logger.watch(model, log_freq=2)

  trainer = pl.Trainer(
    gpus=wandb.config["gpus"], max_epochs=wandb.config["max_epochs"],
    logger=logger, log_every_n_steps=1) 
  
  trainer.fit(model, datamodule)

  model_artifact_id = contest.torch.utils.save_model_to_artifact(
    model, "wandb/final_model", model_artifact_name)

## 3️⃣ Run your model on the evaluation data

Once you've run your model on the evaluation data,
there's two steps to submission:

1. Log an "evaluation run" to W&B, using _this notebook_.
2. Submit the results to _the benchmark_.

Describe format of the results.

In [None]:
evaluation_artifact_name = os.path.join(entity, project, "davis2016-val" +":" + tag)

In [None]:
model_tag = "latest"

In [None]:
output_dir = os.path.join("outputs")
!rm -rf output_dir
!mkdir -p {output_dir}

In [None]:
result_artifact_name = model_artifact_name + "-result"

In [None]:
with wandb.init(project=project, job_type="run-val") as run:
  evaluation_data_artifact = run.use_artifact(evaluation_artifact_name)
  evaluation_data_paths = paths.artifact_paths(evaluation_data_artifact)

  evaluation_dataset = contest.torch.data.VidSegDataset(
    evaluation_data_paths, has_annotations=False)
  num_images = len(evaluation_dataset)

  evaluation_dataloader = torch.utils.data.DataLoader(
    evaluation_dataset, batch_size=1)

  model = contest.torch.utils.load_model_from_artifact(
    model_artifact_name + ":" + model_tag, DummyModel) 

  print("\n")
  device = torch.cuda.device(0)
  nparams = contest.torch.profile.count_params(model)
  nflops = contest.torch.profile.count_flops(model, device)

  wandb.log({"nparams": nparams, "nflops": nflops})

  output_paths = contest.torch.evaluate.run(
    model, evaluation_dataloader, num_images, output_dir)

  result_artifact = contest.evaluate.make_result_artifact(
    output_paths, result_artifact_name)
  run.log_artifact(result_artifact)

## 4️⃣ Submit your results to the leaderboard on Weights & Biases

Once you've run an evaluation job like the one above and produced a results Artifact,
you're almost ready to submit to the contest.

Head over to _this notebook_ for the last two steps.