In [1]:
# installation of Weights & Biases
!pip install wandb

Collecting wandb
  Downloading wandb-0.16.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.39.1-py2.py3-none-any.whl (254 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.1/254.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wand

In [2]:
# installation of pytest
!pip install pytest pytest-sugar

Collecting pytest-sugar
  Downloading pytest_sugar-0.9.7-py2.py3-none-any.whl (10 kB)
Installing collected packages: pytest-sugar
Successfully installed pytest-sugar-0.9.7


In [3]:
# Importing libraries
import wandb
import numpy as np
import pandas as pd

In [4]:
# wandb login
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [5]:
# create test file
%%file test_data.py

import pytest
import wandb
import pandas as pd
import numpy as np
import os
from PIL import Image

# initializing a wandb run to check preprocessing
run = wandb.init(entity="flamigos", project="cnn_animation", job_type="data_check")

@pytest.fixture(scope="session")
def data():
    artifact = run.use_artifact('flamigos/cnn_animation/label_data:latest', type='clean_data').file()
    artifact2 = run.use_artifact('flamigos/cnn_animation/img_data:latest', type='clean_data').file()
    labels = pd.read_csv(artifact)
    data_img = np.load(artifact2)

    return labels, data_img


def test_data_length(data):
    # check if we have a required amount of data and if the two artifects have the same size
    labels, data_img = data
    assert (len(labels) > 8800) and (len(data_img) > 8800) and (len(labels) == len(data_img))


def test_image_data_shape(data):
    # check if all values of the data_img artifact have the same shape
    labels, data_img = data
    for i in data_img:
      assert i.shape == data_img[0].shape


def test_label_data_types(data):
    # check if all values of the labels artifact have the same type
    labels, data_img = data
    for i in range(len(labels["animation Type"])):
      assert type(labels["animation Type"][i]) == type(labels["animation Type"][0])


def test_img_data_types(data):
    # check if all values of the data_img artifact have the same type
    labels, data_img = data
    for i in range(len(data_img)):
      assert type(data_img[i]) == type(data_img[0])


def test_image_data_valid(data):
    # check if the data of img_dat is a valid image
    # with this test we dont need a null check in the img_data!
    labels, data_img = data
    for i in data_img:
        img = i*255
        img = img.astype(np.uint8)
        image = Image.fromarray(img)
        assert (image.size == (128, 128) and image.mode == 'RGBA')


def test_label_range(data):
    # test whether category values are within an expected range of values
    labels, data_img = data
    assert labels["animation Type"].isin([0, 1]).all()


def test_null_values_from_labels(data):
    # test whether there is missing data in the data frame
    labels, data_img = data
    assert labels.isnull().any().any() == False

    # finish the run
    run.finish()

Writing test_data.py


In [6]:
# run test file
!pytest . -vv

[1mTest session starts (platform: linux, Python 3.10.12, pytest 7.4.3, pytest-sugar 0.9.7)[0m
cachedir: .pytest_cache
rootdir: /content
plugins: sugar-0.9.7, anyio-3.7.1
collected 7 items                                                                                  [0m

 [36mtest_data.py[0m::test_data_length[0m [32m✓[0m                                                     [32m14% [0m[40m[32m█[0m[40m[32m▌        [0m
 [36mtest_data.py[0m::test_image_data_shape[0m [32m✓[0m                                                [32m29% [0m[40m[32m█[0m[40m[32m█[0m[40m[32m▉       [0m
 [36mtest_data.py[0m::test_label_data_types[0m [32m✓[0m                                                [32m43% [0m[40m[32m█[0m[40m[32m█[0m[40m[32m█[0m[40m[32m█▍     [0m
 [36mtest_data.py[0m::test_img_data_types[0m [32m✓[0m                                                  [32m57% [0m[40m[32m█[0m[40m[32m█[0m[40m[32m█[0m[40m[32m█[0m[40m[32m█[0m[40m