In [None]:
# instalação do Weights & Biases
!pip install wandb

Collecting wandb
  Downloading wandb-0.16.3-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.42-py3-none-any.whl (195 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.40.6-py2.py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.5/258.5 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wa

In [None]:
# instalação do pytest
!pip install pytest pytest-sugar

Collecting pytest-sugar
  Downloading pytest_sugar-1.0.0-py3-none-any.whl (10 kB)
Installing collected packages: pytest-sugar
Successfully installed pytest-sugar-1.0.0


In [None]:
# importação das bibliotecas
import wandb
import numpy as np
import pandas as pd

In [None]:
# wandb login
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
# criando um arquivo de test
%%file test_data.py

import pytest
import wandb
import pandas as pd
import numpy as np
import os
from PIL import Image

# inicializando uma run no wandb
run = wandb.init(project="Breast Cancer TCC", job_type="check_data")

@pytest.fixture(scope="session")
def data():
    artifact = run.use_artifact('valmirfrancisco1/Breast Cancer TCC/label_data_clean:latest', type='clean_data').file()
    artifact2 = run.use_artifact('valmirfrancisco1/Breast Cancer TCC/img_data_clean:latest', type='clean_data').file()
    labels = pd.read_csv(artifact)
    data_img = np.load(artifact2)

    return labels, data_img


def test_data_length(data):
    # verificamos se temos a quantidade necessária de dados e se os dois artefatos têm o mesmo tamanho
    labels, data_img = data
    assert (len(labels) > 2000) and (len(data_img) > 2000) and (len(labels) == len(data_img))


def test_image_data_shape(data):
    # verifica se todos os valores do artefato data_img têm o mesmo formato
    labels, data_img = data
    for i in data_img:
        assert i.shape == data_img[0].shape


def test_label_data_types(data):
    # verifica se todos os valores do artefato de rótulos são do mesmo tipo
    labels, data_img = data
    for i in range(0, len(labels.columns)):
        assert type(labels.dtypes[i]) == type(labels.dtypes[0])


def test_img_data_types(data):
    # verifica se todos os valores do artefato data_img têm o mesmo tipo
    labels, data_img = data
    for i in range(len(data_img)):
        assert type(data_img[i]) == type(data_img[0])


def test_image_data_valid(data):
    # verifica se os dados de img_dat são uma imagem válida
    # com este teste não precisamos de uma verificação nula no img_data!
    labels, data_img = data
    for i in data_img:
        img = i*255
        img = img.astype(np.uint8)
        image = Image.fromarray(img)
        assert (image.size == (224, 224) and image.mode == 'RGB')


def test_label_range(data):
    # testa se os valores da categoria estão dentro de um intervalo de valores esperado
    labels, data_img = data
    binary_column = ["site_id", "cancer", "biopsy", "invasive", "implant", "difficult_negative_case"]
    for i in binary_column:
        if not labels[i].isin([0, 1]).all():
            return False
    return True


def test_null_values_from_labels(data):
    # testa se há dados faltantes no quadro de dados
    labels, data_img = data
    assert not labels.isnull().any().any()

def test_number_of_columns(data):
    # verificar quantidade de colunas do dataset
    labels, data_img = data
    assert labels.shape[1] == 12

def test_same_quantity_labels(data):
    # verifica se os valores 0 e 1 de cancer estão na mesma quantidade
    labels, data_img = data
    filter_1 = labels["cancer"] == 1
    filter_0 = labels["cancer"] == 0
    assert (labels[filter_1].shape[0] == labels[filter_0].shape[0])

    # finaliza a run
    run.finish()

Overwriting test_data.py


In [None]:
# inicializa o teste
!pytest . -vv

[1mTest session starts (platform: linux, Python 3.10.12, pytest 7.4.4, pytest-sugar 1.0.0)[0m
cachedir: .pytest_cache
rootdir: /content
plugins: sugar-1.0.0, anyio-3.7.1
collected 9 items                                                                                  [0m

 [36mtest_data.py[0m::test_data_length[0m [32m✓[0m                                                     [32m11% [0m[40m[32m█[0m[40m[32m▎        [0m
 [36mtest_data.py[0m::test_image_data_shape[0m [32m✓[0m                                                [32m22% [0m[40m[32m█[0m[40m[32m█[0m[40m[32m▎       [0m
 [36mtest_data.py[0m::test_label_data_types[0m [32m✓[0m                                                [32m33% [0m[40m[32m█[0m[40m[32m█[0m[40m[32m█[0m[40m[32m▍      [0m
 [36mtest_data.py[0m::test_img_data_types[0m [32m✓[0m                                                  [32m44% [0m[40m[32m█[0m[40m[32m█[0m[40m[32m█[0m[40m[32m█[0m[40m[32m▌     [0m