## Dependency

In [58]:
import sys
sys.path.append('..')
from src.scripts.preparation_image_dataset import filter_images, translate_dir_names
import os
import subprocess

## Constants

In [59]:
LINUX_MACOS = 'posix'  # For Linux or macOS
WINDOWS = 'nt'         # For Windows
IMAGE_EXTS = ['jpeg', 'jpg', 'png']
TRANSLATE = {
    "cane": "dog",
    "cavallo": "horse",
    "elefante": "elephant",
    "farfalla": "butterfly",
    "gallina": "chicken",
    "gatto": "cat",
    "mucca": "cow",
    "pecora": "sheep",
    "scoiattolo": "squirrel",
    "ragno": "spider"
}

## Loading dataset

In [60]:
# loading dataset from kaggle
!kaggle datasets download --force -d "alessiocorrado99/animals10" --path ../data2 --unzip

Dataset URL: https://www.kaggle.com/datasets/alessiocorrado99/animals10
License(s): GPL-2.0
Downloading animals10.zip to ../data2




  0%|          | 0.00/586M [00:00<?, ?B/s]
  0%|          | 1.00M/586M [00:01<11:08, 917kB/s]
  0%|          | 2.00M/586M [00:01<05:32, 1.84MB/s]
  1%|          | 4.00M/586M [00:01<02:22, 4.29MB/s]
  1%|          | 6.00M/586M [00:01<01:39, 6.13MB/s]
  1%|▏         | 8.00M/586M [00:01<01:19, 7.59MB/s]
  2%|▏         | 10.0M/586M [00:01<01:09, 8.73MB/s]
  2%|▏         | 12.0M/586M [00:02<01:02, 9.60MB/s]
  2%|▏         | 14.0M/586M [00:02<00:58, 10.2MB/s]
  3%|▎         | 16.0M/586M [00:02<00:56, 10.6MB/s]
  3%|▎         | 18.0M/586M [00:02<00:54, 11.0MB/s]
  3%|▎         | 20.0M/586M [00:02<00:53, 11.2MB/s]
  4%|▍         | 22.0M/586M [00:03<00:52, 11.4MB/s]
  4%|▍         | 24.0M/586M [00:03<00:51, 11.4MB/s]
  4%|▍         | 26.0M/586M [00:03<00:50, 11.5MB/s]
  5%|▍         | 28.0M/586M [00:03<00:50, 11.6MB/s]
  5%|▌         | 30.0M/586M [00:03<00:49, 11.7MB/s]
  5%|▌         | 32.0M/586M [00:03<00:49, 11.6MB/s]
  6%|▌         | 34.0M/586M [00:04<00:49, 11.7MB/s]
  6%|▌         | 36.0

In [None]:
# changing depending on OS name of laoded dataset from raw-img to raw, to better work
# and deleting two files animals10.zip translate.py because we do not need them

if os.name == LINUX_MACOS:  # For Linux or macOS
    # Rename folder and delete file for Linux/macOS
    subprocess.run(["mv", "../data/raw/raw-img", "../data/raw"])
    subprocess.run(["rm", "../data/animals10.zip"])
    subprocess.run(["rm", "../data/translate.py"])
elif os.name == WINDOWS:  # For Windows
    # Rename folder and delete file for Windows
    subprocess.run(["move", "..\\data\\raw-img", "..\\data\\raw"], shell=True)
    subprocess.run(["del", "..\\data\\animals10.zip"], shell=True)
    subprocess.run(["del", "..\\data\\translate.py"], shell=True)
else:
    print("Unsupported OS")


## Path

In [None]:
root_path = os.path.abspath(os.path.join('..')) 
data_path = os.path.join(root_path, "data", "raw")

## Prepare data

In [63]:
def get_count_images(data_path):
    count = 0
    for image_class in os.listdir(data_path): 
        for _ in os.listdir(os.path.join(data_path, image_class)):
            count += 1

    return count

In [64]:
get_count_images(data_path)

26179

In [65]:
# filter dataset by exts and if it does not open it alse are deleted
filter_images(data_path, IMAGE_EXTS)

In [66]:
get_count_images(data_path)

26179

In [67]:
os.listdir(data_path)

['cane',
 'cavallo',
 'elefante',
 'farfalla',
 'gallina',
 'gatto',
 'mucca',
 'pecora',
 'ragno',
 'scoiattolo']

In [68]:
# translate names of subdir by given names
translate_dir_names(data_path, TRANSLATE)

In [69]:
os.listdir(data_path)

['butterfly',
 'cat',
 'chicken',
 'cow',
 'dog',
 'elephant',
 'horse',
 'sheep',
 'spider',
 'squirrel']