In [6]:
import subprocess
import os
import zipfile

# Function to check if the dataset is downloaded
def is_dataset_downloaded():
    return os.path.isdir("./data/train")

# Function to check if Kaggle CLI is installed
def is_kaggle_installed():
    try:
        subprocess.run(["kaggle", "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        return True
    except FileNotFoundError:
        return False

# Function to install Kaggle CLI if not already installed
def install_kaggle():
    if not is_kaggle_installed():
        print("Kaggle CLI is not installed. Installing...")
        install_kaggle_command = ["pip", "install", "kaggle"]
        result = subprocess.run(install_kaggle_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        
        if result.returncode == 0:
            print("Kaggle CLI installed successfully")
        else:
            print(f"Kaggle CLI installation failed with return code {result.returncode}")
            return False
    return True

# Function to download and unzip the dataset
def download_and_unzip_dataset():
    # Define the command to download the dataset
    download_command = ["kaggle", "datasets", "download", "-d", "gpiosenka/100-bird-species", "-p", "./data"]

    # Run the download command
    result = subprocess.run(download_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    # Check if the download was successful
    if result.returncode == 0:
        print("Download executed successfully")

        # Define the path to the downloaded zip file
        zip_file_path = "./data/100-bird-species.zip"

        # Unzip the downloaded file
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall("./data")
        
        print(f"File '{zip_file_path}' has been successfully unzipped.")
    else:
        print(f"Download failed with return code {result.returncode}")

# Download the dataset if not already downloaded
if not is_dataset_downloaded():
    if install_kaggle():
        download_and_unzip_dataset()
else:
    print("Dataset is already downloaded.")


Dataset is already downloaded.


In [None]:
import os
from PIL import Image
import numpy as np

dir = './data'
folds = os.listdir(dir)

# Delete all empty folders and files that are not .jpg or are not 224x224x3
for fold in folds:
    fold_path = os.path.join(dir, fold)
    if os.path.isdir(fold_path):
        subfolds = os.listdir(fold_path)
        for subfold in subfolds:
            subfold_path = os.path.join(fold_path, subfold)
            if os.path.isdir(subfold_path):
                files = os.listdir(subfold_path)
                for file in files:
                    file_path = os.path.join(subfold_path, file)
                    if not (file.endswith('.jpg')) or np.array(Image.open(file_path)).shape != (224, 224, 3):
                        print('Deleting ' + file_path)
                        os.remove(file_path)

# rename the foldes "PARAKETT AUKLET" to "PARAKEET AUKLET"
if os.path.exists('./data/train/PARAKEET AUKLET'):
    os.rename('./data/train/PARAKETT  AUKLET', './data/train/PARAKETT  AKULET')

if os.path.exists('./data/valid/PARAKEET AUKLET'):
    os.rename('./data/valid/PARAKETT  AUKLET', './data/valid/PARAKETT  AKULET')

if os.path.exists('./data/test/PARAKEET AUKLET'):
    os.rename('./data/test/PARAKETT  AUKLET', './data/test/PARAKETT  AKULET')

In [None]:
import pandas as pd

# Load the CSV file
birds_data = pd.read_csv('./data/birds.csv')

# Create a dictionary mapping class IDs to class labels
labels_dict = dict(zip(birds_data['labels'], birds_data['class id'].astype(int)))

# Print the dictionary
print(labels_dict)