<a href="https://colab.research.google.com/github/ttekcor/neurontasksfefu/blob/main/dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
! pip install opendatasets

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [11]:
import tensorflow as tf
import pandas as pd
from google.colab import files
import opendatasets as od

In [4]:
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"ttekcor","key":"dff966458268ba7e3b25ceba3a132cf2"}'}

In [5]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [6]:
!chmod 600 ~/.kaggle/kaggle.json

In [7]:
! kaggle datasets list

ref                                                         title                                        size  lastUpdated          downloadCount  voteCount  usabilityRating  
----------------------------------------------------------  ------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
hummaamqaasim/jobs-in-data                                  Jobs and Salaries in Data Science            76KB  2023-12-25 20:03:32           4741         86  1.0              
thedrcat/daigt-v2-train-dataset                             DAIGT V2 Train Dataset                       29MB  2023-11-16 01:38:36           3993        312  1.0              
jacksondivakarr/car-crash-dataset                           Car Crash Dataset                             4MB  2024-01-08 13:44:08           1496         36  1.0              
rafsunahmad/plane-price-prediction                          Airplane Price Prediction                    17KB  2023-12-2

In [12]:
od.download(
    "https://www.kaggle.com/datasets/gpiosenka/100-bird-species")

Downloading 100-bird-species.zip to ./100-bird-species


100%|██████████| 1.96G/1.96G [01:07<00:00, 31.3MB/s]





In [16]:

! unzip 100-bird-species -d train

unzip:  cannot find or open 100-bird-species, 100-bird-species.zip or 100-bird-species.ZIP.


In [18]:

def load_dataset(path: str, batch_size: int, image_size: tuple[int, int], shuffle: bool, split: str) -> tuple[tf.data.Dataset, dict[int, str]]:
    # Load the CSV index file
    df = pd.read_csv(path)

    # Filter the dataframe based on the specified split
    df_split = df[df['data set'] == split]

    # Get the filepaths and labels
    filepaths = df_split['filepaths'].values
    labels = df_split['labels'].values

    # Create a dictionary mapping class indices to class names
    class_mapping = {idx: label for idx, label in enumerate(df['labels'].unique())}

    # Create a list of class indices corresponding to the labels
    class_indices = [list(class_mapping.keys())[list(class_mapping.values()).index(label)] for label in labels]

    # Create a dataset from the filepaths and labels
    dataset = tf.data.Dataset.from_tensor_slices((filepaths, class_indices))

    # Function to load and preprocess images
    def load_and_preprocess_image(filepath, label):
      img = tf.io.read_file(filepath)
      img = tf.image.decode_image(img, channels=3)
      img.set_shape([None, None, 3])  # Set the shape of the image Tensor
      img = tf.image.resize(img, image_size)
      img = img / 255.0  # Normalize pixel values to the range [0, 1]
      return img, label

    # Map the load_and_preprocess_image function to the dataset
    dataset = dataset.map(load_and_preprocess_image)

    # Shuffle the dataset if required
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(filepaths))

    # Batch and prefetch the dataset
    dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    return dataset, class_mapping

# Example usage:
dataset, class_mapping = load_dataset('/content/100-bird-species/birds.csv', batch_size=32, image_size=(64, 64), shuffle=True, split='train')


In [20]:
class_mapping

{0: 'ABBOTTS BABBLER',
 1: 'ABBOTTS BOOBY',
 2: 'ABYSSINIAN GROUND HORNBILL',
 3: 'AFRICAN CROWNED CRANE',
 4: 'AFRICAN EMERALD CUCKOO',
 5: 'AFRICAN FIREFINCH',
 6: 'AFRICAN OYSTER CATCHER',
 7: 'AFRICAN PIED HORNBILL',
 8: 'AFRICAN PYGMY GOOSE',
 9: 'ALBATROSS',
 10: 'ALBERTS TOWHEE',
 11: 'ALEXANDRINE PARAKEET',
 12: 'ALPINE CHOUGH',
 13: 'ALTAMIRA YELLOWTHROAT',
 14: 'AMERICAN AVOCET',
 15: 'AMERICAN BITTERN',
 16: 'AMERICAN COOT',
 17: 'AMERICAN FLAMINGO',
 18: 'AMERICAN GOLDFINCH',
 19: 'AMERICAN KESTREL',
 20: 'AMERICAN PIPIT',
 21: 'AMERICAN REDSTART',
 22: 'AMERICAN ROBIN',
 23: 'AMERICAN WIGEON',
 24: 'AMETHYST WOODSTAR',
 25: 'ANDEAN GOOSE',
 26: 'ANDEAN LAPWING',
 27: 'ANDEAN SISKIN',
 28: 'ANHINGA',
 29: 'ANIANIAU',
 30: 'ANNAS HUMMINGBIRD',
 31: 'ANTBIRD',
 32: 'ANTILLEAN EUPHONIA',
 33: 'APAPANE',
 34: 'APOSTLEBIRD',
 35: 'ARARIPE MANAKIN',
 36: 'ASHY STORM PETREL',
 37: 'ASHY THRUSHBIRD',
 38: 'ASIAN CRESTED IBIS',
 39: 'ASIAN DOLLARD BIRD',
 40: 'ASIAN GREEN BEE EATER'