In [None]:
from hub import Dataset # Single import from Hub

import time
import os
import numpy as np
import glob
from PIL import Image

# Creating a Classification Dataset

### User Inputs

In [None]:
folder_paths = glob.glob(r"..\Datasets\animals\*") # Path to existing dataset

dataset_name = "animals_hub"
hub_dataset_path = "..\Datasets\{}".format(dataset_name)

# Dataset path can be an S3 bucket or other destination in the cloud
# dataset_path = "s3://hub-2.0-datasets/{}".format(dataset_name)

### Initialize Dataset

In [None]:
ds = Dataset(hub_dataset_path, mode="w")

### Populate Tensors

In [None]:
#Define the dataset tensors
ds.create_tensor("images")
ds.create_tensor("labels")

# Iterate through the subfolders (/dogs, /cats)
for label, folder_path in enumerate(folder_paths):

    paths = glob.glob(os.path.join(folder_path, "*"))
    
    # Iterate through images in the subfolders
    for sample_num, path in enumerate(paths):
        ds.images.append(np.array(Image.open(path))) # Append to images tensor
        ds.labels.append(np.array([label])) # Append to labels tensor
        
ds.flush() #This will not be needed in the future

### Access Data

In [None]:
#Load dataset from a local folder or cloud storage
ds_read = Dataset(hub_dataset_path)

In [None]:
print(ds_read.labels[0].numpy())

In [None]:
# Display an image
Image.fromarray(ds_read.images[0].numpy())

In [None]:
# Display another image
# Images do not have to be the same size
Image.fromarray(ds_read.images[3].numpy())

# Connecting Dataset to PyTorch

In [None]:
import torch

In [None]:
# Create pytorch dataloader
train_loader = torch.utils.data.DataLoader(ds_read.pytorch())

In [None]:
# Iterate through pytorch dataloader
for data in train_loader:
    print(data["images"].shape)
    break