# Getting started with MillionTrees datasets

In [1]:
import os
import sys

if os.path.basename(os.getcwd()) == 'examples':
    sys.path.append("../")
    
import milliontrees
from torchvision import transforms

# List available datasets
print(milliontrees.benchmark_datasets)

['TreePoints', 'TreeBoxes', 'TreePolygons']


The general workflow is to 
1. Select and optionally download a dataset
2. Load the train and test splits from the dataset
3. Create the dataloader, with optional additional transforms, for how to preprocess images, and optionally augment, input images and metadata
4. Use these dataloaders to train models in native pytorch or pytorch lightning

### Select and optionally download a dataset

In [2]:
# Load the box dataset
from milliontrees import get_dataset
dataset = get_dataset("TreeBoxes", root_dir="/orange/ewhite/DeepForest/MillionTrees/")

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


### Load the train and test splits from the dataset

Datasets are split into groups of images based on task. For example, 'train' versus 'test' or 'zero_shot_train' and 'zero_shot_test'.

In [3]:
from milliontrees.common.data_loaders import get_train_loader

train_dataset = dataset.get_subset("train")

# View the first image in the dataset
image, label, metadata = train_dataset[0]
print(f"Metadata length: {len(metadata)}")
print(f"Image shape: {image.shape}, Image type: {type(image)}")
print(f"Label shape: {label.shape}, Label type: {type(label)}")

Metadata length: 2
Image shape: (81, 2), Image type: <class 'numpy.ndarray'>
Label shape: torch.Size([3, 448, 448]), Label type: <class 'torch.Tensor'>


### Create dataloader

In [4]:
train_loader = get_train_loader("standard", train_dataset, batch_size=2)

# Show one batch of the loader
for metadata, image, targets in train_loader:
    print("Targets is a list of dictionaries with the following keys: ", targets[0].keys())
    print(f"Image shape: {image.shape}, Image type: {type(image)}")
    print(f"Annotation shape of the first image: {targets[0]['boxes'].shape}")
    break  # Just show the first batch

Targets is a list of dictionaries with the following keys:  dict_keys(['boxes', 'labels'])
Image shape: torch.Size([2, 3, 448, 448]), Image type: <class 'torch.Tensor'>
Annotation shape of the first image: torch.Size([4, 4])


In [5]:
from deepforest import main

# Create a DeepForest model
m = main.deepforest()

# Load the pre-trained tree model
m.load_model("Weecology/DeepForest-tree")

# Create a trainer with fast development run enabled
m.create_trainer(fast_dev_run=True)
m.config["train"]["csv_file"] ="<dummy file, existing dataloader>"
m.trainer.fit(m, train_loader)  

Reading config file: /blue/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/deepforest/data/deepforest_config.yml


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Reading config file: /blue/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/deepforest/data/deepforest_config.yml


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]

  | Name       | Type                  | Params | Mode 
-------------------------------------------------------------
0 | model      | RetinaNet             | 32.1 M | train
1 | iou_metric | IntersectionOverUnion | 0      | train
2 | mAP_metric | MeanAveragePrecision  | 0      | train
-------------------------------------------------------------
31.9 M    Trainable params
222 K     Non-trainable params
32.1 M    Total params
128.592   Total estimated model params size (MB)
204       Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 0/1 [00:00<?, ?it/s] 



Epoch 0: 100%|██████████| 1/1 [00:33<00:00,  0.03it/s]

`Trainer.fit` stopped: `max_steps=1` reached.


Epoch 0: 100%|██████████| 1/1 [00:33<00:00,  0.03it/s]
