# Getting started with MillionTrees datasets

In [1]:
import os
import sys

if os.path.basename(os.getcwd()) == 'examples':
    sys.path.append("../")
    
import milliontrees
from torchvision import transforms

# List available datasets
print(milliontrees.benchmark_datasets)

['TreePoints', 'TreeBoxes', 'TreePolygons']


The general workflow is to 
1. Select and optionally download a dataset
2. Load the train and test splits from the dataset
3. Create the dataloader, with optional additional transforms, for how to preprocess images, and optionally augment, input images and metadata
4. Use these dataloaders to train models in native pytorch or pytorch lightning

### Select and optionally download a dataset

In [2]:
# Load the box dataset
from milliontrees import get_dataset
dataset = get_dataset("TreeBoxes", root_dir="/orange/ewhite/DeepForest/MillionTrees/")

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


### Load the train and test splits from the dataset

Datasets are split into groups of images based on task. For example, 'train' versus 'test' or 'zero_shot_train' and 'zero_shot_test'.

In [3]:
from milliontrees.common.data_loaders import get_train_loader

train_dataset = dataset.get_subset("train")

# View the first image in the dataset
image, label, metadata = train_dataset[0]
print(f"Metadata length: {len(metadata)}")
print(f"Image shape: {image.shape}, Image type: {type(image)}")
print(f"Label shape: {label.shape}, Label type: {type(label)}")

Metadata length: 2
Image shape: torch.Size([520, 1]), Image type: <class 'torch.Tensor'>
Label shape: torch.Size([3, 448, 448]), Label type: <class 'torch.Tensor'>


### Create dataloader

In [6]:
train_loader = get_train_loader("standard", train_dataset, batch_size=2)

# Show one batch of the loader
for metadata, image, targets in train_loader:
    print("Targets is a list of dictionaries with the following keys: ", targets[0].keys())
    print(f"Image shape: {image.shape}, Image type: {type(image)}")
    print(f"Annotation shape of the first image: {targets[0]['boxes'].shape}")
    break  # Just show the first batch

Targets is a list of dictionaries with the following keys:  dict_keys(['boxes', 'labels'])
Image shape: torch.Size([4, 3, 448, 448]), Image type: <class 'torch.Tensor'>
Annotation shape of the first image: torch.Size([19, 4])


In [7]:
from deepforest import main

# Create a DeepForest model
m = main.deepforest()

# Load the pre-trained tree model
m.load_model("Weecology/DeepForest-tree")

# Create a trainer with fast development run enabled
m.create_trainer(fast_dev_run=True)
m.config["train"]["csv_file"] ="<dummy file, existing dataloader>"
m.trainer.fit(m, train_loader)  

Reading config file: /blue/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/deepforest/data/deepforest_config.yml


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Reading config file: /blue/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/deepforest/data/deepforest_config.yml


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [7]

  | Name       | Type                  | Params | Mode 
-------------------------------------------------------------
0 | model      | RetinaNet             | 32.1 M | train
1 | iou_metric | IntersectionOverUnion | 0      | train
2 | mAP_metric | MeanAveragePrecision  | 0      | train
-------------------------------------------------------------
31.9 M    Trainable params
222 K     Non-trainable params
32.1 M    Total params
128.592   Total estimated model params size (MB)
204       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 1/1 [00:02<00:00,  0.47it/s]



Epoch 0: 100%|██████████| 1/1 [00:02<00:00,  0.46it/s]

`Trainer.fit` stopped: `max_steps=1` reached.


Epoch 0: 100%|██████████| 1/1 [00:02<00:00,  0.46it/s]


In [8]:
from pytorch_lightning.loggers import CometLogger
# To run this notebook, pip install comet-ml
# See creating a comet workspaces https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/

# Create a trainer 
comet_logger = CometLogger()

m.create_trainer(logger=comet_logger)
m.trainer.fit(m, train_loader)  

CometLogger will be initialized in online mode
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/bw4sz/general/1663ce53992d41c496e38d3f96797ffd

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [7]

  | Name       | Type                  | Params | Mode 
-------------------------------------------------------------
0 | model      | RetinaNet             | 32.1 M | train
1 | iou_metric | IntersectionOverUnion | 0      | train
2 | mAP_metric | MeanAveragePrecision  | 0      | train
-------------------------------------------------------------
31.9 M    Trainable params
222 K     Non-trainable params
32.1 M    Total params
128.592   Total estimated model params size (MB)
204       Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 1/1063 [00:00<06:05,  2.90it/s, v_num=7ffd]



Epoch 0:   0%|          | 3/1063 [00:02<13:29,  1.31it/s, v_num=7ffd]



Epoch 0:   0%|          | 4/1063 [00:02<11:44,  1.50it/s, v_num=7ffd]



Epoch 0:   0%|          | 5/1063 [00:03<10:49,  1.63it/s, v_num=7ffd]



Epoch 0:   1%|          | 6/1063 [00:03<10:29,  1.68it/s, v_num=7ffd]



Epoch 0:   1%|          | 7/1063 [00:03<10:00,  1.76it/s, v_num=7ffd]



Epoch 0:   1%|          | 8/1063 [00:04<09:41,  1.81it/s, v_num=7ffd]



Epoch 0:   1%|          | 9/1063 [00:04<09:26,  1.86it/s, v_num=7ffd]



Epoch 0:   1%|          | 10/1063 [00:05<09:33,  1.84it/s, v_num=7ffd]



Epoch 0:   1%|          | 11/1063 [00:05<09:26,  1.86it/s, v_num=7ffd]



Epoch 0:   1%|          | 12/1063 [00:06<09:10,  1.91it/s, v_num=7ffd]



Epoch 0:   1%|          | 13/1063 [00:06<09:02,  1.94it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 16/1063 [00:08<09:25,  1.85it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 17/1063 [00:09<10:04,  1.73it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 18/1063 [00:10<10:07,  1.72it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 19/1063 [00:10<09:54,  1.76it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 22/1063 [00:12<09:58,  1.74it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 23/1063 [00:12<09:44,  1.78it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 24/1063 [00:13<09:35,  1.81it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 25/1063 [00:13<09:34,  1.81it/s, v_num=7ffd]



Epoch 0:   2%|▏         | 26/1063 [00:14<09:26,  1.83it/s, v_num=7ffd]



Epoch 0:   3%|▎         | 28/1063 [00:14<09:05,  1.90it/s, v_num=7ffd]



Epoch 0:   3%|▎         | 29/1063 [00:15<09:00,  1.91it/s, v_num=7ffd]



Epoch 0:   3%|▎         | 31/1063 [00:16<09:16,  1.85it/s, v_num=7ffd]



Epoch 0:   3%|▎         | 32/1063 [00:17<09:14,  1.86it/s, v_num=7ffd]



Epoch 0:   3%|▎         | 34/1063 [00:17<08:59,  1.91it/s, v_num=7ffd]



Epoch 0:   3%|▎         | 35/1063 [00:18<09:14,  1.85it/s, v_num=7ffd]



Epoch 0:   3%|▎         | 36/1063 [00:19<09:06,  1.88it/s, v_num=7ffd]



Epoch 0:   4%|▎         | 39/1063 [00:20<09:08,  1.87it/s, v_num=7ffd]



Epoch 0:   4%|▍         | 40/1063 [00:21<09:02,  1.89it/s, v_num=7ffd]



Epoch 0:   4%|▍         | 42/1063 [00:22<09:12,  1.85it/s, v_num=7ffd]



Epoch 0:   4%|▍         | 44/1063 [00:23<09:03,  1.87it/s, v_num=7ffd]



Epoch 0:   4%|▍         | 45/1063 [00:23<09:00,  1.88it/s, v_num=7ffd]



Epoch 0:   4%|▍         | 46/1063 [00:24<08:56,  1.90it/s, v_num=7ffd]



Epoch 0:   5%|▍         | 48/1063 [00:25<08:51,  1.91it/s, v_num=7ffd]



Epoch 0:   5%|▍         | 49/1063 [00:25<08:49,  1.92it/s, v_num=7ffd]



Epoch 0:   5%|▍         | 50/1063 [00:26<08:58,  1.88it/s, v_num=7ffd]



Epoch 0:   5%|▍         | 52/1063 [00:27<08:53,  1.90it/s, v_num=7ffd]



Epoch 0:   5%|▍         | 53/1063 [00:27<08:51,  1.90it/s, v_num=7ffd]



Epoch 0:   5%|▌         | 55/1063 [00:29<09:02,  1.86it/s, v_num=7ffd]



Epoch 0:   5%|▌         | 57/1063 [00:31<09:09,  1.83it/s, v_num=7ffd]



Epoch 0:   5%|▌         | 58/1063 [00:31<09:07,  1.84it/s, v_num=7ffd]



Epoch 0:   6%|▌         | 62/1063 [00:33<09:08,  1.83it/s, v_num=7ffd]



Epoch 0:   6%|▌         | 64/1063 [00:34<09:01,  1.84it/s, v_num=7ffd]



Epoch 0:   6%|▌         | 65/1063 [00:34<08:57,  1.86it/s, v_num=7ffd]



Epoch 0:   6%|▌         | 66/1063 [00:35<08:54,  1.87it/s, v_num=7ffd]



Epoch 0:   6%|▋         | 67/1063 [00:35<08:51,  1.87it/s, v_num=7ffd]



Epoch 0:   6%|▋         | 69/1063 [00:36<08:45,  1.89it/s, v_num=7ffd]



Epoch 0:   7%|▋         | 72/1063 [00:37<08:35,  1.92it/s, v_num=7ffd]



Epoch 0:   7%|▋         | 76/1063 [00:38<08:26,  1.95it/s, v_num=7ffd]



Epoch 0:   7%|▋         | 77/1063 [00:39<08:23,  1.96it/s, v_num=7ffd]



Epoch 0:   8%|▊         | 85/1063 [00:42<08:07,  2.01it/s, v_num=7ffd]



Epoch 0:   8%|▊         | 88/1063 [00:43<08:01,  2.03it/s, v_num=7ffd]



Epoch 0:   9%|▊         | 91/1063 [00:44<07:54,  2.05it/s, v_num=7ffd]



Epoch 0:   9%|▊         | 92/1063 [00:44<07:52,  2.05it/s, v_num=7ffd]



Epoch 0:   9%|▉         | 97/1063 [00:46<07:44,  2.08it/s, v_num=7ffd]



Epoch 0:   9%|▉         | 99/1063 [00:47<07:42,  2.09it/s, v_num=7ffd]



Epoch 0:   9%|▉         | 100/1063 [00:47<07:39,  2.09it/s, v_num=7ffd]



Epoch 0:  10%|▉         | 104/1063 [00:49<07:34,  2.11it/s, v_num=7ffd]



Epoch 0:  10%|▉         | 106/1063 [00:49<07:30,  2.13it/s, v_num=7ffd]



Epoch 0:  10%|█         | 110/1063 [00:51<07:25,  2.14it/s, v_num=7ffd]



Epoch 0:  11%|█         | 114/1063 [00:52<07:20,  2.15it/s, v_num=7ffd]



Epoch 0:  11%|█         | 116/1063 [00:53<07:18,  2.16it/s, v_num=7ffd]



Epoch 0:  11%|█         | 117/1063 [00:54<07:18,  2.16it/s, v_num=7ffd]



Epoch 0:  11%|█         | 118/1063 [00:54<07:17,  2.16it/s, v_num=7ffd]



Epoch 0:  11%|█▏        | 122/1063 [00:56<07:14,  2.17it/s, v_num=7ffd]



Epoch 0:  12%|█▏        | 127/1063 [00:59<07:20,  2.12it/s, v_num=7ffd]



Epoch 0:  12%|█▏        | 130/1063 [01:00<07:16,  2.14it/s, v_num=7ffd]



Epoch 0:  12%|█▏        | 131/1063 [01:01<07:16,  2.14it/s, v_num=7ffd]



Epoch 0:  13%|█▎        | 139/1063 [01:05<07:13,  2.13it/s, v_num=7ffd]



Epoch 0:  13%|█▎        | 140/1063 [01:05<07:12,  2.14it/s, v_num=7ffd]



Epoch 0:  14%|█▎        | 144/1063 [01:07<07:07,  2.15it/s, v_num=7ffd]



Epoch 0:  14%|█▍        | 151/1063 [01:10<07:02,  2.16it/s, v_num=7ffd]



Epoch 0:  14%|█▍        | 153/1063 [01:10<07:00,  2.17it/s, v_num=7ffd]



Epoch 0:  14%|█▍        | 154/1063 [01:10<06:58,  2.17it/s, v_num=7ffd]



Epoch 0:  15%|█▍        | 155/1063 [01:11<06:57,  2.17it/s, v_num=7ffd]



Epoch 0:  15%|█▍        | 157/1063 [01:12<06:56,  2.18it/s, v_num=7ffd]



Epoch 0:  15%|█▌        | 164/1063 [01:14<06:49,  2.20it/s, v_num=7ffd]



Epoch 0:  16%|█▌        | 165/1063 [01:15<06:53,  2.17it/s, v_num=7ffd]



Epoch 0:  16%|█▌        | 167/1063 [01:16<06:51,  2.18it/s, v_num=7ffd]



Epoch 0:  16%|█▌        | 171/1063 [01:17<06:46,  2.19it/s, v_num=7ffd]



Epoch 0:  17%|█▋        | 176/1063 [01:19<06:42,  2.21it/s, v_num=7ffd]



Epoch 0:  17%|█▋        | 177/1063 [01:20<06:42,  2.20it/s, v_num=7ffd]



Epoch 0:  17%|█▋        | 180/1063 [01:22<06:43,  2.19it/s, v_num=7ffd]



Epoch 0:  17%|█▋        | 181/1063 [01:22<06:43,  2.19it/s, v_num=7ffd]



Epoch 0:  17%|█▋        | 182/1063 [01:23<06:42,  2.19it/s, v_num=7ffd]



Epoch 0:  18%|█▊        | 188/1063 [01:25<06:39,  2.19it/s, v_num=7ffd]



Epoch 0:  18%|█▊        | 189/1063 [01:26<06:39,  2.19it/s, v_num=7ffd]



Epoch 0:  18%|█▊        | 194/1063 [01:28<06:34,  2.20it/s, v_num=7ffd]



Epoch 0:  18%|█▊        | 195/1063 [01:28<06:33,  2.21it/s, v_num=7ffd]



Epoch 0:  18%|█▊        | 196/1063 [01:29<06:36,  2.19it/s, v_num=7ffd]



Epoch 0:  19%|█▊        | 197/1063 [01:29<06:35,  2.19it/s, v_num=7ffd]



Epoch 0:  19%|█▊        | 199/1063 [01:30<06:33,  2.19it/s, v_num=7ffd]



Epoch 0:  19%|█▉        | 201/1063 [01:31<06:31,  2.20it/s, v_num=7ffd]



Epoch 0:  19%|█▉        | 204/1063 [01:32<06:28,  2.21it/s, v_num=7ffd]



Epoch 0:  20%|█▉        | 210/1063 [01:36<06:30,  2.18it/s, v_num=7ffd]



Epoch 0:  20%|█▉        | 212/1063 [01:36<06:28,  2.19it/s, v_num=7ffd]



Epoch 0:  20%|██        | 214/1063 [01:38<06:29,  2.18it/s, v_num=7ffd]



Epoch 0:  21%|██        | 218/1063 [01:39<06:27,  2.18it/s, v_num=7ffd]



Epoch 0:  21%|██        | 221/1063 [01:40<06:24,  2.19it/s, v_num=7ffd]



Epoch 0:  21%|██        | 225/1063 [01:43<06:24,  2.18it/s, v_num=7ffd]



Epoch 0:  21%|██▏       | 227/1063 [01:44<06:25,  2.17it/s, v_num=7ffd]



Epoch 0:  22%|██▏       | 232/1063 [01:46<06:21,  2.18it/s, v_num=7ffd]



Epoch 0:  22%|██▏       | 237/1063 [01:48<06:19,  2.17it/s, v_num=7ffd]



Epoch 0:  22%|██▏       | 239/1063 [01:49<06:18,  2.18it/s, v_num=7ffd]



Epoch 0:  24%|██▎       | 252/1063 [01:56<06:13,  2.17it/s, v_num=7ffd]



Epoch 0:  24%|██▍       | 254/1063 [01:57<06:13,  2.16it/s, v_num=7ffd]



Epoch 0:  24%|██▍       | 260/1063 [01:59<06:09,  2.17it/s, v_num=7ffd]



Epoch 0:  25%|██▍       | 265/1063 [02:01<06:05,  2.18it/s, v_num=7ffd]



Epoch 0:  25%|██▌       | 268/1063 [02:02<06:04,  2.18it/s, v_num=7ffd]



Epoch 0:  25%|██▌       | 271/1063 [02:04<06:03,  2.18it/s, v_num=7ffd]



Epoch 0:  26%|██▌       | 277/1063 [02:07<06:00,  2.18it/s, v_num=7ffd]



Epoch 0:  26%|██▌       | 278/1063 [02:07<06:00,  2.18it/s, v_num=7ffd]



Epoch 0:  27%|██▋       | 286/1063 [02:10<05:54,  2.19it/s, v_num=7ffd]



Epoch 0:  29%|██▉       | 306/1063 [02:18<05:42,  2.21it/s, v_num=7ffd]



Epoch 0:  29%|██▉       | 312/1063 [02:20<05:38,  2.22it/s, v_num=7ffd]



Epoch 0:  30%|██▉       | 314/1063 [02:21<05:37,  2.22it/s, v_num=7ffd]



Epoch 0:  30%|███       | 320/1063 [02:25<05:37,  2.20it/s, v_num=7ffd]



Epoch 0:  30%|███       | 324/1063 [02:27<05:35,  2.20it/s, v_num=7ffd]



Epoch 0:  31%|███       | 329/1063 [02:29<05:32,  2.21it/s, v_num=7ffd]



Epoch 0:  31%|███       | 330/1063 [02:29<05:31,  2.21it/s, v_num=7ffd]



Epoch 0:  34%|███▎      | 358/1063 [02:42<05:20,  2.20it/s, v_num=7ffd]



Epoch 0:  34%|███▍      | 359/1063 [02:43<05:20,  2.20it/s, v_num=7ffd]



Epoch 0:  34%|███▍      | 364/1063 [02:45<05:17,  2.20it/s, v_num=7ffd]



Epoch 0:  34%|███▍      | 365/1063 [02:45<05:17,  2.20it/s, v_num=7ffd]



Epoch 0:  35%|███▌      | 375/1063 [02:49<05:10,  2.21it/s, v_num=7ffd]



Epoch 0:  36%|███▌      | 380/1063 [02:51<05:08,  2.22it/s, v_num=7ffd]



Epoch 0:  37%|███▋      | 394/1063 [02:58<05:03,  2.21it/s, v_num=7ffd]



Epoch 0:  38%|███▊      | 406/1063 [03:02<04:56,  2.22it/s, v_num=7ffd]



Epoch 0:  39%|███▉      | 418/1063 [03:08<04:50,  2.22it/s, v_num=7ffd]



Epoch 0:  41%|████      | 435/1063 [03:15<04:42,  2.22it/s, v_num=7ffd]



Epoch 0:  42%|████▏     | 445/1063 [03:20<04:38,  2.22it/s, v_num=7ffd]



Epoch 0:  42%|████▏     | 450/1063 [03:22<04:35,  2.22it/s, v_num=7ffd]



Epoch 0:  43%|████▎     | 453/1063 [03:23<04:34,  2.23it/s, v_num=7ffd]



Epoch 0:  43%|████▎     | 460/1063 [03:27<04:32,  2.21it/s, v_num=7ffd]



Epoch 0:  44%|████▍     | 467/1063 [03:30<04:28,  2.22it/s, v_num=7ffd]



Epoch 0:  45%|████▌     | 479/1063 [03:36<04:23,  2.21it/s, v_num=7ffd]



Epoch 0:  45%|████▌     | 482/1063 [03:38<04:23,  2.21it/s, v_num=7ffd]



Epoch 0:  46%|████▌     | 491/1063 [03:41<04:18,  2.21it/s, v_num=7ffd]



Epoch 0:  48%|████▊     | 514/1063 [03:51<04:07,  2.22it/s, v_num=7ffd]



Epoch 0:  49%|████▉     | 521/1063 [03:53<04:03,  2.23it/s, v_num=7ffd]



Epoch 0:  49%|████▉     | 523/1063 [03:54<04:02,  2.23it/s, v_num=7ffd]



Epoch 0:  49%|████▉     | 526/1063 [03:55<04:00,  2.23it/s, v_num=7ffd]



Epoch 0:  50%|████▉     | 531/1063 [03:57<03:58,  2.23it/s, v_num=7ffd]



Epoch 0:  51%|█████     | 538/1063 [04:01<03:55,  2.23it/s, v_num=7ffd]



Epoch 0:  51%|█████     | 543/1063 [04:03<03:52,  2.23it/s, v_num=7ffd]

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : key_muffin_9148
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/bw4sz/general/1663ce53992d41c496e38d3f96797ffd
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     train_bbox_regression_step [10] : (0.2905547618865967, 0.4660817086696625)
[1;38;5;39mCOMET INFO:[0m     train_classification_step [10]  : (0.14760786294937134, 0.4187661409378052)
[1;38;5;39mCOMET INFO:[0m     train_loss_step [10]            : (0.438162624835968, 0.88484787940979)
[1;38;5;39mCOMET INFO:[0m   Others:
[1;38

OutOfMemoryError: CUDA out of memory. Tried to allocate 3.08 GiB. GPU 0 has a total capacity of 10.75 GiB of which 2.16 GiB is free. Including non-PyTorch memory, this process has 8.58 GiB memory in use. Of the allocated memory 8.27 GiB is allocated by PyTorch, and 134.28 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)