# Train
### YOLACT

---

Restarting your GPU:

In [1]:
from numba import cuda 

if cuda.is_available():
    device = cuda.get_current_device()
    device.reset()

**Download Pretrained Backbone Weights**

You can download the weights from [here](https://drive.google.com/file/d/1tvqFPd4bJtakOlmn-uIA492g2qurRChj/view), also see [their github repo](https://github.com/dbolya/yolact#training).

In [2]:
import os

if not (os.path.exists("./weights") and os.path.isdir("./weights")):
    print("create weights folder...")
    os.mkdir("./weights")
else:
    print("weights folder already exists")
    print("Found these weights:")
    [print(f"    - {name}") for name in os.listdir("./weights")];

weights folder already exists
Found these weights:
    - 2024-06-04_23-15_YOLACT_test_training_with_WISDOM-Sim_0_759_interrupt.pth
    - 2024-09-05_12-57_YOLACT_test_training_3_with_WISDOM-Sim_0_52_interrupt.pth
    - 2024-06-05_7-9_YOLACT_test_training_with_WISDOM-Sim.pth
    - 2024-09-05_13-07_YOLACT_test_training_3_with_WISDOM-Sim_5_12849_interrupt.pth
    - 2024-06-06_23-15_YOLACT_test_training_2_with_WISDOM-Sim_0_10_interrupt.pth
    - resnet101_reducedfc.pth
    - 2024-06-06_22-57_YOLACT_test_training_2_with_WISDOM-Sim_0_28_interrupt.pth
    - 2024-06-04_20-59_YOLACT_test_training_with_WISDOM-Sim_3_8483_interrupt.pth
    - 2024-06-06_23-12_YOLACT_test_training_2_with_WISDOM-Sim_0_31_interrupt.pth
    - 2024-06-05_0-47_YOLACT_test_training_with_WISDOM-Sim.pth
    - 2024-06-05_15-34_YOLACT_test_training_with_WISDOM-Sim_0_25_interrupt.pth
    - 2024-06-05_15-33_YOLACT_test_training_with_WISDOM-Sim_0_15_interrupt.pth
    - 2024-06-06_23-56_YOLACT_test_training_2_with_WISDOM-Sim_399_5

---
### VARIABLES

In [3]:
!pwd

/home/local-admin/src/instance-segmentation


In [4]:
from datetime import datetime, timedelta

NOW = datetime.now()
DATE = f"{NOW.year:04}-{NOW.month:02}-{NOW.day:02}_{NOW.hour:02}-{NOW.minute:02}"

print(f"Date: {DATE}")

Date: 2024-09-06_16-58


In [5]:
NAME=f"{DATE}_YOLACT_test_training_4" 
MODEL_SAVE_PATH = "./weights/"

# Data
PATH_TO_IMAGES = "/home/local-admin/data/wisdom/wisdom-sim/depth_ims"
PATH_TO_MASKS = "/home/local-admin/data/wisdom/wisdom-sim/modal_segmasks/"
DATA_MODE = "all" # auto, random, range, single -> then use START_IDX and END_IDX
DATA_AMOUNT = 5
START_IDX = 0
END_IDX = 5
TRAIN_DATA_SHUFFLE=True
IMG_MAX_SIZE = 550

# Output
SHOULD_PRINT = True
LOG_FOLDER="./logs/"

# Experiment tracking
USING_EXPERIMENT_TRACKING = True
CREATE_NEW_EXPERIMENT = False
EXPERIMENT_NAME = "Instance Segmentation Optonic"
EXPERIMENT_ID = 778265130982647590

# Learning Parameters
EPOCHS = 20
BATCH_SIZE = 20
LEARNING_RATE = 1e-3
WEIGHT_SAVE_INTERVAL = 1e5
KEEP_ONLY_LATEST_WEIGHTS = True
LEARNING_RATE_ADJUSTMENT = (280000, 600000, 700000, 750000)
MOMENTUM = 0.9
DECAY = 5e-4
GAMMA = 0.1
WARM_UP_ITER = 500
WARM_UP_INIT_LR = 1e-4
FREEZE_BATCH_NORMALIZATION = False

# Model Details
WEIGHTS_NAME = None
BACKBONE_INIT_WEIGHTS="resnet101_reducedfc.pth"
BACKBONE ="resnet101"
MAX_INSTANCES=100
FPN_FEATURES=256
NMS_TOP_K = 100
NMS_CONF_THRESH = 0.009
NMS_THRESH = 0.5


--- 
### Imports

In [6]:
# add nn-lib
import sys
sys.path.append("./comfortable-yolact")

In [7]:
# yolact toolkit
from yolact_toolkit import train

---
### Train

In [None]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [8]:
train(
    MODEL_SAVE_PATH=MODEL_SAVE_PATH, 
    WEIGHTS_NAME=WEIGHTS_NAME,
    PATH_TO_TRAIN_IMAGES=PATH_TO_IMAGES,
    PATH_TO_TRAIN_MASKS=PATH_TO_MASKS,
    TRAIN_DATA_MODE=DATA_MODE,
    TRAIN_DATA_AMOUNT=DATA_AMOUNT,
    TRAIN_START_IDX=START_IDX,
    TRAIN_END_IDX=END_IDX,
    IMG_MAX_SIZE=IMG_MAX_SIZE,
    SHOULD_PRINT=SHOULD_PRINT,
    USING_EXPERIMENT_TRACKING=USING_EXPERIMENT_TRACKING,
    CREATE_NEW_EXPERIMENT=CREATE_NEW_EXPERIMENT,
    EXPERIMENT_NAME=EXPERIMENT_NAME,
    EPOCHS=EPOCHS,
    BATCH_SIZE=BATCH_SIZE,
    LEARNING_RATE=LEARNING_RATE,
    NAME=NAME,
    WEIGHT_SAVE_INTERVAL=WEIGHT_SAVE_INTERVAL,
    KEEP_ONLY_LATEST_WEIGHTS=KEEP_ONLY_LATEST_WEIGHTS,
    BACKBONE_INIT_WEIGHTS=BACKBONE_INIT_WEIGHTS,
    LEARNING_RATE_ADJUSTMENT=LEARNING_RATE_ADJUSTMENT,
    MOMENTUM=MOMENTUM,
    DECAY=DECAY,
    WARM_UP_ITER=WARM_UP_ITER,
    WARM_UP_INIT_LR=WARM_UP_INIT_LR,
    GAMMA=GAMMA,
    FREEZE_BATCH_NORMALIZATION=FREEZE_BATCH_NORMALIZATION,
    BACKBONE=BACKBONE,
    MAX_INSTANCES=MAX_INSTANCES,
    FPN_FEATURES=FPN_FEATURES,
    TRAIN_DATA_SHUFFLE=TRAIN_DATA_SHUFFLE,
    NMS_TOP_K=NMS_TOP_K,
    NMS_CONF_THRESH=NMS_CONF_THRESH,
    NMS_THRESH=NMS_THRESH,
    LOG_FOLDER=LOG_FOLDER
)

Inference Image Indices:
[    0     1     2 ... 49998 49999 50000]
Inference Data Amount: 50001

--------------------------------
Verifying Data...

> > > Images < < <
Found: 100.0% (50000/50001)

 Not Found:
    -> /home/local-admin/data/wisdom/wisdom-sim/depth_ims/cache_folder_0

> > > Masks < < <
Found: 100.0% (50000/50001)

 Not Found:
    -> /home/local-admin/data/wisdom/wisdom-sim/modal_segmasks/cache_folder_0

Updating Images...
From 50001 to 50000 Images
    -> Image amount reduced by 0.0%
--------------------------------

Instance Segmentation with YOLACT (train)

Current Experiment ID: 778265130982647590
Create the model and preparing for training...


  _C._set_default_tensor_type(t)


Training starts now...


  return forward_call(*args, **kwargs)
  return F.conv2d(input, weight, bias, self.stride,


Error Occured: max(): Expected reduction dim 0 to have non-zero size.
Error Occured: max(): Expected reduction dim 0 to have non-zero size.
Error Occured: max(): Expected reduction dim 0 to have non-zero size.
Error Occured: max(): Expected reduction dim 0 to have non-zero size.
Error Occured: max(): Expected reduction dim 0 to have non-zero size.
Error Occured: CUDA out of memory. Tried to allocate 94.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 94.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 94.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 
Error Occured: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 
Error Occured: CUDA out of me

Exception in thread Thread-6 (_pin_memory_loop):
Traceback (most recent call last):
  File "/home/local-admin/.conda/envs/yolact/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/home/local-admin/.conda/envs/yolact/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 761, in run_closure
    _threading_Thread_run(self)
  File "/home/local-admin/.conda/envs/yolact/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/home/local-admin/.conda/envs/yolact/lib/python3.12/site-packages/torch/utils/data/_utils/pin_memory.py", line 54, in _pin_memory_loop
    do_one_step()
  File "/home/local-admin/.conda/envs/yolact/lib/python3.12/site-packages/torch/utils/data/_utils/pin_memory.py", line 31, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/local-admin/.conda/envs/yolact/lib/python3.12/multiprocessing/queues.py", line

Error Occured: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 
Stopping early. Saving network...


CustomDataParallel(
  (module): NetLoss(
    (net): Yolact(
      (backbone): ResNetBackbone(
        (layers): ModuleList(
          (0): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (relu): ReLU(inplace=True)
              (downsample): Sequential(
                (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, tra