Skip to content

Commit

Permalink
Code Refactor for Speed and Readability (#13450)
Browse files Browse the repository at this point in the history
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
  • Loading branch information
pderrenger and glenn-jocher committed Jun 9, 2024
1 parent 1b26838 commit 6367ff4
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 28 deletions.
9 changes: 5 additions & 4 deletions ultralytics/data/split_dota.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def load_yolo_dota(data_root, split="train"):
return annos


def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0.01):
"""
Get the coordinates of windows.
Expand All @@ -95,6 +95,7 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
crop_sizes (List(int)): Crop size of windows.
gaps (List(int)): Gap between crops.
im_rate_thr (float): Threshold of windows areas divided by image ares.
eps (float): Epsilon value for math operations.
"""
h, w = im_size
windows = []
Expand Down Expand Up @@ -187,7 +188,7 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")


def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024,), gaps=(200,)):
"""
Split both images and labels.
Expand Down Expand Up @@ -217,7 +218,7 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))


def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
"""
Split train and val set of DOTA.
Expand Down Expand Up @@ -247,7 +248,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)


def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
"""
Split test set of DOTA, labels are not included within this set.
Expand Down
12 changes: 9 additions & 3 deletions ultralytics/engine/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,18 @@ def __call__(self, source=None, model=None, stream=False, *args, **kwargs):

def predict_cli(self, source=None, model=None):
"""
Method used for CLI prediction.
Method used for Command Line Interface (CLI) prediction.
It uses always generator as outputs as not required by CLI mode.
This function is designed to run predictions using the CLI. It sets up the source and model, then processes
the inputs in a streaming manner. This method ensures that no outputs accumulate in memory by consuming the
generator without storing results.
Note:
Do not modify this function or remove the generator. The generator ensures that no outputs are
accumulated in memory, which is critical for preventing memory issues during long-running predictions.
"""
gen = self.stream_inference(source, model)
for _ in gen: # noqa, running CLI inference without accumulating any outputs (do not modify)
for _ in gen: # sourcery skip: remove-empty-nested-block, noqa
pass

def setup_source(self, source):
Expand Down
42 changes: 21 additions & 21 deletions ultralytics/models/sam/modules/tiny_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,44 +383,44 @@ def forward(self, x):
"""Applies attention-based transformation or padding to input 'x' before passing it through a local
convolution.
"""
H, W = self.input_resolution
B, L, C = x.shape
assert L == H * W, "input feature has wrong size"
h, w = self.input_resolution
b, l, c = x.shape
assert l == h * w, "input feature has wrong size"
res_x = x
if H == self.window_size and W == self.window_size:
if h == self.window_size and w == self.window_size:
x = self.attn(x)
else:
x = x.view(B, H, W, C)
pad_b = (self.window_size - H % self.window_size) % self.window_size
pad_r = (self.window_size - W % self.window_size) % self.window_size
x = x.view(b, h, w, c)
pad_b = (self.window_size - h % self.window_size) % self.window_size
pad_r = (self.window_size - w % self.window_size) % self.window_size
padding = pad_b > 0 or pad_r > 0

if padding:
x = F.pad(x, (0, 0, 0, pad_r, 0, pad_b))

pH, pW = H + pad_b, W + pad_r
pH, pW = h + pad_b, w + pad_r
nH = pH // self.window_size
nW = pW // self.window_size
# Window partition
x = (
x.view(B, nH, self.window_size, nW, self.window_size, C)
x.view(b, nH, self.window_size, nW, self.window_size, c)
.transpose(2, 3)
.reshape(B * nH * nW, self.window_size * self.window_size, C)
.reshape(b * nH * nW, self.window_size * self.window_size, c)
)
x = self.attn(x)
# Window reverse
x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)
x = x.view(b, nH, nW, self.window_size, self.window_size, c).transpose(2, 3).reshape(b, pH, pW, c)

if padding:
x = x[:, :H, :W].contiguous()
x = x[:, :h, :w].contiguous()

x = x.view(B, L, C)
x = x.view(b, l, c)

x = res_x + self.drop_path(x)

x = x.transpose(1, 2).reshape(B, C, H, W)
x = x.transpose(1, 2).reshape(b, c, h, w)
x = self.local_conv(x)
x = x.view(B, C, L).transpose(1, 2)
x = x.view(b, c, l).transpose(1, 2)

return x + self.drop_path(self.mlp(x))

Expand Down Expand Up @@ -565,10 +565,10 @@ def __init__(
img_size=224,
in_chans=3,
num_classes=1000,
embed_dims=[96, 192, 384, 768],
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
window_sizes=[7, 7, 14, 7],
embed_dims=(96, 192, 384, 768),
depths=(2, 2, 6, 2),
num_heads=(3, 6, 12, 24),
window_sizes=(7, 7, 14, 7),
mlp_ratio=4.0,
drop_rate=0.0,
drop_path_rate=0.1,
Expand Down Expand Up @@ -732,8 +732,8 @@ def forward_features(self, x):
for i in range(start_i, len(self.layers)):
layer = self.layers[i]
x = layer(x)
B, _, C = x.shape
x = x.view(B, 64, 64, C)
batch, _, channel = x.shape
x = x.view(batch, 64, 64, channel)
x = x.permute(0, 3, 1, 2)
return self.neck(x)

Expand Down

0 comments on commit 6367ff4

Please sign in to comment.