Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

149 implement robust dpatch attack #155

Merged
merged 33 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a8b7ec2
Merge branch '146-refactor-chains-into-nested-runs' into 149-implemen…
treubig26 Apr 26, 2024
5785160
Download and extract VisDrone2019 zip files
treubig26 Apr 29, 2024
19c5a55
Load dataset with HuggingFace and provider Armory wrapper
treubig26 Apr 29, 2024
6b52a4f
Create benign evaluation for yolov5 with VisDrone dataset
treubig26 Apr 29, 2024
5f21e80
Record OD rates as metrics
treubig26 Apr 30, 2024
d81db21
Merge branch 'master' into 149-implement-robust-dpatch-attack
treubig26 Apr 30, 2024
09e6a8e
Revise YOLOv5 wrapper to work with yolov5 backends with slightly diff…
treubig26 Apr 30, 2024
9d29436
Remove argmax on YOLOv5 output as the NMS processing already does that
treubig26 May 1, 2024
835b906
Download VisDrone from GitHub and adjust labels to match what YOLOv5 …
treubig26 May 1, 2024
78e1182
Ignore all classes with scores of 0 in visdrone dataset
treubig26 May 3, 2024
9b6f67e
Create initial lightning module to generate robust DPatch against vis…
treubig26 May 3, 2024
b68eca5
Merge branch 'master' into 149-implement-robust-dpatch-attack
treubig26 May 6, 2024
1ba82ca
Apply patch to image in training step
treubig26 May 7, 2024
bc68b4f
Save final patch to image file
treubig26 May 7, 2024
3aef3d0
Generate patch then use it in Armory evaluation
treubig26 May 7, 2024
538b634
Apply random augmentations to image during patch generation
treubig26 May 8, 2024
eb0eff2
Add args for patch generation parameters
treubig26 May 8, 2024
d124aa2
Log loss to MLFlow during patch generation
treubig26 May 8, 2024
5121d35
Randomize patch location
treubig26 May 8, 2024
ba2ea7c
Omit boxes with 0 height or width
treubig26 May 9, 2024
ff23c18
Initial attempt using lightning automatic optimization
treubig26 May 9, 2024
0a0d9a8
Merge branch '143-create-model-adapter-for-yolov4' into 149-implement…
treubig26 May 15, 2024
47110ed
Use adjusted learning rate and momentum
treubig26 May 20, 2024
e9ce9bd
Fix _apply methods to return for chaining
treubig26 May 20, 2024
373d2eb
Allow custom loss function in YOLOv5 wrapper
treubig26 May 20, 2024
10ce3c1
Use randomized locations for patch
treubig26 May 20, 2024
0056c67
Clean up patch lightning module
treubig26 May 21, 2024
fb32dfe
Put model in training mode so loss can be calculated
treubig26 May 21, 2024
dd59429
Remove dependency on order of files in visdrone archive
treubig26 Jun 3, 2024
509cb8b
Merge branch 'master' into 149-implement-robust-dpatch-attack
treubig26 Jun 3, 2024
6dc7043
Include test split in visdrone dataset
treubig26 Jun 10, 2024
7e80202
Remove initial_patch from module
treubig26 Jun 10, 2024
062e285
Negate the loss so optimizing it increases the loss
treubig26 Jun 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 219 additions & 0 deletions examples/src/armory/examples/object_detection/datasets/visdrone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
"""Utilities to load the VisDrone 2019 dataset."""

import csv
import io
from pathlib import Path
from pprint import pprint
from typing import Any, Dict, Iterator, List, Tuple

import albumentations as A
import albumentations.pytorch
import datasets
import numpy as np

import armory.data
import armory.dataset


def create_dataloader(
dataset: datasets.Dataset, max_size: int, **kwargs
) -> armory.dataset.ObjectDetectionDataLoader:
"""
Create an Armory object detection dataloader for the given VisDrone2019 dataset split.

Args:
dataset: VisDrone2019 dataset split
max_size: Maximum image size to which to resize and pad image samples
**kwargs: Additional keyword arguments to pass to the dataloader constructor

Return:
Armory object detection dataloader
"""
resize = A.Compose(
[
A.LongestMaxSize(max_size=max_size),
A.PadIfNeeded(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may not be necessary to pad all input images to the max size, some models take a list of images as input (e.g. Faster-RCNN).

min_height=max_size,
min_width=max_size,
border_mode=0,
value=(0, 0, 0),
),
A.ToFloat(max_value=255),
albumentations.pytorch.ToTensorV2(),
],
bbox_params=A.BboxParams(
format="coco",
label_fields=["id", "category", "occlusion", "truncation"],
),
)

def transform(sample):
tmp = dict(**sample)
treubig26 marked this conversation as resolved.
Show resolved Hide resolved
tmp["image"] = []
tmp["objects"] = []
for image, objects in zip(sample["image"], sample["objects"]):
res = resize(
image=np.asarray(image),
bboxes=objects["bbox"],
id=objects["id"],
category=objects["category"],
occlusion=objects["occlusion"],
truncation=objects["truncation"],
)
tmp["image"].append(res.pop("image"))
tmp["objects"].append(res)
return tmp

dataset.set_transform(transform)

return armory.dataset.ObjectDetectionDataLoader(
dataset,
image_key="image",
dim=armory.data.ImageDimensions.CHW,
scale=armory.data.Scale(
dtype=armory.data.DataType.FLOAT,
max=1.0,
),
objects_key="objects",
boxes_key="bboxes",
format=armory.data.BBoxFormat.XYWH,
labels_key="category",
**kwargs,
)


TRAIN_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip"
VAL_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip"

treubig26 marked this conversation as resolved.
Show resolved Hide resolved

def load_dataset() -> datasets.DatasetDict:
"""
Load the train and validation splits of the VisDrone2019 dataset.

Return:
Dictionary containing the train and validation splits
"""
dl_manager = datasets.DownloadManager(dataset_name="VisDrone2019")
ds_features = features()
paths = dl_manager.download({"train": TRAIN_URL, "val": VAL_URL})
train_files = dl_manager.iter_archive(paths["train"])
val_files = dl_manager.iter_archive(paths["val"])
treubig26 marked this conversation as resolved.
Show resolved Hide resolved
return datasets.DatasetDict(
{
"train": datasets.Dataset.from_generator(
generate_samples,
gen_kwargs={"files": train_files},
features=ds_features,
),
"val": datasets.Dataset.from_generator(
treubig26 marked this conversation as resolved.
Show resolved Hide resolved
generate_samples,
gen_kwargs={"files": val_files},
features=ds_features,
),
treubig26 marked this conversation as resolved.
Show resolved Hide resolved
}
)


CATEGORIES = [
# The YOLOv5 model removed this class and shifted all others down by 1 when
# it trained on the VisDrone data
# "ignored",
Comment on lines +125 to +127
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make this dataset useable for other trained models that might keep original category labels.

"pedestrian",
"people",
"bicycle",
"car",
"van",
"truck",
"tricycle",
"awning-tricycle",
"bus",
"motor",
# The YOLOv5 model also ignored/removed this class
# "other",
]


def features() -> datasets.Features:
"""Create VisDrone2019 dataset features"""
return datasets.Features(
{
"image_id": datasets.Value("int64"),
"file_name": datasets.Value("string"),
"image": datasets.Image(),
"objects": datasets.Sequence(
{
"id": datasets.Value("int64"),
"bbox": datasets.Sequence(datasets.Value("float32"), length=4),
"category": datasets.ClassLabel(
num_classes=len(CATEGORIES), names=CATEGORIES
),
"truncation": datasets.Value("int32"),
"occlusion": datasets.Value("int32"),
}
),
}
)


ANNOTATION_FIELDS = [
"x",
"y",
"width",
"height",
"score",
"category_id",
"truncation",
"occlusion",
]


def load_annotations(file: io.BufferedReader) -> List[Dict[str, Any]]:
"""Load annotations/objects from the given file"""
reader = csv.DictReader(
io.StringIO(file.read().decode("utf-8")), fieldnames=ANNOTATION_FIELDS
)
annotations = []
for idx, row in enumerate(reader):
score = int(row["score"])
category = int(row["category_id"])
if score != 0: # Drop annotations with score of 0 (class-0 & class-11)
category -= 1 # The model was trained with 0-indexed categories starting at pedestrian
Comment on lines +186 to +187
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remapping the category labels should be a parameter of load_dataset so that we can use VisDrone with other models.

bbox = list(map(float, [row[k] for k in ANNOTATION_FIELDS[:4]]))
if bbox[2] == 0 or bbox[3] == 0:
continue
annotations.append(
{
"id": idx,
"bbox": bbox,
"category": category,
"truncation": row["truncation"],
"occlusion": row["occlusion"],
}
)
return annotations


def generate_samples(
files: Iterator[Tuple[str, io.BufferedReader]], annotation_file_ext: str = ".txt"
) -> Iterator[Dict[str, Any]]:
"""Generate dataset samples from the given files in a VisDrone2019 archive"""
annotations = {}
images = {}
for path, file in files:
file_name = Path(path).stem
if Path(path).suffix == annotation_file_ext:
annotations[file_name] = load_annotations(file)
else:
images[file_name] = {"path": path, "bytes": file.read()}

for idx, (file_name, annotation) in enumerate(annotations.items()):
yield {
"image_id": idx,
"file_name": file_name,
"image": images[file_name],
"objects": annotation,
}


if __name__ == "__main__":
pprint(load_dataset())
Loading