# Modify YOLOv8 to Get Embedding of Full Image

In [1]:
from ultralytics import YOLO

In [2]:
image_file = './ultralytics/assets/bus.jpg'

# Test with more than one image at a time
# image_file = ['./ultralytics/assets/bus.jpg'] * 2

In [3]:
# Load DETECT model
detect_model = YOLO('yolov8m', 'detect')  # pretrained YOLOv8n model
detect_model.to('cuda:0')
detect_model.name = 'yolov8-detect'

In [4]:
detect_results = detect_model.predict(image_file, imgsz = 640, save = True, device='cuda:0', stream=False, verbose=True)
detect_results


image 1/1 /mnt/nvm/repos/ultralytics/ultralytics/assets/bus.jpg: 640x480 4 persons, 1 bus, 153.4ms
Speed: 4.5ms preprocess, 153.4ms inference, 872.0ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1mruns/detect/predict[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 embedding: [-0.007253803312778473, 0.03278650343418121, -0.008058142848312855, -0.03066238760948181, -0.05129880830645561, -0.018687108531594276, 0.0381661131978035, 0.05169808864593506, 0.02025211602449417, -0.04714430868625641, 0.06978167593479156, -0.056685056537389755, 0.05085020884871483, 0.06940268725156784, 0.02409999817609787, -0.0033564192708581686, 0.017835743725299835, 0.003019969677552581, 0.008426106534898281, -0.0019216255750507116, 0.037997450679540634, 0.005508673842996359, 0.013186294585466385, 0.05164135619997978, 0.03896432742476463, 0.007173761259764433, -0.0006729981396347284, -0.032230231910943985, -0.02943842113018036, -0.016554417088627815, -0.05144011974334717, 0.007770944852381945, -0.06853404641151428, 0.022659994661808014, 0.09621923416852951, 0.07748540490865707, -0.01111998688429594, -0.0028790547512471676, 0.020952414721250534, 0.0385087318718433

In [5]:
len(detect_results[0].embedding)

576

## Check Head of various Models

### Segment Model

In [6]:
# Load a SEGMENT model
segment_model = YOLO('yolov8m-seg.pt')
segment_model.to('cuda:0')
segment_model.name = 'yolov8-segment'

In [7]:
segment_results = segment_model.predict(image_file, imgsz = 640, save = True, device='cuda:0', stream=False, verbose=True)
segment_results


image 1/1 /mnt/nvm/repos/ultralytics/ultralytics/assets/bus.jpg: 640x480 4 persons, 1 bus, 1 tie, 51.1ms
Speed: 2.9ms preprocess, 51.1ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1mruns/segment/predict[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 embedding: [-0.0020015432965010405, 0.03527667000889778, 0.029174717143177986, 0.062284231185913086, 0.048493895679712296, 0.04428226500749588, -0.002125018974766135, 0.04051334038376808, -0.011471728794276714, 0.07714208215475082, 0.02612615004181862, 0.0285150408744812, 0.03183551877737045, 0.010786465369164944, 0.0168526042252779, -0.006661251652985811, -0.018545418977737427, 0.05219756439328194, -0.01887022890150547, -0.019706256687641144, -0.0039895824156701565, 0.011986053548753262, 0.023268593475222588, 0.04685697332024574, 0.03136671707034111, 0.055081479251384735, 0.01957107149064541, 0.037495676428079605, 0.014806749299168587, 0.08199895173311234, 0.04081471636891365, 0.04318268969655037, 0.010746696032583714, 0.0055008335039019585, 0.07184482365846634, -0.02334192954003811, -0.02801280841231346, -0.006087313871830702, -0.010104493238031864, 0.12396065145730972, -0.0

In [8]:
len(segment_results[0].embedding)

576

### Pose Model

In [9]:
# Load a Pose model
pose_model = YOLO('yolov8m-pose.pt')
pose_model.to('cuda:0')
pose_model.name = 'yolov8-pose'

In [10]:
pose_results = pose_model.predict(image_file, imgsz = 640, save = True, device='cuda:0', stream=False, verbose=True)
pose_results


image 1/1 /mnt/nvm/repos/ultralytics/ultralytics/assets/bus.jpg: 640x480 5 persons, 47.8ms
Speed: 2.3ms preprocess, 47.8ms inference, 12.8ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1mruns/pose/predict[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 embedding: [-0.005805651191622019, 0.01715732365846634, -0.007114632520824671, 0.04298879951238632, 0.03602621704339981, 0.012062792666256428, 0.02608845941722393, 0.009263813495635986, -0.0019956340547651052, -0.006326591596007347, -0.005124674644321203, -0.017519773915410042, 0.04094398766756058, 0.015298505313694477, -0.0003718817897606641, 0.04158257693052292, 0.02083549089729786, 0.0705602616071701, -0.013626880943775177, -0.01781449466943741, -0.00751173309981823, 0.04172346740961075, 0.03974161669611931, -0.018130922690033913, 0.03543777018785477, 0.04478223994374275, -0.009346166625618935, -0.006366593763232231, 0.1028968095779419, 0.027727799490094185, -0.008361210115253925, 0.013919229619204998, -0.0005242515471763909, 0.17386376857757568, -0.0005595200345851481, 0.029884420335292816, 0.02591334655880928, 0.021563054993748665, 0.026542415842413902, -0.011528789065778

In [11]:
len(pose_results[0].embedding)

576

### Classification Model

In [12]:
# Load a Classification model
class_model = YOLO('yolov8m-cls.pt')
class_model.to('cuda:0')
class_model.name = 'yolov8-class'

In [13]:
class_results = class_model.predict(image_file, imgsz = 640, save = True, device='cuda:0', stream=False, verbose=True)
class_results


image 1/1 /mnt/nvm/repos/ultralytics/ultralytics/assets/bus.jpg: 224x224 minibus 0.94, trolleybus 0.02, streetcar 0.02, police_van 0.01, amphibian 0.00, 62.0ms
Speed: 1.4ms preprocess, 62.0ms inference, 0.1ms postprocess per image at shape (1, 3, 224, 224)
Results saved to [1mruns/classify/predict[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: None
 embedding: [-0.04771098494529724, -0.029833238571882248, -0.03018191270530224, -0.042616188526153564, -0.034940607845783234, -0.025568649172782898, -0.022161994129419327, -0.038862571120262146, -0.033802829682826996, -0.018356600776314735, -0.039002880454063416, -0.03227447345852852, -0.03197803720831871, -0.04683852195739746, -0.03767804428935051, -0.04585358127951622, -0.03545435890555382, -0.025057785212993622, -0.02768154814839363, -0.02951294183731079, -0.03254470229148865, -0.03740723803639412, -0.04238687455654144, -0.030395817011594772, -0.03891338035464287, -0.03397292643785477, -0.0292766522616148, -0.043166935443878174, -0.020971590653061867, -0.03695017844438553, -0.03824548050761223, -0.0329970121383667, -0.026089917868375778, -0.02678256295621395, -0.043018925935029984, -0.052108313888311386, -0.03989701718091965, 0.014619620516896248, -0.019492676481604576, -0.04034469649195671, -0.03851865604519

In [14]:
len(class_results[0].embedding)

768

## Check Vectors are the same

- Detect, Segment, Pose are all 576 in length
- Class are 768 in length