## Step 1: Data Preprocessing

In [1]:
import os
import json
import spacy
import string
from tqdm import tqdm

# Load spacy tokenizer
nlp = spacy.load("en_core_web_sm")

# Paths
ANNOTATION_FILE = "annotations/captions_train2017.json"
IMAGE_FOLDER = "train2017"
OUTPUT_JSON = "processed_captions.json"

# Load JSON
with open(ANNOTATION_FILE, 'r') as f:
    data = json.load(f)

# Mapping: image_id → [captions]
id_to_captions = {}
for annot in tqdm(data['annotations']):
    image_id = annot['image_id']
    caption = annot['caption'].lower().strip()

    # Remove punctuation
    caption = caption.translate(str.maketrans('', '', string.punctuation))

    # Tokenize using spaCy
    tokens = [token.text for token in nlp(caption) if not token.is_space]

    # Add start and end tokens
    cleaned_caption = "startseq " + " ".join(tokens) + " endseq"

    str_image_id = f"{image_id:012d}"  # Ensure zero-padded 12-digit string
    if str_image_id not in id_to_captions:
        id_to_captions[str_image_id] = []
    id_to_captions[str_image_id].append(cleaned_caption)

# Build mapping: image_id → full path + captions
final_data = []
for image_id, captions in id_to_captions.items():
    filename = f"{image_id}.jpg"
    path = os.path.join(IMAGE_FOLDER, filename)
    if os.path.exists(path):
        final_data.append({
            "image_id": image_id,
            "file_path": path,
            "captions": captions
        })

# Save preprocessed data
with open(OUTPUT_JSON, "w") as f:
    json.dump(final_data, f, indent=2)

print(f"Saved {len(final_data)} image-caption entries to {OUTPUT_JSON}")



100%|██████████| 591753/591753 [40:11<00:00, 245.35it/s]


Saved 118287 image-caption entries to processed_captions.json


In [None]:
# Resizing Images

import os
import json
from PIL import Image
from tqdm import tqdm

# Constants
INPUT_JSON = "processed_captions.json"
RESIZED_DIR = "resized_train2017"
TARGET_SIZE = (224, 224)

# Handle resampling based on Pillow version
try:
    resample_mode = Image.Resampling.LANCZOS
except AttributeError:
    resample_mode = Image.LANCZOS  # Pillow <10

# Create output directory if not exists
os.makedirs(RESIZED_DIR, exist_ok=True)

# Load image paths from JSON
with open(INPUT_JSON, "r") as f:
    data = json.load(f)

saved_count = 0

# Resize loop
for entry in tqdm(data, desc="Resizing images"):
    original_path = entry["file_path"]
    filename = os.path.basename(original_path)
    resized_path = os.path.join(RESIZED_DIR, filename)

    try:
        if not os.path.exists(original_path):
            print(f"Not found: {original_path}")
            continue

        img = Image.open(original_path).convert("RGB")
        img = img.resize(TARGET_SIZE, resample=resample_mode)
        img.save(resized_path)
        saved_count += 1

    except Exception as e:
        print(f"Failed: {original_path} → {e}")

print(f"\nDone: {saved_count} images saved in '{RESIZED_DIR}'")


Resizing images: 100%|██████████| 118287/118287 [12:47<00:00, 154.18it/s]


Done: 118287 images saved in 'resized_train2017'





In [2]:
import os
import json
import spacy
import string
from tqdm import tqdm

# Load tokenizer
nlp = spacy.load("en_core_web_sm")

# Paths
ANNOTATION_FILE = "annotations/captions_val2017.json"
IMAGE_FOLDER = "val2017"
OUTPUT_JSON = "processed_val_captions.json"

# Load annotations
with open(ANNOTATION_FILE, 'r') as f:
    data = json.load(f)

# Create mapping
id_to_captions = {}
for annot in tqdm(data['annotations']):
    image_id = annot['image_id']
    caption = annot['caption'].lower().strip()

    # Remove punctuation
    caption = caption.translate(str.maketrans('', '', string.punctuation))

    # Tokenize with spaCy
    tokens = [token.text for token in nlp(caption) if not token.is_space]

    # Add start and end tokens
    cleaned_caption = "startseq " + " ".join(tokens) + " endseq"

    str_image_id = f"{image_id:012d}"  # Convert to 12-digit string
    if str_image_id not in id_to_captions:
        id_to_captions[str_image_id] = []
    id_to_captions[str_image_id].append(cleaned_caption)

# Build final list
final_data = []
for image_id, captions in id_to_captions.items():
    filename = f"{image_id}.jpg"
    path = os.path.join(IMAGE_FOLDER, filename)
    if os.path.exists(path):
        final_data.append({
            "image_id": image_id,
            "file_path": path,
            "captions": captions
        })

# Save
with open(OUTPUT_JSON, "w") as f:
    json.dump(final_data, f, indent=2)

print(f"Saved {len(final_data)} validation image-caption pairs to {OUTPUT_JSON}")


100%|██████████| 25014/25014 [01:44<00:00, 239.08it/s]


Saved 5000 validation image-caption pairs to processed_val_captions.json


In [7]:
# Processed Val Images

import os
import json
from PIL import Image
from tqdm import tqdm

# Constants

INPUT_JSON = "processed_val_captions.json"
RESIZED_DIR = "resized_val2017"

TARGET_SIZE = (224, 224)

# Handle resampling based on Pillow version
try:
    resample_mode = Image.Resampling.LANCZOS
except AttributeError:
    resample_mode = Image.LANCZOS  # Pillow <10

# Create output directory if not exists
os.makedirs(RESIZED_DIR, exist_ok=True)

# Load image paths from JSON
with open(INPUT_JSON, "r") as f:
    data = json.load(f)

saved_count = 0

# Resize loop
for entry in tqdm(data, desc="Resizing images"):
    original_path = entry["file_path"]
    filename = os.path.basename(original_path)
    resized_path = os.path.join(RESIZED_DIR, filename)

    try:
        if not os.path.exists(original_path):
            print(f"⚠️  Not found: {original_path}")
            continue

        img = Image.open(original_path).convert("RGB")
        img = img.resize(TARGET_SIZE, resample=resample_mode)
        img.save(resized_path)
        saved_count += 1

    except Exception as e:
        print(f"Failed: {original_path} → {e}")

print(f"\nDone: {saved_count} images saved in '{RESIZED_DIR}'")

Resizing images: 100%|██████████| 5000/5000 [00:31<00:00, 157.92it/s]


Done: 5000 images saved in 'resized_val2017'





## Step 2: Tokenizing and Vocabulary Setup

In [35]:
import json
from keras.preprocessing.text import Tokenizer
import pickle

# Set a maximum vocabulary size to filter rare words
MAX_VOCAB_SIZE = 35000  # or 5000 for stricter filtering

# Load processed captions first
with open("processed_captions.json", "r") as f:
    train_data = json.load(f)
with open("processed_val_captions.json", "r") as f:
    val_data = json.load(f)

# Extract all captions (they already contain 'startseq' and 'endseq')
all_captions = []
for entry in train_data + val_data:
    all_captions.extend(entry["captions"])

# Initialize and fit tokenizer with vocab size limit
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE, oov_token="<unk>", filters='', lower=True)
tokenizer.fit_on_texts(all_captions)

# Save tokenizer
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

print(f"Tokenizer created with vocab size limit: {MAX_VOCAB_SIZE}")
print(f"Actual vocab used (word_index length): {len(tokenizer.word_index)}")


Tokenizer created with vocab size limit: 35000
Actual vocab used (word_index length): 29586


In [36]:
# Sample texts with common and rare words
texts = [
    "a man is sitting on a bench",            # Common words (likely in top 10,000)
    "flibbertigibbet is dancing joyfully",    # Rare words (likely outside top 10,000)
    "startseq a cat is playing endseq"         # Contains special tokens and common words
]

# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Print the sequences
for text, seq in zip(texts, sequences):
    print(f"Text: {text}")
    print(f"Sequence: {seq}\n")

Text: a man is sitting on a bench
Sequence: [2, 12, 11, 14, 5, 2, 100]

Text: flibbertigibbet is dancing joyfully
Sequence: [1, 11, 2371, 9948]

Text: startseq a cat is playing endseq
Sequence: [3, 2, 49, 11, 55, 4]



In [37]:
import pickle

def save_tokenizer(tokenizer, file_path="tokenizer.pkl"):
    with open(file_path, "wb") as f:
        pickle.dump(tokenizer, f)
    print(f"Tokenizer saved to {file_path}")


In [38]:
import pickle

def load_tokenizer(file_path="tokenizer.pkl"):
    with open(file_path, "rb") as f:
        tokenizer = pickle.load(f)
    print(f"Tokenizer loaded from {file_path}")
    return tokenizer


In [39]:
# After fitting the tokenizer
save_tokenizer(tokenizer)

# Later (e.g. evaluation or inference)
tokenizer = load_tokenizer()

Tokenizer saved to tokenizer.pkl
Tokenizer loaded from tokenizer.pkl


## Step 3: Image Feature Extraction

In [56]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tqdm import tqdm
import pickle
import json

# Paths
train_json = "processed_captions.json"
image_dir = "train2017"  # same as used in preprocessing
feature_save_path = "train_image_features_tfhub.pkl"
image_size = (224, 224)

# Load MobileNetV2 feature extractor from TF Hub
mobilenet_v2_url = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
feature_extractor = hub.KerasLayer(mobilenet_v2_url, input_shape=(224, 224, 3), trainable=False)

# Load only relevant training image paths from JSON
with open(train_json, "r") as f:
    train_data = json.load(f)

def extract_features(image_path):
    img = load_img(image_path, target_size=image_size)
    img_array = img_to_array(img)
    img_array = img_array / 255.0  # Normalize to [0,1]
    img_array = np.expand_dims(img_array, axis=0)
    features = feature_extractor(img_array)
    features = tf.squeeze(features)
    return features.numpy()

# Extract features only for images listed in JSON
image_features = {}
for entry in tqdm(train_data):
    image_id = str(entry["image_id"]).zfill(12)
    image_path = entry["file_path"]
    try:
        feats = extract_features(image_path)
        image_features[image_id] = feats
    except Exception as e:
        print(f"Error processing {image_path}: {e}")

# Save features
with open(feature_save_path, "wb") as f:
    pickle.dump(image_features, f)

print(f"Saved {len(image_features)} training image features to {feature_save_path}")


100%|██████████| 118287/118287 [1:20:23<00:00, 24.52it/s]


Saved 118287 training image features to train_image_features_tfhub.pkl


In [57]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tqdm import tqdm
import pickle
import json

# Paths
val_json = "processed_val_captions.json"
val_image_features_path = "val_image_features_tfhub.pkl"
image_size = (224, 224)

# Load MobileNetV2 from TF Hub
mobilenet_v2_url = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
feature_extractor = hub.KerasLayer(mobilenet_v2_url, input_shape=(224, 224, 3), trainable=False)

# Load image metadata from JSON
with open(val_json, "r") as f:
    val_data = json.load(f)

# Feature extraction function
def extract_features(image_path):
    img = load_img(image_path, target_size=image_size)
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    features = feature_extractor(img_array)
    return tf.squeeze(features).numpy()

# Extract features from listed images only
val_image_features = {}
for entry in tqdm(val_data):
    image_id = str(entry["image_id"]).zfill(12)
    image_path = entry["file_path"]
    try:
        feats = extract_features(image_path)
        val_image_features[image_id] = feats
    except Exception as e:
        print(f"Error processing {image_path}: {e}")

# Save features
with open(val_image_features_path, "wb") as f:
    pickle.dump(val_image_features, f)

print(f"Saved {len(val_image_features)} validation features to {val_image_features_path}")


100%|██████████| 5000/5000 [03:33<00:00, 23.46it/s]

Saved 5000 validation features to val_image_features_tfhub.pkl





## Step 4: Captioning Training Data

In [None]:
train_image_features = {str(int(k)).zfill(12): v for k, v in train_image_features_tfhub.items()}

In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

class DataGenerator(Sequence):
    def __init__(self, image_features, captions_dict, tokenizer, max_len, batch_size=64, vocab_size=None):
        self.image_features = {
            str(int(k)).zfill(12): v for k, v in image_features.items()
        }  # Normalize keys
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.batch_size = batch_size
        self.vocab_size = vocab_size or (len(tokenizer.word_index) + 1)

        # Normalize caption image IDs as well
        self.captions_dict = {
            str(int(k)).zfill(12): v for k, v in captions_dict.items()
        }

        # Prepare all image IDs
        self.image_ids = list(self.captions_dict.keys())
        self.indexes = np.arange(len(self.image_ids))
        self.on_epoch_end()

        # Preprocess captions into (image_id, in_seq, out_seq) format
        self.data = []
        for img_id in self.image_ids:
            captions = self.captions_dict[img_id]
            for cap in captions:
                seq = tokenizer.texts_to_sequences([cap])[0]
                for i in range(1, len(seq)):
                    in_seq = seq[:i]
                    out_seq = seq[i]
                    self.data.append((img_id, in_seq, out_seq))

    def __len__(self):
        return int(np.ceil(len(self.data) / self.batch_size))

    def __getitem__(self, idx):
        batch_data = self.data[idx * self.batch_size: (idx + 1) * self.batch_size]

        batch_image_features = []
        batch_in_seqs = []
        batch_out_seqs = []

        for img_id, in_seq, out_seq in batch_data:
            features = self.image_features.get(img_id)
            if features is None:
                continue
            batch_image_features.append(features)
            batch_in_seqs.append(in_seq)
            batch_out_seqs.append(out_seq)

        # If no data found, return empty arrays with correct shapes
        if len(batch_image_features) == 0:
            print(f"Warning: Empty batch at index {idx}")
            return [np.empty((0, 1280)), np.empty((0, self.max_len))], np.empty((0, self.vocab_size))

        # Pad sequences and one-hot encode output
        batch_in_seqs = pad_sequences(batch_in_seqs, maxlen=self.max_len, padding='post')
        batch_out_seqs = to_categorical(batch_out_seqs, num_classes=self.vocab_size)
        batch_image_features = np.array(batch_image_features)

        return [batch_image_features, batch_in_seqs], batch_out_seqs

    def on_epoch_end(self):
        np.random.shuffle(self.indexes)


In [None]:
train_generator = DataGenerator(train_image_features, train_captions, tokenizer, max_len)

# Safely get first non-empty batch
for i in range(len(train_generator)):
    batch = train_generator[i]
    if batch[0][0].shape[0] > 0:
        print("Input Image Features Shape:", batch[0][0].shape)
        print("Input Sequences Shape:", batch[0][1].shape)
        print("Output Shape:", batch[1].shape)
        break


In [11]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def beam_search_caption(model, tokenizer, image_feature, max_len, beam_width=3):
    start_token = tokenizer.word_index['startseq']
    end_token = tokenizer.word_index['endseq']
    
    sequences = [[list([start_token]), 0.0]]  # (sequence, score)

    for _ in range(max_len):
        all_candidates = []
        for seq, score in sequences:
            if seq[-1] == end_token:
                all_candidates.append((seq, score))
                continue

            seq_input = pad_sequences([seq], maxlen=max_len, padding='post')

            preds = model.predict([image_feature, seq_input], verbose=0)

            # Handle if output is (1, vocab_size) or (1, max_len, vocab_size)
            if preds.ndim == 3:
                preds = preds[0, -1, :]
            else:
                preds = preds[0]

            top_indices = preds.argsort()[-beam_width:][::-1]
            for idx in top_indices:
                # Anti-repetition: skip if token repeats 3 times
                if len(seq) >= 2 and seq[-1] == seq[-2] == idx:
                    continue
                candidate = seq + [idx]
                candidate_score = score - np.log(preds[idx] + 1e-10)
                all_candidates.append((candidate, candidate_score))

        sequences = sorted(all_candidates, key=lambda tup: tup[1])[:beam_width]

        if all(seq[-1] == end_token for seq, _ in sequences):
            break

    best_seq = sequences[0][0]
    if end_token in best_seq:
        best_seq = best_seq[:best_seq.index(end_token)]

    inv_map = {v: k for k, v in tokenizer.word_index.items()}
    caption_words = [inv_map.get(idx, '') for idx in best_seq if idx not in [start_token, end_token]]
    
    return ' '.join(caption_words)


In [None]:
# Define the image ID
image_id = '000000000285'

# Extract and reshape image feature to match model input
image_feature = val_image_features.get(image_id)
if image_feature is None:
    raise ValueError(f"No image feature found for ID: {image_id}")
image_feature = np.expand_dims(image_feature, axis=0)  # Shape: (1, feature_dim)

# Generate caption using beam search
caption = beam_search_caption(model, tokenizer, image_feature, max_len=49, beam_width=5)
print("Generated Caption:", caption)

In [37]:
print(list(val_image_features.keys())[:10])

['000000000139', '000000000285', '000000000632', '000000000724', '000000000776', '000000000785', '000000000802', '000000000872', '000000000885', '000000001000']


In [81]:
print("Vocabulary size:", len(tokenizer.word_index))
print("Example word from index:", tokenizer.index_word.get(20, 'Not found'))

Vocabulary size: 29586
Example word from index: are


In [5]:
from pathlib import Path
import json
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks, preprocessing
from nltk.translate.bleu_score import corpus_bleu
from typing import List, Dict, Tuple

In [6]:
def create_sequences(tokenizer: preprocessing.text.Tokenizer,
                     max_len: int,
                     desc_list: List[str],
                     photo_feature: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Generate (<img feat>, <partial seq>) → <next word> tuples for one image."""
    vocab_size = len(tokenizer.word_index) + 1
    X1, X2, y = [], [], []
    for desc in desc_list:
        seq = tokenizer.texts_to_sequences([desc])[0]
        for i in range(1, len(seq)):
            in_seq, out_seq = seq[:i], seq[i]
            in_seq = preprocessing.sequence.pad_sequences([in_seq], maxlen=max_len, padding='post')[0]
            out_seq = tf.keras.utils.to_categorical([out_seq], num_classes=vocab_size)[0]
            X1.append(photo_feature)
            X2.append(in_seq)
            y.append(out_seq)
    return np.array(X1), np.array(X2), np.array(y)

In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

def create_sequences(tokenizer, max_len, captions, photo_feat):
    X1, X2, y = [], [], []
    vocab_size = len(tokenizer.word_index) + 1

    photo_feat = np.array(photo_feat, dtype=np.float32)  # ensure dtype

    for caption in captions:
        seq = tokenizer.texts_to_sequences([caption])[0]
        for i in range(1, len(seq)):
            in_seq, out_word = seq[:i], seq[i]
            in_seq = pad_sequences([in_seq], maxlen=max_len)[0]

            out_seq = np.zeros(vocab_size, dtype=np.float32)
            out_seq[out_word] = 1.0

            X1.append(photo_feat)
            X2.append(in_seq)
            y.append(out_seq)

    return np.array(X1), np.array(X2), np.array(y)




In [8]:
import tensorflow as tf

def data_generator(features, descriptions, tokenizer, max_len):
    for img_id, feat in features.items():
        if img_id not in descriptions:
            continue
        X1, X2, y = create_sequences(tokenizer, max_len, descriptions[img_id], feat)
        for i in range(len(X1)):
            yield X1[i], X2[i], y[i]

def build_dataset(features, descriptions, tokenizer, max_len, batch_size=32):
    def gen():
        for img_id, feat in features.items():
            if img_id not in descriptions:
                continue
            X1, X2, y = create_sequences(tokenizer, max_len, descriptions[img_id], feat)
            for i in range(len(X1)):
                # Yield inputs tuple and output separately
                yield (X1[i], X2[i]), y[i]

    sample = next(gen())
    output_types = ((tf.float32, tf.int32), tf.float32)
    output_shapes = ((sample[0].shape, sample[1].shape), sample[2].shape)

    dataset = tf.data.Dataset.from_generator(
        gen,
        output_types=output_types,
        output_shapes=output_shapes
    )
    dataset = dataset.shuffle(1024).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


In [9]:
from pathlib import Path
import json, pickle, numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

# ---------- paths ----------
data_dir = Path(".")
train_feat_path = data_dir / "train_image_features_tfhub.pkl"
val_feat_path   = data_dir / "val_image_features_tfhub.pkl"
train_cap_path  = data_dir / "processed_captions.json"
val_cap_path    = data_dir / "processed_val_captions.json"
tokenizer_path  = data_dir / "tokenizer.pkl"

# ---------- load and fix val captions ----------
with open(val_cap_path) as fh:
    val_caps = json.load(fh)
print("Converting val_caps from list to dict...")
if isinstance(val_caps, list):
    val_caps = {item["image_id"]: item["captions"] for item in val_caps}

# ---------- load and fix train captions ----------
with open(train_cap_path) as fh:
    train_caps = json.load(fh)
print("Converting train_caps from list to dict...")
if isinstance(train_caps, list):
    train_caps = {item["image_id"]: item["captions"] for item in train_caps}

# ---------- merge captions ----------
image_captions_dict = {**train_caps, **val_caps}

# ---------- load image features ----------
with open(train_feat_path, "rb") as fh:
    train_feats = pickle.load(fh)
with open(val_feat_path, "rb") as fh:
    val_feats = pickle.load(fh)

# ---------- merge image features ----------
image_features_dict = {**train_feats, **val_feats}

# ---------- reduce dataset size ----------
MAX_IMAGES = 1000  # adjust based on available RAM
subset_keys = [k for k in image_captions_dict if k in image_features_dict][:MAX_IMAGES]
image_captions_dict = {k: image_captions_dict[k] for k in subset_keys}
image_features_dict = {k: image_features_dict[k] for k in subset_keys}

# ---------- load tokenizer ----------
with open(tokenizer_path, "rb") as fh:
    tokenizer = pickle.load(fh)

# ---------- compute max_len ----------
all_caps = sum(image_captions_dict.values(), [])
max_len = max(len(c.split()) for c in all_caps)

print(f"Loaded {len(image_features_dict)} images, {len(all_caps)} captions, max_len={max_len}")
print(f"Average captions per image: {len(all_caps)/len(image_captions_dict):.2f}")

# ---------- build_dataset function ----------
# This is a sample build_dataset function using create_sequences()
def build_dataset(features, descriptions, tokenizer, max_len, batch_size=32):
    X1_list, X2_list, y_list = [], [], []
    
    for img_id, caps in descriptions.items():
        photo_feat = features[img_id]
        X1, X2, y = create_sequences(tokenizer, max_len, caps, photo_feat)
        X1_list.append(X1)
        X2_list.append(X2)
        y_list.append(y)

    # Concatenate all arrays
    X1_all = np.concatenate(X1_list, axis=0)
    X2_all = np.concatenate(X2_list, axis=0)
    y_all = np.concatenate(y_list, axis=0)

    # Create a tf.data.Dataset
    dataset = tf.data.Dataset.from_tensor_slices(((X1_all, X2_all), y_all))
    dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

    return dataset

# ---------- create_sequences function ----------
def create_sequences(tokenizer, max_len, captions, photo_feat):
    X1, X2, y = [], [], []
    vocab_size = len(tokenizer.word_index) + 1
    photo_feat = np.array(photo_feat, dtype=np.float32)

    for caption in captions:
        seq = tokenizer.texts_to_sequences([caption])[0]
        for i in range(1, len(seq)):
            in_seq, out_word = seq[:i], seq[i]
            in_seq = pad_sequences([in_seq], maxlen=max_len)[0]

            out_seq = np.zeros(vocab_size, dtype=np.float32)
            out_seq[out_word] = 1.0

            X1.append(photo_feat)
            X2.append(in_seq)
            y.append(out_seq)

    return np.array(X1), np.array(X2), np.array(y)

# ---------- build the dataset ----------
dataset = build_dataset(
    features=image_features_dict,
    descriptions=image_captions_dict,
    tokenizer=tokenizer,
    max_len=max_len,
    batch_size=32
)


Converting val_caps from list to dict...
Converting train_caps from list to dict...
Loaded 1000 images, 5007 captions, max_len=37
Average captions per image: 5.01


## Step 5: Image Caption Training Model

In [40]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add

# Hyperparameters
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 256
units = 256

# Image feature input branch
inputs1 = Input(shape=(1280,))  # shape matches your extracted features
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)

# Caption input branch
inputs2 = Input(shape=(max_len,))
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)

# Decoder (merge)
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)

# Define and compile model
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')

model.summary()


Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_7 (InputLayer)           [(None, 37)]         0           []                               
                                                                                                  
 input_6 (InputLayer)           [(None, 1280)]       0           []                               
                                                                                                  
 embedding_5 (Embedding)        (None, 37, 256)      7574272     ['input_7[0][0]']                
                                                                                                  
 dropout_10 (Dropout)           (None, 1280)         0           ['input_6[0][0]']                
                                                                                            

In [42]:
LIMIT = 35000
subset_dataset = dataset.take(LIMIT // 32)  # batch size 32

history = model.fit(
    subset_dataset,
    epochs=10,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [43]:
model.save("saved_model_dir", save_format="tf")



INFO:tensorflow:Assets written to: saved_model_dir\assets


INFO:tensorflow:Assets written to: saved_model_dir\assets


In [44]:
from tensorflow.keras.models import load_model
model = load_model("saved_model_dir")

In [45]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the trained model (already done)
# Load the tokenizer and max_len (already done)

# Use MobileNetV2 (same as during training)
mobilenet_v2_url = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
feature_extractor = hub.KerasLayer(mobilenet_v2_url, input_shape=(224, 224, 3), trainable=False)

# Function to extract features for test image
def extract_features(image_path):
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    features = feature_extractor(img_array)
    return tf.squeeze(features).numpy()

# Function to generate caption
def generate_caption(model, tokenizer, photo_features, max_len):
    input_text = 'startseq'
    for i in range(max_len):
        sequence = tokenizer.texts_to_sequences([input_text])[0]
        sequence = tf.keras.preprocessing.sequence.pad_sequences([sequence], maxlen=max_len)
        yhat = model.predict([np.expand_dims(photo_features, axis=0), sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = tokenizer.index_word.get(yhat, None)
        if word is None:
            break
        input_text += ' ' + word
        if word == 'endseq':
            break
    return input_text.replace('startseq', '').replace('endseq', '').strip()


In [52]:
def generate_caption(model, tokenizer, photo_features, max_len):
    input_text = 'startseq'
    for _ in range(max_len):
        sequence = tokenizer.texts_to_sequences([input_text])[0]
        sequence = tf.keras.preprocessing.sequence.pad_sequences([sequence], maxlen=max_len)

        photo_features_exp = np.expand_dims(photo_features, axis=0)  # add batch dim

        yhat_probs = model.predict([photo_features_exp, sequence], verbose=0)
        yhat = np.argmax(yhat_probs, axis=-1)[0]

        word = tokenizer.index_word.get(yhat, None)
        if word is None:
            break
        if word == '<unk>':
            continue  # skip unknown words during generation

        input_text += ' ' + word  # append predicted word

        if word == 'endseq':
            break

    caption = input_text.split()[1:-1]  # remove 'startseq' and 'endseq'
    return ' '.join(caption)


In [53]:
image_path = r"D:\Image Captioning and Segmentation\val2017\000000000285.jpg"
photo_features = extract_features(image_path)  # returns shape (1280,)
caption = generate_caption(model, tokenizer, photo_features, max_len)
print("Generated Caption:", caption)

Generated Caption: a man in a kitchen with a huge black floor


## BLEU Evaluation

In [55]:
import pickle
import spacy
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Load spacy tokenizer (English)
nlp = spacy.load("en_core_web_sm")

def spacy_tokenize(text):
    return [token.text.lower() for token in nlp(text)]

# Load pre-extracted validation image features
with open("val_image_features_tfhub.pkl", "rb") as f:
    val_image_features = pickle.load(f)

# Assume you have your val_data JSON loaded as well (list of dicts)
# with keys like 'image_id' and 'captions' (list of references)
import json
with open("processed_val_captions.json", "r") as f:
    val_data = json.load(f)

smooth_fn = SmoothingFunction().method1

bleu_scores = []

for entry in val_data:
    image_id = str(entry["image_id"]).zfill(12)
    references = [spacy_tokenize(c) for c in entry["captions"]]

    photo_feat = val_image_features[image_id]
    generated_caption = generate_caption(model, tokenizer, photo_feat, max_len)
    tokenized_candidate = spacy_tokenize(generated_caption)

    score = sentence_bleu(
        references, tokenized_candidate, smoothing_function=smooth_fn
    )
    bleu_scores.append(score)

average_bleu = sum(bleu_scores) / len(bleu_scores)
print(f"Average BLEU score on validation set: {average_bleu:.4f}")

Average BLEU score on validation set: 0.0703


## Step 6: Image Segmentation

In [62]:
import os
from ultralytics import YOLO
from PIL import Image
from tqdm import tqdm

# Paths
val_image_dir = r"D:\Image Captioning and Segmentation\val2017"
segmented_output_dir = r"D:\Image Captioning and Segmentation\val2017_segmented"
os.makedirs(segmented_output_dir, exist_ok=True)

# Load segmentation model
model = YOLO("yolov8n-seg.pt")  # Ensure this is a segmentation model

# Device: 'cpu' is safer, switch to 'cuda' if it's stable for you
device_type = 'cpu'  # or 'cuda'

# Batch processing
image_files = [f for f in os.listdir(val_image_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

for img_file in tqdm(image_files, desc="Segmenting Images"):
    image_path = os.path.join(val_image_dir, img_file)
    
    # Run segmentation
    try:
        results = model(image_path, device=device_type)
        segmented_img = Image.fromarray(results[0].plot())

        # Save segmented image
        output_path = os.path.join(segmented_output_dir, img_file)
        segmented_img.save(output_path)
    except Exception as e:
        print(f"Error processing {img_file}: {e}")


Segmenting Images:   0%|          | 0/5000 [00:00<?, ?it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000139.jpg: 448x640 1 person, 3 chairs, 2 potted plants, 1 dining table, 2 tvs, 1 refrigerator, 1 clock, 1 vase, 111.3ms
Speed: 2.0ms preprocess, 111.3ms inference, 14.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   0%|          | 1/5000 [00:00<21:29,  3.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000285.jpg: 640x608 1 bear, 85.6ms
Speed: 1.8ms preprocess, 85.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:   0%|          | 2/5000 [00:00<15:19,  5.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000632.jpg: 512x640 1 bottle, 3 potted plants, 1 bed, 143.8ms
Speed: 3.0ms preprocess, 143.8ms inference, 6.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   0%|          | 3/5000 [00:00<16:19,  5.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000724.jpg: 640x480 1 stop sign, 178.2ms
Speed: 4.9ms preprocess, 178.2ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   0%|          | 4/5000 [00:00<17:29,  4.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000776.jpg: 640x448 4 teddy bears, 130.4ms
Speed: 2.4ms preprocess, 130.4ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   0%|          | 5/5000 [00:01<16:49,  4.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000785.jpg: 448x640 1 person, 1 skis, 68.5ms
Speed: 2.5ms preprocess, 68.5ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   0%|          | 6/5000 [00:01<14:41,  5.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000802.jpg: 640x448 1 oven, 1 refrigerator, 68.7ms
Speed: 3.0ms preprocess, 68.7ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   0%|          | 7/5000 [00:01<13:10,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000872.jpg: 640x640 2 persons, 176.9ms
Speed: 2.4ms preprocess, 176.9ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   0%|          | 8/5000 [00:01<15:05,  5.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000885.jpg: 448x640 3 persons, 1 sports ball, 1 tennis racket, 70.1ms
Speed: 1.9ms preprocess, 70.1ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   0%|          | 9/5000 [00:01<13:48,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001000.jpg: 480x640 13 persons, 1 handbag, 1 tennis racket, 129.7ms
Speed: 3.6ms preprocess, 129.7ms inference, 14.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   0%|          | 10/5000 [00:01<15:24,  5.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001268.jpg: 448x640 4 persons, 1 bird, 4 handbags, 67.4ms
Speed: 3.1ms preprocess, 67.4ms inference, 8.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   0%|          | 11/5000 [00:02<14:30,  5.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001296.jpg: 640x448 1 person, 1 cell phone, 71.3ms
Speed: 2.8ms preprocess, 71.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   0%|          | 12/5000 [00:02<13:13,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001353.jpg: 640x480 7 persons, 92.1ms
Speed: 14.2ms preprocess, 92.1ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   0%|          | 13/5000 [00:02<13:32,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001425.jpg: 512x640 1 cup, 1 sandwich, 68.0ms
Speed: 2.4ms preprocess, 68.0ms inference, 2.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   0%|          | 14/5000 [00:02<12:21,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001490.jpg: 320x640 1 person, 99.5ms
Speed: 2.7ms preprocess, 99.5ms inference, 1.9ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:   0%|          | 15/5000 [00:02<12:14,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001503.jpg: 480x640 1 person, 1 tv, 1 laptop, 4 mouses, 2 keyboards, 75.8ms
Speed: 4.4ms preprocess, 75.8ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   0%|          | 16/5000 [00:02<12:25,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001532.jpg: 480x640 7 cars, 2 trucks, 1 traffic light, 63.5ms
Speed: 3.4ms preprocess, 63.5ms inference, 9.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   0%|          | 17/5000 [00:02<12:13,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001584.jpg: 640x640 7 persons, 3 buss, 88.6ms
Speed: 4.3ms preprocess, 88.6ms inference, 12.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   0%|          | 18/5000 [00:03<13:07,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001675.jpg: 480x640 1 cat, 1 laptop, 62.7ms
Speed: 2.4ms preprocess, 62.7ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   0%|          | 19/5000 [00:03<11:55,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001761.jpg: 640x448 2 airplanes, 65.0ms
Speed: 2.5ms preprocess, 65.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   0%|          | 20/5000 [00:03<11:12,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001818.jpg: 448x640 1 zebra, 72.4ms
Speed: 2.8ms preprocess, 72.4ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   0%|          | 21/5000 [00:03<10:51,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000001993.jpg: 448x640 1 chair, 1 bed, 2 dining tables, 87.0ms
Speed: 3.0ms preprocess, 87.0ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   0%|          | 22/5000 [00:03<11:13,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002006.jpg: 480x640 3 persons, 1 bus, 79.6ms
Speed: 4.0ms preprocess, 79.6ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   0%|          | 23/5000 [00:03<11:23,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002149.jpg: 448x640 4 apples, 70.9ms
Speed: 3.1ms preprocess, 70.9ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   0%|          | 24/5000 [00:03<11:11,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002153.jpg: 480x640 5 persons, 2 baseball bats, 71.7ms
Speed: 2.3ms preprocess, 71.7ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   0%|          | 25/5000 [00:03<11:18,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002157.jpg: 448x640 5 wine glasss, 2 cups, 2 knifes, 2 bowls, 1 sandwich, 1 dining table, 66.4ms
Speed: 2.4ms preprocess, 66.4ms inference, 11.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 26/5000 [00:04<11:45,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002261.jpg: 448x640 1 person, 66.0ms
Speed: 3.0ms preprocess, 66.0ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 27/5000 [00:04<11:06,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002299.jpg: 416x640 24 persons, 140.7ms
Speed: 4.2ms preprocess, 140.7ms inference, 19.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   1%|          | 28/5000 [00:04<14:09,  5.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002431.jpg: 640x480 2 persons, 1 wine glass, 2 cups, 1 knife, 1 spoon, 1 bowl, 1 dining table, 71.1ms
Speed: 2.2ms preprocess, 71.1ms inference, 8.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   1%|          | 29/5000 [00:04<13:32,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002473.jpg: 448x640 4 persons, 1 skis, 1 snowboard, 66.4ms
Speed: 3.1ms preprocess, 66.4ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 30/5000 [00:04<12:45,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002532.jpg: 640x480 1 person, 1 backpack, 1 skis, 91.7ms
Speed: 3.2ms preprocess, 91.7ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   1%|          | 31/5000 [00:04<12:39,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002587.jpg: 480x640 2 bananas, 1 donut, 73.5ms
Speed: 4.2ms preprocess, 73.5ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|          | 32/5000 [00:05<12:12,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002592.jpg: 384x640 1 cup, 1 knife, 114.9ms
Speed: 2.4ms preprocess, 114.9ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   1%|          | 33/5000 [00:05<12:36,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002685.jpg: 576x640 7 persons, 153.7ms
Speed: 1.9ms preprocess, 153.7ms inference, 8.3ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:   1%|          | 34/5000 [00:05<14:23,  5.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000002923.jpg: 480x640 1 bird, 67.1ms
Speed: 3.5ms preprocess, 67.1ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|          | 35/5000 [00:05<12:55,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000003156.jpg: 640x448 1 person, 63.3ms
Speed: 2.5ms preprocess, 63.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   1%|          | 36/5000 [00:05<11:46,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000003255.jpg: 384x640 5 persons, 57.7ms
Speed: 2.6ms preprocess, 57.7ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   1%|          | 37/5000 [00:05<10:59,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000003501.jpg: 640x640 3 broccolis, 86.8ms
Speed: 4.9ms preprocess, 86.8ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   1%|          | 38/5000 [00:05<11:24,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000003553.jpg: 448x640 2 persons, 1 bench, 1 skateboard, 59.2ms
Speed: 3.1ms preprocess, 59.2ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 39/5000 [00:06<10:50,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000003661.jpg: 384x640 1 cup, 3 bananas, 70.3ms
Speed: 2.6ms preprocess, 70.3ms inference, 5.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   1%|          | 40/5000 [00:06<10:48,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000003845.jpg: 480x640 1 cup, 3 broccolis, 3 carrots, 1 chair, 1 dining table, 65.4ms
Speed: 3.1ms preprocess, 65.4ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|          | 41/5000 [00:06<11:05,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000003934.jpg: 640x480 8 persons, 1 wine glass, 1 couch, 1 remote, 1 book, 71.7ms
Speed: 5.2ms preprocess, 71.7ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   1%|          | 42/5000 [00:06<11:44,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000004134.jpg: 448x640 17 persons, 1 tie, 1 wine glass, 1 cup, 1 dining table, 1 laptop, 69.2ms
Speed: 3.2ms preprocess, 69.2ms inference, 20.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 43/5000 [00:06<12:49,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000004395.jpg: 640x448 1 person, 1 tie, 67.0ms
Speed: 2.7ms preprocess, 67.0ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   1%|          | 44/5000 [00:06<11:58,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000004495.jpg: 480x640 1 chair, 1 bed, 1 tv, 74.2ms
Speed: 4.6ms preprocess, 74.2ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|          | 45/5000 [00:06<11:44,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000004765.jpg: 640x640 1 person, 1 surfboard, 96.4ms
Speed: 4.7ms preprocess, 96.4ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   1%|          | 46/5000 [00:07<12:05,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000004795.jpg: 480x640 1 cat, 3 laptops, 1 cell phone, 72.6ms
Speed: 2.9ms preprocess, 72.6ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|          | 47/5000 [00:07<11:41,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005001.jpg: 480x640 18 persons, 74.5ms
Speed: 2.7ms preprocess, 74.5ms inference, 18.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|          | 48/5000 [00:07<12:51,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005037.jpg: 448x640 3 persons, 2 cars, 1 bus, 89.9ms
Speed: 2.9ms preprocess, 89.9ms inference, 10.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 49/5000 [00:07<13:04,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005060.jpg: 640x480 1 person, 78.4ms
Speed: 2.6ms preprocess, 78.4ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   1%|          | 50/5000 [00:07<12:17,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005193.jpg: 448x640 7 persons, 1 snowboard, 2 surfboards, 1 bottle, 2 cell phones, 70.9ms
Speed: 3.1ms preprocess, 70.9ms inference, 12.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 51/5000 [00:07<12:34,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005477.jpg: 352x640 1 airplane, 138.6ms
Speed: 2.5ms preprocess, 138.6ms inference, 2.0ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:   1%|          | 52/5000 [00:08<13:21,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005503.jpg: 640x448 1 person, 2 toilets, 70.5ms
Speed: 3.0ms preprocess, 70.5ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   1%|          | 53/5000 [00:08<12:26,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005529.jpg: 640x448 1 person, 68.6ms
Speed: 2.2ms preprocess, 68.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   1%|          | 54/5000 [00:08<11:40,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005586.jpg: 480x640 4 persons, 1 skateboard, 1 tennis racket, 76.4ms
Speed: 3.7ms preprocess, 76.4ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|          | 55/5000 [00:08<11:37,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005600.jpg: 384x640 1 cup, 2 bowls, 2 oranges, 1 dining table, 64.5ms
Speed: 2.9ms preprocess, 64.5ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   1%|          | 56/5000 [00:08<11:18,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000005992.jpg: 448x640 1 car, 5 sheeps, 71.5ms
Speed: 2.1ms preprocess, 71.5ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 57/5000 [00:08<11:08,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006012.jpg: 544x640 3 bananas, 1 apple, 203.4ms
Speed: 2.0ms preprocess, 203.4ms inference, 5.3ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:   1%|          | 58/5000 [00:08<14:12,  5.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006040.jpg: 352x640 1 person, 1 car, 1 bus, 1 train, 49.6ms
Speed: 2.3ms preprocess, 49.6ms inference, 3.2ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:   1%|          | 59/5000 [00:09<12:29,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006213.jpg: 448x640 (no detections), 64.6ms
Speed: 2.0ms preprocess, 64.6ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 60/5000 [00:09<11:12,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006460.jpg: 448x640 1 person, 1 surfboard, 59.9ms
Speed: 2.8ms preprocess, 59.9ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 61/5000 [00:09<10:34,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006471.jpg: 448x640 10 persons, 1 baseball bat, 1 baseball glove, 65.7ms
Speed: 3.6ms preprocess, 65.7ms inference, 9.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|          | 62/5000 [00:09<11:00,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006614.jpg: 512x640 3 donuts, 136.9ms
Speed: 3.6ms preprocess, 136.9ms inference, 3.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   1%|▏         | 63/5000 [00:09<12:24,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006723.jpg: 384x640 4 cars, 1 bus, 71.1ms
Speed: 2.5ms preprocess, 71.1ms inference, 4.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   1%|▏         | 64/5000 [00:09<11:54,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006763.jpg: 640x480 4 persons, 65.0ms
Speed: 3.2ms preprocess, 65.0ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   1%|▏         | 65/5000 [00:09<11:17,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006771.jpg: 448x640 9 persons, 1 cell phone, 62.2ms
Speed: 2.4ms preprocess, 62.2ms inference, 8.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|▏         | 66/5000 [00:09<11:18,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006818.jpg: 640x448 1 toilet, 61.7ms
Speed: 3.3ms preprocess, 61.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   1%|▏         | 67/5000 [00:10<10:33,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006894.jpg: 480x640 1 person, 2 elephants, 69.5ms
Speed: 3.0ms preprocess, 69.5ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|▏         | 68/5000 [00:10<10:25,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000006954.jpg: 480x640 5 persons, 2 frisbees, 61.5ms
Speed: 3.3ms preprocess, 61.5ms inference, 7.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   1%|▏         | 69/5000 [00:10<10:26,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007088.jpg: 640x480 1 person, 1 car, 1 umbrella, 66.1ms
Speed: 3.1ms preprocess, 66.1ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   1%|▏         | 70/5000 [00:10<10:11,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007108.jpg: 448x640 4 elephants, 71.0ms
Speed: 2.9ms preprocess, 71.0ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|▏         | 71/5000 [00:10<10:17,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007278.jpg: 512x640 1 person, 1 surfboard, 79.6ms
Speed: 3.9ms preprocess, 79.6ms inference, 3.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   1%|▏         | 72/5000 [00:10<10:32,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007281.jpg: 384x640 7 persons, 2 horses, 89.7ms
Speed: 3.8ms preprocess, 89.7ms inference, 6.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   1%|▏         | 73/5000 [00:10<11:27,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007386.jpg: 448x640 (no detections), 76.4ms
Speed: 4.0ms preprocess, 76.4ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   1%|▏         | 74/5000 [00:10<10:48,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007511.jpg: 480x640 3 persons, 1 kite, 72.7ms
Speed: 3.0ms preprocess, 72.7ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 75/5000 [00:11<10:38,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007574.jpg: 480x640 2 bottles, 1 bowl, 1 cake, 1 microwave, 1 refrigerator, 1 clock, 65.8ms
Speed: 3.0ms preprocess, 65.8ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 76/5000 [00:11<10:42,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007784.jpg: 480x640 1 kite, 70.5ms
Speed: 5.5ms preprocess, 70.5ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 77/5000 [00:11<10:26,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007795.jpg: 448x640 1 couch, 4 beds, 64.2ms
Speed: 2.5ms preprocess, 64.2ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 78/5000 [00:11<10:14,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007816.jpg: 448x640 9 persons, 1 motorcycle, 66.0ms
Speed: 2.5ms preprocess, 66.0ms inference, 10.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 79/5000 [00:11<10:41,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007818.jpg: 448x640 1 chair, 2 vases, 63.9ms
Speed: 2.9ms preprocess, 63.9ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 80/5000 [00:11<10:21,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007888.jpg: 640x640 2 clocks, 85.3ms
Speed: 2.2ms preprocess, 85.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   2%|▏         | 81/5000 [00:11<10:35,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007977.jpg: 640x448 4 persons, 1 skateboard, 91.3ms
Speed: 5.3ms preprocess, 91.3ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   2%|▏         | 82/5000 [00:12<11:08,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000007991.jpg: 384x640 1 cup, 1 fork, 1 knife, 1 carrot, 1 dining table, 60.0ms
Speed: 2.3ms preprocess, 60.0ms inference, 4.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   2%|▏         | 83/5000 [00:12<10:39,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008021.jpg: 480x640 3 persons, 1 tie, 77.0ms
Speed: 2.7ms preprocess, 77.0ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 84/5000 [00:12<10:44,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008211.jpg: 480x640 2 persons, 3 motorcycles, 1 airplane, 70.4ms
Speed: 2.8ms preprocess, 70.4ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 85/5000 [00:12<10:50,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008277.jpg: 640x640 1 fork, 1 bowl, 1 dining table, 91.9ms
Speed: 4.4ms preprocess, 91.9ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   2%|▏         | 86/5000 [00:12<11:24,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008532.jpg: 448x640 1 person, 1 tie, 63.2ms
Speed: 2.7ms preprocess, 63.2ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 87/5000 [00:12<10:43,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008629.jpg: 640x640 1 fork, 1 knife, 5 pizzas, 90.1ms
Speed: 3.1ms preprocess, 90.1ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   2%|▏         | 88/5000 [00:12<11:34,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008690.jpg: 480x640 2 persons, 1 dog, 1 cow, 72.9ms
Speed: 2.7ms preprocess, 72.9ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 89/5000 [00:12<11:15,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008762.jpg: 448x640 7 traffic lights, 70.3ms
Speed: 2.8ms preprocess, 70.3ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 90/5000 [00:13<11:10,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008844.jpg: 448x640 3 persons, 2 bananas, 81.0ms
Speed: 3.0ms preprocess, 81.0ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 91/5000 [00:13<11:19,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000008899.jpg: 544x640 1 bicycle, 1 fire hydrant, 73.1ms
Speed: 1.6ms preprocess, 73.1ms inference, 2.7ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:   2%|▏         | 92/5000 [00:13<11:02,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009378.jpg: 448x640 5 persons, 1 remote, 66.2ms
Speed: 4.4ms preprocess, 66.2ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 93/5000 [00:13<10:54,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009400.jpg: 480x640 10 persons, 3 cups, 8 laptops, 1 keyboard, 70.4ms
Speed: 2.6ms preprocess, 70.4ms inference, 20.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 94/5000 [00:13<12:05,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009448.jpg: 640x576 1 person, 2 umbrellas, 156.2ms
Speed: 2.8ms preprocess, 156.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:   2%|▏         | 95/5000 [00:13<13:40,  5.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009483.jpg: 480x640 2 persons, 1 tv, 2 laptops, 1 mouse, 1 keyboard, 66.0ms
Speed: 2.4ms preprocess, 66.0ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 96/5000 [00:14<13:04,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009590.jpg: 448x640 5 persons, 2 bottles, 5 cups, 3 bowls, 1 dining table, 1 clock, 63.2ms
Speed: 2.3ms preprocess, 63.2ms inference, 14.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 97/5000 [00:14<12:58,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009769.jpg: 480x640 1 person, 1 car, 1 truck, 1 fire hydrant, 66.5ms
Speed: 2.5ms preprocess, 66.5ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 98/5000 [00:14<12:05,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009772.jpg: 640x576 1 person, 1 bed, 1 tv, 2 sinks, 85.3ms
Speed: 2.4ms preprocess, 85.3ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:   2%|▏         | 99/5000 [00:14<12:04,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009891.jpg: 480x640 2 persons, 3 cars, 1 handbag, 1 suitcase, 92.9ms
Speed: 2.5ms preprocess, 92.9ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 100/5000 [00:14<12:20,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000009914.jpg: 480x640 2 bowls, 2 sandwichs, 1 dining table, 67.5ms
Speed: 2.4ms preprocess, 67.5ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 101/5000 [00:14<11:40,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000010092.jpg: 448x640 4 chairs, 1 bed, 2 dining tables, 63.7ms
Speed: 3.0ms preprocess, 63.7ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 102/5000 [00:14<11:17,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000010363.jpg: 384x640 2 cars, 1 cat, 1 bottle, 59.0ms
Speed: 2.9ms preprocess, 59.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   2%|▏         | 103/5000 [00:14<10:34,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000010583.jpg: 640x640 1 cup, 1 knife, 1 bowl, 1 cake, 90.4ms
Speed: 4.4ms preprocess, 90.4ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   2%|▏         | 104/5000 [00:15<11:16,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000010707.jpg: 480x640 4 persons, 3 bottles, 2 cups, 1 couch, 1 dining table, 1 laptop, 67.4ms
Speed: 2.4ms preprocess, 67.4ms inference, 10.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 105/5000 [00:15<11:31,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000010764.jpg: 448x640 1 person, 1 baseball glove, 65.6ms
Speed: 3.3ms preprocess, 65.6ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 106/5000 [00:15<10:50,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000010977.jpg: 480x640 2 bottles, 1 toilet, 1 sink, 64.8ms
Speed: 4.4ms preprocess, 64.8ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 107/5000 [00:15<10:31,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000010995.jpg: 480x640 1 bed, 67.1ms
Speed: 2.4ms preprocess, 67.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 108/5000 [00:15<10:05,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011051.jpg: 544x640 2 persons, 1 tie, 103.0ms
Speed: 2.5ms preprocess, 103.0ms inference, 3.7ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:   2%|▏         | 109/5000 [00:15<10:54,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011122.jpg: 480x640 1 stop sign, 64.2ms
Speed: 2.5ms preprocess, 64.2ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 110/5000 [00:15<10:22,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011149.jpg: 480x640 2 persons, 2 bicycles, 1 motorcycle, 64.5ms
Speed: 3.9ms preprocess, 64.5ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 111/5000 [00:16<10:20,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011197.jpg: 448x640 4 persons, 1 bicycle, 5 cars, 3 traffic lights, 1 handbag, 67.3ms
Speed: 2.7ms preprocess, 67.3ms inference, 12.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 112/5000 [00:16<10:58,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011511.jpg: 480x640 6 persons, 69.8ms
Speed: 2.9ms preprocess, 69.8ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 113/5000 [00:16<10:51,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011615.jpg: 640x480 1 stop sign, 68.8ms
Speed: 2.3ms preprocess, 68.8ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   2%|▏         | 114/5000 [00:16<10:26,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011699.jpg: 640x480 3 persons, 1 handbag, 1 suitcase, 66.6ms
Speed: 2.2ms preprocess, 66.6ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   2%|▏         | 115/5000 [00:16<10:28,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011760.jpg: 448x640 3 zebras, 62.6ms
Speed: 2.9ms preprocess, 62.6ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 116/5000 [00:16<10:09,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000011813.jpg: 640x448 1 chair, 62.9ms
Speed: 3.7ms preprocess, 62.9ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   2%|▏         | 117/5000 [00:16<09:51,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012062.jpg: 448x640 2 sheeps, 101.7ms
Speed: 2.8ms preprocess, 101.7ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 118/5000 [00:16<10:37,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012120.jpg: 448x640 11 persons, 1 tennis racket, 61.2ms
Speed: 3.1ms preprocess, 61.2ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 119/5000 [00:17<10:43,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012280.jpg: 640x480 1 person, 70.0ms
Speed: 2.9ms preprocess, 70.0ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   2%|▏         | 120/5000 [00:17<10:22,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012576.jpg: 640x480 1 person, 5 cups, 1 fork, 4 pizzas, 1 chair, 1 dining table, 1 tv, 69.0ms
Speed: 5.0ms preprocess, 69.0ms inference, 13.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   2%|▏         | 121/5000 [00:17<11:17,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012639.jpg: 640x480 15 persons, 1 baseball glove, 77.7ms
Speed: 3.1ms preprocess, 77.7ms inference, 15.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   2%|▏         | 122/5000 [00:17<12:20,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012667.jpg: 480x640 1 banana, 1 dining table, 1 remote, 1 book, 70.9ms
Speed: 2.3ms preprocess, 70.9ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   2%|▏         | 123/5000 [00:17<11:44,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012670.jpg: 448x640 15 persons, 1 teddy bear, 67.1ms
Speed: 3.1ms preprocess, 67.1ms inference, 16.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   2%|▏         | 124/5000 [00:17<12:23,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000012748.jpg: 640x480 2 persons, 1 horse, 78.0ms
Speed: 3.1ms preprocess, 78.0ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   2%|▎         | 125/5000 [00:17<11:53,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013004.jpg: 640x480 (no detections), 71.2ms
Speed: 4.0ms preprocess, 71.2ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   3%|▎         | 126/5000 [00:18<10:55,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013177.jpg: 448x640 1 person, 1 motorcycle, 101.0ms
Speed: 2.6ms preprocess, 101.0ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 127/5000 [00:18<11:15,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013201.jpg: 640x448 1 person, 1 skateboard, 67.6ms
Speed: 2.6ms preprocess, 67.6ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   3%|▎         | 128/5000 [00:18<10:42,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013291.jpg: 448x640 6 persons, 4 frisbees, 66.6ms
Speed: 5.0ms preprocess, 66.6ms inference, 9.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 129/5000 [00:18<11:09,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013348.jpg: 448x640 2 airplanes, 69.0ms
Speed: 2.9ms preprocess, 69.0ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 130/5000 [00:18<10:45,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013546.jpg: 448x640 2 persons, 1 car, 2 benchs, 2 skateboards, 69.3ms
Speed: 2.8ms preprocess, 69.3ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 131/5000 [00:18<10:42,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013597.jpg: 448x640 1 cake, 60.8ms
Speed: 2.5ms preprocess, 60.8ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 132/5000 [00:18<10:06,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013659.jpg: 480x640 6 persons, 2 cups, 9 chairs, 3 dining tables, 2 tvs, 6 laptops, 1 mouse, 1 cell phone, 73.0ms
Speed: 3.1ms preprocess, 73.0ms inference, 27.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 133/5000 [00:19<12:20,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013729.jpg: 480x640 4 persons, 1 backpack, 2 bottles, 1 dining table, 3 remotes, 65.3ms
Speed: 2.6ms preprocess, 65.3ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 134/5000 [00:19<12:10,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013774.jpg: 480x640 2 persons, 1 kite, 67.9ms
Speed: 2.6ms preprocess, 67.9ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 135/5000 [00:19<11:22,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000013923.jpg: 448x640 6 chairs, 1 couch, 1 potted plant, 1 dining table, 1 tv, 1 book, 3 vases, 90.7ms
Speed: 3.3ms preprocess, 90.7ms inference, 11.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 136/5000 [00:19<12:17,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014007.jpg: 448x640 1 cat, 1 suitcase, 62.9ms
Speed: 2.5ms preprocess, 62.9ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 137/5000 [00:19<11:22,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014038.jpg: 448x640 1 refrigerator, 2 books, 61.8ms
Speed: 3.6ms preprocess, 61.8ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 138/5000 [00:19<10:45,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014226.jpg: 480x640 2 persons, 1 laptop, 74.4ms
Speed: 2.9ms preprocess, 74.4ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 139/5000 [00:19<10:35,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014380.jpg: 448x640 1 train, 63.4ms
Speed: 3.0ms preprocess, 63.4ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 140/5000 [00:19<10:08,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014439.jpg: 416x640 17 persons, 2 backpacks, 1 kite, 2 chairs, 122.4ms
Speed: 3.1ms preprocess, 122.4ms inference, 15.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   3%|▎         | 141/5000 [00:20<12:35,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014473.jpg: 448x640 7 persons, 1 train, 65.6ms
Speed: 2.6ms preprocess, 65.6ms inference, 7.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 142/5000 [00:20<12:12,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014831.jpg: 640x480 1 cat, 66.7ms
Speed: 2.9ms preprocess, 66.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   3%|▎         | 143/5000 [00:20<11:16,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000014888.jpg: 480x640 1 cat, 1 dog, 69.0ms
Speed: 3.0ms preprocess, 69.0ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 144/5000 [00:20<10:43,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015079.jpg: 448x640 1 fork, 1 knife, 2 sandwichs, 1 cake, 93.7ms
Speed: 2.8ms preprocess, 93.7ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 145/5000 [00:20<11:07,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015254.jpg: 448x640 4 bowls, 5 carrots, 2 dining tables, 66.9ms
Speed: 6.8ms preprocess, 66.9ms inference, 9.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 146/5000 [00:20<11:21,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015272.jpg: 640x448 1 traffic light, 60.4ms
Speed: 2.7ms preprocess, 60.4ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   3%|▎         | 147/5000 [00:20<10:30,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015278.jpg: 480x640 1 cup, 5 broccolis, 68.1ms
Speed: 2.7ms preprocess, 68.1ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 148/5000 [00:21<10:29,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015335.jpg: 480x640 4 persons, 1 cup, 67.0ms
Speed: 3.0ms preprocess, 67.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 149/5000 [00:21<10:18,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015338.jpg: 448x640 2 cars, 2 buss, 65.3ms
Speed: 3.1ms preprocess, 65.3ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 150/5000 [00:21<10:12,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015440.jpg: 640x416 1 car, 1 stop sign, 142.8ms
Speed: 3.5ms preprocess, 142.8ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:   3%|▎         | 151/5000 [00:21<11:47,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015497.jpg: 480x640 1 cat, 68.0ms
Speed: 2.7ms preprocess, 68.0ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 152/5000 [00:21<11:02,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015517.jpg: 480x640 5 buss, 1 train, 1 potted plant, 71.1ms
Speed: 3.1ms preprocess, 71.1ms inference, 7.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 153/5000 [00:21<11:19,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015597.jpg: 640x448 1 person, 1 skateboard, 75.1ms
Speed: 3.8ms preprocess, 75.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   3%|▎         | 154/5000 [00:21<11:04,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015660.jpg: 352x640 1 person, 6 kites, 2 surfboards, 125.7ms
Speed: 2.6ms preprocess, 125.7ms inference, 5.2ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:   3%|▎         | 155/5000 [00:22<12:17,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015746.jpg: 640x448 1 fire hydrant, 61.8ms
Speed: 2.7ms preprocess, 61.8ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   3%|▎         | 156/5000 [00:22<11:10,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015751.jpg: 448x640 1 person, 60.5ms
Speed: 3.5ms preprocess, 60.5ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 157/5000 [00:22<10:27,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000015956.jpg: 480x640 2 persons, 1 horse, 1 chair, 1 clock, 64.2ms
Speed: 2.5ms preprocess, 64.2ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 158/5000 [00:22<10:15,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016010.jpg: 480x640 3 sheeps, 3 cows, 1 zebra, 65.9ms
Speed: 2.9ms preprocess, 65.9ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 159/5000 [00:22<10:23,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016228.jpg: 448x640 11 persons, 1 bench, 1 horse, 1 umbrella, 62.1ms
Speed: 2.3ms preprocess, 62.1ms inference, 14.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 160/5000 [00:22<10:57,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016249.jpg: 480x640 6 persons, 3 benchs, 64.7ms
Speed: 3.9ms preprocess, 64.7ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 161/5000 [00:22<10:59,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016439.jpg: 480x640 1 wine glass, 2 tvs, 1 laptop, 1 keyboard, 1 book, 87.4ms
Speed: 2.4ms preprocess, 87.4ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 162/5000 [00:23<11:14,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016451.jpg: 640x640 1 person, 1 backpack, 1 umbrella, 1 handbag, 1 surfboard, 2 chairs, 89.5ms
Speed: 4.4ms preprocess, 89.5ms inference, 8.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   3%|▎         | 163/5000 [00:23<11:58,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016502.jpg: 640x480 1 sheep, 70.7ms
Speed: 4.3ms preprocess, 70.7ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   3%|▎         | 164/5000 [00:23<11:18,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016598.jpg: 640x480 1 person, 1 cell phone, 76.5ms
Speed: 7.6ms preprocess, 76.5ms inference, 6.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   3%|▎         | 165/5000 [00:23<11:08,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000016958.jpg: 448x640 3 chairs, 1 dining table, 1 tv, 3 vases, 66.3ms
Speed: 2.9ms preprocess, 66.3ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 166/5000 [00:23<11:04,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017029.jpg: 640x640 2 cars, 1 dog, 1 frisbee, 86.2ms
Speed: 2.8ms preprocess, 86.2ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   3%|▎         | 167/5000 [00:23<11:14,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017031.jpg: 448x640 1 person, 1 giraffe, 63.4ms
Speed: 4.7ms preprocess, 63.4ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 168/5000 [00:23<10:38,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017115.jpg: 640x448 2 zebras, 66.2ms
Speed: 2.7ms preprocess, 66.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   3%|▎         | 169/5000 [00:23<10:19,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017178.jpg: 448x640 1 car, 3 horses, 62.0ms
Speed: 3.1ms preprocess, 62.0ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 170/5000 [00:24<10:08,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017182.jpg: 448x640 2 chairs, 97.1ms
Speed: 2.4ms preprocess, 97.1ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   3%|▎         | 171/5000 [00:24<10:32,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017207.jpg: 480x640 1 person, 1 car, 1 motorcycle, 1 bus, 1 truck, 69.4ms
Speed: 3.2ms preprocess, 69.4ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   3%|▎         | 172/5000 [00:24<10:27,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017379.jpg: 640x480 1 toilet, 1 tv, 2 sinks, 75.0ms
Speed: 3.1ms preprocess, 75.0ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   3%|▎         | 173/5000 [00:24<10:29,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017436.jpg: 640x512 1 person, 2 benchs, 170.4ms
Speed: 2.4ms preprocess, 170.4ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:   3%|▎         | 174/5000 [00:24<12:48,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017627.jpg: 480x640 2 persons, 10 cars, 74.3ms
Speed: 3.1ms preprocess, 74.3ms inference, 11.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▎         | 175/5000 [00:24<12:53,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017714.jpg: 480x640 2 cups, 1 fork, 1 knife, 1 dining table, 74.4ms
Speed: 2.0ms preprocess, 74.4ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▎         | 176/5000 [00:25<12:13,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017899.jpg: 640x480 1 person, 4 cups, 1 bowl, 1 pizza, 1 chair, 1 couch, 1 dining table, 78.4ms
Speed: 2.8ms preprocess, 78.4ms inference, 8.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   4%|▎         | 177/5000 [00:25<12:23,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017905.jpg: 640x480 1 person, 1 traffic light, 1 fire hydrant, 86.5ms
Speed: 2.1ms preprocess, 86.5ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   4%|▎         | 178/5000 [00:25<12:08,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000017959.jpg: 448x640 3 persons, 8 kites, 67.8ms
Speed: 3.0ms preprocess, 67.8ms inference, 9.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▎         | 179/5000 [00:25<12:21,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018150.jpg: 480x640 2 persons, 1 bottle, 1 pizza, 1 dining table, 76.9ms
Speed: 3.0ms preprocess, 76.9ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▎         | 180/5000 [00:25<12:04,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018193.jpg: 480x640 1 person, 2 sandwichs, 2 chairs, 74.0ms
Speed: 2.5ms preprocess, 74.0ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▎         | 181/5000 [00:25<11:35,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018380.jpg: 448x640 18 persons, 2 wine glasss, 5 cups, 1 fork, 2 knifes, 1 bowl, 1 pizza, 1 cake, 1 dining table, 65.8ms
Speed: 3.1ms preprocess, 65.8ms inference, 25.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▎         | 182/5000 [00:25<13:12,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018491.jpg: 448x640 9 persons, 2 baseball gloves, 65.4ms
Speed: 2.7ms preprocess, 65.4ms inference, 8.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▎         | 183/5000 [00:26<12:54,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018519.jpg: 640x544 1 person, 1 sports ball, 1 skateboard, 159.8ms
Speed: 2.3ms preprocess, 159.8ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:   4%|▎         | 184/5000 [00:26<14:10,  5.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018575.jpg: 480x640 1 wine glass, 2 cups, 1 spoon, 1 orange, 1 dining table, 67.0ms
Speed: 2.4ms preprocess, 67.0ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▎         | 185/5000 [00:26<13:06,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018737.jpg: 448x640 1 motorcycle, 76.5ms
Speed: 2.4ms preprocess, 76.5ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▎         | 186/5000 [00:26<12:05,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018770.jpg: 448x640 1 person, 1 tie, 70.4ms
Speed: 2.8ms preprocess, 70.4ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▎         | 187/5000 [00:26<11:15,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018833.jpg: 448x640 (no detections), 92.8ms
Speed: 2.8ms preprocess, 92.8ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 188/5000 [00:26<10:58,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000018837.jpg: 480x640 4 persons, 1 car, 1 truck, 70.3ms
Speed: 2.4ms preprocess, 70.3ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 189/5000 [00:26<10:50,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019042.jpg: 384x640 1 person, 119.1ms
Speed: 2.1ms preprocess, 119.1ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   4%|▍         | 190/5000 [00:27<11:30,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019109.jpg: 448x640 7 persons, 1 car, 13 motorcycles, 71.4ms
Speed: 2.6ms preprocess, 71.4ms inference, 19.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 191/5000 [00:27<12:27,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019221.jpg: 480x640 1 person, 1 broccoli, 68.3ms
Speed: 2.1ms preprocess, 68.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 192/5000 [00:27<11:34,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019402.jpg: 480x640 3 persons, 1 bottle, 66.8ms
Speed: 2.5ms preprocess, 66.8ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 193/5000 [00:27<10:57,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019432.jpg: 480x640 1 person, 1 sports ball, 1 tennis racket, 12 chairs, 66.5ms
Speed: 2.7ms preprocess, 66.5ms inference, 15.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 194/5000 [00:27<11:29,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019742.jpg: 480x640 1 wine glass, 1 cup, 66.0ms
Speed: 4.9ms preprocess, 66.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 195/5000 [00:27<10:56,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019786.jpg: 480x640 2 persons, 1 sink, 68.8ms
Speed: 5.7ms preprocess, 68.8ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 196/5000 [00:27<10:42,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000019924.jpg: 640x608 1 person, 1 tie, 166.7ms
Speed: 13.7ms preprocess, 166.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:   4%|▍         | 197/5000 [00:28<13:19,  6.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000020059.jpg: 448x640 2 zebras, 63.9ms
Speed: 3.6ms preprocess, 63.9ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 198/5000 [00:28<12:03,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000020107.jpg: 640x448 1 fire hydrant, 70.3ms
Speed: 4.5ms preprocess, 70.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   4%|▍         | 199/5000 [00:28<11:21,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000020247.jpg: 480x640 2 bears, 69.6ms
Speed: 4.0ms preprocess, 69.6ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 200/5000 [00:28<10:50,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000020333.jpg: 640x448 1 person, 2 ties, 1 bowl, 66.1ms
Speed: 2.9ms preprocess, 66.1ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   4%|▍         | 201/5000 [00:28<10:35,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000020553.jpg: 480x640 1 dog, 4 bottles, 67.4ms
Speed: 5.4ms preprocess, 67.4ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 202/5000 [00:28<10:32,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000020571.jpg: 640x544 2 persons, 2 boats, 1 suitcase, 1 refrigerator, 82.7ms
Speed: 2.6ms preprocess, 82.7ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:   4%|▍         | 203/5000 [00:28<11:00,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000020992.jpg: 448x640 1 person, 1 hot dog, 63.3ms
Speed: 4.6ms preprocess, 63.3ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 204/5000 [00:29<10:26,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000021167.jpg: 640x448 2 persons, 2 ties, 1 wine glass, 63.7ms
Speed: 3.2ms preprocess, 63.7ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   4%|▍         | 205/5000 [00:29<10:13,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000021465.jpg: 384x640 1 person, 1 cup, 1 chair, 1 vase, 1 teddy bear, 82.9ms
Speed: 5.6ms preprocess, 82.9ms inference, 4.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   4%|▍         | 206/5000 [00:29<10:32,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000021503.jpg: 480x640 1 knife, 2 sandwichs, 2 pizzas, 1 cake, 1 keyboard, 76.7ms
Speed: 2.9ms preprocess, 76.7ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 207/5000 [00:29<10:45,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000021604.jpg: 640x512 1 person, 2 ties, 72.8ms
Speed: 2.7ms preprocess, 72.8ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:   4%|▍         | 208/5000 [00:29<10:30,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000021839.jpg: 640x480 8 persons, 1 car, 1 handbag, 73.7ms
Speed: 3.7ms preprocess, 73.7ms inference, 8.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   4%|▍         | 209/5000 [00:29<10:53,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000021879.jpg: 448x640 4 persons, 3 surfboards, 64.6ms
Speed: 2.7ms preprocess, 64.6ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 210/5000 [00:29<10:47,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000021903.jpg: 480x640 3 persons, 1 elephant, 74.9ms
Speed: 3.0ms preprocess, 74.9ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 211/5000 [00:29<10:42,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022192.jpg: 448x640 1 dog, 2 beds, 1 book, 71.1ms
Speed: 3.2ms preprocess, 71.1ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 212/5000 [00:30<10:29,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022371.jpg: 448x640 1 person, 1 tie, 1 dining table, 1 laptop, 70.1ms
Speed: 4.0ms preprocess, 70.1ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 213/5000 [00:30<10:25,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022396.jpg: 512x640 1 airplane, 1 orange, 152.6ms
Speed: 3.0ms preprocess, 152.6ms inference, 2.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   4%|▍         | 214/5000 [00:30<12:14,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022479.jpg: 608x640 1 person, 1 skateboard, 225.6ms
Speed: 2.6ms preprocess, 225.6ms inference, 3.1ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:   4%|▍         | 215/5000 [00:30<15:11,  5.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022589.jpg: 480x640 2 sheeps, 70.2ms
Speed: 3.0ms preprocess, 70.2ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 216/5000 [00:30<13:35,  5.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022623.jpg: 480x640 1 dining table, 76.9ms
Speed: 2.9ms preprocess, 76.9ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 217/5000 [00:30<12:27,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022705.jpg: 640x512 1 person, 2 bottles, 1 bowl, 1 oven, 2 refrigerators, 82.4ms
Speed: 2.5ms preprocess, 82.4ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:   4%|▍         | 218/5000 [00:31<12:21,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022755.jpg: 480x640 1 car, 1 chair, 70.1ms
Speed: 3.1ms preprocess, 70.1ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 219/5000 [00:31<11:30,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022892.jpg: 448x640 1 bird, 1 dog, 72.8ms
Speed: 4.0ms preprocess, 72.8ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 220/5000 [00:31<11:01,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022935.jpg: 480x640 2 persons, 1 sports ball, 1 tennis racket, 75.0ms
Speed: 2.1ms preprocess, 75.0ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 221/5000 [00:31<10:44,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000022969.jpg: 480x640 2 giraffes, 79.4ms
Speed: 4.4ms preprocess, 79.4ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   4%|▍         | 222/5000 [00:31<10:20,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023023.jpg: 640x640 1 suitcase, 1 chair, 192.3ms
Speed: 4.9ms preprocess, 192.3ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   4%|▍         | 223/5000 [00:31<13:26,  5.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023034.jpg: 448x640 2 persons, 2 horses, 65.6ms
Speed: 2.2ms preprocess, 65.6ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 224/5000 [00:32<12:20,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023126.jpg: 448x640 1 person, 2 horses, 65.4ms
Speed: 2.7ms preprocess, 65.4ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   4%|▍         | 225/5000 [00:32<11:23,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023230.jpg: 480x640 3 birds, 75.9ms
Speed: 2.7ms preprocess, 75.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▍         | 226/5000 [00:32<10:59,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023272.jpg: 480x640 2 cars, 1 cat, 75.8ms
Speed: 5.5ms preprocess, 75.8ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▍         | 227/5000 [00:32<11:01,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023359.jpg: 448x640 1 person, 1 snowboard, 1 kite, 75.0ms
Speed: 3.7ms preprocess, 75.0ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 228/5000 [00:32<10:48,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023666.jpg: 640x480 1 toilet, 96.3ms
Speed: 7.9ms preprocess, 96.3ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   5%|▍         | 229/5000 [00:32<11:36,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023751.jpg: 640x448 1 person, 1 kite, 83.0ms
Speed: 3.4ms preprocess, 83.0ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   5%|▍         | 230/5000 [00:32<11:28,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023781.jpg: 448x640 2 bowls, 1 apple, 1 broccoli, 4 carrots, 88.0ms
Speed: 3.2ms preprocess, 88.0ms inference, 10.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 231/5000 [00:32<11:55,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023899.jpg: 448x640 3 persons, 1 couch, 73.1ms
Speed: 3.3ms preprocess, 73.1ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 232/5000 [00:33<11:28,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000023937.jpg: 448x640 7 sheeps, 66.2ms
Speed: 2.6ms preprocess, 66.2ms inference, 11.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 233/5000 [00:33<11:13,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000024021.jpg: 416x640 15 persons, 1 sports ball, 129.1ms
Speed: 2.7ms preprocess, 129.1ms inference, 13.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   5%|▍         | 234/5000 [00:33<13:06,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000024027.jpg: 480x640 2 persons, 2 kites, 68.7ms
Speed: 4.5ms preprocess, 68.7ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▍         | 235/5000 [00:33<12:07,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000024144.jpg: 480x640 1 pizza, 70.3ms
Speed: 3.6ms preprocess, 70.3ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▍         | 236/5000 [00:33<11:25,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000024243.jpg: 480x640 4 persons, 1 bench, 2 cups, 89.7ms
Speed: 3.5ms preprocess, 89.7ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▍         | 237/5000 [00:33<11:46,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000024567.jpg: 640x480 1 person, 6 hot dogs, 2 chairs, 84.6ms
Speed: 3.1ms preprocess, 84.6ms inference, 9.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   5%|▍         | 238/5000 [00:34<12:37,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000024610.jpg: 480x640 1 backpack, 1 cup, 2 chairs, 1 couch, 1 laptop, 86.6ms
Speed: 3.7ms preprocess, 86.6ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▍         | 239/5000 [00:34<12:27,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000024919.jpg: 448x640 2 elephants, 102.7ms
Speed: 3.8ms preprocess, 102.7ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 240/5000 [00:34<12:31,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025057.jpg: 448x640 2 persons, 1 frisbee, 67.4ms
Speed: 6.1ms preprocess, 67.4ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 241/5000 [00:34<11:42,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025096.jpg: 640x480 1 person, 1 knife, 1 dining table, 68.7ms
Speed: 4.7ms preprocess, 68.7ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   5%|▍         | 242/5000 [00:34<11:11,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025139.jpg: 448x640 1 zebra, 66.7ms
Speed: 4.5ms preprocess, 66.7ms inference, 1.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 243/5000 [00:34<10:38,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025181.jpg: 448x640 1 person, 1 train, 68.2ms
Speed: 3.0ms preprocess, 68.2ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 244/5000 [00:34<10:14,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025228.jpg: 448x640 1 person, 1 surfboard, 70.2ms
Speed: 3.0ms preprocess, 70.2ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▍         | 245/5000 [00:34<10:06,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025386.jpg: 640x448 2 persons, 67.9ms
Speed: 2.9ms preprocess, 67.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   5%|▍         | 246/5000 [00:35<09:50,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025393.jpg: 480x640 2 persons, 1 car, 2 ties, 74.1ms
Speed: 3.2ms preprocess, 74.1ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▍         | 247/5000 [00:35<10:02,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025394.jpg: 640x480 3 persons, 17 bottles, 2 wine glasss, 1 cup, 2 dining tables, 1 cell phone, 72.7ms
Speed: 3.3ms preprocess, 72.7ms inference, 24.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   5%|▍         | 248/5000 [00:35<12:25,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025424.jpg: 640x448 1 person, 1 sports ball, 1 tennis racket, 71.5ms
Speed: 3.1ms preprocess, 71.5ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   5%|▍         | 249/5000 [00:35<11:37,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025560.jpg: 480x640 1 dog, 1 cup, 1 tv, 75.8ms
Speed: 2.9ms preprocess, 75.8ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 250/5000 [00:35<11:26,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025593.jpg: 480x640 (no detections), 64.9ms
Speed: 3.4ms preprocess, 64.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 251/5000 [00:35<10:27,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025603.jpg: 480x640 3 persons, 2 cups, 1 pizza, 6 chairs, 2 dining tables, 1 book, 62.1ms
Speed: 3.2ms preprocess, 62.1ms inference, 14.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 252/5000 [00:35<10:55,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000025986.jpg: 480x640 1 cup, 4 bowls, 1 dining table, 63.7ms
Speed: 2.8ms preprocess, 63.7ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 253/5000 [00:36<10:41,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000026204.jpg: 448x640 3 persons, 8 cars, 1 bus, 1 truck, 63.8ms
Speed: 2.8ms preprocess, 63.8ms inference, 11.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 254/5000 [00:36<10:59,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000026465.jpg: 480x640 1 person, 1 laptop, 1 keyboard, 2 cell phones, 68.1ms
Speed: 4.7ms preprocess, 68.1ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 255/5000 [00:36<10:47,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000026564.jpg: 448x640 1 tv, 1 laptop, 2 mouses, 3 keyboards, 3 books, 60.4ms
Speed: 2.9ms preprocess, 60.4ms inference, 11.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 256/5000 [00:36<10:48,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000026690.jpg: 640x448 7 persons, 63.5ms
Speed: 2.9ms preprocess, 63.5ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   5%|▌         | 257/5000 [00:36<10:46,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000026926.jpg: 640x448 4 cars, 1 fire hydrant, 69.9ms
Speed: 2.9ms preprocess, 69.9ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   5%|▌         | 258/5000 [00:36<10:32,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000026941.jpg: 448x640 1 person, 7 suitcases, 68.0ms
Speed: 2.7ms preprocess, 68.0ms inference, 7.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 259/5000 [00:36<10:35,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000027186.jpg: 448x640 1 person, 1 couch, 63.9ms
Speed: 2.8ms preprocess, 63.9ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 260/5000 [00:37<10:23,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000027620.jpg: 480x640 1 cup, 1 chair, 1 laptop, 1 keyboard, 90.2ms
Speed: 3.5ms preprocess, 90.2ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 261/5000 [00:37<10:46,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000027696.jpg: 416x640 1 person, 1 pizza, 1 dining table, 62.6ms
Speed: 2.6ms preprocess, 62.6ms inference, 3.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   5%|▌         | 262/5000 [00:37<10:15,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000027768.jpg: 640x640 1 person, 1 car, 1 bus, 86.2ms
Speed: 4.3ms preprocess, 86.2ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   5%|▌         | 263/5000 [00:37<10:35,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000027932.jpg: 640x608 1 motorcycle, 144.8ms
Speed: 3.7ms preprocess, 144.8ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:   5%|▌         | 264/5000 [00:37<12:17,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000027972.jpg: 448x640 1 person, 1 surfboard, 63.4ms
Speed: 3.4ms preprocess, 63.4ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 265/5000 [00:37<11:18,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000027982.jpg: 448x640 1 person, 1 toilet, 1 sink, 58.8ms
Speed: 4.2ms preprocess, 58.8ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 266/5000 [00:37<10:26,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000028285.jpg: 448x640 1 clock, 67.1ms
Speed: 3.2ms preprocess, 67.1ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 267/5000 [00:38<10:10,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000028449.jpg: 448x640 5 elephants, 64.7ms
Speed: 3.2ms preprocess, 64.7ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   5%|▌         | 268/5000 [00:38<10:05,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000028452.jpg: 640x480 1 bottle, 1 refrigerator, 67.9ms
Speed: 2.8ms preprocess, 67.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   5%|▌         | 269/5000 [00:38<09:53,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000028809.jpg: 640x640 5 bananas, 87.8ms
Speed: 3.9ms preprocess, 87.8ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   5%|▌         | 270/5000 [00:38<10:40,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000028993.jpg: 640x640 3 persons, 1 fire hydrant, 120.4ms
Speed: 4.7ms preprocess, 120.4ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   5%|▌         | 271/5000 [00:38<11:53,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000029187.jpg: 512x640 1 person, 1 horse, 73.5ms
Speed: 3.2ms preprocess, 73.5ms inference, 2.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   5%|▌         | 272/5000 [00:38<11:18,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000029393.jpg: 480x640 1 dog, 66.9ms
Speed: 4.9ms preprocess, 66.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 273/5000 [00:38<10:38,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000029397.jpg: 480x640 1 person, 1 bench, 69.5ms
Speed: 2.8ms preprocess, 69.5ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   5%|▌         | 274/5000 [00:38<10:23,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000029596.jpg: 448x640 3 chairs, 1 couch, 1 dining table, 1 tv, 1 vase, 70.0ms
Speed: 3.0ms preprocess, 70.0ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 275/5000 [00:39<10:28,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000029640.jpg: 448x640 1 knife, 1 spoon, 2 bowls, 13 broccolis, 6 carrots, 65.2ms
Speed: 3.2ms preprocess, 65.2ms inference, 19.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 276/5000 [00:39<11:41,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000029675.jpg: 640x480 2 hot dogs, 67.3ms
Speed: 3.2ms preprocess, 67.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   6%|▌         | 277/5000 [00:39<11:02,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000029984.jpg: 512x640 4 persons, 1 bird, 2 umbrellas, 2 chairs, 71.0ms
Speed: 3.5ms preprocess, 71.0ms inference, 8.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   6%|▌         | 278/5000 [00:39<11:13,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000030213.jpg: 480x640 3 bowls, 1 chair, 1 dining table, 1 oven, 1 sink, 1 refrigerator, 78.2ms
Speed: 2.7ms preprocess, 78.2ms inference, 11.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 279/5000 [00:39<11:38,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000030494.jpg: 480x640 1 dog, 99.0ms
Speed: 6.3ms preprocess, 99.0ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 280/5000 [00:39<11:48,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000030504.jpg: 640x480 1 person, 1 backpack, 1 skis, 83.9ms
Speed: 4.1ms preprocess, 83.9ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   6%|▌         | 281/5000 [00:40<11:52,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000030675.jpg: 384x640 1 train, 220.6ms
Speed: 3.8ms preprocess, 220.6ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   6%|▌         | 282/5000 [00:40<14:49,  5.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000030785.jpg: 480x640 1 bowl, 1 sandwich, 3 broccolis, 1 cake, 1 dining table, 73.6ms
Speed: 3.0ms preprocess, 73.6ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 283/5000 [00:40<13:50,  5.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000030828.jpg: 448x640 1 bench, 2 sports balls, 77.3ms
Speed: 4.1ms preprocess, 77.3ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 284/5000 [00:40<12:54,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031050.jpg: 640x448 1 potted plant, 2 vases, 69.0ms
Speed: 3.1ms preprocess, 69.0ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   6%|▌         | 285/5000 [00:40<11:52,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031093.jpg: 448x640 3 persons, 1 skateboard, 72.5ms
Speed: 2.5ms preprocess, 72.5ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 286/5000 [00:40<11:23,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031118.jpg: 448x640 9 persons, 2 cars, 2 clocks, 71.5ms
Speed: 3.3ms preprocess, 71.5ms inference, 11.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 287/5000 [00:40<11:44,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031217.jpg: 448x640 1 person, 1 tennis racket, 70.7ms
Speed: 3.0ms preprocess, 70.7ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 288/5000 [00:41<11:03,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031248.jpg: 448x640 2 chairs, 2 couchs, 4 potted plants, 1 book, 64.6ms
Speed: 4.5ms preprocess, 64.6ms inference, 9.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 289/5000 [00:41<11:15,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031269.jpg: 480x640 3 zebras, 71.9ms
Speed: 2.4ms preprocess, 71.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 290/5000 [00:41<10:50,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031296.jpg: 448x640 22 persons, 2 bottles, 4 cups, 11 chairs, 2 dining tables, 1 clock, 74.2ms
Speed: 3.0ms preprocess, 74.2ms inference, 35.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 291/5000 [00:41<14:14,  5.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031322.jpg: 448x640 8 boats, 7 birds, 75.5ms
Speed: 3.4ms preprocess, 75.5ms inference, 14.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 292/5000 [00:41<14:11,  5.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031620.jpg: 640x480 5 persons, 1 suitcase, 126.7ms
Speed: 3.3ms preprocess, 126.7ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   6%|▌         | 293/5000 [00:42<14:34,  5.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031735.jpg: 480x640 1 couch, 4 potted plants, 2 dining tables, 1 vase, 82.9ms
Speed: 3.2ms preprocess, 82.9ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 294/5000 [00:42<14:19,  5.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031749.jpg: 640x640 1 clock, 135.4ms
Speed: 5.5ms preprocess, 135.4ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   6%|▌         | 295/5000 [00:42<14:44,  5.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000031817.jpg: 640x352 3 persons, 1 motorcycle, 153.1ms
Speed: 3.1ms preprocess, 153.1ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 352)


Segmenting Images:   6%|▌         | 296/5000 [00:42<15:16,  5.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032038.jpg: 640x448 1 person, 1 pizza, 70.7ms
Speed: 5.0ms preprocess, 70.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   6%|▌         | 297/5000 [00:42<13:35,  5.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032081.jpg: 640x480 1 person, 1 tennis racket, 92.7ms
Speed: 4.9ms preprocess, 92.7ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   6%|▌         | 298/5000 [00:42<13:18,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032285.jpg: 448x640 1 toilet, 72.5ms
Speed: 3.1ms preprocess, 72.5ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 299/5000 [00:43<12:16,  6.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032334.jpg: 480x640 6 persons, 3 wine glasss, 1 cup, 1 chair, 1 dining table, 70.1ms
Speed: 3.1ms preprocess, 70.1ms inference, 14.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 300/5000 [00:43<12:22,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032570.jpg: 448x640 1 person, 2 surfboards, 75.3ms
Speed: 2.9ms preprocess, 75.3ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 301/5000 [00:43<11:44,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032610.jpg: 448x640 1 chair, 2 tvs, 3 laptops, 1 keyboard, 66.6ms
Speed: 2.8ms preprocess, 66.6ms inference, 8.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 302/5000 [00:43<11:24,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032735.jpg: 448x640 2 persons, 2 skiss, 68.7ms
Speed: 3.1ms preprocess, 68.7ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 303/5000 [00:43<11:06,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032811.jpg: 640x480 1 bird, 99.5ms
Speed: 4.4ms preprocess, 99.5ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   6%|▌         | 304/5000 [00:43<11:12,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032817.jpg: 640x480 2 persons, 3 bottles, 1 cup, 2 toilets, 65.2ms
Speed: 2.8ms preprocess, 65.2ms inference, 7.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   6%|▌         | 305/5000 [00:43<11:03,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032861.jpg: 640x448 1 person, 1 umbrella, 58.3ms
Speed: 3.2ms preprocess, 58.3ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   6%|▌         | 306/5000 [00:43<10:15,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032887.jpg: 480x640 2 persons, 65.2ms
Speed: 2.5ms preprocess, 65.2ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 307/5000 [00:44<09:54,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032901.jpg: 576x640 5 persons, 1 tie, 1 suitcase, 3 bottles, 1 chair, 148.6ms
Speed: 1.8ms preprocess, 148.6ms inference, 11.0ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:   6%|▌         | 308/5000 [00:44<12:27,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000032941.jpg: 640x480 2 persons, 4 cars, 1 bus, 1 traffic light, 1 stop sign, 74.3ms
Speed: 3.3ms preprocess, 74.3ms inference, 7.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   6%|▌         | 309/5000 [00:44<12:08,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033005.jpg: 448x640 1 person, 1 stop sign, 2 tennis rackets, 70.6ms
Speed: 3.1ms preprocess, 70.6ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▌         | 310/5000 [00:44<11:34,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033104.jpg: 640x576 8 persons, 1 skis, 192.5ms
Speed: 3.2ms preprocess, 192.5ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:   6%|▌         | 311/5000 [00:44<14:33,  5.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033109.jpg: 480x640 6 cars, 2 trucks, 69.8ms
Speed: 3.2ms preprocess, 69.8ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▌         | 312/5000 [00:45<13:45,  5.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033114.jpg: 480x640 1 airplane, 1 fire hydrant, 78.4ms
Speed: 3.1ms preprocess, 78.4ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▋         | 313/5000 [00:45<12:41,  6.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033221.jpg: 448x640 6 persons, 3 cars, 63.3ms
Speed: 3.9ms preprocess, 63.3ms inference, 7.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▋         | 314/5000 [00:45<12:06,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033368.jpg: 640x448 1 person, 1 tennis racket, 95.3ms
Speed: 3.1ms preprocess, 95.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   6%|▋         | 315/5000 [00:45<11:49,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033638.jpg: 640x448 1 person, 1 cup, 2 bowls, 1 potted plant, 68.9ms
Speed: 2.6ms preprocess, 68.9ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   6%|▋         | 316/5000 [00:45<11:13,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033707.jpg: 640x640 2 giraffes, 92.3ms
Speed: 2.9ms preprocess, 92.3ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   6%|▋         | 317/5000 [00:45<11:23,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033759.jpg: 480x640 1 person, 1 sports ball, 3 baseball bats, 66.3ms
Speed: 3.1ms preprocess, 66.3ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▋         | 318/5000 [00:45<10:55,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000033854.jpg: 448x640 10 persons, 3 cars, 2 motorcycles, 2 buss, 1 truck, 57.4ms
Speed: 2.8ms preprocess, 57.4ms inference, 15.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▋         | 319/5000 [00:46<11:22,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034071.jpg: 448x640 1 parking meter, 74.2ms
Speed: 4.6ms preprocess, 74.2ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▋         | 320/5000 [00:46<10:54,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034139.jpg: 480x640 2 persons, 1 backpack, 1 suitcase, 1 clock, 70.6ms
Speed: 3.1ms preprocess, 70.6ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▋         | 321/5000 [00:46<10:44,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034205.jpg: 448x640 2 broccolis, 1 pizza, 64.7ms
Speed: 3.6ms preprocess, 64.7ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   6%|▋         | 322/5000 [00:46<10:17,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034257.jpg: 416x640 1 broccoli, 9 carrots, 122.0ms
Speed: 4.4ms preprocess, 122.0ms inference, 7.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   6%|▋         | 323/5000 [00:46<11:42,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034417.jpg: 480x640 7 persons, 11 donuts, 67.0ms
Speed: 4.3ms preprocess, 67.0ms inference, 13.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▋         | 324/5000 [00:46<12:24,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034452.jpg: 480x640 1 person, 1 frisbee, 90.4ms
Speed: 2.8ms preprocess, 90.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   6%|▋         | 325/5000 [00:46<11:53,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034760.jpg: 640x448 1 bottle, 1 toilet, 1 sink, 52.7ms
Speed: 2.7ms preprocess, 52.7ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   7%|▋         | 326/5000 [00:47<10:42,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000034873.jpg: 480x640 1 bottle, 1 bowl, 3 chairs, 1 tv, 1 refrigerator, 63.5ms
Speed: 2.7ms preprocess, 63.5ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 327/5000 [00:47<10:19,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000035062.jpg: 640x448 (no detections), 57.9ms
Speed: 2.7ms preprocess, 57.9ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 448)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000035197.jpg: 640x448 3 persons, 1 skateboard, 56.2ms
Speed: 2.5ms preprocess, 56.2ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   7%|▋         | 329/5000 [00:47<09:08,  8.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000035279.jpg: 480x640 1 person, 1 bottle, 1 laptop, 1 mouse, 1 cell phone, 60.1ms
Speed: 2.6ms preprocess, 60.1ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 330/5000 [00:47<09:01,  8.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000035326.jpg: 480x640 1 potted plant, 1 oven, 64.7ms
Speed: 2.5ms preprocess, 64.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 331/5000 [00:47<09:03,  8.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000035682.jpg: 640x480 5 persons, 2 cups, 2 donuts, 1 dining table, 72.5ms
Speed: 2.9ms preprocess, 72.5ms inference, 9.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   7%|▋         | 332/5000 [00:47<09:42,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000035770.jpg: 640x480 1 cake, 2 toilets, 69.8ms
Speed: 2.8ms preprocess, 69.8ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   7%|▋         | 333/5000 [00:47<09:39,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000035963.jpg: 640x480 2 teddy bears, 70.6ms
Speed: 2.7ms preprocess, 70.6ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   7%|▋         | 334/5000 [00:47<09:40,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000036494.jpg: 448x640 18 persons, 1 bowl, 3 chairs, 1 clock, 64.6ms
Speed: 2.9ms preprocess, 64.6ms inference, 20.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 335/5000 [00:48<10:44,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000036539.jpg: 640x448 2 persons, 1 skis, 2 snowboards, 73.2ms
Speed: 2.9ms preprocess, 73.2ms inference, 9.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   7%|▋         | 336/5000 [00:48<11:04,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000036660.jpg: 416x640 1 person, 4 cell phones, 56.9ms
Speed: 4.3ms preprocess, 56.9ms inference, 4.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   7%|▋         | 337/5000 [00:48<10:22,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000036678.jpg: 384x640 5 boats, 55.4ms
Speed: 2.5ms preprocess, 55.4ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   7%|▋         | 338/5000 [00:48<09:50,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000036844.jpg: 480x640 1 chair, 3 couchs, 6 potted plants, 2 tvs, 64.9ms
Speed: 2.8ms preprocess, 64.9ms inference, 11.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 339/5000 [00:48<10:16,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000036861.jpg: 640x480 3 parking meters, 63.5ms
Speed: 2.6ms preprocess, 63.5ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   7%|▋         | 340/5000 [00:48<09:45,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000036936.jpg: 416x640 2 persons, 1 handbag, 4 cups, 1 bowl, 1 couch, 1 potted plant, 1 tv, 59.2ms
Speed: 2.6ms preprocess, 59.2ms inference, 8.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   7%|▋         | 341/5000 [00:48<09:56,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000037670.jpg: 480x640 2 persons, 2 toilets, 66.4ms
Speed: 2.5ms preprocess, 66.4ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 342/5000 [00:49<09:40,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000037689.jpg: 448x640 18 persons, 1 skis, 1 snowboard, 63.3ms
Speed: 2.7ms preprocess, 63.3ms inference, 17.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 343/5000 [00:49<10:50,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000037740.jpg: 480x640 1 chair, 1 couch, 1 potted plant, 2 tvs, 1 laptop, 2 keyboards, 1 book, 74.7ms
Speed: 2.8ms preprocess, 74.7ms inference, 9.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 344/5000 [00:49<11:06,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000037751.jpg: 480x640 1 person, 4 motorcycles, 1 backpack, 70.2ms
Speed: 2.9ms preprocess, 70.2ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 345/5000 [00:49<10:57,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000037777.jpg: 448x640 3 oranges, 1 dining table, 1 oven, 2 refrigerators, 100.9ms
Speed: 3.8ms preprocess, 100.9ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 346/5000 [00:49<11:34,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000037988.jpg: 480x640 3 persons, 1 sports ball, 1 tennis racket, 70.1ms
Speed: 2.5ms preprocess, 70.1ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 347/5000 [00:49<11:12,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038048.jpg: 640x384 1 person, 1 fire hydrant, 141.9ms
Speed: 4.1ms preprocess, 141.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:   7%|▋         | 348/5000 [00:49<12:18,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038070.jpg: 480x640 1 toilet, 66.2ms
Speed: 4.9ms preprocess, 66.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 349/5000 [00:50<11:20,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038118.jpg: 448x640 1 person, 68.6ms
Speed: 2.8ms preprocess, 68.6ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 350/5000 [00:50<10:34,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038210.jpg: 640x448 2 persons, 1 skis, 68.7ms
Speed: 2.8ms preprocess, 68.7ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   7%|▋         | 351/5000 [00:50<10:22,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038576.jpg: 640x448 1 tv, 2 mouses, 1 keyboard, 65.1ms
Speed: 2.9ms preprocess, 65.1ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   7%|▋         | 352/5000 [00:50<10:01,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038678.jpg: 640x480 3 persons, 1 cup, 1 donut, 1 dining table, 68.8ms
Speed: 2.8ms preprocess, 68.8ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   7%|▋         | 353/5000 [00:50<10:03,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038825.jpg: 448x640 3 zebras, 67.8ms
Speed: 2.8ms preprocess, 67.8ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 354/5000 [00:50<09:51,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000038829.jpg: 448x640 5 persons, 1 bicycle, 2 motorcycles, 61.8ms
Speed: 2.6ms preprocess, 61.8ms inference, 10.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 355/5000 [00:50<10:04,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039405.jpg: 480x640 13 persons, 3 backpacks, 1 sports ball, 1 tennis racket, 2 chairs, 108.5ms
Speed: 2.8ms preprocess, 108.5ms inference, 19.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 356/5000 [00:51<12:09,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039477.jpg: 448x640 1 bottle, 1 couch, 4 potted plants, 1 tv, 1 laptop, 1 book, 68.2ms
Speed: 2.6ms preprocess, 68.2ms inference, 7.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 357/5000 [00:51<11:50,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039480.jpg: 448x640 3 persons, 2 tennis rackets, 67.1ms
Speed: 2.9ms preprocess, 67.1ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 358/5000 [00:51<11:09,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039484.jpg: 448x640 6 persons, 13 cars, 66.9ms
Speed: 2.7ms preprocess, 66.9ms inference, 16.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 359/5000 [00:51<11:54,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039551.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 62.6ms
Speed: 3.0ms preprocess, 62.6ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 360/5000 [00:51<11:00,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039670.jpg: 640x640 1 train, 90.3ms
Speed: 2.5ms preprocess, 90.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   7%|▋         | 361/5000 [00:51<10:59,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039769.jpg: 480x640 2 cats, 1 couch, 1 remote, 71.4ms
Speed: 2.9ms preprocess, 71.4ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 362/5000 [00:51<10:36,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039785.jpg: 480x640 1 person, 2 surfboards, 65.4ms
Speed: 4.2ms preprocess, 65.4ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 363/5000 [00:52<10:13,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039914.jpg: 640x512 5 persons, 1 kite, 147.6ms
Speed: 2.6ms preprocess, 147.6ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:   7%|▋         | 364/5000 [00:52<11:54,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039951.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 61.3ms
Speed: 2.3ms preprocess, 61.3ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 365/5000 [00:52<10:57,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000039956.jpg: 448x640 1 cup, 3 chairs, 1 couch, 1 bed, 1 dining table, 82.3ms
Speed: 2.1ms preprocess, 82.3ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 366/5000 [00:52<11:03,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000040036.jpg: 448x640 2 persons, 1 horse, 61.1ms
Speed: 2.1ms preprocess, 61.1ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 367/5000 [00:52<10:10,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000040083.jpg: 448x640 3 persons, 1 bicycle, 2 cars, 1 umbrella, 3 chairs, 77.5ms
Speed: 4.8ms preprocess, 77.5ms inference, 9.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 368/5000 [00:52<11:04,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000040471.jpg: 640x448 1 banana, 1 oven, 1 refrigerator, 63.0ms
Speed: 3.0ms preprocess, 63.0ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   7%|▋         | 369/5000 [00:52<10:26,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000040757.jpg: 640x448 2 persons, 1 bed, 1 tv, 66.7ms
Speed: 2.2ms preprocess, 66.7ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   7%|▋         | 370/5000 [00:52<10:07,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000041488.jpg: 384x640 (no detections), 61.6ms
Speed: 3.6ms preprocess, 61.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000041633.jpg: 448x640 1 car, 1 truck, 67.8ms
Speed: 2.3ms preprocess, 67.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 372/5000 [00:53<09:11,  8.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000041635.jpg: 480x640 1 sheep, 5 cows, 68.9ms
Speed: 2.6ms preprocess, 68.9ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   7%|▋         | 373/5000 [00:53<09:23,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000041872.jpg: 448x640 3 chairs, 1 couch, 1 bed, 2 tvs, 1 clock, 59.7ms
Speed: 2.7ms preprocess, 59.7ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   7%|▋         | 374/5000 [00:53<09:27,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000041888.jpg: 480x640 2 birds, 2 sheeps, 68.3ms
Speed: 2.8ms preprocess, 68.3ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 375/5000 [00:53<09:26,  8.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000041990.jpg: 480x640 3 persons, 68.3ms
Speed: 2.8ms preprocess, 68.3ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 376/5000 [00:53<09:32,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042070.jpg: 512x640 1 car, 1 bus, 131.8ms
Speed: 2.6ms preprocess, 131.8ms inference, 2.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   8%|▊         | 377/5000 [00:53<10:47,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042102.jpg: 640x256 1 person, 1 handbag, 1 tie, 104.1ms
Speed: 2.3ms preprocess, 104.1ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 256)


Segmenting Images:   8%|▊         | 378/5000 [00:54<10:58,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042178.jpg: 480x640 1 kite, 65.0ms
Speed: 2.3ms preprocess, 65.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 379/5000 [00:54<10:23,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042276.jpg: 640x480 1 potted plant, 1 toilet, 1 book, 61.8ms
Speed: 4.5ms preprocess, 61.8ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   8%|▊         | 380/5000 [00:54<09:56,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042296.jpg: 448x640 1 bear, 1 sports ball, 55.4ms
Speed: 2.5ms preprocess, 55.4ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 381/5000 [00:54<09:18,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042528.jpg: 512x640 1 person, 1 suitcase, 1 chair, 1 cell phone, 66.2ms
Speed: 2.4ms preprocess, 66.2ms inference, 4.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   8%|▊         | 382/5000 [00:54<09:10,  8.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042563.jpg: 640x640 1 train, 83.3ms
Speed: 2.6ms preprocess, 83.3ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   8%|▊         | 383/5000 [00:54<09:39,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042628.jpg: 448x640 5 persons, 1 fire hydrant, 64.6ms
Speed: 2.6ms preprocess, 64.6ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 384/5000 [00:54<09:32,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042888.jpg: 480x640 (no detections), 67.5ms
Speed: 3.9ms preprocess, 67.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 385/5000 [00:54<09:05,  8.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000042889.jpg: 640x480 2 mouses, 1 keyboard, 1 cell phone, 1 teddy bear, 87.0ms
Speed: 3.6ms preprocess, 87.0ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   8%|▊         | 386/5000 [00:54<09:43,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000043314.jpg: 512x640 1 person, 4 skiss, 1 snowboard, 67.4ms
Speed: 3.4ms preprocess, 67.4ms inference, 6.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   8%|▊         | 387/5000 [00:55<09:55,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000043435.jpg: 640x640 2 persons, 2 surfboards, 82.4ms
Speed: 2.5ms preprocess, 82.4ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   8%|▊         | 388/5000 [00:55<10:11,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000043581.jpg: 448x640 1 bottle, 1 fork, 1 pizza, 1 dining table, 63.4ms
Speed: 2.5ms preprocess, 63.4ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 389/5000 [00:55<09:51,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000043737.jpg: 640x480 5 cars, 1 clock, 63.8ms
Speed: 2.7ms preprocess, 63.8ms inference, 8.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   8%|▊         | 390/5000 [00:55<09:46,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000043816.jpg: 448x640 4 persons, 1 sports ball, 1 baseball bat, 2 baseball gloves, 60.1ms
Speed: 2.5ms preprocess, 60.1ms inference, 7.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 391/5000 [00:55<09:46,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044068.jpg: 640x480 2 chairs, 1 teddy bear, 62.6ms
Speed: 2.4ms preprocess, 62.6ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   8%|▊         | 392/5000 [00:55<09:25,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044195.jpg: 640x480 2 persons, 1 snowboard, 61.5ms
Speed: 4.4ms preprocess, 61.5ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   8%|▊         | 393/5000 [00:55<09:17,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044260.jpg: 448x640 2 apples, 64.9ms
Speed: 3.1ms preprocess, 64.9ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 394/5000 [00:55<09:11,  8.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044279.jpg: 448x640 2 persons, 1 bottle, 64.3ms
Speed: 2.6ms preprocess, 64.3ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 395/5000 [00:56<09:05,  8.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044590.jpg: 256x640 11 persons, 9 motorcycles, 1 umbrella, 143.4ms
Speed: 2.7ms preprocess, 143.4ms inference, 10.8ms postprocess per image at shape (1, 3, 256, 640)


Segmenting Images:   8%|▊         | 396/5000 [00:56<11:29,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044652.jpg: 448x640 1 airplane, 63.9ms
Speed: 2.3ms preprocess, 63.9ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 397/5000 [00:56<10:30,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044699.jpg: 448x640 7 sheeps, 60.0ms
Speed: 2.8ms preprocess, 60.0ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 398/5000 [00:56<10:15,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000044877.jpg: 448x640 4 persons, 1 tennis racket, 58.5ms
Speed: 2.7ms preprocess, 58.5ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 399/5000 [00:56<09:51,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000045070.jpg: 608x640 1 person, 150.9ms
Speed: 2.6ms preprocess, 150.9ms inference, 2.4ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:   8%|▊         | 400/5000 [00:56<11:26,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000045090.jpg: 352x640 4 persons, 1 surfboard, 110.4ms
Speed: 3.8ms preprocess, 110.4ms inference, 3.6ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:   8%|▊         | 401/5000 [00:57<11:46,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000045229.jpg: 480x640 5 bottles, 1 potted plant, 1 tv, 1 book, 63.7ms
Speed: 2.7ms preprocess, 63.7ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 402/5000 [00:57<11:09,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000045472.jpg: 448x640 5 cups, 2 bowls, 5 oranges, 1 potted plant, 60.1ms
Speed: 2.6ms preprocess, 60.1ms inference, 11.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 403/5000 [00:57<11:07,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000045550.jpg: 480x640 5 persons, 1 bowl, 1 sandwich, 1 pizza, 1 dining table, 66.1ms
Speed: 3.0ms preprocess, 66.1ms inference, 8.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 404/5000 [00:57<10:46,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000045596.jpg: 640x416 3 persons, 2 bicycles, 129.3ms
Speed: 2.8ms preprocess, 129.3ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:   8%|▊         | 405/5000 [00:57<11:53,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000045728.jpg: 480x640 (no detections), 62.3ms
Speed: 2.9ms preprocess, 62.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000046031.jpg: 480x640 1 bed, 1 tv, 2 laptops, 2 mouses, 2 keyboards, 2 cell phones, 72.1ms
Speed: 2.6ms preprocess, 72.1ms inference, 15.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 407/5000 [00:57<10:45,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000046048.jpg: 480x640 1 person, 1 bed, 1 book, 69.1ms
Speed: 2.6ms preprocess, 69.1ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 408/5000 [00:57<10:20,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000046252.jpg: 480x640 3 persons, 1 baseball bat, 2 baseball gloves, 64.4ms
Speed: 3.0ms preprocess, 64.4ms inference, 8.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 409/5000 [00:58<10:19,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000046378.jpg: 384x640 1 cat, 54.9ms
Speed: 2.3ms preprocess, 54.9ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000046463.jpg: 512x640 1 person, 1 sandwich, 66.6ms
Speed: 3.8ms preprocess, 66.6ms inference, 3.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   8%|▊         | 411/5000 [00:58<09:22,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000046497.jpg: 448x640 4 persons, 1 boat, 65.1ms
Speed: 3.9ms preprocess, 65.1ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   8%|▊         | 412/5000 [00:58<09:23,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000046804.jpg: 480x640 1 sheep, 64.8ms
Speed: 3.2ms preprocess, 64.8ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 413/5000 [00:58<09:10,  8.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000046872.jpg: 416x640 3 persons, 4 cars, 1 truck, 129.2ms
Speed: 2.7ms preprocess, 129.2ms inference, 6.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   8%|▊         | 414/5000 [00:58<10:41,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047010.jpg: 480x640 1 bird, 3 giraffes, 65.5ms
Speed: 2.5ms preprocess, 65.5ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 415/5000 [00:58<10:13,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047112.jpg: 480x640 1 person, 2 wine glasss, 1 bowl, 1 pizza, 5 chairs, 1 dining table, 63.9ms
Speed: 2.5ms preprocess, 63.9ms inference, 13.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 416/5000 [00:59<10:27,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047121.jpg: 480x640 1 cat, 1 bottle, 1 sink, 63.5ms
Speed: 2.4ms preprocess, 63.5ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 417/5000 [00:59<10:15,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047571.jpg: 640x640 2 persons, 1 horse, 147.1ms
Speed: 2.8ms preprocess, 147.1ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   8%|▊         | 418/5000 [00:59<11:56,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047585.jpg: 640x448 4 persons, 2 umbrellas, 2 ties, 122.5ms
Speed: 3.5ms preprocess, 122.5ms inference, 7.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   8%|▊         | 419/5000 [00:59<12:40,  6.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047740.jpg: 384x640 4 persons, 2 kites, 1 surfboard, 54.4ms
Speed: 3.1ms preprocess, 54.4ms inference, 4.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   8%|▊         | 420/5000 [00:59<11:29,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047769.jpg: 480x640 2 chairs, 2 couchs, 1 tv, 64.0ms
Speed: 4.1ms preprocess, 64.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 421/5000 [00:59<10:57,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047801.jpg: 640x640 1 person, 1 bench, 85.4ms
Speed: 4.2ms preprocess, 85.4ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   8%|▊         | 422/5000 [00:59<10:54,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047819.jpg: 480x640 1 person, 1 horse, 63.9ms
Speed: 4.6ms preprocess, 63.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 423/5000 [01:00<10:12,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000047828.jpg: 320x640 6 persons, 3 horses, 104.2ms
Speed: 2.6ms preprocess, 104.2ms inference, 5.2ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:   8%|▊         | 424/5000 [01:00<11:03,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000048153.jpg: 480x640 1 person, 65.1ms
Speed: 3.3ms preprocess, 65.1ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   8%|▊         | 425/5000 [01:00<10:25,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000048396.jpg: 448x640 2 persons, 1 cup, 1 bowl, 1 chair, 1 dining table, 61.7ms
Speed: 2.8ms preprocess, 61.7ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▊         | 426/5000 [01:00<10:06,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000048504.jpg: 448x640 3 persons, 1 horse, 2 elephants, 1 chair, 78.8ms
Speed: 4.0ms preprocess, 78.8ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▊         | 427/5000 [01:00<10:17,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000048555.jpg: 480x640 2 persons, 3 horses, 66.3ms
Speed: 2.2ms preprocess, 66.3ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▊         | 428/5000 [01:00<10:10,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000048564.jpg: 640x448 1 person, 1 chair, 65.5ms
Speed: 2.5ms preprocess, 65.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   9%|▊         | 429/5000 [01:00<09:45,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000048924.jpg: 448x640 1 motorcycle, 1 umbrella, 63.7ms
Speed: 2.4ms preprocess, 63.7ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▊         | 430/5000 [01:00<09:19,  8.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000049060.jpg: 416x640 3 persons, 2 trains, 54.9ms
Speed: 2.3ms preprocess, 54.9ms inference, 4.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   9%|▊         | 431/5000 [01:01<09:07,  8.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000049091.jpg: 448x640 1 book, 60.8ms
Speed: 2.5ms preprocess, 60.8ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▊         | 432/5000 [01:01<08:51,  8.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000049259.jpg: 640x448 1 person, 62.6ms
Speed: 2.9ms preprocess, 62.6ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   9%|▊         | 433/5000 [01:01<08:46,  8.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000049269.jpg: 640x448 1 dog, 1 horse, 59.3ms
Speed: 2.5ms preprocess, 59.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   9%|▊         | 434/5000 [01:01<08:38,  8.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000049759.jpg: 480x640 17 persons, 65.6ms
Speed: 2.8ms preprocess, 65.6ms inference, 15.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▊         | 435/5000 [01:01<09:48,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000049761.jpg: 480x640 2 horses, 5 zebras, 65.9ms
Speed: 2.9ms preprocess, 65.9ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▊         | 436/5000 [01:01<09:55,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000049810.jpg: 448x640 1 bench, 2 cats, 81.7ms
Speed: 2.5ms preprocess, 81.7ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▊         | 437/5000 [01:01<10:06,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050006.jpg: 480x640 1 person, 3 boats, 66.2ms
Speed: 2.8ms preprocess, 66.2ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 438/5000 [01:01<09:45,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050145.jpg: 448x640 2 persons, 2 bicycles, 1 umbrella, 1 handbag, 61.3ms
Speed: 4.2ms preprocess, 61.3ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 439/5000 [01:02<09:40,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050149.jpg: 512x640 12 bananas, 70.3ms
Speed: 3.9ms preprocess, 70.3ms inference, 12.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:   9%|▉         | 440/5000 [01:02<10:17,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050165.jpg: 448x640 1 train, 60.7ms
Speed: 2.5ms preprocess, 60.7ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 441/5000 [01:02<09:37,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050326.jpg: 480x640 1 person, 62.9ms
Speed: 4.3ms preprocess, 62.9ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 442/5000 [01:02<09:18,  8.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050331.jpg: 480x640 1 bench, 64.6ms
Speed: 2.7ms preprocess, 64.6ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 443/5000 [01:02<09:02,  8.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050380.jpg: 448x640 4 persons, 1 horse, 2 chairs, 59.7ms
Speed: 2.7ms preprocess, 59.7ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 444/5000 [01:02<09:08,  8.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050638.jpg: 416x640 1 person, 1 baseball glove, 56.6ms
Speed: 2.9ms preprocess, 56.6ms inference, 2.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:   9%|▉         | 445/5000 [01:02<08:46,  8.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050679.jpg: 448x640 8 cars, 1 truck, 1 orange, 78.6ms
Speed: 2.4ms preprocess, 78.6ms inference, 11.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 446/5000 [01:02<09:35,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050811.jpg: 608x640 1 person, 1 tie, 2 bottles, 1 bowl, 1 dining table, 143.3ms
Speed: 2.7ms preprocess, 143.3ms inference, 9.3ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:   9%|▉         | 447/5000 [01:03<11:36,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050828.jpg: 480x640 1 chair, 1 bed, 62.8ms
Speed: 3.1ms preprocess, 62.8ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 448/5000 [01:03<10:38,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050844.jpg: 640x448 3 teddy bears, 61.1ms
Speed: 4.0ms preprocess, 61.1ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   9%|▉         | 449/5000 [01:03<10:04,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050896.jpg: 640x640 1 bowl, 13 oranges, 84.4ms
Speed: 3.1ms preprocess, 84.4ms inference, 17.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:   9%|▉         | 450/5000 [01:03<11:28,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000050943.jpg: 320x640 2 persons, 1 surfboard, 46.8ms
Speed: 2.6ms preprocess, 46.8ms inference, 2.6ms postprocess per image at shape (1, 3, 320, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000051008.jpg: 480x640 1 cat, 1 couch, 1 laptop, 64.1ms
Speed: 2.8ms preprocess, 64.1ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 452/5000 [01:03<09:40,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051309.jpg: 384x640 5 horses, 50.3ms
Speed: 2.4ms preprocess, 50.3ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   9%|▉         | 453/5000 [01:03<09:17,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051314.jpg: 480x640 1 person, 1 surfboard, 67.1ms
Speed: 3.1ms preprocess, 67.1ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 454/5000 [01:04<09:07,  8.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051326.jpg: 448x640 1 donut, 3 vases, 61.8ms
Speed: 2.8ms preprocess, 61.8ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 455/5000 [01:04<08:58,  8.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051598.jpg: 640x384 1 sink, 134.3ms
Speed: 2.5ms preprocess, 134.3ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:   9%|▉         | 456/5000 [01:04<10:17,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051610.jpg: 448x640 1 person, 2 laptops, 62.9ms
Speed: 2.5ms preprocess, 62.9ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 457/5000 [01:04<09:48,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051712.jpg: 384x640 1 person, 1 skis, 56.1ms
Speed: 2.6ms preprocess, 56.1ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:   9%|▉         | 458/5000 [01:04<09:14,  8.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051738.jpg: 480x640 1 couch, 1 bed, 68.4ms
Speed: 4.4ms preprocess, 68.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 459/5000 [01:04<09:10,  8.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051938.jpg: 480x640 6 persons, 64.8ms
Speed: 3.0ms preprocess, 64.8ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 460/5000 [01:04<09:11,  8.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051961.jpg: 640x448 1 parking meter, 60.3ms
Speed: 2.8ms preprocess, 60.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   9%|▉         | 461/5000 [01:04<08:57,  8.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000051976.jpg: 448x640 1 person, 1 surfboard, 57.4ms
Speed: 2.4ms preprocess, 57.4ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 462/5000 [01:04<08:34,  8.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052007.jpg: 640x480 3 persons, 1 bus, 122.1ms
Speed: 3.3ms preprocess, 122.1ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:   9%|▉         | 463/5000 [01:05<10:02,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052017.jpg: 448x640 1 airplane, 59.5ms
Speed: 2.8ms preprocess, 59.5ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 464/5000 [01:05<09:24,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052412.jpg: 448x640 4 cars, 1 airplane, 65.0ms
Speed: 2.8ms preprocess, 65.0ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 465/5000 [01:05<09:31,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052413.jpg: 480x640 2 persons, 1 cup, 2 remotes, 69.3ms
Speed: 4.3ms preprocess, 69.3ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 466/5000 [01:05<09:33,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052462.jpg: 448x640 3 zebras, 67.9ms
Speed: 2.3ms preprocess, 67.9ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 467/5000 [01:05<09:23,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052507.jpg: 448x640 2 persons, 1 surfboard, 69.3ms
Speed: 3.3ms preprocess, 69.3ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 468/5000 [01:05<09:23,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052565.jpg: 480x640 1 cow, 72.9ms
Speed: 2.8ms preprocess, 72.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:   9%|▉         | 469/5000 [01:05<09:23,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052591.jpg: 640x384 1 person, 56.4ms
Speed: 3.0ms preprocess, 56.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:   9%|▉         | 470/5000 [01:05<08:56,  8.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052891.jpg: 448x640 1 dog, 1 frisbee, 63.2ms
Speed: 2.8ms preprocess, 63.2ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 471/5000 [01:06<08:44,  8.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000052996.jpg: 448x640 7 persons, 1 handbag, 1 cup, 4 bowls, 1 chair, 1 refrigerator, 64.8ms
Speed: 3.1ms preprocess, 64.8ms inference, 15.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 472/5000 [01:06<09:43,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000053505.jpg: 640x448 1 toilet, 66.2ms
Speed: 2.9ms preprocess, 66.2ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:   9%|▉         | 473/5000 [01:06<09:21,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000053529.jpg: 448x640 1 person, 1 dog, 85.8ms
Speed: 4.3ms preprocess, 85.8ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:   9%|▉         | 474/5000 [01:06<09:42,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000053624.jpg: 448x640 3 persons, 2 elephants, 66.6ms
Speed: 2.9ms preprocess, 66.6ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 475/5000 [01:06<09:39,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000053626.jpg: 480x640 4 persons, 3 skiss, 71.5ms
Speed: 3.3ms preprocess, 71.5ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|▉         | 476/5000 [01:06<09:52,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000053909.jpg: 640x480 2 persons, 3 cell phones, 67.8ms
Speed: 2.8ms preprocess, 67.8ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  10%|▉         | 477/5000 [01:06<09:46,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000053994.jpg: 640x512 1 car, 1 parking meter, 1 bench, 142.4ms
Speed: 3.1ms preprocess, 142.4ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  10%|▉         | 478/5000 [01:07<11:15,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054123.jpg: 448x640 4 zebras, 68.8ms
Speed: 3.0ms preprocess, 68.8ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 479/5000 [01:07<10:52,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054164.jpg: 448x640 1 person, 1 surfboard, 63.8ms
Speed: 2.9ms preprocess, 63.8ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 480/5000 [01:07<10:00,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054592.jpg: 448x640 2 persons, 1 backpack, 2 skiss, 62.2ms
Speed: 2.8ms preprocess, 62.2ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 481/5000 [01:07<09:55,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054593.jpg: 448x640 8 persons, 1 car, 1 sports ball, 1 baseball bat, 1 baseball glove, 58.4ms
Speed: 3.0ms preprocess, 58.4ms inference, 10.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 482/5000 [01:07<10:07,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054605.jpg: 640x640 3 persons, 2 cups, 3 forks, 1 cake, 2 chairs, 1 dining table, 100.6ms
Speed: 4.0ms preprocess, 100.6ms inference, 21.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  10%|▉         | 483/5000 [01:07<11:58,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054628.jpg: 640x480 1 person, 1 umbrella, 2 cakes, 3 chairs, 1 potted plant, 2 dining tables, 63.3ms
Speed: 4.7ms preprocess, 63.3ms inference, 10.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  10%|▉         | 484/5000 [01:07<11:44,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054654.jpg: 640x416 1 person, 1 bottle, 126.5ms
Speed: 2.7ms preprocess, 126.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  10%|▉         | 485/5000 [01:08<12:10,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054931.jpg: 640x448 1 person, 1 horse, 60.9ms
Speed: 2.5ms preprocess, 60.9ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  10%|▉         | 486/5000 [01:08<10:53,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000054967.jpg: 640x448 1 person, 13 cars, 1 truck, 2 traffic lights, 64.9ms
Speed: 2.5ms preprocess, 64.9ms inference, 15.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  10%|▉         | 487/5000 [01:08<11:10,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055002.jpg: 480x640 2 toilets, 65.2ms
Speed: 4.0ms preprocess, 65.2ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|▉         | 488/5000 [01:08<10:33,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055022.jpg: 640x480 1 person, 5 bicycles, 62.9ms
Speed: 2.2ms preprocess, 62.9ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  10%|▉         | 489/5000 [01:08<10:10,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055072.jpg: 448x640 1 giraffe, 61.1ms
Speed: 2.5ms preprocess, 61.1ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 490/5000 [01:08<09:41,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055150.jpg: 448x640 3 persons, 9 cars, 2 trucks, 3 suitcases, 60.6ms
Speed: 2.4ms preprocess, 60.6ms inference, 14.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 491/5000 [01:08<10:15,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055167.jpg: 384x640 6 persons, 1 car, 2 buss, 1 truck, 57.0ms
Speed: 2.2ms preprocess, 57.0ms inference, 8.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  10%|▉         | 492/5000 [01:09<10:10,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055299.jpg: 448x640 1 boat, 1 bird, 67.1ms
Speed: 2.6ms preprocess, 67.1ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|▉         | 493/5000 [01:09<09:43,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055528.jpg: 480x640 1 person, 1 couch, 2 remotes, 68.6ms
Speed: 2.1ms preprocess, 68.6ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|▉         | 494/5000 [01:09<09:35,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000055950.jpg: 640x448 1 person, 1 sports ball, 1 tennis racket, 65.4ms
Speed: 2.6ms preprocess, 65.4ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  10%|▉         | 495/5000 [01:09<09:23,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000056127.jpg: 640x608 1 bottle, 1 cup, 1 chair, 1 oven, 153.0ms
Speed: 2.3ms preprocess, 153.0ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  10%|▉         | 496/5000 [01:09<11:19,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000056288.jpg: 480x640 1 sandwich, 1 remote, 61.7ms
Speed: 2.5ms preprocess, 61.7ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|▉         | 497/5000 [01:09<10:28,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000056344.jpg: 480x640 3 tvs, 1 mouse, 1 keyboard, 2 cell phones, 62.6ms
Speed: 2.0ms preprocess, 62.6ms inference, 7.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|▉         | 498/5000 [01:09<10:07,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000056350.jpg: 640x640 5 persons, 1 toilet, 82.5ms
Speed: 4.5ms preprocess, 82.5ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  10%|▉         | 499/5000 [01:10<10:34,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000056545.jpg: 640x512 1 bird, 72.0ms
Speed: 2.7ms preprocess, 72.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  10%|█         | 500/5000 [01:10<10:07,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057027.jpg: 448x640 4 elephants, 60.2ms
Speed: 2.8ms preprocess, 60.2ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|█         | 501/5000 [01:10<09:37,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057149.jpg: 448x640 1 car, 1 bus, 1 train, 4 traffic lights, 80.5ms
Speed: 3.3ms preprocess, 80.5ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|█         | 502/5000 [01:10<10:03,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057150.jpg: 640x480 10 persons, 1 backpack, 1 teddy bear, 67.3ms
Speed: 4.9ms preprocess, 67.3ms inference, 10.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  10%|█         | 503/5000 [01:10<10:23,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057232.jpg: 640x640 1 hot dog, 82.2ms
Speed: 4.6ms preprocess, 82.2ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  10%|█         | 504/5000 [01:10<10:16,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057238.jpg: 640x480 3 chairs, 1 refrigerator, 63.0ms
Speed: 5.2ms preprocess, 63.0ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  10%|█         | 505/5000 [01:10<09:58,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057244.jpg: 448x640 1 boat, 60.7ms
Speed: 2.9ms preprocess, 60.7ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|█         | 506/5000 [01:10<09:23,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057597.jpg: 448x640 14 persons, 1 car, 1 sports ball, 76.8ms
Speed: 3.0ms preprocess, 76.8ms inference, 14.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|█         | 507/5000 [01:11<10:26,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057672.jpg: 480x640 16 persons, 3 cars, 4 boats, 65.4ms
Speed: 3.0ms preprocess, 65.4ms inference, 20.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|█         | 508/5000 [01:11<11:13,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057725.jpg: 480x640 2 clocks, 62.6ms
Speed: 2.3ms preprocess, 62.6ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|█         | 509/5000 [01:11<10:35,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000057760.jpg: 416x640 9 persons, 1 kite, 119.9ms
Speed: 4.0ms preprocess, 119.9ms inference, 8.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  10%|█         | 510/5000 [01:11<11:46,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058029.jpg: 480x640 2 toilets, 2 sinks, 104.1ms
Speed: 3.7ms preprocess, 104.1ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|█         | 511/5000 [01:11<11:57,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058111.jpg: 640x640 1 cat, 95.1ms
Speed: 3.3ms preprocess, 95.1ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  10%|█         | 512/5000 [01:11<11:44,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058350.jpg: 480x640 2 persons, 1 pizza, 1 chair, 1 couch, 86.3ms
Speed: 3.3ms preprocess, 86.3ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|█         | 513/5000 [01:12<11:51,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058384.jpg: 480x640 3 elephants, 72.8ms
Speed: 3.4ms preprocess, 72.8ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|█         | 514/5000 [01:12<11:13,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058393.jpg: 512x640 3 persons, 1 bench, 149.8ms
Speed: 2.9ms preprocess, 149.8ms inference, 5.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  10%|█         | 515/5000 [01:12<12:33,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058539.jpg: 448x640 5 persons, 1 car, 1 tie, 67.5ms
Speed: 2.9ms preprocess, 67.5ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|█         | 516/5000 [01:12<11:44,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058636.jpg: 640x640 (no detections), 87.7ms
Speed: 2.4ms preprocess, 87.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  10%|█         | 517/5000 [01:12<10:59,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058655.jpg: 640x448 1 clock, 59.4ms
Speed: 2.9ms preprocess, 59.4ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  10%|█         | 518/5000 [01:12<10:02,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000058705.jpg: 512x640 2 persons, 1 tie, 71.6ms
Speed: 2.8ms preprocess, 71.6ms inference, 3.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  10%|█         | 519/5000 [01:12<09:55,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000059044.jpg: 448x640 10 persons, 2 handbags, 1 remote, 69.9ms
Speed: 3.0ms preprocess, 69.9ms inference, 10.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|█         | 520/5000 [01:13<10:19,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000059386.jpg: 640x448 1 zebra, 4 giraffes, 61.6ms
Speed: 3.0ms preprocess, 61.6ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  10%|█         | 521/5000 [01:13<09:53,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000059598.jpg: 480x640 2 persons, 1 cup, 1 banana, 1 chair, 1 dining table, 1 laptop, 1 mouse, 1 keyboard, 1 cell phone, 63.9ms
Speed: 3.2ms preprocess, 63.9ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  10%|█         | 522/5000 [01:13<09:50,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000059635.jpg: 640x512 30 persons, 1 bicycle, 3 surfboards, 68.9ms
Speed: 2.8ms preprocess, 68.9ms inference, 31.4ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  10%|█         | 523/5000 [01:13<12:10,  6.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000059920.jpg: 640x448 3 toilets, 65.9ms
Speed: 2.9ms preprocess, 65.9ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  10%|█         | 524/5000 [01:13<11:14,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060052.jpg: 448x640 2 persons, 1 baseball bat, 60.4ms
Speed: 3.0ms preprocess, 60.4ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  10%|█         | 525/5000 [01:13<10:17,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060090.jpg: 448x640 3 persons, 1 train, 61.6ms
Speed: 3.0ms preprocess, 61.6ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 526/5000 [01:13<09:46,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060102.jpg: 384x640 2 persons, 1 sports ball, 55.4ms
Speed: 2.6ms preprocess, 55.4ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  11%|█         | 527/5000 [01:13<09:14,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060347.jpg: 640x480 1 person, 1 bench, 69.4ms
Speed: 3.1ms preprocess, 69.4ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  11%|█         | 528/5000 [01:14<09:20,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060363.jpg: 480x640 1 clock, 75.3ms
Speed: 2.7ms preprocess, 75.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 529/5000 [01:14<09:17,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060449.jpg: 640x480 1 cup, 1 chair, 1 laptop, 76.4ms
Speed: 2.4ms preprocess, 76.4ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  11%|█         | 530/5000 [01:14<09:23,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060507.jpg: 448x640 15 persons, 1 sports ball, 67.9ms
Speed: 2.8ms preprocess, 67.9ms inference, 15.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 531/5000 [01:14<10:12,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060770.jpg: 480x640 2 zebras, 64.6ms
Speed: 5.1ms preprocess, 64.6ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 532/5000 [01:14<09:48,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060823.jpg: 480x640 2 persons, 20 birds, 4 cows, 1 bowl, 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 29.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 533/5000 [01:14<11:18,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060835.jpg: 480x640 2 sheeps, 67.1ms
Speed: 2.7ms preprocess, 67.1ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 534/5000 [01:14<10:39,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060855.jpg: 512x640 1 apple, 6 oranges, 74.3ms
Speed: 5.0ms preprocess, 74.3ms inference, 8.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  11%|█         | 535/5000 [01:15<10:45,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060886.jpg: 448x640 15 persons, 1 sports ball, 1 baseball glove, 68.9ms
Speed: 5.1ms preprocess, 68.9ms inference, 15.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 536/5000 [01:15<11:14,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060899.jpg: 640x384 1 person, 2 chairs, 2 couchs, 201.1ms
Speed: 2.8ms preprocess, 201.1ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  11%|█         | 537/5000 [01:15<13:46,  5.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000060932.jpg: 448x640 9 persons, 2 ties, 69.0ms
Speed: 2.8ms preprocess, 69.0ms inference, 9.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 538/5000 [01:15<12:58,  5.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061108.jpg: 480x640 1 bicycle, 1 car, 1 dog, 73.1ms
Speed: 3.2ms preprocess, 73.1ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 539/5000 [01:15<11:52,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061171.jpg: 480x640 1 horse, 5 sheeps, 1 elephant, 68.7ms
Speed: 3.0ms preprocess, 68.7ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 540/5000 [01:15<11:14,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061268.jpg: 448x640 1 person, 2 trains, 67.3ms
Speed: 2.6ms preprocess, 67.3ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 541/5000 [01:16<10:31,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061333.jpg: 480x640 1 cat, 2 beds, 3 books, 67.8ms
Speed: 4.7ms preprocess, 67.8ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 542/5000 [01:16<10:14,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061418.jpg: 480x640 7 persons, 2 boats, 1 baseball glove, 2 chairs, 68.4ms
Speed: 2.9ms preprocess, 68.4ms inference, 10.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 543/5000 [01:16<10:25,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061471.jpg: 480x640 1 dog, 1 toilet, 63.2ms
Speed: 2.7ms preprocess, 63.2ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 544/5000 [01:16<09:51,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061584.jpg: 480x640 3 persons, 1 frisbee, 65.3ms
Speed: 3.9ms preprocess, 65.3ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 545/5000 [01:16<09:40,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061658.jpg: 480x640 8 broccolis, 85.4ms
Speed: 5.3ms preprocess, 85.4ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 546/5000 [01:16<10:18,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061747.jpg: 576x640 1 person, 1 skis, 146.1ms
Speed: 1.8ms preprocess, 146.1ms inference, 5.2ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  11%|█         | 547/5000 [01:16<11:40,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000061960.jpg: 384x640 1 person, 1 couch, 1 bed, 57.0ms
Speed: 2.4ms preprocess, 57.0ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  11%|█         | 548/5000 [01:17<10:37,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000062025.jpg: 640x480 1 cup, 2 toilets, 2 sinks, 70.2ms
Speed: 3.6ms preprocess, 70.2ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  11%|█         | 549/5000 [01:17<10:21,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000062353.jpg: 480x640 5 persons, 5 giraffes, 70.5ms
Speed: 2.7ms preprocess, 70.5ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 550/5000 [01:17<10:29,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000062355.jpg: 448x640 5 persons, 1 baseball bat, 1 skateboard, 61.1ms
Speed: 2.8ms preprocess, 61.1ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 551/5000 [01:17<10:02,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000062554.jpg: 448x640 1 bowl, 3 broccolis, 1 dining table, 59.6ms
Speed: 2.7ms preprocess, 59.6ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 552/5000 [01:17<09:39,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000062692.jpg: 640x448 1 person, 61.4ms
Speed: 3.1ms preprocess, 61.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  11%|█         | 553/5000 [01:17<09:13,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000062808.jpg: 512x640 2 persons, 3 knifes, 1 pizza, 1 dining table, 1 cell phone, 79.1ms
Speed: 3.6ms preprocess, 79.1ms inference, 8.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  11%|█         | 554/5000 [01:17<10:19,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000063047.jpg: 576x640 4 persons, 1 umbrella, 83.1ms
Speed: 3.3ms preprocess, 83.1ms inference, 6.1ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  11%|█         | 555/5000 [01:17<10:32,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000063154.jpg: 448x640 2 persons, 1 surfboard, 64.6ms
Speed: 2.7ms preprocess, 64.6ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 556/5000 [01:18<09:54,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000063552.jpg: 608x640 1 cat, 1 bowl, 147.4ms
Speed: 1.6ms preprocess, 147.4ms inference, 2.8ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  11%|█         | 557/5000 [01:18<11:15,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000063602.jpg: 448x640 1 cup, 1 chair, 1 laptop, 1 keyboard, 59.8ms
Speed: 2.4ms preprocess, 59.8ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 558/5000 [01:18<10:19,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000063740.jpg: 480x640 5 persons, 2 cups, 1 chair, 1 tv, 1 laptop, 1 mouse, 1 keyboard, 58.0ms
Speed: 3.0ms preprocess, 58.0ms inference, 10.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█         | 559/5000 [01:18<10:16,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000063965.jpg: 448x640 3 persons, 1 cake, 65.8ms
Speed: 2.6ms preprocess, 65.8ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 560/5000 [01:18<09:54,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064084.jpg: 448x640 3 bottles, 1 spoon, 3 sandwichs, 1 dining table, 68.1ms
Speed: 3.0ms preprocess, 68.1ms inference, 8.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 561/5000 [01:18<10:05,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064359.jpg: 448x640 3 zebras, 4 giraffes, 68.3ms
Speed: 4.5ms preprocess, 68.3ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█         | 562/5000 [01:18<10:04,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064462.jpg: 512x640 1 person, 1 skis, 99.7ms
Speed: 5.0ms preprocess, 99.7ms inference, 2.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  11%|█▏        | 563/5000 [01:19<10:27,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064495.jpg: 480x640 (no detections), 65.1ms
Speed: 2.8ms preprocess, 65.1ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█▏        | 564/5000 [01:19<09:37,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064499.jpg: 480x640 2 zebras, 64.7ms
Speed: 2.5ms preprocess, 64.7ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█▏        | 565/5000 [01:19<09:12,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064523.jpg: 448x640 12 persons, 1 tennis racket, 63.1ms
Speed: 2.8ms preprocess, 63.1ms inference, 10.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█▏        | 566/5000 [01:19<09:46,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064574.jpg: 640x480 2 knifes, 64.9ms
Speed: 2.7ms preprocess, 64.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  11%|█▏        | 567/5000 [01:19<09:22,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064718.jpg: 480x640 1 person, 1 sports ball, 1 tennis racket, 62.8ms
Speed: 2.7ms preprocess, 62.8ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█▏        | 568/5000 [01:19<09:03,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064868.jpg: 480x640 1 person, 1 bottle, 1 cup, 1 keyboard, 1 oven, 56.2ms
Speed: 4.1ms preprocess, 56.2ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█▏        | 569/5000 [01:19<08:58,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000064898.jpg: 448x640 2 persons, 1 surfboard, 60.8ms
Speed: 3.7ms preprocess, 60.8ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█▏        | 570/5000 [01:19<08:47,  8.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000065074.jpg: 640x448 1 bed, 59.4ms
Speed: 3.0ms preprocess, 59.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  11%|█▏        | 571/5000 [01:20<08:28,  8.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000065288.jpg: 448x640 11 persons, 1 bicycle, 1 cat, 1 dog, 1 chair, 3 potted plants, 78.4ms
Speed: 3.2ms preprocess, 78.4ms inference, 16.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█▏        | 572/5000 [01:20<09:50,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000065350.jpg: 448x640 3 persons, 1 bicycle, 1 skateboard, 52.2ms
Speed: 3.6ms preprocess, 52.2ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  11%|█▏        | 573/5000 [01:20<09:20,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000065455.jpg: 480x640 5 persons, 3 giraffes, 61.0ms
Speed: 4.7ms preprocess, 61.0ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  11%|█▏        | 574/5000 [01:20<09:24,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000065485.jpg: 384x640 1 truck, 49.5ms
Speed: 2.5ms preprocess, 49.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000065736.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 50.0ms
Speed: 2.6ms preprocess, 50.0ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 576/5000 [01:20<08:16,  8.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000065798.jpg: 480x640 6 persons, 59.0ms
Speed: 2.7ms preprocess, 59.0ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 577/5000 [01:20<08:25,  8.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066038.jpg: 480x640 4 persons, 1 bird, 9 umbrellas, 4 chairs, 57.8ms
Speed: 2.4ms preprocess, 57.8ms inference, 17.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 578/5000 [01:20<09:16,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066135.jpg: 512x640 1 train, 66.4ms
Speed: 5.3ms preprocess, 66.4ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  12%|█▏        | 579/5000 [01:21<09:08,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066231.jpg: 448x640 7 persons, 2 bottles, 2 cups, 3 bowls, 4 bananas, 60.0ms
Speed: 3.2ms preprocess, 60.0ms inference, 15.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 580/5000 [01:21<09:55,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066523.jpg: 640x480 1 bed, 96.3ms
Speed: 3.5ms preprocess, 96.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  12%|█▏        | 581/5000 [01:21<10:10,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066561.jpg: 448x640 2 sheeps, 59.4ms
Speed: 2.8ms preprocess, 59.4ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000066635.jpg: 480x640 1 tv, 1 mouse, 1 keyboard, 59.0ms
Speed: 2.4ms preprocess, 59.0ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 583/5000 [01:21<09:05,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066706.jpg: 640x640 1 cup, 1 bowl, 5 bananas, 1 pizza, 1 dining table, 162.2ms
Speed: 3.2ms preprocess, 162.2ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  12%|█▏        | 584/5000 [01:21<11:19,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066771.jpg: 480x640 2 persons, 4 cups, 1 spoon, 2 bowls, 2 dining tables, 1 cell phone, 67.0ms
Speed: 2.5ms preprocess, 67.0ms inference, 10.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 585/5000 [01:21<11:14,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066817.jpg: 640x640 1 cup, 2 bowls, 1 dining table, 84.4ms
Speed: 4.1ms preprocess, 84.4ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  12%|█▏        | 586/5000 [01:22<11:07,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066841.jpg: 640x448 (no detections), 62.5ms
Speed: 4.4ms preprocess, 62.5ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 448)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000066886.jpg: 448x640 1 person, 1 remote, 56.1ms
Speed: 2.8ms preprocess, 56.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 588/5000 [01:22<09:27,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000066926.jpg: 480x640 10 donuts, 62.8ms
Speed: 2.5ms preprocess, 62.8ms inference, 8.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 589/5000 [01:22<09:32,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067180.jpg: 640x480 1 person, 2 benchs, 63.5ms
Speed: 2.2ms preprocess, 63.5ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  12%|█▏        | 590/5000 [01:22<09:17,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067213.jpg: 512x640 4 persons, 4 cars, 1 dog, 101.8ms
Speed: 2.6ms preprocess, 101.8ms inference, 8.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  12%|█▏        | 591/5000 [01:22<10:15,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067310.jpg: 640x512 5 persons, 1 skateboard, 122.8ms
Speed: 3.4ms preprocess, 122.8ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  12%|█▏        | 592/5000 [01:22<11:08,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067315.jpg: 448x640 1 person, 3 ties, 58.7ms
Speed: 2.7ms preprocess, 58.7ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 593/5000 [01:23<10:20,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067406.jpg: 480x640 3 persons, 1 kite, 62.1ms
Speed: 4.8ms preprocess, 62.1ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 594/5000 [01:23<09:51,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067534.jpg: 640x320 1 person, 133.3ms
Speed: 2.3ms preprocess, 133.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 320)


Segmenting Images:  12%|█▏        | 595/5000 [01:23<10:44,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067616.jpg: 480x640 4 persons, 1 bicycle, 6 cars, 1 fire hydrant, 11 chairs, 64.5ms
Speed: 2.9ms preprocess, 64.5ms inference, 20.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 596/5000 [01:23<11:34,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000067896.jpg: 480x640 1 clock, 60.2ms
Speed: 2.3ms preprocess, 60.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 597/5000 [01:23<10:28,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068078.jpg: 640x384 1 bottle, 1 toilet, 2 sinks, 107.0ms
Speed: 2.9ms preprocess, 107.0ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  12%|█▏        | 598/5000 [01:23<10:48,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068093.jpg: 480x640 2 persons, 1 car, 1 motorcycle, 62.1ms
Speed: 3.2ms preprocess, 62.1ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 599/5000 [01:23<10:21,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068286.jpg: 384x640 1 bed, 55.7ms
Speed: 2.6ms preprocess, 55.7ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  12%|█▏        | 600/5000 [01:23<09:33,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068387.jpg: 480x640 4 persons, 1 baseball bat, 1 baseball glove, 64.4ms
Speed: 2.8ms preprocess, 64.4ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 601/5000 [01:24<09:20,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068409.jpg: 288x640 7 persons, 5 ties, 1 cake, 101.2ms
Speed: 2.7ms preprocess, 101.2ms inference, 5.2ms postprocess per image at shape (1, 3, 288, 640)


Segmenting Images:  12%|█▏        | 602/5000 [01:24<10:07,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068628.jpg: 448x640 1 person, 1 skateboard, 62.1ms
Speed: 2.8ms preprocess, 62.1ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 603/5000 [01:24<09:29,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068765.jpg: 480x640 1 mouse, 1 keyboard, 1 cell phone, 66.4ms
Speed: 3.2ms preprocess, 66.4ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 604/5000 [01:24<09:13,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068833.jpg: 480x640 1 umbrella, 1 bowl, 1 chair, 1 dining table, 2 microwaves, 1 refrigerator, 1 clock, 63.5ms
Speed: 3.0ms preprocess, 63.5ms inference, 7.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 605/5000 [01:24<09:14,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000068933.jpg: 416x640 3 zebras, 108.0ms
Speed: 3.7ms preprocess, 108.0ms inference, 2.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  12%|█▏        | 606/5000 [01:24<09:59,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000069106.jpg: 448x640 4 zebras, 79.0ms
Speed: 4.0ms preprocess, 79.0ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 607/5000 [01:24<09:56,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000069138.jpg: 640x384 1 truck, 1 traffic light, 54.4ms
Speed: 2.5ms preprocess, 54.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 384)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000069213.jpg: 448x640 8 persons, 1 tie, 1 sports ball, 48.4ms
Speed: 2.6ms preprocess, 48.4ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 609/5000 [01:25<08:49,  8.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000069224.jpg: 480x640 2 persons, 1 bottle, 1 knife, 1 donut, 2 chairs, 1 dining table, 51.7ms
Speed: 4.2ms preprocess, 51.7ms inference, 8.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 610/5000 [01:25<08:56,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000069356.jpg: 512x640 5 persons, 1 car, 1 banana, 2 apples, 1 orange, 65.9ms
Speed: 2.8ms preprocess, 65.9ms inference, 7.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  12%|█▏        | 611/5000 [01:25<09:18,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000069795.jpg: 480x640 1 potted plant, 1 dining table, 1 vase, 65.5ms
Speed: 2.8ms preprocess, 65.5ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 612/5000 [01:25<09:02,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000070048.jpg: 480x640 1 cup, 2 bowls, 1 sandwich, 1 dining table, 58.5ms
Speed: 3.1ms preprocess, 58.5ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 613/5000 [01:25<08:54,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000070158.jpg: 640x448 4 zebras, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  12%|█▏        | 614/5000 [01:25<08:42,  8.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000070229.jpg: 640x512 2 persons, 1 car, 63.9ms
Speed: 2.6ms preprocess, 63.9ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  12%|█▏        | 615/5000 [01:25<08:42,  8.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000070254.jpg: 384x640 9 persons, 1 train, 2 handbags, 63.9ms
Speed: 2.6ms preprocess, 63.9ms inference, 9.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  12%|█▏        | 616/5000 [01:25<09:03,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000070739.jpg: 480x640 4 persons, 2 wine glasss, 62.7ms
Speed: 3.0ms preprocess, 62.7ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 617/5000 [01:26<09:00,  8.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000070774.jpg: 448x640 2 motorcycles, 57.7ms
Speed: 2.8ms preprocess, 57.7ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 618/5000 [01:26<08:38,  8.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000071226.jpg: 448x640 2 cats, 3 dogs, 1 bed, 2 books, 59.5ms
Speed: 2.8ms preprocess, 59.5ms inference, 8.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 619/5000 [01:26<08:54,  8.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000071451.jpg: 640x384 1 bed, 62.1ms
Speed: 2.8ms preprocess, 62.1ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  12%|█▏        | 620/5000 [01:26<08:41,  8.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000071711.jpg: 448x640 7 persons, 1 airplane, 68.8ms
Speed: 2.8ms preprocess, 68.8ms inference, 7.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  12%|█▏        | 621/5000 [01:26<09:09,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000071756.jpg: 512x640 1 bear, 76.9ms
Speed: 2.7ms preprocess, 76.9ms inference, 2.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  12%|█▏        | 622/5000 [01:26<09:10,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000071877.jpg: 640x448 2 persons, 1 skateboard, 71.2ms
Speed: 2.9ms preprocess, 71.2ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  12%|█▏        | 623/5000 [01:26<09:09,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000071938.jpg: 480x640 1 person, 2 beds, 1 cell phone, 96.2ms
Speed: 3.1ms preprocess, 96.2ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  12%|█▏        | 624/5000 [01:27<09:45,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000072281.jpg: 640x448 1 person, 1 skateboard, 56.4ms
Speed: 3.4ms preprocess, 56.4ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  12%|█▎        | 625/5000 [01:27<09:10,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000072795.jpg: 480x640 14 persons, 63.6ms
Speed: 3.8ms preprocess, 63.6ms inference, 11.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 626/5000 [01:27<09:47,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000072813.jpg: 480x640 1 dog, 1 bed, 1 tv, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 627/5000 [01:27<09:27,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000072852.jpg: 640x448 2 persons, 1 bicycle, 65.1ms
Speed: 2.9ms preprocess, 65.1ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  13%|█▎        | 628/5000 [01:27<09:12,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000073118.jpg: 640x640 3 teddy bears, 181.3ms
Speed: 4.0ms preprocess, 181.3ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  13%|█▎        | 629/5000 [01:27<11:45,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000073153.jpg: 640x448 1 person, 1 sports ball, 65.0ms
Speed: 2.7ms preprocess, 65.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  13%|█▎        | 630/5000 [01:27<10:48,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000073326.jpg: 480x640 11 bottles, 1 refrigerator, 71.5ms
Speed: 2.7ms preprocess, 71.5ms inference, 12.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 631/5000 [01:28<10:59,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000073533.jpg: 480x640 2 persons, 1 couch, 1 cell phone, 100.7ms
Speed: 2.7ms preprocess, 100.7ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 632/5000 [01:28<11:05,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000073702.jpg: 640x480 2 persons, 1 backpack, 1 umbrella, 167.4ms
Speed: 2.6ms preprocess, 167.4ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  13%|█▎        | 633/5000 [01:28<12:46,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000073946.jpg: 448x640 2 persons, 1 couch, 63.4ms
Speed: 3.0ms preprocess, 63.4ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 634/5000 [01:28<11:33,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074058.jpg: 448x640 2 persons, 1 bicycle, 3 cars, 3 umbrellas, 1 chair, 68.8ms
Speed: 3.0ms preprocess, 68.8ms inference, 13.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 635/5000 [01:28<11:22,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074092.jpg: 640x448 6 persons, 1 sports ball, 1 tennis racket, 67.6ms
Speed: 4.0ms preprocess, 67.6ms inference, 9.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  13%|█▎        | 636/5000 [01:28<11:02,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074200.jpg: 480x640 1 person, 2 surfboards, 68.3ms
Speed: 3.1ms preprocess, 68.3ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 637/5000 [01:28<10:26,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074209.jpg: 480x640 3 bottles, 1 bowl, 4 sinks, 1 clock, 69.7ms
Speed: 2.9ms preprocess, 69.7ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 638/5000 [01:29<10:26,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074256.jpg: 480x640 4 persons, 1 backpack, 2 cell phones, 67.8ms
Speed: 3.0ms preprocess, 67.8ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 639/5000 [01:29<10:18,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074457.jpg: 384x640 1 person, 1 surfboard, 82.6ms
Speed: 2.5ms preprocess, 82.6ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  13%|█▎        | 640/5000 [01:29<10:11,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074646.jpg: 640x480 4 persons, 1 kite, 1 cup, 79.0ms
Speed: 3.3ms preprocess, 79.0ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  13%|█▎        | 641/5000 [01:29<10:14,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074733.jpg: 640x640 2 persons, 10 cups, 2 pizzas, 3 chairs, 2 dining tables, 92.8ms
Speed: 3.4ms preprocess, 92.8ms inference, 22.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  13%|█▎        | 642/5000 [01:29<12:12,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000074860.jpg: 448x640 1 person, 1 snowboard, 63.2ms
Speed: 2.8ms preprocess, 63.2ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 643/5000 [01:29<11:04,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000075393.jpg: 448x640 1 sandwich, 1 cake, 63.1ms
Speed: 2.8ms preprocess, 63.1ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 644/5000 [01:29<10:13,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000075456.jpg: 448x640 1 bowl, 1 pizza, 1 dining table, 65.4ms
Speed: 3.1ms preprocess, 65.4ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 645/5000 [01:30<09:44,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000075612.jpg: 480x640 14 persons, 2 chairs, 1 laptop, 69.5ms
Speed: 2.9ms preprocess, 69.5ms inference, 17.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 646/5000 [01:30<10:41,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076211.jpg: 512x640 2 clocks, 74.8ms
Speed: 2.9ms preprocess, 74.8ms inference, 3.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  13%|█▎        | 647/5000 [01:30<10:27,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076261.jpg: 448x640 2 kites, 99.3ms
Speed: 2.8ms preprocess, 99.3ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 648/5000 [01:30<10:35,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076416.jpg: 480x640 1 person, 1 bus, 1 traffic light, 1 bench, 72.5ms
Speed: 2.7ms preprocess, 72.5ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 649/5000 [01:30<10:19,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076417.jpg: 480x640 1 car, 2 traffic lights, 1 dog, 73.3ms
Speed: 3.5ms preprocess, 73.3ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 650/5000 [01:30<10:05,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076468.jpg: 480x640 7 persons, 14 chairs, 70.6ms
Speed: 3.1ms preprocess, 70.6ms inference, 23.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 651/5000 [01:31<11:12,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076547.jpg: 480x640 1 person, 3 bicycles, 1 train, 1 bench, 1 backpack, 2 laptops, 1 cell phone, 70.5ms
Speed: 3.0ms preprocess, 70.5ms inference, 10.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 652/5000 [01:31<11:06,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076625.jpg: 480x640 7 persons, 2 trains, 70.3ms
Speed: 2.7ms preprocess, 70.3ms inference, 8.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 653/5000 [01:31<10:54,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000076731.jpg: 416x640 1 toothbrush, 62.6ms
Speed: 2.5ms preprocess, 62.6ms inference, 1.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  13%|█▎        | 654/5000 [01:31<10:00,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000077396.jpg: 480x640 2 cats, 1 dining table, 1 tv, 1 book, 74.9ms
Speed: 2.7ms preprocess, 74.9ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 655/5000 [01:31<09:53,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000077460.jpg: 640x448 6 persons, 7 kites, 93.1ms
Speed: 5.5ms preprocess, 93.1ms inference, 14.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  13%|█▎        | 656/5000 [01:31<11:00,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000077595.jpg: 448x640 1 cat, 1 bed, 1 laptop, 62.0ms
Speed: 2.8ms preprocess, 62.0ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 657/5000 [01:31<10:10,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078032.jpg: 384x640 2 benchs, 58.3ms
Speed: 3.0ms preprocess, 58.3ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  13%|█▎        | 658/5000 [01:31<09:31,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078170.jpg: 640x640 2 persons, 1 bottle, 87.8ms
Speed: 2.3ms preprocess, 87.8ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  13%|█▎        | 659/5000 [01:32<09:48,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078266.jpg: 448x640 1 sink, 1 refrigerator, 68.5ms
Speed: 2.4ms preprocess, 68.5ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 660/5000 [01:32<09:24,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078404.jpg: 448x640 5 persons, 2 benchs, 1 chair, 63.4ms
Speed: 4.3ms preprocess, 63.4ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 661/5000 [01:32<09:35,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078420.jpg: 480x640 1 cat, 1 chair, 1 tv, 1 laptop, 69.8ms
Speed: 2.6ms preprocess, 69.8ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 662/5000 [01:32<09:28,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078426.jpg: 448x640 1 cat, 1 cup, 1 remote, 67.2ms
Speed: 3.6ms preprocess, 67.2ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 663/5000 [01:32<09:15,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078565.jpg: 512x640 9 persons, 3 boats, 106.1ms
Speed: 3.3ms preprocess, 106.1ms inference, 13.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  13%|█▎        | 664/5000 [01:32<10:48,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078748.jpg: 480x640 15 persons, 3 motorcycles, 3 backpacks, 66.3ms
Speed: 3.0ms preprocess, 66.3ms inference, 20.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 665/5000 [01:32<11:32,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078823.jpg: 480x640 3 cars, 1 truck, 1 dog, 67.2ms
Speed: 3.4ms preprocess, 67.2ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 666/5000 [01:33<10:52,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078843.jpg: 480x640 2 persons, 72.6ms
Speed: 2.8ms preprocess, 72.6ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 667/5000 [01:33<10:16,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078915.jpg: 640x512 11 persons, 1 sports ball, 1 tennis racket, 6 chairs, 78.9ms
Speed: 3.3ms preprocess, 78.9ms inference, 19.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  13%|█▎        | 668/5000 [01:33<11:28,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000078959.jpg: 640x384 (no detections), 58.8ms
Speed: 3.1ms preprocess, 58.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000079014.jpg: 640x416 1 person, 136.8ms
Speed: 4.5ms preprocess, 136.8ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  13%|█▎        | 670/5000 [01:33<10:48,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079031.jpg: 448x640 1 person, 1 surfboard, 66.1ms
Speed: 2.8ms preprocess, 66.1ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  13%|█▎        | 671/5000 [01:33<10:17,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079034.jpg: 480x640 3 cars, 1 fire hydrant, 72.8ms
Speed: 3.0ms preprocess, 72.8ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 672/5000 [01:33<10:08,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079144.jpg: 480x640 3 bears, 85.5ms
Speed: 4.3ms preprocess, 85.5ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  13%|█▎        | 673/5000 [01:34<10:20,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079188.jpg: 640x480 2 giraffes, 65.9ms
Speed: 2.8ms preprocess, 65.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  13%|█▎        | 674/5000 [01:34<09:52,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079229.jpg: 448x640 1 person, 1 dog, 1 horse, 63.9ms
Speed: 3.2ms preprocess, 63.9ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▎        | 675/5000 [01:34<09:29,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079408.jpg: 448x640 1 stop sign, 60.2ms
Speed: 2.6ms preprocess, 60.2ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▎        | 676/5000 [01:34<09:01,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079565.jpg: 480x640 2 giraffes, 62.7ms
Speed: 3.9ms preprocess, 62.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▎        | 677/5000 [01:34<08:45,  8.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079588.jpg: 480x640 4 persons, 2 clocks, 51.7ms
Speed: 2.7ms preprocess, 51.7ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▎        | 678/5000 [01:34<08:28,  8.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079651.jpg: 480x640 3 bottles, 2 bananas, 2 dining tables, 60.2ms
Speed: 2.5ms preprocess, 60.2ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▎        | 679/5000 [01:34<08:34,  8.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079837.jpg: 640x448 7 boats, 1 sports ball, 59.3ms
Speed: 2.5ms preprocess, 59.3ms inference, 7.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  14%|█▎        | 680/5000 [01:34<08:40,  8.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000079969.jpg: 640x448 8 persons, 84.6ms
Speed: 2.6ms preprocess, 84.6ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  14%|█▎        | 681/5000 [01:35<09:22,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080022.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 62.2ms
Speed: 2.7ms preprocess, 62.2ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▎        | 682/5000 [01:35<08:59,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080057.jpg: 640x448 2 cups, 1 teddy bear, 61.2ms
Speed: 2.6ms preprocess, 61.2ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  14%|█▎        | 683/5000 [01:35<08:42,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080153.jpg: 640x448 1 person, 1 dog, 64.3ms
Speed: 2.5ms preprocess, 64.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  14%|█▎        | 684/5000 [01:35<08:33,  8.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080273.jpg: 448x640 1 person, 1 snowboard, 59.2ms
Speed: 2.4ms preprocess, 59.2ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▎        | 685/5000 [01:35<08:13,  8.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080274.jpg: 480x640 4 elephants, 64.9ms
Speed: 2.7ms preprocess, 64.9ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▎        | 686/5000 [01:35<08:19,  8.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080340.jpg: 448x640 8 persons, 1 handbag, 1 tie, 2 dining tables, 58.3ms
Speed: 2.7ms preprocess, 58.3ms inference, 8.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▎        | 687/5000 [01:35<08:47,  8.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080413.jpg: 480x640 1 car, 1 dog, 1 clock, 61.3ms
Speed: 2.7ms preprocess, 61.3ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 688/5000 [01:35<08:35,  8.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080659.jpg: 448x640 5 persons, 2 horses, 82.6ms
Speed: 2.9ms preprocess, 82.6ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 689/5000 [01:36<09:05,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080666.jpg: 640x640 1 bench, 1 cat, 82.5ms
Speed: 2.5ms preprocess, 82.5ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  14%|█▍        | 690/5000 [01:36<09:19,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080671.jpg: 448x640 1 person, 2 snowboards, 59.5ms
Speed: 2.3ms preprocess, 59.5ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 691/5000 [01:36<08:52,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080932.jpg: 640x480 3 persons, 1 cup, 1 fork, 1 pizza, 1 chair, 1 dining table, 75.5ms
Speed: 3.4ms preprocess, 75.5ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  14%|█▍        | 692/5000 [01:36<09:20,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000080949.jpg: 448x640 1 person, 1 cat, 2 laptops, 68.8ms
Speed: 2.3ms preprocess, 68.8ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 693/5000 [01:36<09:15,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000081061.jpg: 448x640 1 chair, 1 couch, 65.3ms
Speed: 2.4ms preprocess, 65.3ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 694/5000 [01:36<08:54,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000081394.jpg: 480x640 1 truck, 70.1ms
Speed: 2.7ms preprocess, 70.1ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 695/5000 [01:36<08:50,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000081594.jpg: 640x480 1 person, 1 umbrella, 3 handbags, 67.8ms
Speed: 2.8ms preprocess, 67.8ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  14%|█▍        | 696/5000 [01:36<09:02,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000081738.jpg: 480x640 2 persons, 1 cake, 1 dining table, 1 vase, 79.3ms
Speed: 4.9ms preprocess, 79.3ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 697/5000 [01:37<09:25,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000081766.jpg: 480x640 1 bench, 2 dogs, 1 bear, 69.1ms
Speed: 3.0ms preprocess, 69.1ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 698/5000 [01:37<09:34,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000081988.jpg: 448x640 5 persons, 5 surfboards, 60.5ms
Speed: 3.2ms preprocess, 60.5ms inference, 9.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 699/5000 [01:37<09:34,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082085.jpg: 448x640 4 persons, 1 train, 57.4ms
Speed: 2.7ms preprocess, 57.4ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 700/5000 [01:37<09:14,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082180.jpg: 448x640 1 bench, 2 teddy bears, 58.6ms
Speed: 2.8ms preprocess, 58.6ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 701/5000 [01:37<09:00,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082688.jpg: 448x640 3 persons, 1 remote, 62.6ms
Speed: 2.6ms preprocess, 62.6ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 702/5000 [01:37<08:46,  8.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082696.jpg: 640x448 1 person, 1 bird, 1 dog, 7 chairs, 1 dining table, 64.3ms
Speed: 2.6ms preprocess, 64.3ms inference, 13.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  14%|█▍        | 703/5000 [01:37<09:15,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082715.jpg: 448x640 1 person, 1 surfboard, 59.7ms
Speed: 2.7ms preprocess, 59.7ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 704/5000 [01:37<08:51,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082765.jpg: 640x448 1 bed, 1 laptop, 112.5ms
Speed: 3.2ms preprocess, 112.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  14%|█▍        | 705/5000 [01:38<09:41,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082807.jpg: 608x640 1 dog, 1 cake, 1 dining table, 161.2ms
Speed: 2.0ms preprocess, 161.2ms inference, 3.9ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  14%|█▍        | 706/5000 [01:38<11:25,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082812.jpg: 384x640 12 persons, 1 train, 1 traffic light, 1 backpack, 1 handbag, 2 suitcases, 98.7ms
Speed: 2.4ms preprocess, 98.7ms inference, 11.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  14%|█▍        | 707/5000 [01:38<12:03,  5.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082821.jpg: 448x640 3 persons, 6 boats, 62.6ms
Speed: 2.8ms preprocess, 62.6ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 708/5000 [01:38<11:18,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082846.jpg: 480x640 1 bus, 1 stop sign, 73.8ms
Speed: 3.7ms preprocess, 73.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 709/5000 [01:38<10:39,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000082986.jpg: 448x640 3 persons, 1 traffic light, 1 frisbee, 60.3ms
Speed: 2.7ms preprocess, 60.3ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 710/5000 [01:38<09:59,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000083113.jpg: 480x640 1 elephant, 66.8ms
Speed: 3.1ms preprocess, 66.8ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 711/5000 [01:39<09:22,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000083172.jpg: 448x640 3 persons, 1 sports ball, 2 tennis rackets, 65.3ms
Speed: 3.2ms preprocess, 65.3ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 712/5000 [01:39<09:15,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000083531.jpg: 352x640 1 person, 1 bottle, 1 cup, 15 hot dogs, 1 remote, 144.3ms
Speed: 3.3ms preprocess, 144.3ms inference, 12.8ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  14%|█▍        | 713/5000 [01:39<11:30,  6.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000083540.jpg: 448x640 1 person, 64.4ms
Speed: 3.3ms preprocess, 64.4ms inference, 1.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 714/5000 [01:39<10:26,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084031.jpg: 448x640 8 persons, 2 baseball gloves, 62.2ms
Speed: 3.2ms preprocess, 62.2ms inference, 9.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 715/5000 [01:39<10:16,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084170.jpg: 480x640 1 person, 4 cars, 1 bus, 66.9ms
Speed: 2.8ms preprocess, 66.9ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 716/5000 [01:39<09:54,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084241.jpg: 416x640 3 persons, 5 bowls, 2 refrigerators, 110.5ms
Speed: 2.6ms preprocess, 110.5ms inference, 8.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  14%|█▍        | 717/5000 [01:39<10:45,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084270.jpg: 480x640 12 persons, 3 backpacks, 5 suitcases, 61.7ms
Speed: 2.9ms preprocess, 61.7ms inference, 19.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 718/5000 [01:40<11:13,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084362.jpg: 480x640 1 person, 1 cat, 1 dog, 1 chair, 2 couchs, 1 tv, 59.6ms
Speed: 2.6ms preprocess, 59.6ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 719/5000 [01:40<10:28,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084431.jpg: 640x384 1 person, 1 pizza, 1 dining table, 1 tv, 107.5ms
Speed: 2.2ms preprocess, 107.5ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  14%|█▍        | 720/5000 [01:40<10:41,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084477.jpg: 448x640 1 person, 1 umbrella, 1 chair, 82.8ms
Speed: 2.9ms preprocess, 82.8ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  14%|█▍        | 721/5000 [01:40<10:25,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084492.jpg: 640x448 2 persons, 1 bicycle, 2 cars, 1 skateboard, 63.2ms
Speed: 2.9ms preprocess, 63.2ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  14%|█▍        | 722/5000 [01:40<10:02,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084650.jpg: 480x640 1 cat, 2 suitcases, 59.3ms
Speed: 2.4ms preprocess, 59.3ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  14%|█▍        | 723/5000 [01:40<09:25,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084664.jpg: 640x640 1 bowl, 2 sandwichs, 1 cake, 1 dining table, 84.5ms
Speed: 3.1ms preprocess, 84.5ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  14%|█▍        | 724/5000 [01:40<09:47,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084674.jpg: 640x480 3 persons, 1 tv, 65.3ms
Speed: 2.7ms preprocess, 65.3ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  14%|█▍        | 725/5000 [01:41<09:26,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000084752.jpg: 448x640 8 cars, 1 airplane, 53.8ms
Speed: 2.8ms preprocess, 53.8ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▍        | 726/5000 [01:41<09:16,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085089.jpg: 448x640 4 persons, 6 wine glasss, 1 dining table, 62.4ms
Speed: 2.8ms preprocess, 62.4ms inference, 10.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▍        | 727/5000 [01:41<09:27,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085157.jpg: 480x640 6 persons, 3 cups, 1 bowl, 2 chairs, 64.8ms
Speed: 2.1ms preprocess, 64.8ms inference, 11.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 728/5000 [01:41<09:41,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085195.jpg: 480x640 1 fork, 1 cake, 1 dining table, 84.4ms
Speed: 2.4ms preprocess, 84.4ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 729/5000 [01:41<09:47,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085329.jpg: 480x640 1 person, 1 tie, 71.0ms
Speed: 2.7ms preprocess, 71.0ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 730/5000 [01:41<09:18,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085376.jpg: 640x480 1 person, 8 cars, 1 motorcycle, 1 truck, 66.4ms
Speed: 2.6ms preprocess, 66.4ms inference, 10.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  15%|█▍        | 731/5000 [01:41<09:40,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085478.jpg: 416x640 2 bears, 57.6ms
Speed: 2.7ms preprocess, 57.6ms inference, 2.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  15%|█▍        | 732/5000 [01:41<09:04,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085576.jpg: 512x640 1 toilet, 140.5ms
Speed: 4.2ms preprocess, 140.5ms inference, 2.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  15%|█▍        | 733/5000 [01:42<10:25,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085665.jpg: 640x512 2 kites, 143.0ms
Speed: 3.0ms preprocess, 143.0ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  15%|█▍        | 734/5000 [01:42<11:21,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085682.jpg: 448x640 11 persons, 57.5ms
Speed: 3.0ms preprocess, 57.5ms inference, 9.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▍        | 735/5000 [01:42<10:52,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085772.jpg: 448x640 1 person, 2 sports balls, 1 tennis racket, 64.0ms
Speed: 2.9ms preprocess, 64.0ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▍        | 736/5000 [01:42<10:18,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085823.jpg: 448x640 4 zebras, 59.2ms
Speed: 4.9ms preprocess, 59.2ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▍        | 737/5000 [01:42<09:46,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000085911.jpg: 416x640 1 kite, 58.1ms
Speed: 2.7ms preprocess, 58.1ms inference, 2.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  15%|█▍        | 738/5000 [01:42<09:06,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000086220.jpg: 448x640 2 cars, 1 bus, 59.2ms
Speed: 2.5ms preprocess, 59.2ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▍        | 739/5000 [01:42<08:48,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000086483.jpg: 480x640 1 bench, 76.3ms
Speed: 3.8ms preprocess, 76.3ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 740/5000 [01:43<08:31,  8.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000086582.jpg: 448x640 1 zebra, 3 potted plants, 2 laptops, 2 mouses, 62.0ms
Speed: 2.5ms preprocess, 62.0ms inference, 9.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▍        | 741/5000 [01:43<09:02,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000086755.jpg: 480x640 1 person, 2 backpacks, 1 skis, 66.8ms
Speed: 2.4ms preprocess, 66.8ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 742/5000 [01:43<08:50,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000086956.jpg: 640x480 3 persons, 1 tie, 2 baseball bats, 1 couch, 64.1ms
Speed: 2.3ms preprocess, 64.1ms inference, 7.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  15%|█▍        | 743/5000 [01:43<08:59,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000087038.jpg: 480x640 12 persons, 1 bicycle, 1 skateboard, 94.3ms
Speed: 2.7ms preprocess, 94.3ms inference, 12.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 744/5000 [01:43<10:06,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000087144.jpg: 480x640 3 persons, 1 car, 2 benchs, 67.3ms
Speed: 2.5ms preprocess, 67.3ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 745/5000 [01:43<09:50,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000087244.jpg: 640x480 1 fire hydrant, 66.1ms
Speed: 3.7ms preprocess, 66.1ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  15%|█▍        | 746/5000 [01:43<09:19,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000087470.jpg: 480x640 2 persons, 12 cows, 73.9ms
Speed: 2.4ms preprocess, 73.9ms inference, 17.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▍        | 747/5000 [01:44<10:07,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000087476.jpg: 640x448 12 persons, 1 skateboard, 66.4ms
Speed: 2.8ms preprocess, 66.4ms inference, 12.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  15%|█▍        | 748/5000 [01:44<10:19,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000087742.jpg: 640x512 1 potted plant, 1 dining table, 1 vase, 71.7ms
Speed: 4.2ms preprocess, 71.7ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  15%|█▍        | 749/5000 [01:44<10:01,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000087875.jpg: 512x640 1 fire hydrant, 71.8ms
Speed: 3.0ms preprocess, 71.8ms inference, 2.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  15%|█▌        | 750/5000 [01:44<09:36,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088040.jpg: 640x448 1 cup, 3 spoons, 5 bowls, 1 dining table, 70.9ms
Speed: 2.8ms preprocess, 70.9ms inference, 12.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  15%|█▌        | 751/5000 [01:44<09:59,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088218.jpg: 640x480 2 traffic lights, 71.9ms
Speed: 2.6ms preprocess, 71.9ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  15%|█▌        | 752/5000 [01:44<09:38,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088250.jpg: 448x640 2 elephants, 61.5ms
Speed: 2.7ms preprocess, 61.5ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 753/5000 [01:44<09:05,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088265.jpg: 640x448 1 horse, 60.3ms
Speed: 3.0ms preprocess, 60.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  15%|█▌        | 754/5000 [01:44<08:38,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088269.jpg: 480x640 1 knife, 1 bowl, 1 dining table, 62.8ms
Speed: 2.6ms preprocess, 62.8ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▌        | 755/5000 [01:45<08:34,  8.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088345.jpg: 448x640 2 persons, 2 clocks, 65.4ms
Speed: 2.9ms preprocess, 65.4ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 756/5000 [01:45<08:30,  8.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088432.jpg: 640x480 2 trucks, 1 traffic light, 1 fire hydrant, 65.3ms
Speed: 2.3ms preprocess, 65.3ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  15%|█▌        | 757/5000 [01:45<08:27,  8.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088462.jpg: 384x640 2 persons, 11 cars, 1 bus, 55.1ms
Speed: 2.2ms preprocess, 55.1ms inference, 14.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  15%|█▌        | 758/5000 [01:45<09:04,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088485.jpg: 448x640 1 person, 1 frisbee, 60.8ms
Speed: 2.5ms preprocess, 60.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 759/5000 [01:45<08:40,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088848.jpg: 640x640 6 persons, 2 fire hydrants, 82.8ms
Speed: 2.5ms preprocess, 82.8ms inference, 9.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  15%|█▌        | 760/5000 [01:45<09:26,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088951.jpg: 480x640 1 person, 1 bench, 1 dog, 64.9ms
Speed: 4.4ms preprocess, 64.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▌        | 761/5000 [01:45<09:13,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000088970.jpg: 640x448 1 person, 1 sports ball, 1 tennis racket, 62.5ms
Speed: 4.4ms preprocess, 62.5ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  15%|█▌        | 762/5000 [01:45<08:56,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089045.jpg: 448x640 1 chair, 2 couchs, 63.7ms
Speed: 4.0ms preprocess, 63.7ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 763/5000 [01:46<08:43,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089078.jpg: 640x480 1 person, 66.0ms
Speed: 2.1ms preprocess, 66.0ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  15%|█▌        | 764/5000 [01:46<08:28,  8.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089271.jpg: 544x640 3 cats, 175.7ms
Speed: 1.9ms preprocess, 175.7ms inference, 5.2ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  15%|█▌        | 765/5000 [01:46<10:47,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089296.jpg: 448x640 3 persons, 3 sports balls, 2 baseball bats, 67.7ms
Speed: 2.9ms preprocess, 67.7ms inference, 8.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 766/5000 [01:46<10:24,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089556.jpg: 480x640 1 person, 12 cars, 1 fire hydrant, 71.9ms
Speed: 3.3ms preprocess, 71.9ms inference, 13.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▌        | 767/5000 [01:46<11:26,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089648.jpg: 448x640 15 persons, 2 tvs, 6 laptops, 62.9ms
Speed: 3.0ms preprocess, 62.9ms inference, 20.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 768/5000 [01:46<11:52,  5.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089670.jpg: 448x640 1 spoon, 1 sandwich, 60.7ms
Speed: 2.5ms preprocess, 60.7ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 769/5000 [01:47<10:41,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089697.jpg: 448x640 2 persons, 2 benchs, 58.3ms
Speed: 3.2ms preprocess, 58.3ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 770/5000 [01:47<09:47,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089761.jpg: 640x480 1 toilet, 65.0ms
Speed: 2.6ms preprocess, 65.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  15%|█▌        | 771/5000 [01:47<09:17,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000089880.jpg: 384x640 2 dogs, 68.7ms
Speed: 4.1ms preprocess, 68.7ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  15%|█▌        | 772/5000 [01:47<09:03,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090003.jpg: 480x640 2 dogs, 1 frisbee, 67.3ms
Speed: 2.7ms preprocess, 67.3ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  15%|█▌        | 773/5000 [01:47<08:51,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090062.jpg: 448x640 1 cow, 61.7ms
Speed: 2.9ms preprocess, 61.7ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  15%|█▌        | 774/5000 [01:47<08:29,  8.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090108.jpg: 480x640 1 bottle, 2 toilets, 2 sinks, 66.5ms
Speed: 3.5ms preprocess, 66.5ms inference, 7.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 775/5000 [01:47<08:32,  8.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090155.jpg: 448x640 1 train, 58.7ms
Speed: 2.7ms preprocess, 58.7ms inference, 1.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 776/5000 [01:47<08:17,  8.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090208.jpg: 448x640 3 buss, 59.8ms
Speed: 2.7ms preprocess, 59.8ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 777/5000 [01:47<08:12,  8.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090284.jpg: 640x640 1 person, 1 tennis racket, 84.1ms
Speed: 2.0ms preprocess, 84.1ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  16%|█▌        | 778/5000 [01:48<08:27,  8.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090631.jpg: 416x640 1 airplane, 78.2ms
Speed: 2.6ms preprocess, 78.2ms inference, 2.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  16%|█▌        | 779/5000 [01:48<08:45,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090891.jpg: 448x640 5 persons, 1 banana, 57.8ms
Speed: 2.9ms preprocess, 57.8ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 780/5000 [01:48<08:38,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000090956.jpg: 448x640 1 person, 60.8ms
Speed: 2.8ms preprocess, 60.8ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 781/5000 [01:48<08:15,  8.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091406.jpg: 448x640 4 persons, 1 knife, 1 spoon, 6 pizzas, 5 chairs, 2 dining tables, 1 tv, 1 laptop, 1 book, 59.1ms
Speed: 3.2ms preprocess, 59.1ms inference, 21.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 782/5000 [01:48<09:13,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091495.jpg: 448x640 3 persons, 2 baseball gloves, 61.7ms
Speed: 2.6ms preprocess, 61.7ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 783/5000 [01:48<09:12,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091500.jpg: 480x640 2 persons, 3 bottles, 2 chairs, 1 dining table, 2 remotes, 63.0ms
Speed: 3.0ms preprocess, 63.0ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 784/5000 [01:48<09:18,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091615.jpg: 448x640 1 bowl, 1 potted plant, 1 microwave, 2 ovens, 1 sink, 1 vase, 60.9ms
Speed: 3.0ms preprocess, 60.9ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 785/5000 [01:48<09:09,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091619.jpg: 640x448 1 stop sign, 80.4ms
Speed: 2.9ms preprocess, 80.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  16%|█▌        | 786/5000 [01:49<08:58,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091654.jpg: 480x640 1 spoon, 1 bowl, 1 carrot, 66.1ms
Speed: 2.6ms preprocess, 66.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 787/5000 [01:49<08:48,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091779.jpg: 480x640 1 cup, 2 bowls, 3 hot dogs, 2 chairs, 1 dining table, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 788/5000 [01:49<08:54,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000091921.jpg: 480x640 2 zebras, 66.8ms
Speed: 3.3ms preprocess, 66.8ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 789/5000 [01:49<08:43,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092053.jpg: 448x640 3 cups, 2 forks, 1 knife, 2 sandwichs, 1 dining table, 60.4ms
Speed: 3.1ms preprocess, 60.4ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 790/5000 [01:49<08:53,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092091.jpg: 480x640 1 person, 2 chairs, 1 couch, 1 bed, 2 tvs, 1 remote, 1 teddy bear, 65.7ms
Speed: 2.9ms preprocess, 65.7ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 791/5000 [01:49<09:04,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092124.jpg: 640x384 1 bottle, 1 toilet, 2 sinks, 108.0ms
Speed: 2.6ms preprocess, 108.0ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  16%|█▌        | 792/5000 [01:49<09:42,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092177.jpg: 640x640 1 cake, 1 dining table, 94.8ms
Speed: 1.7ms preprocess, 94.8ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  16%|█▌        | 793/5000 [01:50<10:03,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092416.jpg: 640x480 4 persons, 1 boat, 1 tie, 1 bottle, 69.9ms
Speed: 3.4ms preprocess, 69.9ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  16%|█▌        | 794/5000 [01:50<09:50,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092660.jpg: 448x640 1 bottle, 1 wine glass, 2 cups, 3 broccolis, 1 pizza, 1 dining table, 61.7ms
Speed: 3.1ms preprocess, 61.7ms inference, 7.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 795/5000 [01:50<09:41,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092839.jpg: 544x640 1 bear, 70.4ms
Speed: 1.8ms preprocess, 70.4ms inference, 2.0ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  16%|█▌        | 796/5000 [01:50<09:20,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000092939.jpg: 640x448 2 persons, 2 cups, 2 cakes, 1 chair, 1 potted plant, 1 dining table, 65.3ms
Speed: 2.6ms preprocess, 65.3ms inference, 7.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  16%|█▌        | 797/5000 [01:50<09:20,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000093154.jpg: 640x448 10 cars, 61.1ms
Speed: 3.0ms preprocess, 61.1ms inference, 8.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  16%|█▌        | 798/5000 [01:50<09:23,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000093261.jpg: 480x640 2 persons, 1 kite, 73.9ms
Speed: 4.1ms preprocess, 73.9ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 799/5000 [01:50<09:14,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000093353.jpg: 480x640 2 persons, 1 sandwich, 1 hot dog, 1 book, 62.5ms
Speed: 2.9ms preprocess, 62.5ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 800/5000 [01:50<09:06,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000093437.jpg: 384x640 1 person, 1 bottle, 6 chairs, 1 potted plant, 58.3ms
Speed: 3.0ms preprocess, 58.3ms inference, 6.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  16%|█▌        | 801/5000 [01:51<09:00,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000093717.jpg: 448x640 13 persons, 1 skateboard, 61.6ms
Speed: 3.0ms preprocess, 61.6ms inference, 11.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 802/5000 [01:51<09:26,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000093965.jpg: 480x640 2 zebras, 66.4ms
Speed: 3.1ms preprocess, 66.4ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 803/5000 [01:51<09:06,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094157.jpg: 640x384 7 persons, 1 clock, 54.0ms
Speed: 2.9ms preprocess, 54.0ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  16%|█▌        | 804/5000 [01:51<08:44,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094185.jpg: 480x640 1 person, 2 teddy bears, 69.6ms
Speed: 3.0ms preprocess, 69.6ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▌        | 805/5000 [01:51<08:45,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094326.jpg: 640x480 1 person, 2 benchs, 70.6ms
Speed: 4.8ms preprocess, 70.6ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  16%|█▌        | 806/5000 [01:51<08:47,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094336.jpg: 448x640 1 cat, 1 cup, 2 sinks, 68.3ms
Speed: 4.0ms preprocess, 68.3ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 807/5000 [01:51<08:47,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094614.jpg: 448x640 1 person, 83.5ms
Speed: 4.1ms preprocess, 83.5ms inference, 1.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 808/5000 [01:52<08:57,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094751.jpg: 640x480 1 car, 4 traffic lights, 72.5ms
Speed: 5.3ms preprocess, 72.5ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  16%|█▌        | 809/5000 [01:52<09:08,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094852.jpg: 512x640 1 elephant, 70.4ms
Speed: 3.2ms preprocess, 70.4ms inference, 2.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  16%|█▌        | 810/5000 [01:52<09:00,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094871.jpg: 640x480 4 persons, 3 sheeps, 1 bear, 65.1ms
Speed: 2.8ms preprocess, 65.1ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  16%|█▌        | 811/5000 [01:52<09:07,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000094944.jpg: 448x640 3 persons, 2 skiss, 4 snowboards, 60.6ms
Speed: 3.1ms preprocess, 60.6ms inference, 8.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▌        | 812/5000 [01:52<08:52,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000095069.jpg: 480x640 1 clock, 87.4ms
Speed: 4.6ms preprocess, 87.4ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▋        | 813/5000 [01:52<09:35,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000095155.jpg: 448x640 5 persons, 2 snowboards, 61.9ms
Speed: 2.8ms preprocess, 61.9ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▋        | 814/5000 [01:52<09:17,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000095707.jpg: 384x640 1 sandwich, 1 cake, 81.6ms
Speed: 2.7ms preprocess, 81.6ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  16%|█▋        | 815/5000 [01:52<09:13,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000095786.jpg: 448x640 3 cups, 63.2ms
Speed: 4.0ms preprocess, 63.2ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▋        | 816/5000 [01:53<08:46,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000095843.jpg: 448x640 1 bus, 58.0ms
Speed: 2.9ms preprocess, 58.0ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▋        | 817/5000 [01:53<08:26,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000095862.jpg: 480x640 16 persons, 3 baseball bats, 64.1ms
Speed: 4.9ms preprocess, 64.1ms inference, 18.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▋        | 818/5000 [01:53<09:29,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000095899.jpg: 480x640 1 stop sign, 66.1ms
Speed: 3.1ms preprocess, 66.1ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▋        | 819/5000 [01:53<09:02,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000096001.jpg: 448x640 1 dog, 1 cup, 1 bowl, 62.0ms
Speed: 3.2ms preprocess, 62.0ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▋        | 820/5000 [01:53<08:43,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000096427.jpg: 448x640 3 persons, 1 tennis racket, 68.9ms
Speed: 3.0ms preprocess, 68.9ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  16%|█▋        | 821/5000 [01:53<08:43,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000096493.jpg: 640x448 1 person, 1 couch, 1 bed, 1 cell phone, 95.3ms
Speed: 3.2ms preprocess, 95.3ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  16%|█▋        | 822/5000 [01:53<09:21,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000096549.jpg: 224x640 1 airplane, 109.6ms
Speed: 2.3ms preprocess, 109.6ms inference, 1.4ms postprocess per image at shape (1, 3, 224, 640)


Segmenting Images:  16%|█▋        | 823/5000 [01:53<09:34,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000096825.jpg: 512x640 1 person, 78.1ms
Speed: 2.8ms preprocess, 78.1ms inference, 2.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  16%|█▋        | 824/5000 [01:54<09:31,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000096960.jpg: 480x640 1 bear, 73.1ms
Speed: 2.6ms preprocess, 73.1ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  16%|█▋        | 825/5000 [01:54<09:12,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097022.jpg: 448x640 1 chair, 1 microwave, 2 ovens, 1 sink, 62.9ms
Speed: 3.2ms preprocess, 62.9ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 826/5000 [01:54<09:03,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097230.jpg: 480x640 3 elephants, 68.5ms
Speed: 3.8ms preprocess, 68.5ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 827/5000 [01:54<08:51,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097278.jpg: 640x480 1 person, 1 skis, 1 snowboard, 78.9ms
Speed: 2.6ms preprocess, 78.9ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 828/5000 [01:54<08:58,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097337.jpg: 480x640 1 chair, 1 toilet, 1 tv, 1 vase, 64.8ms
Speed: 4.4ms preprocess, 64.8ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 829/5000 [01:54<08:53,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097585.jpg: 640x480 8 vases, 65.4ms
Speed: 3.1ms preprocess, 65.4ms inference, 8.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 830/5000 [01:54<08:57,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097679.jpg: 480x640 2 cars, 68.5ms
Speed: 2.8ms preprocess, 68.5ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 831/5000 [01:54<08:50,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097924.jpg: 448x640 2 persons, 1 bus, 3 trucks, 1 horse, 66.7ms
Speed: 4.8ms preprocess, 66.7ms inference, 7.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 832/5000 [01:55<08:59,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097988.jpg: 640x640 22 persons, 2 bicycles, 1 bench, 84.3ms
Speed: 2.9ms preprocess, 84.3ms inference, 30.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  17%|█▋        | 833/5000 [01:55<11:19,  6.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000097994.jpg: 448x640 1 bottle, 1 cup, 2 tvs, 3 laptops, 2 keyboards, 2 books, 61.5ms
Speed: 3.5ms preprocess, 61.5ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 834/5000 [01:55<10:56,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098018.jpg: 448x640 1 vase, 68.5ms
Speed: 3.4ms preprocess, 68.5ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 835/5000 [01:55<10:04,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098261.jpg: 448x640 3 birds, 62.7ms
Speed: 3.9ms preprocess, 62.7ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 836/5000 [01:55<09:24,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098287.jpg: 640x416 8 persons, 1 skateboard, 111.4ms
Speed: 2.5ms preprocess, 111.4ms inference, 7.1ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  17%|█▋        | 837/5000 [01:55<10:17,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098392.jpg: 640x480 3 persons, 4 cars, 1 bus, 1 clock, 71.2ms
Speed: 3.0ms preprocess, 71.2ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 838/5000 [01:56<10:09,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098497.jpg: 480x640 1 airplane, 1 traffic light, 65.3ms
Speed: 3.2ms preprocess, 65.3ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 839/5000 [01:56<09:22,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098520.jpg: 480x640 1 airplane, 3 trucks, 61.5ms
Speed: 3.2ms preprocess, 61.5ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 840/5000 [01:56<08:56,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098633.jpg: 640x448 1 knife, 1 dining table, 63.4ms
Speed: 3.2ms preprocess, 63.4ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  17%|█▋        | 841/5000 [01:56<08:39,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098716.jpg: 384x640 3 persons, 3 benchs, 1 bird, 55.8ms
Speed: 2.4ms preprocess, 55.8ms inference, 5.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  17%|█▋        | 842/5000 [01:56<08:33,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098839.jpg: 480x640 1 cat, 2 tvs, 77.3ms
Speed: 3.7ms preprocess, 77.3ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 843/5000 [01:56<08:48,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000098853.jpg: 640x448 15 persons, 1 sports ball, 5 chairs, 67.3ms
Speed: 3.2ms preprocess, 67.3ms inference, 17.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  17%|█▋        | 844/5000 [01:56<10:03,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099024.jpg: 512x640 1 person, 1 surfboard, 80.1ms
Speed: 9.0ms preprocess, 80.1ms inference, 3.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  17%|█▋        | 845/5000 [01:56<10:04,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099039.jpg: 480x640 2 persons, 2 wine glasss, 1 knife, 2 bowls, 2 dining tables, 1 cell phone, 71.4ms
Speed: 3.1ms preprocess, 71.4ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 846/5000 [01:57<10:08,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099053.jpg: 576x640 1 spoon, 1 bowl, 1 broccoli, 1 carrot, 1 dining table, 149.7ms
Speed: 2.0ms preprocess, 149.7ms inference, 7.8ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  17%|█▋        | 847/5000 [01:57<11:30,  6.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099054.jpg: 640x448 1 person, 1 airplane, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  17%|█▋        | 848/5000 [01:57<10:14,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099114.jpg: 448x640 13 persons, 2 traffic lights, 57.9ms
Speed: 4.1ms preprocess, 57.9ms inference, 13.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 849/5000 [01:57<10:20,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099182.jpg: 448x640 2 persons, 1 bed, 1 laptop, 1 book, 80.1ms
Speed: 4.4ms preprocess, 80.1ms inference, 9.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 850/5000 [01:57<10:15,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099242.jpg: 448x640 1 person, 1 skis, 69.4ms
Speed: 2.7ms preprocess, 69.4ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 851/5000 [01:57<09:36,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099428.jpg: 480x640 1 suitcase, 1 mouse, 2 cell phones, 76.2ms
Speed: 4.1ms preprocess, 76.2ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 852/5000 [01:58<09:56,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000099810.jpg: 448x640 1 person, 5 donuts, 3 chairs, 2 dining tables, 67.2ms
Speed: 5.1ms preprocess, 67.2ms inference, 9.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 853/5000 [01:58<10:05,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100238.jpg: 480x640 4 persons, 1 handbag, 3 frisbees, 82.7ms
Speed: 3.4ms preprocess, 82.7ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 854/5000 [01:58<10:37,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100274.jpg: 448x640 1 car, 1 train, 1 truck, 71.9ms
Speed: 3.5ms preprocess, 71.9ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 855/5000 [01:58<10:09,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100283.jpg: 480x640 2 cars, 1 stop sign, 82.3ms
Speed: 3.4ms preprocess, 82.3ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 856/5000 [01:58<10:00,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100428.jpg: 512x640 3 persons, 101.3ms
Speed: 3.4ms preprocess, 101.3ms inference, 6.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  17%|█▋        | 857/5000 [01:58<10:26,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100489.jpg: 640x480 1 bird, 75.4ms
Speed: 2.3ms preprocess, 75.4ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 858/5000 [01:58<09:53,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100510.jpg: 448x640 4 persons, 1 horse, 72.3ms
Speed: 5.1ms preprocess, 72.3ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 859/5000 [01:59<09:43,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100582.jpg: 448x640 1 pizza, 67.6ms
Speed: 2.5ms preprocess, 67.6ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 860/5000 [01:59<09:19,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100624.jpg: 448x640 4 persons, 4 cell phones, 70.0ms
Speed: 2.7ms preprocess, 70.0ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 861/5000 [01:59<09:26,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000100723.jpg: 448x640 19 persons, 1 stop sign, 1 sports ball, 68.3ms
Speed: 4.5ms preprocess, 68.3ms inference, 18.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 862/5000 [01:59<10:28,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000101022.jpg: 640x480 (no detections), 69.2ms
Speed: 3.5ms preprocess, 69.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 863/5000 [01:59<09:35,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000101068.jpg: 448x640 2 persons, 2 baseball bats, 69.6ms
Speed: 3.8ms preprocess, 69.6ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 864/5000 [01:59<09:25,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000101420.jpg: 480x640 1 cat, 1 couch, 1 bed, 102.0ms
Speed: 6.4ms preprocess, 102.0ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 865/5000 [01:59<09:54,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000101762.jpg: 480x640 1 bicycle, 1 cat, 74.0ms
Speed: 2.6ms preprocess, 74.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 866/5000 [02:00<09:31,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000101780.jpg: 640x480 4 giraffes, 74.2ms
Speed: 4.1ms preprocess, 74.2ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 867/5000 [02:00<09:27,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000101787.jpg: 640x480 1 airplane, 74.6ms
Speed: 2.9ms preprocess, 74.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 868/5000 [02:00<09:11,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000101884.jpg: 448x640 1 car, 1 clock, 64.0ms
Speed: 3.2ms preprocess, 64.0ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 869/5000 [02:00<08:52,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000102331.jpg: 480x640 1 person, 1 motorcycle, 74.2ms
Speed: 5.1ms preprocess, 74.2ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  17%|█▋        | 870/5000 [02:00<08:52,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000102356.jpg: 640x448 1 person, 1 motorcycle, 67.4ms
Speed: 3.4ms preprocess, 67.4ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  17%|█▋        | 871/5000 [02:00<08:41,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000102411.jpg: 448x640 11 persons, 1 motorcycle, 2 suitcases, 64.5ms
Speed: 3.1ms preprocess, 64.5ms inference, 14.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  17%|█▋        | 872/5000 [02:00<09:16,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000102644.jpg: 640x480 1 toilet, 89.2ms
Speed: 5.4ms preprocess, 89.2ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  17%|█▋        | 873/5000 [02:00<09:27,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000102707.jpg: 640x640 1 cup, 88.0ms
Speed: 4.5ms preprocess, 88.0ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  17%|█▋        | 874/5000 [02:01<09:34,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000102805.jpg: 448x640 1 person, 2 cars, 1 frisbee, 1 chair, 67.3ms
Speed: 2.5ms preprocess, 67.3ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 875/5000 [02:01<09:17,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000102820.jpg: 640x448 1 person, 1 cake, 1 dining table, 67.2ms
Speed: 3.0ms preprocess, 67.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  18%|█▊        | 876/5000 [02:01<09:02,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000103548.jpg: 480x640 1 person, 9 sheeps, 73.7ms
Speed: 3.2ms preprocess, 73.7ms inference, 12.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 877/5000 [02:01<09:33,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000103585.jpg: 640x448 1 toilet, 2 sinks, 1 vase, 68.6ms
Speed: 3.2ms preprocess, 68.6ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  18%|█▊        | 878/5000 [02:01<09:13,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000103723.jpg: 640x480 6 persons, 1 elephant, 68.6ms
Speed: 3.8ms preprocess, 68.6ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  18%|█▊        | 879/5000 [02:01<09:17,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104119.jpg: 480x640 1 bird, 95.8ms
Speed: 2.5ms preprocess, 95.8ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 880/5000 [02:01<09:31,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104198.jpg: 480x640 1 stop sign, 72.6ms
Speed: 3.7ms preprocess, 72.6ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 881/5000 [02:02<09:18,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104424.jpg: 640x448 1 person, 2 tennis rackets, 65.9ms
Speed: 3.4ms preprocess, 65.9ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  18%|█▊        | 882/5000 [02:02<09:00,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104455.jpg: 480x640 1 zebra, 72.2ms
Speed: 4.2ms preprocess, 72.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 883/5000 [02:02<08:50,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104572.jpg: 448x640 2 sinks, 69.5ms
Speed: 3.5ms preprocess, 69.5ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 884/5000 [02:02<08:45,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104603.jpg: 448x640 2 bears, 70.1ms
Speed: 2.8ms preprocess, 70.1ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 885/5000 [02:02<08:42,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104612.jpg: 480x640 6 broccolis, 76.3ms
Speed: 3.3ms preprocess, 76.3ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 886/5000 [02:02<09:03,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104619.jpg: 480x640 1 giraffe, 70.0ms
Speed: 3.3ms preprocess, 70.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 887/5000 [02:02<08:32,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104666.jpg: 448x640 2 chairs, 1 tv, 95.4ms
Speed: 3.5ms preprocess, 95.4ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 888/5000 [02:02<09:16,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104669.jpg: 480x640 3 persons, 2 knifes, 4 broccolis, 3 carrots, 1 dining table, 75.6ms
Speed: 3.4ms preprocess, 75.6ms inference, 13.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 889/5000 [02:03<09:59,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104782.jpg: 480x640 (no detections), 75.8ms
Speed: 5.0ms preprocess, 75.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 890/5000 [02:03<09:18,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000104803.jpg: 640x480 1 toilet, 75.7ms
Speed: 4.6ms preprocess, 75.7ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  18%|█▊        | 891/5000 [02:03<09:07,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000105014.jpg: 640x448 1 fork, 1 spoon, 1 bowl, 1 broccoli, 1 carrot, 1 dining table, 1 toothbrush, 70.4ms
Speed: 2.8ms preprocess, 70.4ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  18%|█▊        | 892/5000 [02:03<09:19,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000105249.jpg: 448x640 2 toilets, 1 sink, 71.5ms
Speed: 4.7ms preprocess, 71.5ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 893/5000 [02:03<09:09,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000105264.jpg: 448x640 7 persons, 1 cow, 1 backpack, 72.2ms
Speed: 3.0ms preprocess, 72.2ms inference, 8.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 894/5000 [02:03<09:24,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000105335.jpg: 640x640 1 person, 1 bowl, 4 carrots, 105.5ms
Speed: 4.0ms preprocess, 105.5ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  18%|█▊        | 895/5000 [02:03<10:26,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000105455.jpg: 640x448 5 clocks, 67.4ms
Speed: 2.9ms preprocess, 67.4ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  18%|█▊        | 896/5000 [02:04<09:54,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000105912.jpg: 480x640 12 persons, 5 cars, 1 fire hydrant, 1 kite, 75.9ms
Speed: 3.8ms preprocess, 75.9ms inference, 17.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 897/5000 [02:04<10:51,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000105923.jpg: 480x640 1 car, 72.2ms
Speed: 3.4ms preprocess, 72.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 898/5000 [02:04<10:09,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106048.jpg: 448x640 1 bus, 67.8ms
Speed: 2.7ms preprocess, 67.8ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 899/5000 [02:04<09:28,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106235.jpg: 480x640 1 cup, 3 couchs, 70.5ms
Speed: 3.2ms preprocess, 70.5ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 900/5000 [02:04<09:22,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106266.jpg: 480x640 2 sandwichs, 74.3ms
Speed: 3.3ms preprocess, 74.3ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 901/5000 [02:04<09:08,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106281.jpg: 448x640 2 trains, 68.4ms
Speed: 4.0ms preprocess, 68.4ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 902/5000 [02:04<08:53,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106330.jpg: 448x640 1 teddy bear, 97.4ms
Speed: 2.6ms preprocess, 97.4ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 903/5000 [02:05<08:59,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106389.jpg: 480x640 1 cat, 2 chairs, 7 books, 72.9ms
Speed: 3.5ms preprocess, 72.9ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 904/5000 [02:05<09:40,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106563.jpg: 640x640 1 clock, 93.2ms
Speed: 3.9ms preprocess, 93.2ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  18%|█▊        | 905/5000 [02:05<09:56,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106757.jpg: 640x448 1 person, 1 elephant, 72.1ms
Speed: 3.5ms preprocess, 72.1ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  18%|█▊        | 906/5000 [02:05<09:33,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106881.jpg: 544x640 2 birds, 165.6ms
Speed: 2.3ms preprocess, 165.6ms inference, 3.7ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  18%|█▊        | 907/5000 [02:05<11:06,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000106912.jpg: 640x640 1 person, 1 sports ball, 2 skateboards, 90.3ms
Speed: 4.0ms preprocess, 90.3ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  18%|█▊        | 908/5000 [02:05<11:01,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000107087.jpg: 448x640 (no detections), 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 909/5000 [02:05<09:51,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000107094.jpg: 448x640 1 person, 2 skiss, 95.7ms
Speed: 3.7ms preprocess, 95.7ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 910/5000 [02:06<10:05,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000107226.jpg: 640x480 7 persons, 1 horse, 2 sheeps, 4 umbrellas, 72.4ms
Speed: 2.9ms preprocess, 72.4ms inference, 13.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  18%|█▊        | 911/5000 [02:06<10:24,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000107339.jpg: 480x640 2 persons, 1 couch, 75.2ms
Speed: 3.8ms preprocess, 75.2ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 912/5000 [02:06<09:49,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000107554.jpg: 480x640 (no detections), 72.4ms
Speed: 2.7ms preprocess, 72.4ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 913/5000 [02:06<09:02,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000107851.jpg: 480x640 4 persons, 2 cows, 3 elephants, 77.2ms
Speed: 3.0ms preprocess, 77.2ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 914/5000 [02:06<09:35,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000108026.jpg: 448x640 2 persons, 1 keyboard, 69.3ms
Speed: 2.8ms preprocess, 69.3ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 915/5000 [02:06<09:15,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000108244.jpg: 480x640 1 cat, 84.4ms
Speed: 2.9ms preprocess, 84.4ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 916/5000 [02:06<09:14,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000108253.jpg: 640x480 1 person, 2 bottles, 1 cup, 1 knife, 1 hot dog, 1 dining table, 73.1ms
Speed: 4.2ms preprocess, 73.1ms inference, 6.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  18%|█▊        | 917/5000 [02:07<09:45,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000108440.jpg: 448x640 1 train, 1 fire hydrant, 85.2ms
Speed: 3.4ms preprocess, 85.2ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 918/5000 [02:07<09:37,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000108495.jpg: 640x448 1 person, 1 skateboard, 74.0ms
Speed: 4.1ms preprocess, 74.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  18%|█▊        | 919/5000 [02:07<09:17,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000108503.jpg: 448x640 2 persons, 4 surfboards, 67.4ms
Speed: 3.0ms preprocess, 67.4ms inference, 8.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  18%|█▊        | 920/5000 [02:07<09:18,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000108864.jpg: 480x640 2 elephants, 74.9ms
Speed: 3.1ms preprocess, 74.9ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 921/5000 [02:07<09:05,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109055.jpg: 480x640 1 bicycle, 1 dog, 71.9ms
Speed: 3.4ms preprocess, 71.9ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 922/5000 [02:07<08:57,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109118.jpg: 640x512 3 persons, 177.9ms
Speed: 3.6ms preprocess, 177.9ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  18%|█▊        | 923/5000 [02:07<11:00,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109313.jpg: 640x480 1 person, 1 cup, 2 remotes, 1 book, 74.0ms
Speed: 2.7ms preprocess, 74.0ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  18%|█▊        | 924/5000 [02:08<10:27,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109441.jpg: 480x640 3 cars, 4 traffic lights, 75.4ms
Speed: 2.6ms preprocess, 75.4ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  18%|█▊        | 925/5000 [02:08<10:09,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109798.jpg: 448x640 3 donuts, 69.6ms
Speed: 4.5ms preprocess, 69.6ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▊        | 926/5000 [02:08<09:40,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109827.jpg: 448x640 1 teddy bear, 70.6ms
Speed: 3.8ms preprocess, 70.6ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▊        | 927/5000 [02:08<09:22,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109900.jpg: 480x640 7 persons, 4 airplanes, 1 truck, 73.9ms
Speed: 2.6ms preprocess, 73.9ms inference, 12.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▊        | 928/5000 [02:08<09:45,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109916.jpg: 480x640 1 cup, 1 oven, 73.9ms
Speed: 3.2ms preprocess, 73.9ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▊        | 929/5000 [02:08<09:30,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109976.jpg: 480x640 2 microwaves, 1 oven, 86.1ms
Speed: 3.0ms preprocess, 86.1ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▊        | 930/5000 [02:08<09:33,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000109992.jpg: 448x640 1 person, 1 car, 1 skis, 1 snowboard, 69.6ms
Speed: 3.5ms preprocess, 69.6ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▊        | 931/5000 [02:09<09:19,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110042.jpg: 640x448 5 persons, 1 chair, 1 potted plant, 1 toilet, 72.7ms
Speed: 3.0ms preprocess, 72.7ms inference, 8.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  19%|█▊        | 932/5000 [02:09<09:32,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110211.jpg: 352x640 1 cow, 5 zebras, 130.1ms
Speed: 3.3ms preprocess, 130.1ms inference, 4.8ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  19%|█▊        | 933/5000 [02:09<10:32,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110282.jpg: 448x640 1 traffic light, 66.0ms
Speed: 3.3ms preprocess, 66.0ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▊        | 934/5000 [02:09<09:41,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110359.jpg: 448x640 1 airplane, 68.4ms
Speed: 3.1ms preprocess, 68.4ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▊        | 935/5000 [02:09<09:11,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110449.jpg: 480x640 2 persons, 1 tie, 73.7ms
Speed: 4.3ms preprocess, 73.7ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▊        | 936/5000 [02:09<09:05,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110638.jpg: 448x640 3 elephants, 86.8ms
Speed: 3.1ms preprocess, 86.8ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▊        | 937/5000 [02:09<09:22,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110721.jpg: 448x640 1 airplane, 2 trucks, 73.1ms
Speed: 3.0ms preprocess, 73.1ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 938/5000 [02:10<09:10,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110784.jpg: 640x448 2 persons, 1 bus, 1 truck, 71.0ms
Speed: 3.0ms preprocess, 71.0ms inference, 7.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  19%|█▉        | 939/5000 [02:10<09:00,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110884.jpg: 640x640 1 bowl, 1 toilet, 2 sinks, 92.6ms
Speed: 2.4ms preprocess, 92.6ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  19%|█▉        | 940/5000 [02:10<09:30,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110972.jpg: 512x640 1 bear, 78.0ms
Speed: 2.8ms preprocess, 78.0ms inference, 2.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  19%|█▉        | 941/5000 [02:10<09:20,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000110999.jpg: 640x384 1 person, 1 cup, 2 bowls, 4 oranges, 139.9ms
Speed: 3.2ms preprocess, 139.9ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  19%|█▉        | 942/5000 [02:10<10:36,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000111036.jpg: 640x448 1 car, 1 oven, 68.3ms
Speed: 3.0ms preprocess, 68.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  19%|█▉        | 943/5000 [02:10<09:53,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000111086.jpg: 448x640 2 cars, 1 airplane, 104.5ms
Speed: 5.3ms preprocess, 104.5ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 944/5000 [02:10<10:17,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000111179.jpg: 640x480 2 clocks, 96.4ms
Speed: 18.6ms preprocess, 96.4ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  19%|█▉        | 945/5000 [02:11<10:32,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000111207.jpg: 640x480 2 persons, 1 bench, 1 umbrella, 2 skateboards, 80.9ms
Speed: 2.9ms preprocess, 80.9ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  19%|█▉        | 946/5000 [02:11<10:25,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000111609.jpg: 448x640 1 person, 2 beds, 1 laptop, 78.0ms
Speed: 3.3ms preprocess, 78.0ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 947/5000 [02:11<10:06,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000111951.jpg: 448x640 1 cup, 1 toilet, 2 sinks, 77.2ms
Speed: 4.3ms preprocess, 77.2ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 948/5000 [02:11<09:50,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000112110.jpg: 480x640 1 person, 84.8ms
Speed: 3.9ms preprocess, 84.8ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 949/5000 [02:11<09:43,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000112298.jpg: 480x640 1 person, 2 bottles, 80.5ms
Speed: 2.7ms preprocess, 80.5ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 950/5000 [02:11<09:49,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000112378.jpg: 480x640 1 bench, 3 bananas, 1 apple, 79.1ms
Speed: 3.4ms preprocess, 79.1ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 951/5000 [02:11<10:10,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000112626.jpg: 640x448 1 suitcase, 1 bottle, 1 couch, 69.3ms
Speed: 3.5ms preprocess, 69.3ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  19%|█▉        | 952/5000 [02:12<09:42,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000112634.jpg: 448x640 2 giraffes, 73.2ms
Speed: 3.2ms preprocess, 73.2ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 953/5000 [02:12<09:23,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000112798.jpg: 384x640 1 cat, 1 chair, 1 tv, 1 laptop, 4 books, 138.4ms
Speed: 2.8ms preprocess, 138.4ms inference, 5.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  19%|█▉        | 954/5000 [02:12<10:41,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000112997.jpg: 640x480 2 persons, 1 cell phone, 68.4ms
Speed: 3.3ms preprocess, 68.4ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  19%|█▉        | 955/5000 [02:12<09:54,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000113051.jpg: 480x640 1 boat, 74.5ms
Speed: 3.0ms preprocess, 74.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 956/5000 [02:12<09:28,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000113235.jpg: 480x640 2 persons, 1 dog, 1 cow, 75.6ms
Speed: 3.0ms preprocess, 75.6ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 957/5000 [02:12<09:20,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000113354.jpg: 480x640 2 zebras, 91.9ms
Speed: 3.8ms preprocess, 91.9ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 958/5000 [02:12<09:45,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000113403.jpg: 480x640 1 couch, 1 bed, 3 teddy bears, 95.1ms
Speed: 3.0ms preprocess, 95.1ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 959/5000 [02:13<10:00,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000113589.jpg: 480x640 1 bowl, 1 banana, 1 apple, 1 sandwich, 1 dining table, 78.1ms
Speed: 3.0ms preprocess, 78.1ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 960/5000 [02:13<09:47,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000113720.jpg: 448x640 5 persons, 1 bottle, 5 cups, 2 forks, 4 pizzas, 1 chair, 2 dining tables, 68.2ms
Speed: 3.3ms preprocess, 68.2ms inference, 17.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 961/5000 [02:13<10:39,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000113867.jpg: 640x640 2 clocks, 90.2ms
Speed: 1.8ms preprocess, 90.2ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  19%|█▉        | 962/5000 [02:13<10:24,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000114049.jpg: 640x448 3 persons, 5 suitcases, 70.9ms
Speed: 2.9ms preprocess, 70.9ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  19%|█▉        | 963/5000 [02:13<10:13,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000114770.jpg: 416x640 1 bicycle, 1 motorcycle, 132.8ms
Speed: 3.2ms preprocess, 132.8ms inference, 12.5ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  19%|█▉        | 964/5000 [02:13<11:20,  5.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000114871.jpg: 448x640 1 bowl, 1 banana, 2 oranges, 1 dining table, 73.1ms
Speed: 4.2ms preprocess, 73.1ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 965/5000 [02:14<10:42,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000114884.jpg: 448x640 14 persons, 1 motorcycle, 4 buss, 1 truck, 1 backpack, 1 handbag, 71.2ms
Speed: 4.3ms preprocess, 71.2ms inference, 19.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 966/5000 [02:14<11:41,  5.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000114907.jpg: 448x640 21 persons, 1 cow, 67.5ms
Speed: 2.7ms preprocess, 67.5ms inference, 19.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 967/5000 [02:14<11:52,  5.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000115118.jpg: 448x640 1 train, 63.7ms
Speed: 2.9ms preprocess, 63.7ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 968/5000 [02:14<10:36,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000115245.jpg: 640x640 4 persons, 1 handbag, 3 suitcases, 2 chairs, 100.4ms
Speed: 4.2ms preprocess, 100.4ms inference, 12.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  19%|█▉        | 969/5000 [02:14<11:23,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000115870.jpg: 448x640 8 persons, 2 couchs, 1 potted plant, 2 remotes, 1 book, 67.1ms
Speed: 3.0ms preprocess, 67.1ms inference, 12.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 970/5000 [02:14<11:22,  5.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000115885.jpg: 480x640 1 person, 1 cat, 1 laptop, 69.4ms
Speed: 4.7ms preprocess, 69.4ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 971/5000 [02:15<10:29,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000115898.jpg: 448x640 1 person, 2 surfboards, 69.1ms
Speed: 2.8ms preprocess, 69.1ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  19%|█▉        | 972/5000 [02:15<10:13,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000115946.jpg: 640x480 6 cars, 4 traffic lights, 104.6ms
Speed: 2.9ms preprocess, 104.6ms inference, 12.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  19%|█▉        | 973/5000 [02:15<11:01,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116068.jpg: 480x640 4 persons, 2 kites, 71.0ms
Speed: 4.3ms preprocess, 71.0ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  19%|█▉        | 974/5000 [02:15<10:33,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116206.jpg: 480x640 1 cup, 1 knife, 2 carrots, 1 dining table, 77.4ms
Speed: 3.2ms preprocess, 77.4ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 975/5000 [02:15<10:08,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116208.jpg: 480x640 1 person, 5 bottles, 3 wine glasss, 3 cups, 3 pizzas, 1 chair, 1 dining table, 69.3ms
Speed: 2.6ms preprocess, 69.3ms inference, 16.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 976/5000 [02:15<10:36,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116362.jpg: 640x640 2 bowls, 1 dining table, 95.4ms
Speed: 7.4ms preprocess, 95.4ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  20%|█▉        | 977/5000 [02:16<10:42,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116439.jpg: 640x448 2 persons, 1 boat, 68.8ms
Speed: 2.8ms preprocess, 68.8ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  20%|█▉        | 978/5000 [02:16<09:53,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116479.jpg: 640x416 1 bed, 189.4ms
Speed: 2.8ms preprocess, 189.4ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  20%|█▉        | 979/5000 [02:16<11:47,  5.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116589.jpg: 480x640 3 zebras, 95.8ms
Speed: 4.4ms preprocess, 95.8ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 980/5000 [02:16<11:29,  5.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000116825.jpg: 384x640 1 cat, 1 bed, 1 dining table, 60.8ms
Speed: 2.7ms preprocess, 60.8ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  20%|█▉        | 981/5000 [02:16<10:18,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117197.jpg: 480x640 1 person, 1 fire hydrant, 2 cell phones, 70.8ms
Speed: 3.5ms preprocess, 70.8ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 982/5000 [02:16<09:51,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117374.jpg: 448x640 3 birds, 1 cat, 72.0ms
Speed: 3.2ms preprocess, 72.0ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|█▉        | 983/5000 [02:16<09:31,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117425.jpg: 448x640 3 persons, 1 cup, 1 cake, 1 dining table, 68.6ms
Speed: 3.0ms preprocess, 68.6ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|█▉        | 984/5000 [02:17<09:14,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117492.jpg: 448x640 23 persons, 118.4ms
Speed: 2.7ms preprocess, 118.4ms inference, 20.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|█▉        | 985/5000 [02:17<11:13,  5.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117525.jpg: 640x640 1 person, 1 dog, 91.6ms
Speed: 4.1ms preprocess, 91.6ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  20%|█▉        | 986/5000 [02:17<11:01,  6.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117645.jpg: 480x640 2 persons, 69.9ms
Speed: 3.5ms preprocess, 69.9ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 987/5000 [02:17<10:13,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117719.jpg: 448x640 2 persons, 17 bottles, 72.6ms
Speed: 2.7ms preprocess, 72.6ms inference, 16.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|█▉        | 988/5000 [02:17<11:28,  5.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117744.jpg: 640x384 1 person, 1 tennis racket, 60.0ms
Speed: 2.4ms preprocess, 60.0ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  20%|█▉        | 989/5000 [02:17<10:10,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117908.jpg: 416x640 1 person, 1 cat, 3 bottles, 60.9ms
Speed: 4.2ms preprocess, 60.9ms inference, 4.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  20%|█▉        | 990/5000 [02:18<09:30,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000117914.jpg: 640x448 1 toilet, 1 sink, 68.5ms
Speed: 4.1ms preprocess, 68.5ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  20%|█▉        | 991/5000 [02:18<09:03,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000118209.jpg: 480x640 2 horses, 99.7ms
Speed: 6.1ms preprocess, 99.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 992/5000 [02:18<09:24,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000118367.jpg: 448x640 1 person, 1 sandwich, 67.7ms
Speed: 3.0ms preprocess, 67.7ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|█▉        | 993/5000 [02:18<08:53,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000118405.jpg: 480x640 1 person, 77.4ms
Speed: 4.4ms preprocess, 77.4ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 994/5000 [02:18<08:57,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000118515.jpg: 320x640 1 bench, 1 cat, 128.6ms
Speed: 2.5ms preprocess, 128.6ms inference, 2.8ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  20%|█▉        | 995/5000 [02:18<09:51,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000118594.jpg: 480x640 3 cows, 71.5ms
Speed: 3.0ms preprocess, 71.5ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 996/5000 [02:18<09:26,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000118921.jpg: 448x640 1 person, 1 skis, 71.3ms
Speed: 3.0ms preprocess, 71.3ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|█▉        | 997/5000 [02:19<09:09,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119038.jpg: 480x640 3 cows, 100.9ms
Speed: 2.9ms preprocess, 100.9ms inference, 17.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 998/5000 [02:19<10:03,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119088.jpg: 480x640 3 persons, 1 frisbee, 74.2ms
Speed: 4.6ms preprocess, 74.2ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|█▉        | 999/5000 [02:19<09:46,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119233.jpg: 448x640 2 cats, 1 laptop, 1 cell phone, 68.3ms
Speed: 4.3ms preprocess, 68.3ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1000/5000 [02:19<09:24,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119365.jpg: 640x480 4 persons, 3 tvs, 1 laptop, 85.6ms
Speed: 2.8ms preprocess, 85.6ms inference, 8.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  20%|██        | 1001/5000 [02:19<09:50,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119445.jpg: 448x640 3 persons, 1 sports ball, 1 baseball glove, 75.8ms
Speed: 2.9ms preprocess, 75.8ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1002/5000 [02:19<09:52,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119452.jpg: 480x640 2 bananas, 1 apple, 2 oranges, 70.8ms
Speed: 2.9ms preprocess, 70.8ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1003/5000 [02:19<09:35,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119516.jpg: 448x640 1 person, 4 bicycles, 88.9ms
Speed: 2.9ms preprocess, 88.9ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1004/5000 [02:20<09:50,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119641.jpg: 480x640 13 persons, 7 elephants, 71.4ms
Speed: 3.2ms preprocess, 71.4ms inference, 18.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1005/5000 [02:20<10:32,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119677.jpg: 480x640 1 fork, 1 knife, 1 cake, 75.8ms
Speed: 3.3ms preprocess, 75.8ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1006/5000 [02:20<09:59,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119828.jpg: 480x640 1 cat, 1 laptop, 1 mouse, 1 keyboard, 71.1ms
Speed: 4.0ms preprocess, 71.1ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1007/5000 [02:20<09:34,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119911.jpg: 448x640 1 person, 3 cars, 112.9ms
Speed: 4.2ms preprocess, 112.9ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1008/5000 [02:20<10:07,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000119995.jpg: 640x608 6 persons, 2 tennis rackets, 176.2ms
Speed: 4.3ms preprocess, 176.2ms inference, 9.8ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  20%|██        | 1009/5000 [02:20<12:17,  5.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000120420.jpg: 480x640 6 persons, 83.5ms
Speed: 3.4ms preprocess, 83.5ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1010/5000 [02:21<11:39,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000120572.jpg: 448x640 1 clock, 67.8ms
Speed: 2.9ms preprocess, 67.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1011/5000 [02:21<10:30,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000120584.jpg: 640x480 1 tv, 1 clock, 69.6ms
Speed: 3.2ms preprocess, 69.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  20%|██        | 1012/5000 [02:21<09:47,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000120777.jpg: 480x640 3 cups, 1 dining table, 72.5ms
Speed: 4.6ms preprocess, 72.5ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1013/5000 [02:21<09:31,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000120853.jpg: 448x640 1 bottle, 1 cup, 1 spoon, 1 bowl, 3 sandwichs, 1 dining table, 70.9ms
Speed: 6.1ms preprocess, 70.9ms inference, 9.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1014/5000 [02:21<09:36,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121031.jpg: 480x640 5 persons, 3 horses, 74.8ms
Speed: 2.4ms preprocess, 74.8ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1015/5000 [02:21<09:44,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121153.jpg: 480x640 (no detections), 114.6ms
Speed: 5.9ms preprocess, 114.6ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1016/5000 [02:21<10:16,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121242.jpg: 448x640 4 persons, 6 cars, 1 horse, 69.5ms
Speed: 3.1ms preprocess, 69.5ms inference, 9.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1017/5000 [02:22<10:13,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121417.jpg: 640x480 8 persons, 1 umbrella, 74.1ms
Speed: 3.3ms preprocess, 74.1ms inference, 9.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  20%|██        | 1018/5000 [02:22<10:11,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121497.jpg: 480x640 6 persons, 1 bottle, 72.4ms
Speed: 3.1ms preprocess, 72.4ms inference, 7.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1019/5000 [02:22<09:55,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121506.jpg: 640x640 3 benchs, 2 umbrellas, 1 cup, 1 chair, 101.7ms
Speed: 4.9ms preprocess, 101.7ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  20%|██        | 1020/5000 [02:22<10:33,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121586.jpg: 480x640 1 tv, 3 books, 70.5ms
Speed: 2.6ms preprocess, 70.5ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1021/5000 [02:22<09:54,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121591.jpg: 512x640 1 person, 169.1ms
Speed: 4.6ms preprocess, 169.1ms inference, 2.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  20%|██        | 1022/5000 [02:22<11:38,  5.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121673.jpg: 480x640 1 person, 1 surfboard, 70.6ms
Speed: 4.7ms preprocess, 70.6ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  20%|██        | 1023/5000 [02:23<11:00,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000121744.jpg: 448x640 8 persons, 2 tennis rackets, 1 chair, 78.3ms
Speed: 5.1ms preprocess, 78.3ms inference, 9.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  20%|██        | 1024/5000 [02:23<10:59,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122046.jpg: 640x448 1 person, 1 umbrella, 69.7ms
Speed: 3.6ms preprocess, 69.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  20%|██        | 1025/5000 [02:23<10:06,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122166.jpg: 480x640 4 persons, 1 bicycle, 6 cars, 1 motorcycle, 1 traffic light, 78.4ms
Speed: 3.0ms preprocess, 78.4ms inference, 12.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1026/5000 [02:23<10:25,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122217.jpg: 640x448 2 persons, 2 skateboards, 70.0ms
Speed: 2.7ms preprocess, 70.0ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  21%|██        | 1027/5000 [02:23<09:44,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122606.jpg: 480x640 1 person, 1 bus, 84.0ms
Speed: 3.2ms preprocess, 84.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1028/5000 [02:23<09:35,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122672.jpg: 480x640 1 person, 3 surfboards, 103.2ms
Speed: 3.0ms preprocess, 103.2ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1029/5000 [02:23<09:56,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122745.jpg: 640x480 1 stop sign, 80.2ms
Speed: 3.0ms preprocess, 80.2ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  21%|██        | 1030/5000 [02:24<09:28,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122927.jpg: 384x640 2 birds, 126.4ms
Speed: 2.7ms preprocess, 126.4ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  21%|██        | 1031/5000 [02:24<10:27,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122962.jpg: 448x640 26 persons, 1 handbag, 2 cups, 1 chair, 1 dining table, 76.9ms
Speed: 3.5ms preprocess, 76.9ms inference, 25.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1032/5000 [02:24<11:48,  5.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000122969.jpg: 448x640 3 zebras, 71.8ms
Speed: 4.2ms preprocess, 71.8ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1033/5000 [02:24<10:48,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000123131.jpg: 448x640 1 car, 1 bus, 2 trucks, 71.0ms
Speed: 3.3ms preprocess, 71.0ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1034/5000 [02:24<10:06,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000123213.jpg: 416x640 9 persons, 2 baseball bats, 1 baseball glove, 2 chairs, 66.4ms
Speed: 2.8ms preprocess, 66.4ms inference, 11.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  21%|██        | 1035/5000 [02:24<10:23,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000123321.jpg: 640x640 1 bowl, 9 broccolis, 85.2ms
Speed: 4.7ms preprocess, 85.2ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  21%|██        | 1036/5000 [02:25<10:55,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000123480.jpg: 480x640 1 person, 1 toothbrush, 70.2ms
Speed: 3.4ms preprocess, 70.2ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1037/5000 [02:25<10:14,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000123585.jpg: 640x480 4 birds, 1 teddy bear, 69.6ms
Speed: 2.9ms preprocess, 69.6ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  21%|██        | 1038/5000 [02:25<09:43,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000123633.jpg: 480x640 5 persons, 1 bowl, 1 chair, 1 dining table, 1 book, 71.7ms
Speed: 3.4ms preprocess, 71.7ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1039/5000 [02:25<09:48,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000124277.jpg: 512x640 1 train, 102.2ms
Speed: 3.3ms preprocess, 102.2ms inference, 4.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  21%|██        | 1040/5000 [02:25<10:05,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000124442.jpg: 640x480 1 person, 1 kite, 78.5ms
Speed: 5.3ms preprocess, 78.5ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  21%|██        | 1041/5000 [02:25<10:01,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000124636.jpg: 480x640 2 cows, 1 giraffe, 74.0ms
Speed: 3.6ms preprocess, 74.0ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1042/5000 [02:25<09:35,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000124659.jpg: 480x640 2 chairs, 1 couch, 3 tvs, 1 laptop, 1 book, 73.6ms
Speed: 2.7ms preprocess, 73.6ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1043/5000 [02:26<09:39,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000124798.jpg: 448x640 9 cars, 1 bus, 71.6ms
Speed: 2.9ms preprocess, 71.6ms inference, 9.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1044/5000 [02:26<09:45,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000124975.jpg: 448x640 1 zebra, 66.8ms
Speed: 4.2ms preprocess, 66.8ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1045/5000 [02:26<09:09,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125062.jpg: 640x448 5 teddy bears, 68.6ms
Speed: 2.8ms preprocess, 68.6ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  21%|██        | 1046/5000 [02:26<09:04,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125072.jpg: 320x640 4 cows, 63.1ms
Speed: 2.8ms preprocess, 63.1ms inference, 3.1ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  21%|██        | 1047/5000 [02:26<08:39,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125129.jpg: 448x640 3 persons, 4 chairs, 1 dining table, 1 book, 91.6ms
Speed: 6.0ms preprocess, 91.6ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1048/5000 [02:26<09:27,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125211.jpg: 480x640 1 person, 1 zebra, 74.3ms
Speed: 3.3ms preprocess, 74.3ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1049/5000 [02:26<09:12,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125245.jpg: 448x640 1 broccoli, 89.6ms
Speed: 7.5ms preprocess, 89.6ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1050/5000 [02:27<09:38,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125257.jpg: 480x640 3 persons, 1 snowboard, 75.0ms
Speed: 2.9ms preprocess, 75.0ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1051/5000 [02:27<09:26,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125405.jpg: 448x640 2 dogs, 1 cow, 1 frisbee, 73.1ms
Speed: 3.1ms preprocess, 73.1ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1052/5000 [02:27<09:14,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125472.jpg: 640x448 1 person, 1 traffic light, 1 skateboard, 74.7ms
Speed: 2.7ms preprocess, 74.7ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  21%|██        | 1053/5000 [02:27<08:59,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125572.jpg: 448x640 12 persons, 1 bicycle, 1 motorcycle, 3 buss, 2 trucks, 1 clock, 72.1ms
Speed: 3.1ms preprocess, 72.1ms inference, 16.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1054/5000 [02:27<10:13,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125778.jpg: 480x640 1 chair, 2 couchs, 1 potted plant, 1 tv, 2 vases, 72.3ms
Speed: 3.9ms preprocess, 72.3ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1055/5000 [02:27<09:57,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125806.jpg: 448x640 2 cows, 1 bear, 70.8ms
Speed: 3.4ms preprocess, 70.8ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1056/5000 [02:27<09:36,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125850.jpg: 512x640 1 cat, 1 bowl, 82.2ms
Speed: 2.8ms preprocess, 82.2ms inference, 3.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  21%|██        | 1057/5000 [02:28<09:24,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125936.jpg: 448x640 5 persons, 2 bananas, 71.1ms
Speed: 4.6ms preprocess, 71.1ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1058/5000 [02:28<09:50,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000125952.jpg: 640x384 2 backpacks, 2 suitcases, 133.1ms
Speed: 3.5ms preprocess, 133.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  21%|██        | 1059/5000 [02:28<10:44,  6.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000126107.jpg: 480x640 6 persons, 1 airplane, 2 boats, 1 bird, 71.7ms
Speed: 3.2ms preprocess, 71.7ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1060/5000 [02:28<10:32,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000126110.jpg: 480x640 1 bus, 84.5ms
Speed: 5.4ms preprocess, 84.5ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██        | 1061/5000 [02:28<10:01,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000126137.jpg: 448x640 11 persons, 1 tennis racket, 1 chair, 69.2ms
Speed: 2.9ms preprocess, 69.2ms inference, 11.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██        | 1062/5000 [02:28<10:09,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000126216.jpg: 448x640 5 elephants, 73.1ms
Speed: 3.4ms preprocess, 73.1ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██▏       | 1063/5000 [02:29<09:53,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000126226.jpg: 480x640 4 persons, 1 motorcycle, 76.9ms
Speed: 3.2ms preprocess, 76.9ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██▏       | 1064/5000 [02:29<09:39,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000126592.jpg: 448x640 9 cars, 1 stop sign, 78.4ms
Speed: 2.8ms preprocess, 78.4ms inference, 9.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██▏       | 1065/5000 [02:29<09:47,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127092.jpg: 480x640 1 person, 3 cars, 1 potted plant, 71.8ms
Speed: 3.2ms preprocess, 71.8ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██▏       | 1066/5000 [02:29<09:30,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127135.jpg: 640x448 1 dog, 70.7ms
Speed: 3.1ms preprocess, 70.7ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  21%|██▏       | 1067/5000 [02:29<09:04,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127182.jpg: 640x448 1 knife, 1 potted plant, 1 microwave, 2 ovens, 1 refrigerator, 1 vase, 114.9ms
Speed: 24.3ms preprocess, 114.9ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  21%|██▏       | 1068/5000 [02:29<10:20,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127263.jpg: 448x640 2 persons, 1 car, 1 frisbee, 68.7ms
Speed: 3.4ms preprocess, 68.7ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██▏       | 1069/5000 [02:29<09:44,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127270.jpg: 640x448 5 persons, 70.3ms
Speed: 4.7ms preprocess, 70.3ms inference, 9.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  21%|██▏       | 1070/5000 [02:30<09:35,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127394.jpg: 640x416 6 persons, 4 bottles, 1 wine glass, 6 cups, 1 knife, 1 spoon, 1 bowl, 1 chair, 1 dining table, 148.6ms
Speed: 2.6ms preprocess, 148.6ms inference, 17.4ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  21%|██▏       | 1071/5000 [02:30<11:55,  5.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127476.jpg: 640x480 1 person, 2 pizzas, 1 dining table, 1 cell phone, 70.7ms
Speed: 3.4ms preprocess, 70.7ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  21%|██▏       | 1072/5000 [02:30<11:09,  5.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127494.jpg: 448x640 1 dining table, 72.9ms
Speed: 5.9ms preprocess, 72.9ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  21%|██▏       | 1073/5000 [02:30<10:17,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127517.jpg: 480x640 11 surfboards, 88.9ms
Speed: 2.8ms preprocess, 88.9ms inference, 14.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  21%|██▏       | 1074/5000 [02:30<10:44,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127530.jpg: 448x640 1 person, 2 sports balls, 1 tennis racket, 102.0ms
Speed: 4.8ms preprocess, 102.0ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1075/5000 [02:30<10:38,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127624.jpg: 640x640 (no detections), 103.8ms
Speed: 8.0ms preprocess, 103.8ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  22%|██▏       | 1076/5000 [02:31<10:25,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127660.jpg: 544x640 2 persons, 1 chair, 178.5ms
Speed: 3.1ms preprocess, 178.5ms inference, 4.3ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  22%|██▏       | 1077/5000 [02:31<12:00,  5.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127955.jpg: 448x640 2 bears, 68.8ms
Speed: 2.9ms preprocess, 68.8ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1078/5000 [02:31<11:02,  5.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000127987.jpg: 448x640 1 person, 2 tvs, 84.2ms
Speed: 2.9ms preprocess, 84.2ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1079/5000 [02:31<10:29,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128051.jpg: 384x640 1 car, 2 buss, 2 trucks, 166.6ms
Speed: 2.6ms preprocess, 166.6ms inference, 10.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  22%|██▏       | 1080/5000 [02:31<11:53,  5.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128112.jpg: 640x448 1 person, 67.1ms
Speed: 2.6ms preprocess, 67.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  22%|██▏       | 1081/5000 [02:31<10:30,  6.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128148.jpg: 448x640 2 couchs, 1 tv, 71.0ms
Speed: 3.1ms preprocess, 71.0ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1082/5000 [02:32<09:46,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128372.jpg: 448x640 9 persons, 1 bus, 65.8ms
Speed: 2.7ms preprocess, 65.8ms inference, 10.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1083/5000 [02:32<09:36,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128476.jpg: 448x640 2 persons, 1 cup, 5 cakes, 69.4ms
Speed: 2.7ms preprocess, 69.4ms inference, 8.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1084/5000 [02:32<09:31,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128598.jpg: 640x480 3 apples, 8 oranges, 69.5ms
Speed: 3.2ms preprocess, 69.5ms inference, 10.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  22%|██▏       | 1085/5000 [02:32<09:39,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128654.jpg: 512x640 1 person, 1 frisbee, 103.7ms
Speed: 4.6ms preprocess, 103.7ms inference, 3.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  22%|██▏       | 1086/5000 [02:32<09:55,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128658.jpg: 448x640 2 carrots, 1 dining table, 105.2ms
Speed: 3.7ms preprocess, 105.2ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1087/5000 [02:32<10:10,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128675.jpg: 480x640 4 persons, 1 kite, 2 surfboards, 76.2ms
Speed: 3.3ms preprocess, 76.2ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1088/5000 [02:32<10:03,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128699.jpg: 640x480 12 persons, 1 skateboard, 75.4ms
Speed: 4.5ms preprocess, 75.4ms inference, 13.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  22%|██▏       | 1089/5000 [02:33<10:17,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000128748.jpg: 640x512 3 persons, 1 baseball bat, 176.9ms
Speed: 3.1ms preprocess, 176.9ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  22%|██▏       | 1090/5000 [02:33<11:55,  5.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129054.jpg: 480x640 16 zebras, 72.7ms
Speed: 4.3ms preprocess, 72.7ms inference, 15.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1091/5000 [02:33<11:47,  5.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129062.jpg: 480x640 1 parking meter, 2 umbrellas, 65.6ms
Speed: 2.7ms preprocess, 65.6ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1092/5000 [02:33<10:47,  6.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129113.jpg: 480x640 2 benchs, 96.2ms
Speed: 3.3ms preprocess, 96.2ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1093/5000 [02:33<10:27,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129135.jpg: 480x640 1 train, 1 bench, 71.3ms
Speed: 2.8ms preprocess, 71.3ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1094/5000 [02:33<09:41,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129322.jpg: 640x480 1 toilet, 71.5ms
Speed: 2.9ms preprocess, 71.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  22%|██▏       | 1095/5000 [02:34<09:04,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129416.jpg: 480x640 5 horses, 4 cows, 70.5ms
Speed: 4.5ms preprocess, 70.5ms inference, 8.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1096/5000 [02:34<09:13,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129492.jpg: 480x640 3 persons, 1 handbag, 1 chair, 1 dining table, 1 remote, 1 cell phone, 87.3ms
Speed: 2.9ms preprocess, 87.3ms inference, 9.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1097/5000 [02:34<09:38,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129756.jpg: 480x640 1 person, 8 sheeps, 1 cow, 73.7ms
Speed: 3.5ms preprocess, 73.7ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1098/5000 [02:34<09:56,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129812.jpg: 448x640 1 person, 1 skis, 66.1ms
Speed: 2.8ms preprocess, 66.1ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1099/5000 [02:34<09:07,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000129945.jpg: 320x640 7 persons, 1 baseball bat, 1 baseball glove, 173.6ms
Speed: 3.5ms preprocess, 173.6ms inference, 6.3ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  22%|██▏       | 1100/5000 [02:34<11:03,  5.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130386.jpg: 640x480 4 persons, 3 bicycles, 1 car, 1 suitcase, 74.7ms
Speed: 4.3ms preprocess, 74.7ms inference, 8.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  22%|██▏       | 1101/5000 [02:35<10:45,  6.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130465.jpg: 448x640 2 clocks, 94.3ms
Speed: 2.9ms preprocess, 94.3ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1102/5000 [02:35<10:37,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130566.jpg: 448x640 1 train, 74.0ms
Speed: 2.9ms preprocess, 74.0ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1103/5000 [02:35<09:56,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130579.jpg: 640x416 1 person, 1 baseball glove, 154.2ms
Speed: 2.4ms preprocess, 154.2ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  22%|██▏       | 1104/5000 [02:35<11:14,  5.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130586.jpg: 640x448 1 person, 1 kite, 69.3ms
Speed: 3.0ms preprocess, 69.3ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  22%|██▏       | 1105/5000 [02:35<10:18,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130599.jpg: 480x640 1 person, 1 cow, 1 giraffe, 69.9ms
Speed: 3.3ms preprocess, 69.9ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1106/5000 [02:35<09:39,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130613.jpg: 448x640 3 carrots, 99.7ms
Speed: 2.8ms preprocess, 99.7ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1107/5000 [02:35<09:43,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130699.jpg: 448x640 6 persons, 3 cars, 65.2ms
Speed: 2.7ms preprocess, 65.2ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1108/5000 [02:36<09:26,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000130826.jpg: 480x640 6 persons, 3 benchs, 1 handbag, 1 chair, 87.0ms
Speed: 3.4ms preprocess, 87.0ms inference, 13.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1109/5000 [02:36<10:07,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131131.jpg: 480x640 2 cats, 2 tvs, 2 keyboards, 73.6ms
Speed: 2.7ms preprocess, 73.6ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1110/5000 [02:36<10:04,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131138.jpg: 480x640 3 cups, 1 spoon, 1 chair, 1 tv, 1 laptop, 1 mouse, 2 keyboards, 74.8ms
Speed: 3.0ms preprocess, 74.8ms inference, 9.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1111/5000 [02:36<10:21,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131273.jpg: 640x480 1 cat, 1 dog, 88.2ms
Speed: 3.5ms preprocess, 88.2ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  22%|██▏       | 1112/5000 [02:36<10:05,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131379.jpg: 480x640 1 pizza, 69.0ms
Speed: 2.9ms preprocess, 69.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1113/5000 [02:36<09:23,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131386.jpg: 448x640 2 airplanes, 82.8ms
Speed: 3.0ms preprocess, 82.8ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1114/5000 [02:37<09:18,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131431.jpg: 640x448 1 clock, 71.5ms
Speed: 3.0ms preprocess, 71.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  22%|██▏       | 1115/5000 [02:37<08:50,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131444.jpg: 640x480 1 person, 1 tie, 73.8ms
Speed: 2.7ms preprocess, 73.8ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  22%|██▏       | 1116/5000 [02:37<08:39,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131556.jpg: 448x640 2 persons, 2 skiss, 115.9ms
Speed: 3.7ms preprocess, 115.9ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1117/5000 [02:37<09:31,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000131938.jpg: 640x480 1 cat, 1 tie, 72.1ms
Speed: 3.0ms preprocess, 72.1ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  22%|██▏       | 1118/5000 [02:37<09:04,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132116.jpg: 640x640 1 knife, 2 bowls, 4 broccolis, 1 dining table, 184.1ms
Speed: 4.0ms preprocess, 184.1ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  22%|██▏       | 1119/5000 [02:37<11:27,  5.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132329.jpg: 640x448 4 bottles, 1 sink, 2 refrigerators, 62.8ms
Speed: 2.9ms preprocess, 62.8ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  22%|██▏       | 1120/5000 [02:37<10:36,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132375.jpg: 480x640 1 potted plant, 1 dining table, 2 vases, 84.4ms
Speed: 17.8ms preprocess, 84.4ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  22%|██▏       | 1121/5000 [02:38<10:22,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132408.jpg: 640x448 1 person, 3 horses, 65.2ms
Speed: 2.7ms preprocess, 65.2ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  22%|██▏       | 1122/5000 [02:38<09:37,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132544.jpg: 448x640 6 persons, 1 sports ball, 1 baseball glove, 81.3ms
Speed: 2.7ms preprocess, 81.3ms inference, 12.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1123/5000 [02:38<10:00,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132587.jpg: 448x640 4 persons, 2 skateboards, 1 bottle, 72.1ms
Speed: 3.1ms preprocess, 72.1ms inference, 6.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  22%|██▏       | 1124/5000 [02:38<09:45,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132622.jpg: 512x640 1 bear, 73.2ms
Speed: 3.1ms preprocess, 73.2ms inference, 2.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  22%|██▎       | 1125/5000 [02:38<09:13,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132703.jpg: 640x448 1 person, 1 skateboard, 92.1ms
Speed: 3.1ms preprocess, 92.1ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  23%|██▎       | 1126/5000 [02:38<09:14,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132796.jpg: 448x640 3 persons, 1 elephant, 66.4ms
Speed: 3.0ms preprocess, 66.4ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1127/5000 [02:38<08:51,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000132931.jpg: 448x640 2 persons, 1 baseball glove, 66.3ms
Speed: 2.7ms preprocess, 66.3ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1128/5000 [02:39<08:33,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133000.jpg: 448x640 1 train, 81.2ms
Speed: 2.9ms preprocess, 81.2ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1129/5000 [02:39<08:34,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133087.jpg: 448x640 1 person, 2 traffic lights, 93.2ms
Speed: 2.8ms preprocess, 93.2ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1130/5000 [02:39<08:47,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133233.jpg: 384x640 2 boats, 57.4ms
Speed: 2.7ms preprocess, 57.4ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  23%|██▎       | 1131/5000 [02:39<08:23,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133244.jpg: 384x640 18 persons, 1 backpack, 2 tennis rackets, 69.2ms
Speed: 3.2ms preprocess, 69.2ms inference, 16.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  23%|██▎       | 1132/5000 [02:39<09:15,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133343.jpg: 640x416 1 person, 2 ties, 66.6ms
Speed: 4.1ms preprocess, 66.6ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  23%|██▎       | 1133/5000 [02:39<08:54,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133418.jpg: 448x640 1 person, 2 tennis rackets, 95.5ms
Speed: 4.4ms preprocess, 95.5ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1134/5000 [02:39<09:13,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133567.jpg: 448x640 1 person, 1 train, 72.2ms
Speed: 3.8ms preprocess, 72.2ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1135/5000 [02:40<09:07,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133631.jpg: 640x448 1 elephant, 105.5ms
Speed: 4.0ms preprocess, 105.5ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  23%|██▎       | 1136/5000 [02:40<09:29,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133645.jpg: 448x640 2 boats, 1 bench, 69.6ms
Speed: 2.7ms preprocess, 69.6ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1137/5000 [02:40<09:00,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133778.jpg: 512x640 2 cows, 87.4ms
Speed: 3.0ms preprocess, 87.4ms inference, 3.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  23%|██▎       | 1138/5000 [02:40<09:03,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133819.jpg: 448x640 8 persons, 2 buss, 3 traffic lights, 73.2ms
Speed: 2.7ms preprocess, 73.2ms inference, 11.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1139/5000 [02:40<09:20,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000133969.jpg: 448x640 14 persons, 2 baseball gloves, 86.9ms
Speed: 3.3ms preprocess, 86.9ms inference, 16.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1140/5000 [02:40<10:23,  6.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134034.jpg: 640x608 1 bench, 2 cows, 164.6ms
Speed: 3.9ms preprocess, 164.6ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  23%|██▎       | 1141/5000 [02:41<11:45,  5.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134096.jpg: 480x640 1 cat, 1 sink, 70.5ms
Speed: 3.1ms preprocess, 70.5ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1142/5000 [02:41<10:34,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134112.jpg: 480x640 2 dogs, 1 bed, 1 laptop, 89.3ms
Speed: 16.5ms preprocess, 89.3ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1143/5000 [02:41<10:35,  6.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134322.jpg: 480x640 9 persons, 1 kite, 72.0ms
Speed: 2.9ms preprocess, 72.0ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1144/5000 [02:41<10:16,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134689.jpg: 640x448 1 giraffe, 66.3ms
Speed: 3.2ms preprocess, 66.3ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  23%|██▎       | 1145/5000 [02:41<09:26,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134722.jpg: 480x640 1 airplane, 1 train, 100.9ms
Speed: 3.5ms preprocess, 100.9ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1146/5000 [02:41<09:38,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134856.jpg: 480x640 5 persons, 73.9ms
Speed: 3.0ms preprocess, 73.9ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1147/5000 [02:41<09:21,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134882.jpg: 640x640 1 cat, 1 dog, 1 bed, 2 books, 92.8ms
Speed: 3.0ms preprocess, 92.8ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  23%|██▎       | 1148/5000 [02:42<09:35,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000134886.jpg: 480x640 2 persons, 1 car, 1 airplane, 1 truck, 120.6ms
Speed: 2.5ms preprocess, 120.6ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1149/5000 [02:42<10:11,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135410.jpg: 640x480 1 person, 7 cars, 1 motorcycle, 1 parking meter, 69.8ms
Speed: 2.7ms preprocess, 69.8ms inference, 9.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  23%|██▎       | 1150/5000 [02:42<10:05,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135561.jpg: 480x640 1 bowl, 1 toaster, 1 sink, 1 refrigerator, 73.2ms
Speed: 3.4ms preprocess, 73.2ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1151/5000 [02:42<09:35,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135604.jpg: 448x640 3 persons, 1 sports ball, 73.7ms
Speed: 2.7ms preprocess, 73.7ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1152/5000 [02:42<09:14,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135670.jpg: 640x480 2 bicycles, 75.7ms
Speed: 3.6ms preprocess, 75.7ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  23%|██▎       | 1153/5000 [02:42<09:14,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135673.jpg: 448x640 2 airplanes, 79.9ms
Speed: 3.4ms preprocess, 79.9ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1154/5000 [02:42<09:01,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135872.jpg: 448x640 1 tie, 1 chair, 1 tv, 1 laptop, 2 keyboards, 1 book, 97.5ms
Speed: 2.7ms preprocess, 97.5ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1155/5000 [02:43<09:26,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135890.jpg: 640x448 2 persons, 2 clocks, 76.2ms
Speed: 3.0ms preprocess, 76.2ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  23%|██▎       | 1156/5000 [02:43<09:11,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000135902.jpg: 448x640 1 person, 2 trains, 74.1ms
Speed: 3.6ms preprocess, 74.1ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1157/5000 [02:43<08:56,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136033.jpg: 448x640 1 sink, 70.6ms
Speed: 2.8ms preprocess, 70.6ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1158/5000 [02:43<08:32,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136334.jpg: 480x640 2 persons, 82.7ms
Speed: 3.0ms preprocess, 82.7ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1159/5000 [02:43<08:32,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136355.jpg: 448x640 1 bowl, 3 chairs, 3 potted plants, 1 dining table, 1 refrigerator, 1 vase, 69.0ms
Speed: 2.7ms preprocess, 69.0ms inference, 8.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1160/5000 [02:43<09:12,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136466.jpg: 640x448 1 oven, 92.4ms
Speed: 5.2ms preprocess, 92.4ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  23%|██▎       | 1161/5000 [02:43<09:24,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136600.jpg: 480x640 1 bottle, 2 cups, 1 laptop, 2 vases, 75.7ms
Speed: 3.6ms preprocess, 75.7ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  23%|██▎       | 1162/5000 [02:44<09:16,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136633.jpg: 640x448 6 persons, 2 umbrellas, 1 bowl, 74.5ms
Speed: 3.1ms preprocess, 74.5ms inference, 8.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  23%|██▎       | 1163/5000 [02:44<09:23,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136715.jpg: 448x640 14 persons, 3 motorcycles, 1 truck, 69.7ms
Speed: 2.9ms preprocess, 69.7ms inference, 15.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1164/5000 [02:44<10:06,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136772.jpg: 448x640 1 bowl, 1 banana, 1 apple, 1 orange, 68.7ms
Speed: 4.6ms preprocess, 68.7ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1165/5000 [02:44<09:30,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000136915.jpg: 448x640 3 persons, 1 tie, 1 chair, 75.4ms
Speed: 2.9ms preprocess, 75.4ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1166/5000 [02:44<09:16,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000137106.jpg: 448x640 1 person, 102.5ms
Speed: 2.7ms preprocess, 102.5ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1167/5000 [02:44<09:44,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000137246.jpg: 448x640 2 persons, 2 umbrellas, 1 sandwich, 1 dining table, 75.0ms
Speed: 4.5ms preprocess, 75.0ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1168/5000 [02:45<09:32,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000137294.jpg: 448x640 2 persons, 67.7ms
Speed: 2.6ms preprocess, 67.7ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  23%|██▎       | 1169/5000 [02:45<08:58,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000137576.jpg: 576x640 1 truck, 5 cows, 184.5ms
Speed: 2.4ms preprocess, 184.5ms inference, 10.4ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  23%|██▎       | 1170/5000 [02:45<11:13,  5.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000137727.jpg: 384x640 1 car, 1 train, 1 truck, 1 backpack, 4 suitcases, 62.2ms
Speed: 3.2ms preprocess, 62.2ms inference, 7.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  23%|██▎       | 1171/5000 [02:45<10:18,  6.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000137950.jpg: 416x640 2 airplanes, 135.6ms
Speed: 2.8ms preprocess, 135.6ms inference, 2.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  23%|██▎       | 1172/5000 [02:45<10:43,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138115.jpg: 640x480 1 person, 1 orange, 1 donut, 91.3ms
Speed: 4.1ms preprocess, 91.3ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  23%|██▎       | 1173/5000 [02:45<10:47,  5.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138241.jpg: 640x480 1 bowl, 1 pizza, 1 dining table, 82.2ms
Speed: 3.8ms preprocess, 82.2ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  23%|██▎       | 1174/5000 [02:46<10:10,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138492.jpg: 640x448 1 dog, 1 frisbee, 67.9ms
Speed: 2.8ms preprocess, 67.9ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  24%|██▎       | 1175/5000 [02:46<09:26,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138550.jpg: 448x640 1 fire hydrant, 73.2ms
Speed: 3.2ms preprocess, 73.2ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▎       | 1176/5000 [02:46<08:57,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138639.jpg: 480x640 8 persons, 1 bicycle, 3 cars, 2 backpacks, 2 potted plants, 71.5ms
Speed: 3.1ms preprocess, 71.5ms inference, 15.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▎       | 1177/5000 [02:46<09:41,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138819.jpg: 480x640 1 toilet, 1 sink, 71.8ms
Speed: 3.1ms preprocess, 71.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▎       | 1178/5000 [02:46<09:07,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138856.jpg: 480x640 1 cup, 1 bowl, 1 cake, 1 dining table, 1 oven, 107.4ms
Speed: 3.1ms preprocess, 107.4ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▎       | 1179/5000 [02:46<09:41,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138954.jpg: 640x640 1 banana, 3 oranges, 2 carrots, 1 dining table, 91.0ms
Speed: 3.4ms preprocess, 91.0ms inference, 9.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  24%|██▎       | 1180/5000 [02:46<10:17,  6.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000138979.jpg: 448x640 5 persons, 1 car, 1 bus, 78.4ms
Speed: 3.4ms preprocess, 78.4ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▎       | 1181/5000 [02:47<10:05,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000139077.jpg: 480x640 1 person, 1 tv, 1 remote, 73.7ms
Speed: 4.9ms preprocess, 73.7ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▎       | 1182/5000 [02:47<09:34,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000139099.jpg: 480x640 13 persons, 2 handbags, 1 banana, 77.4ms
Speed: 3.3ms preprocess, 77.4ms inference, 15.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▎       | 1183/5000 [02:47<10:08,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000139260.jpg: 416x640 1 banana, 1 cake, 74.0ms
Speed: 2.2ms preprocess, 74.0ms inference, 2.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  24%|██▎       | 1184/5000 [02:47<09:27,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000139684.jpg: 448x640 2 chairs, 2 couchs, 1 tv, 3 books, 1 clock, 94.0ms
Speed: 4.3ms preprocess, 94.0ms inference, 7.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▎       | 1185/5000 [02:47<09:59,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000139871.jpg: 448x640 1 airplane, 68.7ms
Speed: 3.0ms preprocess, 68.7ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▎       | 1186/5000 [02:47<09:16,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000139872.jpg: 480x640 1 person, 1 dog, 1 frisbee, 77.0ms
Speed: 2.8ms preprocess, 77.0ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▎       | 1187/5000 [02:47<09:01,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000139883.jpg: 448x640 (no detections), 86.8ms
Speed: 21.9ms preprocess, 86.8ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1188/5000 [02:48<09:10,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140076.jpg: 480x640 5 bottles, 5 chairs, 1 potted plant, 1 dining table, 1 vase, 75.0ms
Speed: 4.4ms preprocess, 75.0ms inference, 12.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1189/5000 [02:48<09:33,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140203.jpg: 480x640 4 persons, 1 truck, 78.5ms
Speed: 3.1ms preprocess, 78.5ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1190/5000 [02:48<09:20,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140270.jpg: 448x640 1 person, 1 horse, 90.2ms
Speed: 4.5ms preprocess, 90.2ms inference, 8.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1191/5000 [02:48<09:37,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140286.jpg: 544x640 1 person, 1 horse, 164.9ms
Speed: 2.6ms preprocess, 164.9ms inference, 3.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  24%|██▍       | 1192/5000 [02:48<10:52,  5.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140420.jpg: 448x640 1 motorcycle, 70.2ms
Speed: 2.8ms preprocess, 70.2ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1193/5000 [02:48<09:54,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140439.jpg: 640x448 2 potted plants, 3 vases, 72.2ms
Speed: 3.0ms preprocess, 72.2ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  24%|██▍       | 1194/5000 [02:49<09:43,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140556.jpg: 480x640 4 persons, 1 fire hydrant, 77.2ms
Speed: 2.8ms preprocess, 77.2ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1195/5000 [02:49<09:32,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140583.jpg: 480x640 1 person, 1 car, 8 sheeps, 70.5ms
Speed: 3.2ms preprocess, 70.5ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1196/5000 [02:49<09:39,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140640.jpg: 448x640 3 persons, 2 cakes, 2 potted plants, 95.7ms
Speed: 3.3ms preprocess, 95.7ms inference, 8.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1197/5000 [02:49<09:52,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140658.jpg: 640x480 1 clock, 82.6ms
Speed: 3.2ms preprocess, 82.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  24%|██▍       | 1198/5000 [02:49<09:26,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140840.jpg: 384x640 2 persons, 1 truck, 6 kites, 59.4ms
Speed: 3.7ms preprocess, 59.4ms inference, 10.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  24%|██▍       | 1199/5000 [02:49<09:05,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140929.jpg: 416x640 1 clock, 65.1ms
Speed: 2.8ms preprocess, 65.1ms inference, 2.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  24%|██▍       | 1200/5000 [02:49<08:34,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000140987.jpg: 640x448 6 persons, 3 cups, 1 pizza, 1 potted plant, 2 dining tables, 71.4ms
Speed: 2.8ms preprocess, 71.4ms inference, 15.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  24%|██▍       | 1201/5000 [02:50<09:02,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000141328.jpg: 480x640 10 broccolis, 1 carrot, 106.4ms
Speed: 3.4ms preprocess, 106.4ms inference, 10.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1202/5000 [02:50<09:53,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000141597.jpg: 448x640 2 cars, 1 truck, 1 sports ball, 99.5ms
Speed: 2.9ms preprocess, 99.5ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1203/5000 [02:50<09:50,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000141671.jpg: 352x640 7 persons, 1 bicycle, 5 motorcycles, 1 bench, 1 umbrella, 130.2ms
Speed: 2.2ms preprocess, 130.2ms inference, 11.8ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  24%|██▍       | 1204/5000 [02:50<10:53,  5.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000141821.jpg: 480x640 3 persons, 2 bottles, 1 cup, 1 bowl, 1 pizza, 1 dining table, 69.2ms
Speed: 3.2ms preprocess, 69.2ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1205/5000 [02:50<10:26,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142092.jpg: 480x640 1 pizza, 79.4ms
Speed: 3.6ms preprocess, 79.4ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1206/5000 [02:50<09:46,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142238.jpg: 448x640 20 persons, 68.8ms
Speed: 3.0ms preprocess, 68.8ms inference, 17.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1207/5000 [02:51<10:18,  6.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142324.jpg: 480x640 13 persons, 3 bicycles, 1 motorcycle, 1 suitcase, 69.0ms
Speed: 3.8ms preprocess, 69.0ms inference, 17.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1208/5000 [02:51<10:32,  5.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142472.jpg: 480x640 2 persons, 6 cars, 1 bus, 108.6ms
Speed: 42.2ms preprocess, 108.6ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1209/5000 [02:51<11:43,  5.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142585.jpg: 640x480 2 persons, 6 cars, 2 motorcycles, 1 bus, 1 traffic light, 70.8ms
Speed: 4.5ms preprocess, 70.8ms inference, 11.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  24%|██▍       | 1210/5000 [02:51<11:13,  5.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142620.jpg: 448x640 1 person, 1 bench, 2 bottles, 3 cups, 2 bowls, 1 dining table, 68.9ms
Speed: 3.7ms preprocess, 68.9ms inference, 9.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1211/5000 [02:51<10:46,  5.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142790.jpg: 480x640 2 persons, 2 skiss, 2 snowboards, 73.0ms
Speed: 4.9ms preprocess, 73.0ms inference, 7.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1212/5000 [02:51<10:11,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000142971.jpg: 448x640 4 persons, 3 surfboards, 73.2ms
Speed: 3.2ms preprocess, 73.2ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1213/5000 [02:52<09:47,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000143068.jpg: 224x640 4 persons, 5 kites, 107.6ms
Speed: 2.3ms preprocess, 107.6ms inference, 5.9ms postprocess per image at shape (1, 3, 224, 640)


Segmenting Images:  24%|██▍       | 1214/5000 [02:52<09:58,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000143556.jpg: 640x480 3 persons, 2 motorcycles, 76.5ms
Speed: 3.0ms preprocess, 76.5ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  24%|██▍       | 1215/5000 [02:52<09:46,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000143572.jpg: 448x640 14 persons, 3 tennis rackets, 3 chairs, 79.2ms
Speed: 4.4ms preprocess, 79.2ms inference, 17.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1216/5000 [02:52<10:42,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000143931.jpg: 480x640 1 person, 1 bus, 101.4ms
Speed: 18.2ms preprocess, 101.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1217/5000 [02:52<10:41,  5.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000143961.jpg: 384x640 17 persons, 2 umbrellas, 63.6ms
Speed: 2.6ms preprocess, 63.6ms inference, 14.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  24%|██▍       | 1218/5000 [02:52<10:30,  6.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000143998.jpg: 640x640 1 person, 1 carrot, 100.3ms
Speed: 4.5ms preprocess, 100.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  24%|██▍       | 1219/5000 [02:53<10:23,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144003.jpg: 480x640 4 persons, 1 cup, 1 cake, 1 dining table, 73.3ms
Speed: 5.0ms preprocess, 73.3ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1220/5000 [02:53<09:59,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144114.jpg: 448x640 1 person, 2 airplanes, 74.8ms
Speed: 4.9ms preprocess, 74.8ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1221/5000 [02:53<09:30,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144300.jpg: 448x640 3 persons, 1 car, 1 motorcycle, 1 bus, 1 truck, 98.2ms
Speed: 3.3ms preprocess, 98.2ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  24%|██▍       | 1222/5000 [02:53<09:58,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144333.jpg: 640x448 1 person, 1 bicycle, 4 cars, 73.2ms
Speed: 3.3ms preprocess, 73.2ms inference, 7.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  24%|██▍       | 1223/5000 [02:53<09:37,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144706.jpg: 480x640 3 cars, 3 traffic lights, 78.7ms
Speed: 3.4ms preprocess, 78.7ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  24%|██▍       | 1224/5000 [02:53<09:29,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144784.jpg: 640x480 1 person, 1 toilet, 92.4ms
Speed: 20.4ms preprocess, 92.4ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  24%|██▍       | 1225/5000 [02:53<09:43,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144798.jpg: 640x448 1 cup, 1 toilet, 68.3ms
Speed: 3.4ms preprocess, 68.3ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  25%|██▍       | 1226/5000 [02:54<09:01,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144932.jpg: 448x640 1 boat, 69.6ms
Speed: 2.9ms preprocess, 69.6ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▍       | 1227/5000 [02:54<08:34,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000144984.jpg: 640x448 1 person, 1 wine glass, 2 couchs, 1 remote, 95.1ms
Speed: 3.9ms preprocess, 95.1ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  25%|██▍       | 1228/5000 [02:54<09:02,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000145020.jpg: 384x640 6 persons, 1 clock, 5 teddy bears, 60.0ms
Speed: 2.8ms preprocess, 60.0ms inference, 8.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  25%|██▍       | 1229/5000 [02:54<08:54,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000145591.jpg: 480x640 1 clock, 73.5ms
Speed: 4.0ms preprocess, 73.5ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1230/5000 [02:54<08:33,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000145597.jpg: 480x640 6 persons, 1 backpack, 82.8ms
Speed: 3.0ms preprocess, 82.8ms inference, 7.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1231/5000 [02:54<08:48,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000145620.jpg: 448x640 3 persons, 1 motorcycle, 70.2ms
Speed: 2.8ms preprocess, 70.2ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▍       | 1232/5000 [02:54<08:33,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000145665.jpg: 640x448 1 person, 1 skateboard, 87.2ms
Speed: 3.0ms preprocess, 87.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  25%|██▍       | 1233/5000 [02:55<08:44,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000145781.jpg: 480x640 1 dog, 1 bottle, 1 mouse, 1 remote, 97.6ms
Speed: 3.4ms preprocess, 97.6ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1234/5000 [02:55<09:07,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146155.jpg: 512x640 4 persons, 12 bottles, 3 wine glasss, 154.6ms
Speed: 3.7ms preprocess, 154.6ms inference, 19.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  25%|██▍       | 1235/5000 [02:55<11:36,  5.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146358.jpg: 640x416 4 persons, 1 cup, 3 cakes, 1 dining table, 136.8ms
Speed: 3.0ms preprocess, 136.8ms inference, 8.0ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  25%|██▍       | 1236/5000 [02:55<12:12,  5.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146363.jpg: 480x640 1 boat, 1 umbrella, 1 chair, 73.1ms
Speed: 4.4ms preprocess, 73.1ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1237/5000 [02:55<10:56,  5.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146457.jpg: 480x640 2 persons, 5 bottles, 1 bowl, 2 pizzas, 1 chair, 1 dining table, 1 sink, 85.5ms
Speed: 3.7ms preprocess, 85.5ms inference, 12.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1238/5000 [02:55<10:58,  5.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146489.jpg: 480x640 1 bottle, 1 wine glass, 1 pizza, 1 dining table, 72.1ms
Speed: 2.6ms preprocess, 72.1ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1239/5000 [02:56<10:03,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146498.jpg: 480x640 (no detections), 70.9ms
Speed: 2.5ms preprocess, 70.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1240/5000 [02:56<09:01,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146667.jpg: 544x640 14 persons, 2 cars, 1 motorcycle, 2 handbags, 259.4ms
Speed: 2.5ms preprocess, 259.4ms inference, 19.9ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  25%|██▍       | 1241/5000 [02:56<13:33,  4.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146825.jpg: 320x640 5 persons, 1 train, 127.4ms
Speed: 2.5ms preprocess, 127.4ms inference, 4.2ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  25%|██▍       | 1242/5000 [02:56<12:57,  4.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000146831.jpg: 448x640 2 persons, 1 car, 1 skateboard, 65.8ms
Speed: 2.8ms preprocess, 65.8ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▍       | 1243/5000 [02:56<11:26,  5.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147205.jpg: 448x640 2 cakes, 1 dining table, 77.9ms
Speed: 3.1ms preprocess, 77.9ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▍       | 1244/5000 [02:57<10:33,  5.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147223.jpg: 384x640 4 cars, 1 bus, 61.0ms
Speed: 2.2ms preprocess, 61.0ms inference, 4.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  25%|██▍       | 1245/5000 [02:57<09:36,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147338.jpg: 640x480 3 cars, 2 trucks, 1 fire hydrant, 81.6ms
Speed: 2.9ms preprocess, 81.6ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  25%|██▍       | 1246/5000 [02:57<09:30,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147415.jpg: 480x640 1 person, 1 cup, 1 laptop, 1 keyboard, 1 book, 69.8ms
Speed: 2.3ms preprocess, 69.8ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▍       | 1247/5000 [02:57<09:31,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147498.jpg: 544x640 4 birds, 100.3ms
Speed: 2.8ms preprocess, 100.3ms inference, 5.0ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  25%|██▍       | 1248/5000 [02:57<10:01,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147518.jpg: 640x480 1 cup, 1 toilet, 1 sink, 68.4ms
Speed: 2.5ms preprocess, 68.4ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  25%|██▍       | 1249/5000 [02:57<09:15,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147725.jpg: 416x640 1 person, 4 cars, 1 bus, 136.8ms
Speed: 2.5ms preprocess, 136.8ms inference, 5.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  25%|██▌       | 1250/5000 [02:57<10:07,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147729.jpg: 480x640 8 persons, 1 cell phone, 69.7ms
Speed: 3.8ms preprocess, 69.7ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1251/5000 [02:58<09:50,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147740.jpg: 480x640 6 persons, 1 car, 2 motorcycles, 2 bananas, 85.6ms
Speed: 3.1ms preprocess, 85.6ms inference, 9.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1252/5000 [02:58<10:04,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000147745.jpg: 480x640 2 trucks, 72.2ms
Speed: 2.4ms preprocess, 72.2ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1253/5000 [02:58<09:20,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148508.jpg: 448x640 13 persons, 1 car, 1 bus, 99.8ms
Speed: 6.7ms preprocess, 99.8ms inference, 14.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▌       | 1254/5000 [02:58<10:18,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148620.jpg: 480x640 3 tvs, 1 laptop, 1 mouse, 1 keyboard, 67.1ms
Speed: 4.3ms preprocess, 67.1ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1255/5000 [02:58<09:44,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148662.jpg: 480x640 3 persons, 1 hot dog, 71.0ms
Speed: 3.0ms preprocess, 71.0ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1256/5000 [02:58<09:16,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148707.jpg: 480x640 1 apple, 1 orange, 84.8ms
Speed: 26.2ms preprocess, 84.8ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1257/5000 [02:59<09:35,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148719.jpg: 480x640 2 persons, 1 car, 3 trucks, 69.4ms
Speed: 2.6ms preprocess, 69.4ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1258/5000 [02:59<09:12,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148730.jpg: 448x640 (no detections), 90.7ms
Speed: 3.2ms preprocess, 90.7ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▌       | 1259/5000 [02:59<08:51,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148739.jpg: 448x640 1 person, 1 surfboard, 72.0ms
Speed: 3.3ms preprocess, 72.0ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▌       | 1260/5000 [02:59<08:30,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148783.jpg: 640x448 3 zebras, 68.6ms
Speed: 2.7ms preprocess, 68.6ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  25%|██▌       | 1261/5000 [02:59<08:11,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148957.jpg: 480x640 6 donuts, 66.7ms
Speed: 2.7ms preprocess, 66.7ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1262/5000 [02:59<08:13,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000148999.jpg: 448x640 4 persons, 69.8ms
Speed: 3.1ms preprocess, 69.8ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▌       | 1263/5000 [02:59<08:10,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000149222.jpg: 512x640 2 bottles, 1 cup, 2 tvs, 2 mouses, 1 keyboard, 1 cell phone, 1 book, 78.3ms
Speed: 4.0ms preprocess, 78.3ms inference, 14.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  25%|██▌       | 1264/5000 [03:00<09:17,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000149375.jpg: 512x640 4 persons, 1 frisbee, 2 skateboards, 72.4ms
Speed: 3.2ms preprocess, 72.4ms inference, 8.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  25%|██▌       | 1265/5000 [03:00<09:13,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000149406.jpg: 640x640 1 motorcycle, 217.7ms
Speed: 2.7ms preprocess, 217.7ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  25%|██▌       | 1266/5000 [03:00<11:30,  5.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000149568.jpg: 576x640 1 person, 1 dog, 1 sheep, 1 frisbee, 167.9ms
Speed: 3.3ms preprocess, 167.9ms inference, 5.0ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  25%|██▌       | 1267/5000 [03:00<12:31,  4.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000149622.jpg: 480x640 3 birds, 70.7ms
Speed: 2.9ms preprocess, 70.7ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1268/5000 [03:00<11:15,  5.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000149770.jpg: 480x640 1 person, 72.9ms
Speed: 4.0ms preprocess, 72.9ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1269/5000 [03:00<10:08,  6.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000150224.jpg: 448x640 9 persons, 1 backpack, 2 umbrellas, 1 chair, 115.5ms
Speed: 2.8ms preprocess, 115.5ms inference, 11.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  25%|██▌       | 1270/5000 [03:01<10:53,  5.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000150265.jpg: 640x480 4 persons, 1 fire hydrant, 69.3ms
Speed: 2.9ms preprocess, 69.3ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  25%|██▌       | 1271/5000 [03:01<10:04,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000150417.jpg: 480x640 3 persons, 2 cups, 1 bowl, 1 cake, 1 chair, 2 dining tables, 1 refrigerator, 68.8ms
Speed: 3.3ms preprocess, 68.8ms inference, 10.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1272/5000 [03:01<09:54,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000150638.jpg: 480x640 3 persons, 1 suitcase, 4 bottles, 3 cups, 1 dining table, 70.8ms
Speed: 3.0ms preprocess, 70.8ms inference, 11.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  25%|██▌       | 1273/5000 [03:01<09:57,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000150649.jpg: 416x640 5 persons, 1 car, 5 skateboards, 84.6ms
Speed: 4.4ms preprocess, 84.6ms inference, 16.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  25%|██▌       | 1274/5000 [03:01<10:10,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000150726.jpg: 640x480 2 giraffes, 70.9ms
Speed: 3.0ms preprocess, 70.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  26%|██▌       | 1275/5000 [03:01<09:27,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000150930.jpg: 640x416 2 clocks, 173.0ms
Speed: 3.1ms preprocess, 173.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  26%|██▌       | 1276/5000 [03:02<10:47,  5.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151000.jpg: 448x640 4 persons, 1 tie, 63.7ms
Speed: 3.0ms preprocess, 63.7ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1277/5000 [03:02<09:49,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151051.jpg: 480x640 1 person, 1 snowboard, 72.3ms
Speed: 3.2ms preprocess, 72.3ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1278/5000 [03:02<09:09,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151480.jpg: 480x640 14 persons, 69.2ms
Speed: 2.7ms preprocess, 69.2ms inference, 14.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1279/5000 [03:02<09:25,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151516.jpg: 448x640 1 person, 1 bird, 69.6ms
Speed: 9.0ms preprocess, 69.6ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1280/5000 [03:02<08:57,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151629.jpg: 640x448 3 persons, 66.4ms
Speed: 2.9ms preprocess, 66.4ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  26%|██▌       | 1281/5000 [03:02<08:43,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151657.jpg: 640x416 2 persons, 1 tie, 109.1ms
Speed: 2.8ms preprocess, 109.1ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  26%|██▌       | 1282/5000 [03:02<09:13,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151662.jpg: 480x640 2 giraffes, 69.5ms
Speed: 3.0ms preprocess, 69.5ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1283/5000 [03:03<08:40,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151820.jpg: 448x640 7 persons, 1 wine glass, 4 cups, 6 chairs, 1 dining table, 68.5ms
Speed: 2.6ms preprocess, 68.5ms inference, 18.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1284/5000 [03:03<09:24,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151857.jpg: 480x640 1 train, 70.9ms
Speed: 2.7ms preprocess, 70.9ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1285/5000 [03:03<08:53,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151938.jpg: 640x480 5 persons, 1 umbrella, 2 cups, 80.0ms
Speed: 2.7ms preprocess, 80.0ms inference, 8.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  26%|██▌       | 1286/5000 [03:03<09:02,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000151962.jpg: 480x640 1 car, 1 dog, 104.4ms
Speed: 2.6ms preprocess, 104.4ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1287/5000 [03:03<09:18,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000152120.jpg: 448x640 1 person, 1 motorcycle, 72.4ms
Speed: 2.9ms preprocess, 72.4ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1288/5000 [03:03<08:52,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000152214.jpg: 640x480 1 person, 1 tie, 75.2ms
Speed: 3.2ms preprocess, 75.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  26%|██▌       | 1289/5000 [03:03<08:36,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000152465.jpg: 544x640 1 boat, 1 fire hydrant, 1 bird, 88.6ms
Speed: 2.2ms preprocess, 88.6ms inference, 4.0ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  26%|██▌       | 1290/5000 [03:04<08:45,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000152686.jpg: 416x640 2 persons, 1 sports ball, 1 tennis racket, 64.1ms
Speed: 2.5ms preprocess, 64.1ms inference, 4.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  26%|██▌       | 1291/5000 [03:04<08:39,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000152740.jpg: 352x640 19 cows, 127.3ms
Speed: 4.7ms preprocess, 127.3ms inference, 15.2ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  26%|██▌       | 1292/5000 [03:04<10:44,  5.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000152771.jpg: 480x640 1 bicycle, 2 cars, 71.5ms
Speed: 3.2ms preprocess, 71.5ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1293/5000 [03:04<10:01,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000152870.jpg: 640x512 2 giraffes, 150.7ms
Speed: 3.2ms preprocess, 150.7ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  26%|██▌       | 1294/5000 [03:04<10:50,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153011.jpg: 480x640 6 persons, 3 cars, 2 buss, 3 chairs, 68.9ms
Speed: 2.9ms preprocess, 68.9ms inference, 14.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1295/5000 [03:04<10:40,  5.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153217.jpg: 640x480 1 motorcycle, 73.4ms
Speed: 4.2ms preprocess, 73.4ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  26%|██▌       | 1296/5000 [03:05<09:44,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153229.jpg: 480x640 7 persons, 2 frisbees, 1 sports ball, 69.1ms
Speed: 3.0ms preprocess, 69.1ms inference, 9.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1297/5000 [03:05<09:41,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153299.jpg: 640x608 2 giraffes, 254.0ms
Speed: 4.8ms preprocess, 254.0ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  26%|██▌       | 1298/5000 [03:05<12:32,  4.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153343.jpg: 480x640 2 teddy bears, 71.1ms
Speed: 4.3ms preprocess, 71.1ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1299/5000 [03:05<11:05,  5.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153510.jpg: 448x640 7 broccolis, 70.0ms
Speed: 3.5ms preprocess, 70.0ms inference, 6.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1300/5000 [03:05<10:21,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153527.jpg: 480x640 1 person, 3 benchs, 2 chairs, 76.2ms
Speed: 4.5ms preprocess, 76.2ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1301/5000 [03:05<09:55,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153529.jpg: 640x480 4 persons, 4 backpacks, 1 handbag, 2 suitcases, 69.4ms
Speed: 2.8ms preprocess, 69.4ms inference, 11.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  26%|██▌       | 1302/5000 [03:06<09:44,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153568.jpg: 512x640 1 stop sign, 185.8ms
Speed: 4.5ms preprocess, 185.8ms inference, 2.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  26%|██▌       | 1303/5000 [03:06<11:16,  5.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153632.jpg: 480x640 1 fork, 1 knife, 2 bowls, 1 dining table, 73.0ms
Speed: 3.0ms preprocess, 73.0ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1304/5000 [03:06<10:22,  5.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153669.jpg: 448x640 20 persons, 1 baseball bat, 1 baseball glove, 1 chair, 67.8ms
Speed: 2.8ms preprocess, 67.8ms inference, 20.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1305/5000 [03:06<10:55,  5.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153782.jpg: 640x416 2 clocks, 78.3ms
Speed: 14.5ms preprocess, 78.3ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  26%|██▌       | 1306/5000 [03:06<10:25,  5.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000153797.jpg: 512x640 1 person, 1 sports ball, 74.6ms
Speed: 4.6ms preprocess, 74.6ms inference, 3.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  26%|██▌       | 1307/5000 [03:06<09:45,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154000.jpg: 480x640 1 car, 1 truck, 71.9ms
Speed: 2.8ms preprocess, 71.9ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1308/5000 [03:07<09:11,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154004.jpg: 448x640 8 persons, 83.8ms
Speed: 4.5ms preprocess, 83.8ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1309/5000 [03:07<09:22,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154087.jpg: 480x640 4 persons, 1 kite, 76.3ms
Speed: 2.8ms preprocess, 76.3ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1310/5000 [03:07<09:02,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154213.jpg: 480x640 1 bowl, 1 cake, 1 scissors, 71.3ms
Speed: 4.8ms preprocess, 71.3ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▌       | 1311/5000 [03:07<08:43,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154339.jpg: 448x640 1 bottle, 2 cups, 1 bowl, 1 scissors, 68.6ms
Speed: 2.6ms preprocess, 68.6ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▌       | 1312/5000 [03:07<08:27,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154358.jpg: 640x448 2 persons, 1 cup, 2 chairs, 2 potted plants, 1 book, 2 vases, 126.2ms
Speed: 3.6ms preprocess, 126.2ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  26%|██▋       | 1313/5000 [03:07<09:42,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154425.jpg: 448x640 1 person, 2 kites, 98.7ms
Speed: 3.1ms preprocess, 98.7ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▋       | 1314/5000 [03:08<09:39,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154431.jpg: 448x640 1 tv, 3 vases, 66.7ms
Speed: 2.8ms preprocess, 66.7ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▋       | 1315/5000 [03:08<09:05,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154644.jpg: 640x640 1 person, 191.6ms
Speed: 4.7ms preprocess, 191.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  26%|██▋       | 1316/5000 [03:08<10:53,  5.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154705.jpg: 480x640 2 tvs, 2 laptops, 2 mouses, 2 keyboards, 68.9ms
Speed: 2.6ms preprocess, 68.9ms inference, 9.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▋       | 1317/5000 [03:08<10:14,  5.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154718.jpg: 640x480 1 person, 1 toilet, 79.3ms
Speed: 26.9ms preprocess, 79.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  26%|██▋       | 1318/5000 [03:08<10:10,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000154947.jpg: 640x640 1 sheep, 141.6ms
Speed: 2.6ms preprocess, 141.6ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  26%|██▋       | 1319/5000 [03:08<10:42,  5.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155051.jpg: 480x640 1 clock, 69.3ms
Speed: 2.7ms preprocess, 69.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▋       | 1320/5000 [03:08<09:39,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155145.jpg: 480x640 4 persons, 4 boats, 69.9ms
Speed: 4.6ms preprocess, 69.9ms inference, 10.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▋       | 1321/5000 [03:09<09:24,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155154.jpg: 640x480 1 sink, 68.7ms
Speed: 4.6ms preprocess, 68.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  26%|██▋       | 1322/5000 [03:09<08:49,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155179.jpg: 480x640 1 cake, 74.3ms
Speed: 2.9ms preprocess, 74.3ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▋       | 1323/5000 [03:09<08:26,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155291.jpg: 448x640 1 cat, 82.0ms
Speed: 2.8ms preprocess, 82.0ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  26%|██▋       | 1324/5000 [03:09<08:39,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155341.jpg: 480x640 5 persons, 1 car, 2 trucks, 71.5ms
Speed: 3.1ms preprocess, 71.5ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  26%|██▋       | 1325/5000 [03:09<08:39,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155443.jpg: 448x640 2 trains, 76.2ms
Speed: 3.0ms preprocess, 76.2ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1326/5000 [03:09<08:27,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155451.jpg: 480x640 1 person, 1 traffic light, 73.6ms
Speed: 2.9ms preprocess, 73.6ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1327/5000 [03:09<08:11,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000155571.jpg: 448x640 1 elephant, 65.7ms
Speed: 3.0ms preprocess, 65.7ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1328/5000 [03:10<08:11,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000156071.jpg: 480x640 11 persons, 80.5ms
Speed: 3.6ms preprocess, 80.5ms inference, 10.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1329/5000 [03:10<08:57,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000156076.jpg: 480x640 4 persons, 2 bottles, 4 cups, 7 pizzas, 1 dining table, 72.6ms
Speed: 4.4ms preprocess, 72.6ms inference, 18.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1330/5000 [03:10<09:47,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000156278.jpg: 448x640 6 bowls, 1 chair, 1 dining table, 1 oven, 2 sinks, 1 clock, 71.1ms
Speed: 3.0ms preprocess, 71.1ms inference, 10.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1331/5000 [03:10<09:41,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000156292.jpg: 640x448 2 persons, 1 handbag, 6 clocks, 67.4ms
Speed: 3.1ms preprocess, 67.4ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  27%|██▋       | 1332/5000 [03:10<09:23,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000156372.jpg: 640x448 3 persons, 1 backpack, 1 chair, 66.4ms
Speed: 4.1ms preprocess, 66.4ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  27%|██▋       | 1333/5000 [03:10<08:52,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000156643.jpg: 480x640 8 persons, 1 bottle, 1 cup, 1 cake, 2 dining tables, 1 teddy bear, 79.4ms
Speed: 2.8ms preprocess, 79.4ms inference, 14.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1334/5000 [03:11<09:20,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000156924.jpg: 448x640 3 persons, 1 bottle, 1 chair, 1 couch, 106.1ms
Speed: 4.1ms preprocess, 106.1ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1335/5000 [03:11<09:47,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157046.jpg: 480x640 (no detections), 70.7ms
Speed: 4.5ms preprocess, 70.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1336/5000 [03:11<08:48,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157098.jpg: 448x640 3 giraffes, 69.5ms
Speed: 3.9ms preprocess, 69.5ms inference, 7.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1337/5000 [03:11<08:45,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157124.jpg: 448x640 1 oven, 1 sink, 104.5ms
Speed: 2.9ms preprocess, 104.5ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1338/5000 [03:11<09:01,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157138.jpg: 480x640 2 forks, 1 cake, 72.1ms
Speed: 4.8ms preprocess, 72.1ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1339/5000 [03:11<08:45,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157213.jpg: 384x640 1 elephant, 151.3ms
Speed: 2.7ms preprocess, 151.3ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  27%|██▋       | 1340/5000 [03:11<09:54,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157365.jpg: 640x448 1 person, 5 cars, 1 skateboard, 71.3ms
Speed: 3.2ms preprocess, 71.3ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  27%|██▋       | 1341/5000 [03:12<09:29,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157390.jpg: 480x640 1 bowl, 6 broccolis, 13 carrots, 82.3ms
Speed: 3.2ms preprocess, 82.3ms inference, 19.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1342/5000 [03:12<10:21,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157418.jpg: 640x480 1 bottle, 2 cups, 1 bowl, 4 sandwichs, 1 dining table, 71.4ms
Speed: 3.2ms preprocess, 71.4ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  27%|██▋       | 1343/5000 [03:12<09:56,  6.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157601.jpg: 640x640 1 person, 1 cup, 92.0ms
Speed: 4.9ms preprocess, 92.0ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  27%|██▋       | 1344/5000 [03:12<09:50,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157756.jpg: 640x480 7 persons, 2 cars, 3 traffic lights, 1 clock, 68.1ms
Speed: 3.3ms preprocess, 68.1ms inference, 14.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  27%|██▋       | 1345/5000 [03:12<09:46,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157767.jpg: 448x640 11 persons, 2 handbags, 1 bowl, 1 dining table, 122.4ms
Speed: 2.9ms preprocess, 122.4ms inference, 15.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1346/5000 [03:12<10:58,  5.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157807.jpg: 448x640 1 cat, 2 sinks, 63.4ms
Speed: 3.1ms preprocess, 63.4ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1347/5000 [03:13<09:51,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157847.jpg: 416x640 2 boats, 1 bird, 1 kite, 142.4ms
Speed: 2.7ms preprocess, 142.4ms inference, 4.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  27%|██▋       | 1348/5000 [03:13<10:32,  5.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000157928.jpg: 480x640 2 persons, 2 cars, 1 suitcase, 72.5ms
Speed: 4.0ms preprocess, 72.5ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1349/5000 [03:13<09:51,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000158227.jpg: 480x640 1 person, 1 sports ball, 1 baseball bat, 75.3ms
Speed: 4.0ms preprocess, 75.3ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1350/5000 [03:13<09:18,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000158548.jpg: 512x640 3 persons, 1 car, 1 dog, 106.5ms
Speed: 3.4ms preprocess, 106.5ms inference, 8.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  27%|██▋       | 1351/5000 [03:13<09:45,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000158660.jpg: 640x640 3 persons, 3 bowls, 1 banana, 3 apples, 1 donut, 1 dining table, 88.1ms
Speed: 4.3ms preprocess, 88.1ms inference, 16.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  27%|██▋       | 1352/5000 [03:13<10:31,  5.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000158744.jpg: 448x640 1 person, 1 car, 1 suitcase, 1 potted plant, 63.3ms
Speed: 2.9ms preprocess, 63.3ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1353/5000 [03:14<09:34,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000158945.jpg: 640x480 2 persons, 1 elephant, 71.7ms
Speed: 2.8ms preprocess, 71.7ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  27%|██▋       | 1354/5000 [03:14<09:07,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000158956.jpg: 448x640 1 person, 1 cake, 66.7ms
Speed: 3.0ms preprocess, 66.7ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1355/5000 [03:14<08:35,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159112.jpg: 480x640 1 banana, 1 pizza, 105.1ms
Speed: 3.4ms preprocess, 105.1ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1356/5000 [03:14<09:13,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159282.jpg: 448x640 1 vase, 74.0ms
Speed: 3.3ms preprocess, 74.0ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1357/5000 [03:14<08:48,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159311.jpg: 448x640 2 zebras, 72.0ms
Speed: 4.9ms preprocess, 72.0ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1358/5000 [03:14<08:29,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159399.jpg: 480x640 4 persons, 3 kites, 76.7ms
Speed: 3.0ms preprocess, 76.7ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1359/5000 [03:14<08:36,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159458.jpg: 480x640 1 person, 1 dog, 2 chairs, 1 bed, 79.1ms
Speed: 2.8ms preprocess, 79.1ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1360/5000 [03:15<08:36,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159684.jpg: 416x640 1 train, 1 clock, 65.8ms
Speed: 2.9ms preprocess, 65.8ms inference, 3.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  27%|██▋       | 1361/5000 [03:15<08:19,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159791.jpg: 480x640 1 chair, 1 microwave, 1 oven, 1 refrigerator, 105.2ms
Speed: 4.5ms preprocess, 105.2ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1362/5000 [03:15<08:55,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000159977.jpg: 448x640 1 zebra, 1 giraffe, 65.4ms
Speed: 2.9ms preprocess, 65.4ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1363/5000 [03:15<08:23,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000160012.jpg: 480x640 1 person, 2 cars, 1 bottle, 2 pizzas, 71.1ms
Speed: 2.8ms preprocess, 71.1ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1364/5000 [03:15<08:17,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000160556.jpg: 448x640 1 person, 2 tvs, 1 mouse, 1 keyboard, 1 cell phone, 83.1ms
Speed: 2.9ms preprocess, 83.1ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1365/5000 [03:15<08:23,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000160666.jpg: 480x640 (no detections), 73.8ms
Speed: 4.1ms preprocess, 73.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1366/5000 [03:15<08:02,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000160728.jpg: 448x640 3 persons, 11 boats, 2 birds, 92.7ms
Speed: 3.1ms preprocess, 92.7ms inference, 17.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  27%|██▋       | 1367/5000 [03:16<09:13,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000160772.jpg: 512x640 1 boat, 79.3ms
Speed: 3.2ms preprocess, 79.3ms inference, 2.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  27%|██▋       | 1368/5000 [03:16<08:52,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000160864.jpg: 416x640 7 persons, 4 baseball bats, 3 baseball gloves, 1 chair, 99.6ms
Speed: 3.2ms preprocess, 99.6ms inference, 11.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  27%|██▋       | 1369/5000 [03:16<09:47,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161008.jpg: 480x640 1 knife, 67.7ms
Speed: 2.8ms preprocess, 67.7ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1370/5000 [03:16<08:59,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161032.jpg: 480x640 2 persons, 2 umbrellas, 67.3ms
Speed: 3.0ms preprocess, 67.3ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1371/5000 [03:16<08:36,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161044.jpg: 256x640 2 airplanes, 106.8ms
Speed: 2.4ms preprocess, 106.8ms inference, 2.4ms postprocess per image at shape (1, 3, 256, 640)


Segmenting Images:  27%|██▋       | 1372/5000 [03:16<08:46,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161128.jpg: 640x480 2 persons, 4 cars, 1 truck, 87.1ms
Speed: 4.5ms preprocess, 87.1ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  27%|██▋       | 1373/5000 [03:16<09:01,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161397.jpg: 480x640 1 vase, 69.3ms
Speed: 2.9ms preprocess, 69.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  27%|██▋       | 1374/5000 [03:17<08:28,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161609.jpg: 480x640 3 persons, 2 handbags, 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1375/5000 [03:17<08:16,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161642.jpg: 640x480 1 clock, 71.1ms
Speed: 3.2ms preprocess, 71.1ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  28%|██▊       | 1376/5000 [03:17<07:59,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161781.jpg: 448x640 1 person, 1 skis, 67.6ms
Speed: 2.9ms preprocess, 67.6ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1377/5000 [03:17<07:42,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161799.jpg: 448x640 2 persons, 2 skateboards, 104.7ms
Speed: 3.1ms preprocess, 104.7ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1378/5000 [03:17<08:21,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161820.jpg: 640x448 1 banana, 71.8ms
Speed: 2.7ms preprocess, 71.8ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  28%|██▊       | 1379/5000 [03:17<08:08,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161861.jpg: 480x640 1 person, 1 tennis racket, 76.1ms
Speed: 4.4ms preprocess, 76.1ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1380/5000 [03:17<08:03,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161875.jpg: 448x640 1 person, 1 tie, 64.2ms
Speed: 2.9ms preprocess, 64.2ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1381/5000 [03:17<07:43,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161879.jpg: 576x640 1 person, 1 bench, 173.5ms
Speed: 2.1ms preprocess, 173.5ms inference, 3.4ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  28%|██▊       | 1382/5000 [03:18<09:29,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161925.jpg: 640x480 2 persons, 2 pizzas, 73.4ms
Speed: 2.7ms preprocess, 73.4ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  28%|██▊       | 1383/5000 [03:18<09:21,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000161978.jpg: 448x640 17 persons, 13 skateboards, 69.5ms
Speed: 3.2ms preprocess, 69.5ms inference, 25.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1384/5000 [03:18<10:27,  5.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162035.jpg: 480x640 5 persons, 3 teddy bears, 72.4ms
Speed: 4.3ms preprocess, 72.4ms inference, 8.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1385/5000 [03:18<09:49,  6.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162092.jpg: 640x640 12 persons, 81.7ms
Speed: 2.8ms preprocess, 81.7ms inference, 15.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  28%|██▊       | 1386/5000 [03:18<10:16,  5.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162130.jpg: 480x640 1 bench, 61.6ms
Speed: 2.9ms preprocess, 61.6ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1387/5000 [03:18<09:05,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162366.jpg: 480x640 (no detections), 62.8ms
Speed: 2.6ms preprocess, 62.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1388/5000 [03:19<08:10,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162415.jpg: 640x544 1 person, 1 baseball glove, 181.1ms
Speed: 2.7ms preprocess, 181.1ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  28%|██▊       | 1389/5000 [03:19<09:56,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162543.jpg: 384x640 6 elephants, 124.2ms
Speed: 2.6ms preprocess, 124.2ms inference, 4.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  28%|██▊       | 1390/5000 [03:19<10:12,  5.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162581.jpg: 640x480 1 person, 65.0ms
Speed: 2.8ms preprocess, 65.0ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  28%|██▊       | 1391/5000 [03:19<09:07,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162732.jpg: 448x640 6 persons, 1 sports ball, 64.4ms
Speed: 2.7ms preprocess, 64.4ms inference, 8.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1392/5000 [03:19<08:50,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000162858.jpg: 640x448 1 person, 9 cars, 2 traffic lights, 74.6ms
Speed: 3.0ms preprocess, 74.6ms inference, 11.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  28%|██▊       | 1393/5000 [03:19<09:06,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163057.jpg: 480x640 10 persons, 5 kites, 89.9ms
Speed: 2.8ms preprocess, 89.9ms inference, 15.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1394/5000 [03:20<09:49,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163117.jpg: 640x512 6 persons, 11 kites, 146.5ms
Speed: 4.3ms preprocess, 146.5ms inference, 17.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  28%|██▊       | 1395/5000 [03:20<11:31,  5.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163118.jpg: 448x640 2 persons, 1 frisbee, 66.0ms
Speed: 2.8ms preprocess, 66.0ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1396/5000 [03:20<10:12,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163155.jpg: 448x640 2 cats, 69.1ms
Speed: 3.2ms preprocess, 69.1ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1397/5000 [03:20<09:19,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163257.jpg: 640x448 3 persons, 62.0ms
Speed: 2.8ms preprocess, 62.0ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  28%|██▊       | 1398/5000 [03:20<08:38,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163258.jpg: 480x640 1 toilet, 1 sink, 69.3ms
Speed: 2.9ms preprocess, 69.3ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1399/5000 [03:20<08:19,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163290.jpg: 640x480 1 zebra, 1 giraffe, 74.4ms
Speed: 3.8ms preprocess, 74.4ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  28%|██▊       | 1400/5000 [03:20<08:04,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163314.jpg: 448x640 9 persons, 61.3ms
Speed: 4.4ms preprocess, 61.3ms inference, 7.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1401/5000 [03:21<07:59,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163562.jpg: 448x640 1 person, 1 frisbee, 61.9ms
Speed: 4.1ms preprocess, 61.9ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1402/5000 [03:21<07:35,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163611.jpg: 480x640 1 cup, 2 forks, 3 pizzas, 1 dining table, 62.5ms
Speed: 2.9ms preprocess, 62.5ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1403/5000 [03:21<07:40,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163640.jpg: 480x640 4 persons, 1 bowl, 62.3ms
Speed: 3.2ms preprocess, 62.3ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1404/5000 [03:21<07:33,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163682.jpg: 640x448 1 person, 1 tie, 62.7ms
Speed: 2.7ms preprocess, 62.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  28%|██▊       | 1405/5000 [03:21<07:22,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163746.jpg: 512x640 8 persons, 2 airplanes, 147.6ms
Speed: 4.2ms preprocess, 147.6ms inference, 8.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  28%|██▊       | 1406/5000 [03:21<09:19,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000163951.jpg: 640x448 1 chair, 60.0ms
Speed: 2.7ms preprocess, 60.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  28%|██▊       | 1407/5000 [03:21<08:27,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000164115.jpg: 640x640 (no detections), 81.8ms
Speed: 4.3ms preprocess, 81.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  28%|██▊       | 1408/5000 [03:22<08:01,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000164363.jpg: 608x640 2 clocks, 151.9ms
Speed: 2.7ms preprocess, 151.9ms inference, 3.3ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  28%|██▊       | 1409/5000 [03:22<09:15,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000164602.jpg: 480x640 2 persons, 1 handbag, 1 bottle, 1 sink, 63.8ms
Speed: 2.8ms preprocess, 63.8ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1410/5000 [03:22<08:39,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000164637.jpg: 640x480 2 microwaves, 83.7ms
Speed: 2.9ms preprocess, 83.7ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  28%|██▊       | 1411/5000 [03:22<08:34,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000164883.jpg: 480x640 2 persons, 2 tvs, 1 remote, 1 cell phone, 65.2ms
Speed: 4.2ms preprocess, 65.2ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1412/5000 [03:22<08:15,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000164885.jpg: 448x640 5 persons, 1 skis, 60.2ms
Speed: 2.8ms preprocess, 60.2ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1413/5000 [03:22<07:58,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000164969.jpg: 448x640 1 pizza, 63.8ms
Speed: 2.7ms preprocess, 63.8ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1414/5000 [03:22<07:23,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165039.jpg: 448x640 3 persons, 6 cars, 1 bus, 6 traffic lights, 62.1ms
Speed: 2.8ms preprocess, 62.1ms inference, 13.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1415/5000 [03:23<08:02,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165257.jpg: 384x640 1 oven, 1 sink, 53.9ms
Speed: 2.2ms preprocess, 53.9ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  28%|██▊       | 1416/5000 [03:23<07:33,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165336.jpg: 480x640 2 giraffes, 73.1ms
Speed: 4.0ms preprocess, 73.1ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1417/5000 [03:23<07:29,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165351.jpg: 640x640 1 banana, 87.6ms
Speed: 4.1ms preprocess, 87.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  28%|██▊       | 1418/5000 [03:23<07:47,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165500.jpg: 480x640 2 sheeps, 1 bear, 70.5ms
Speed: 2.9ms preprocess, 70.5ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1419/5000 [03:23<07:54,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165518.jpg: 640x640 1 person, 1 motorcycle, 114.6ms
Speed: 4.6ms preprocess, 114.6ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  28%|██▊       | 1420/5000 [03:23<08:49,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165681.jpg: 448x640 2 persons, 1 bicycle, 8 motorcycles, 71.7ms
Speed: 2.9ms preprocess, 71.7ms inference, 9.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  28%|██▊       | 1421/5000 [03:23<08:54,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165713.jpg: 640x448 1 fire hydrant, 87.5ms
Speed: 2.7ms preprocess, 87.5ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  28%|██▊       | 1422/5000 [03:23<08:46,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000165831.jpg: 480x640 1 cup, 1 bowl, 1 orange, 4 broccolis, 6 carrots, 70.5ms
Speed: 3.4ms preprocess, 70.5ms inference, 12.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1423/5000 [03:24<09:05,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166165.jpg: 640x576 2 persons, 1 tennis racket, 209.5ms
Speed: 2.2ms preprocess, 209.5ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  28%|██▊       | 1424/5000 [03:24<11:10,  5.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166166.jpg: 480x640 1 person, 1 bottle, 1 potted plant, 1 dining table, 1 tv, 1 remote, 1 cell phone, 1 book, 1 vase, 67.0ms
Speed: 4.1ms preprocess, 67.0ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  28%|██▊       | 1425/5000 [03:24<10:24,  5.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166259.jpg: 448x640 4 birds, 62.8ms
Speed: 2.7ms preprocess, 62.8ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▊       | 1426/5000 [03:24<09:14,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166277.jpg: 640x480 1 cat, 101.8ms
Speed: 2.8ms preprocess, 101.8ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▊       | 1427/5000 [03:24<09:23,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166287.jpg: 480x640 12 birds, 3 cows, 76.7ms
Speed: 3.0ms preprocess, 76.7ms inference, 15.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▊       | 1428/5000 [03:25<09:47,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166391.jpg: 448x640 14 cars, 2 trucks, 14 traffic lights, 69.9ms
Speed: 3.2ms preprocess, 69.9ms inference, 26.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▊       | 1429/5000 [03:25<10:37,  5.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166426.jpg: 640x608 3 bottles, 2 cups, 2 forks, 1 knife, 1 bowl, 1 dining table, 160.7ms
Speed: 2.3ms preprocess, 160.7ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  29%|██▊       | 1430/5000 [03:25<11:54,  4.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166478.jpg: 448x640 1 person, 5 bowls, 2 couchs, 1 laptop, 1 remote, 2 books, 71.4ms
Speed: 4.0ms preprocess, 71.4ms inference, 12.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▊       | 1431/5000 [03:25<11:15,  5.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166509.jpg: 448x640 2 traffic lights, 65.5ms
Speed: 2.7ms preprocess, 65.5ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▊       | 1432/5000 [03:25<09:55,  6.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166521.jpg: 448x640 2 tvs, 88.2ms
Speed: 3.7ms preprocess, 88.2ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▊       | 1433/5000 [03:25<09:30,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166563.jpg: 416x640 1 horse, 157.3ms
Speed: 2.9ms preprocess, 157.3ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  29%|██▊       | 1434/5000 [03:26<10:21,  5.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166642.jpg: 480x640 2 elephants, 67.0ms
Speed: 3.1ms preprocess, 67.0ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▊       | 1435/5000 [03:26<09:19,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166664.jpg: 448x640 1 car, 1 elephant, 62.1ms
Speed: 2.4ms preprocess, 62.1ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▊       | 1436/5000 [03:26<08:28,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166747.jpg: 640x480 8 persons, 1 horse, 1 backpack, 1 handbag, 69.5ms
Speed: 4.0ms preprocess, 69.5ms inference, 11.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▊       | 1437/5000 [03:26<08:44,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166768.jpg: 640x448 3 persons, 1 tie, 71.2ms
Speed: 2.7ms preprocess, 71.2ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  29%|██▉       | 1438/5000 [03:26<08:27,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000166918.jpg: 640x480 12 persons, 3 wine glasss, 8 cups, 2 dining tables, 93.8ms
Speed: 4.0ms preprocess, 93.8ms inference, 22.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▉       | 1439/5000 [03:26<09:56,  5.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167067.jpg: 640x480 1 person, 1 tie, 64.5ms
Speed: 4.3ms preprocess, 64.5ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▉       | 1440/5000 [03:26<08:58,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167122.jpg: 480x640 (no detections), 67.5ms
Speed: 2.8ms preprocess, 67.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1441/5000 [03:27<08:09,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167128.jpg: 448x640 2 elephants, 67.1ms
Speed: 3.2ms preprocess, 67.1ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▉       | 1442/5000 [03:27<07:54,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167159.jpg: 480x640 1 person, 1 bed, 1 book, 72.2ms
Speed: 5.3ms preprocess, 72.2ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1443/5000 [03:27<07:53,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167240.jpg: 448x640 1 vase, 61.0ms
Speed: 2.6ms preprocess, 61.0ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▉       | 1444/5000 [03:27<07:27,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167353.jpg: 640x480 1 clock, 87.0ms
Speed: 3.6ms preprocess, 87.0ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▉       | 1445/5000 [03:27<07:38,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167486.jpg: 480x640 5 persons, 1 bicycle, 65.3ms
Speed: 2.8ms preprocess, 65.3ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1446/5000 [03:27<07:34,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167540.jpg: 448x640 1 airplane, 60.9ms
Speed: 2.6ms preprocess, 60.9ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▉       | 1447/5000 [03:27<07:09,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167572.jpg: 448x640 1 teddy bear, 56.9ms
Speed: 2.6ms preprocess, 56.9ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▉       | 1448/5000 [03:27<06:50,  8.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167898.jpg: 480x640 1 toilet, 1 sink, 67.8ms
Speed: 2.8ms preprocess, 67.8ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1449/5000 [03:28<06:54,  8.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000167902.jpg: 480x640 1 bench, 1 bird, 73.2ms
Speed: 4.7ms preprocess, 73.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1450/5000 [03:28<07:11,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000168330.jpg: 416x640 4 cars, 2 traffic lights, 1 fire hydrant, 2 clocks, 72.6ms
Speed: 2.7ms preprocess, 72.6ms inference, 8.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  29%|██▉       | 1451/5000 [03:28<07:38,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000168337.jpg: 480x640 2 persons, 1 fire hydrant, 1 suitcase, 80.4ms
Speed: 3.4ms preprocess, 80.4ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1452/5000 [03:28<07:52,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000168458.jpg: 640x448 1 potted plant, 2 vases, 67.5ms
Speed: 2.8ms preprocess, 67.5ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  29%|██▉       | 1453/5000 [03:28<07:29,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000168593.jpg: 480x640 1 bench, 1 chair, 2 refrigerators, 63.7ms
Speed: 2.7ms preprocess, 63.7ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1454/5000 [03:28<07:30,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000168619.jpg: 480x640 1 bench, 1 chair, 63.9ms
Speed: 2.8ms preprocess, 63.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1455/5000 [03:28<07:22,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000168883.jpg: 640x480 4 persons, 1 handbag, 3 cell phones, 89.6ms
Speed: 9.3ms preprocess, 89.6ms inference, 8.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▉       | 1456/5000 [03:28<08:21,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000168974.jpg: 640x480 2 persons, 66.4ms
Speed: 4.1ms preprocess, 66.4ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▉       | 1457/5000 [03:29<07:53,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000169076.jpg: 640x640 3 persons, 1 cat, 1 tv, 83.5ms
Speed: 3.9ms preprocess, 83.5ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  29%|██▉       | 1458/5000 [03:29<08:14,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000169169.jpg: 480x640 9 persons, 1 car, 3 potted plants, 64.5ms
Speed: 2.8ms preprocess, 64.5ms inference, 12.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1459/5000 [03:29<08:22,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000169356.jpg: 480x640 1 person, 1 skateboard, 1 surfboard, 65.0ms
Speed: 2.9ms preprocess, 65.0ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1460/5000 [03:29<07:57,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000169996.jpg: 480x640 2 persons, 2 bicycles, 3 cars, 1 truck, 4 traffic lights, 83.7ms
Speed: 2.7ms preprocess, 83.7ms inference, 14.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1461/5000 [03:29<08:44,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170099.jpg: 480x640 1 person, 1 tie, 1 couch, 134.3ms
Speed: 2.8ms preprocess, 134.3ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1462/5000 [03:29<09:45,  6.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170116.jpg: 480x640 2 refrigerators, 65.3ms
Speed: 2.9ms preprocess, 65.3ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1463/5000 [03:30<08:54,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170191.jpg: 544x640 1 person, 1 backpack, 1 suitcase, 2 beds, 136.4ms
Speed: 2.3ms preprocess, 136.4ms inference, 6.2ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  29%|██▉       | 1464/5000 [03:30<09:36,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170278.jpg: 512x640 1 person, 1 dog, 1 bed, 138.2ms
Speed: 2.7ms preprocess, 138.2ms inference, 3.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  29%|██▉       | 1465/5000 [03:30<10:10,  5.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170474.jpg: 480x640 1 person, 1 sports ball, 2 tennis rackets, 74.7ms
Speed: 2.7ms preprocess, 74.7ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1466/5000 [03:30<09:32,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170545.jpg: 480x640 1 car, 64.5ms
Speed: 2.9ms preprocess, 64.5ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1467/5000 [03:30<08:39,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170595.jpg: 640x640 1 bowl, 1 dining table, 85.9ms
Speed: 3.8ms preprocess, 85.9ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  29%|██▉       | 1468/5000 [03:30<08:32,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170613.jpg: 640x448 3 persons, 3 surfboards, 61.3ms
Speed: 2.6ms preprocess, 61.3ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  29%|██▉       | 1469/5000 [03:30<08:07,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170670.jpg: 448x640 1 bench, 4 bottles, 6 cups, 1 fork, 5 bowls, 1 dining table, 62.6ms
Speed: 2.8ms preprocess, 62.6ms inference, 18.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▉       | 1470/5000 [03:31<08:40,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170739.jpg: 544x640 4 elephants, 98.1ms
Speed: 1.9ms preprocess, 98.1ms inference, 5.2ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  29%|██▉       | 1471/5000 [03:31<08:49,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170893.jpg: 480x640 1 cat, 1 dog, 2 toilets, 66.0ms
Speed: 2.6ms preprocess, 66.0ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  29%|██▉       | 1472/5000 [03:31<08:14,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000170955.jpg: 448x640 1 person, 59.9ms
Speed: 2.6ms preprocess, 59.9ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  29%|██▉       | 1473/5000 [03:31<07:38,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171050.jpg: 640x480 1 fire hydrant, 66.0ms
Speed: 2.9ms preprocess, 66.0ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  29%|██▉       | 1474/5000 [03:31<07:17,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171190.jpg: 480x640 12 persons, 7 bottles, 5 wine glasss, 3 chairs, 1 dining table, 63.7ms
Speed: 2.8ms preprocess, 63.7ms inference, 26.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1475/5000 [03:31<08:39,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171298.jpg: 480x640 4 buss, 67.1ms
Speed: 2.7ms preprocess, 67.1ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1476/5000 [03:31<08:16,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171382.jpg: 448x640 12 persons, 5 cars, 1 truck, 1 backpack, 1 handbag, 1 skateboard, 58.4ms
Speed: 2.7ms preprocess, 58.4ms inference, 17.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  30%|██▉       | 1477/5000 [03:32<08:46,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171611.jpg: 448x640 1 person, 5 boats, 1 cow, 59.3ms
Speed: 2.8ms preprocess, 59.3ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  30%|██▉       | 1478/5000 [03:32<08:21,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171740.jpg: 480x640 1 cup, 1 bowl, 1 couch, 1 clock, 62.4ms
Speed: 2.6ms preprocess, 62.4ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1479/5000 [03:32<07:52,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171757.jpg: 480x640 4 persons, 1 chair, 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1480/5000 [03:32<07:39,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000171788.jpg: 640x448 1 person, 1 sports ball, 1 tennis racket, 59.9ms
Speed: 2.7ms preprocess, 59.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  30%|██▉       | 1481/5000 [03:32<07:12,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172083.jpg: 384x640 (no detections), 141.1ms
Speed: 2.4ms preprocess, 141.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  30%|██▉       | 1482/5000 [03:32<08:04,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172330.jpg: 480x640 3 cars, 63.1ms
Speed: 3.0ms preprocess, 63.1ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1483/5000 [03:32<07:44,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172396.jpg: 352x640 1 pizza, 1 oven, 114.7ms
Speed: 2.2ms preprocess, 114.7ms inference, 2.9ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  30%|██▉       | 1484/5000 [03:33<08:16,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172547.jpg: 480x640 2 cows, 66.1ms
Speed: 2.7ms preprocess, 66.1ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1485/5000 [03:33<07:47,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172571.jpg: 384x640 1 person, 1 wine glass, 1 cup, 1 knife, 2 pizzas, 1 dining table, 55.0ms
Speed: 2.2ms preprocess, 55.0ms inference, 5.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  30%|██▉       | 1486/5000 [03:33<07:27,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172595.jpg: 384x640 1 cup, 3 chairs, 4 tvs, 1 laptop, 1 mouse, 1 remote, 2 keyboards, 58.6ms
Speed: 2.4ms preprocess, 58.6ms inference, 13.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  30%|██▉       | 1487/5000 [03:33<07:52,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172617.jpg: 640x480 1 person, 2 books, 64.4ms
Speed: 4.2ms preprocess, 64.4ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  30%|██▉       | 1488/5000 [03:33<07:36,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172648.jpg: 480x640 2 persons, 1 car, 3 trucks, 1 fire hydrant, 65.4ms
Speed: 2.6ms preprocess, 65.4ms inference, 10.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1489/5000 [03:33<07:36,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172649.jpg: 448x640 1 person, 1 skateboard, 62.5ms
Speed: 2.6ms preprocess, 62.5ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  30%|██▉       | 1490/5000 [03:33<07:20,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172856.jpg: 480x640 1 stop sign, 63.2ms
Speed: 2.7ms preprocess, 63.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1491/5000 [03:33<07:03,  8.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172877.jpg: 480x640 1 person, 3 ties, 1 bottle, 1 bowl, 2 chairs, 1 couch, 62.2ms
Speed: 2.9ms preprocess, 62.2ms inference, 9.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1492/5000 [03:34<07:30,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172935.jpg: 480x640 (no detections), 63.4ms
Speed: 4.1ms preprocess, 63.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000172946.jpg: 640x448 10 persons, 59.3ms
Speed: 2.6ms preprocess, 59.3ms inference, 8.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  30%|██▉       | 1494/5000 [03:34<07:05,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000172977.jpg: 512x640 1 zebra, 70.1ms
Speed: 2.9ms preprocess, 70.1ms inference, 2.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  30%|██▉       | 1495/5000 [03:34<07:03,  8.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173004.jpg: 480x640 1 person, 3 bottles, 1 cup, 5 pizzas, 2 chairs, 1 dining table, 66.9ms
Speed: 2.8ms preprocess, 66.9ms inference, 12.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1496/5000 [03:34<07:32,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173008.jpg: 480x640 1 person, 1 bowl, 1 chair, 65.9ms
Speed: 2.4ms preprocess, 65.9ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|██▉       | 1497/5000 [03:34<07:17,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173033.jpg: 640x480 1 bear, 65.7ms
Speed: 2.6ms preprocess, 65.7ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  30%|██▉       | 1498/5000 [03:34<07:03,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173044.jpg: 640x384 1 clock, 136.3ms
Speed: 2.5ms preprocess, 136.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  30%|██▉       | 1499/5000 [03:34<08:03,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173057.jpg: 640x480 12 persons, 1 car, 1 clock, 66.4ms
Speed: 3.0ms preprocess, 66.4ms inference, 13.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  30%|███       | 1500/5000 [03:35<08:17,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173091.jpg: 640x640 (no detections), 85.0ms
Speed: 4.1ms preprocess, 85.0ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  30%|███       | 1501/5000 [03:35<07:58,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173183.jpg: 480x640 (no detections), 67.3ms
Speed: 3.2ms preprocess, 67.3ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000173302.jpg: 448x640 2 persons, 5 chairs, 2 dining tables, 2 tvs, 1 microwave, 57.7ms
Speed: 2.4ms preprocess, 57.7ms inference, 10.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  30%|███       | 1503/5000 [03:35<07:31,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173371.jpg: 640x640 1 cup, 4 forks, 1 knife, 2 spoons, 1 broccoli, 3 pizzas, 1 dining table, 87.7ms
Speed: 5.0ms preprocess, 87.7ms inference, 16.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  30%|███       | 1504/5000 [03:35<08:29,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173383.jpg: 448x640 2 cups, 1 mouse, 1 remote, 1 scissors, 59.4ms
Speed: 3.1ms preprocess, 59.4ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  30%|███       | 1505/5000 [03:35<08:04,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173799.jpg: 544x640 2 persons, 22 elephants, 70.8ms
Speed: 2.2ms preprocess, 70.8ms inference, 23.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  30%|███       | 1506/5000 [03:35<09:06,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000173830.jpg: 640x608 1 person, 1 skis, 150.1ms
Speed: 2.3ms preprocess, 150.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  30%|███       | 1507/5000 [03:36<09:47,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000174004.jpg: 480x640 1 truck, 63.8ms
Speed: 2.8ms preprocess, 63.8ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1508/5000 [03:36<08:52,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000174018.jpg: 480x640 2 teddy bears, 72.5ms
Speed: 4.2ms preprocess, 72.5ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1509/5000 [03:36<08:41,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000174123.jpg: 512x640 2 persons, 1 fork, 2 pizzas, 86.2ms
Speed: 4.9ms preprocess, 86.2ms inference, 5.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  30%|███       | 1510/5000 [03:36<08:52,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000174231.jpg: 640x480 1 cat, 2 chairs, 1 microwave, 67.4ms
Speed: 4.5ms preprocess, 67.4ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  30%|███       | 1511/5000 [03:36<08:25,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000174371.jpg: 512x640 5 persons, 1 bottle, 3 cell phones, 66.0ms
Speed: 2.8ms preprocess, 66.0ms inference, 8.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  30%|███       | 1512/5000 [03:36<08:18,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000174482.jpg: 416x640 1 bicycle, 5 cars, 116.4ms
Speed: 2.7ms preprocess, 116.4ms inference, 5.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  30%|███       | 1513/5000 [03:36<08:47,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000175251.jpg: 640x640 2 persons, 2 toothbrushs, 82.3ms
Speed: 4.5ms preprocess, 82.3ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  30%|███       | 1514/5000 [03:37<08:47,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000175364.jpg: 512x640 1 dining table, 3 ovens, 1 sink, 105.9ms
Speed: 3.7ms preprocess, 105.9ms inference, 6.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  30%|███       | 1515/5000 [03:37<09:09,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000175387.jpg: 480x640 2 toilets, 77.3ms
Speed: 5.3ms preprocess, 77.3ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1516/5000 [03:37<08:51,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000175438.jpg: 480x640 14 cars, 3 traffic lights, 1 clock, 65.7ms
Speed: 2.9ms preprocess, 65.7ms inference, 17.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1517/5000 [03:37<09:07,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000175443.jpg: 640x480 1 teddy bear, 65.3ms
Speed: 2.5ms preprocess, 65.3ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  30%|███       | 1518/5000 [03:37<08:18,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000175535.jpg: 640x480 3 bowls, 1 sandwich, 1 dining table, 63.3ms
Speed: 2.7ms preprocess, 63.3ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  30%|███       | 1519/5000 [03:37<07:56,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176037.jpg: 448x640 2 persons, 1 bus, 71.0ms
Speed: 2.8ms preprocess, 71.0ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  30%|███       | 1520/5000 [03:38<08:00,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176232.jpg: 640x480 1 chair, 1 potted plant, 1 dining table, 1 vase, 86.9ms
Speed: 5.2ms preprocess, 86.9ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  30%|███       | 1521/5000 [03:38<08:15,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176446.jpg: 480x640 1 suitcase, 1 laptop, 68.3ms
Speed: 2.9ms preprocess, 68.3ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1522/5000 [03:38<07:51,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176606.jpg: 480x640 1 person, 1 car, 1 sheep, 67.6ms
Speed: 2.7ms preprocess, 67.6ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1523/5000 [03:38<07:37,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176634.jpg: 480x640 1 zebra, 81.1ms
Speed: 2.9ms preprocess, 81.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1524/5000 [03:38<07:40,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176701.jpg: 480x640 (no detections), 71.0ms
Speed: 2.8ms preprocess, 71.0ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  30%|███       | 1525/5000 [03:38<07:14,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176778.jpg: 640x448 1 toilet, 1 sink, 62.5ms
Speed: 3.3ms preprocess, 62.5ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  31%|███       | 1526/5000 [03:38<07:00,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176799.jpg: 448x640 3 persons, 3 skateboards, 73.2ms
Speed: 3.1ms preprocess, 73.2ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1527/5000 [03:38<07:16,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176847.jpg: 480x640 1 bird, 65.5ms
Speed: 2.6ms preprocess, 65.5ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1528/5000 [03:38<06:58,  8.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176857.jpg: 480x640 10 persons, 1 dog, 64.3ms
Speed: 3.9ms preprocess, 64.3ms inference, 13.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1529/5000 [03:39<07:32,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000176901.jpg: 384x640 3 persons, 1 skis, 141.8ms
Speed: 3.1ms preprocess, 141.8ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  31%|███       | 1530/5000 [03:39<08:42,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177015.jpg: 480x640 1 person, 1 cat, 1 chair, 1 laptop, 69.0ms
Speed: 3.0ms preprocess, 69.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1531/5000 [03:39<08:16,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177065.jpg: 576x640 3 persons, 1 backpack, 1 sports ball, 3 tennis rackets, 172.9ms
Speed: 2.3ms preprocess, 172.9ms inference, 14.8ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  31%|███       | 1532/5000 [03:39<10:17,  5.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177213.jpg: 384x640 1 bottle, 2 cups, 1 fork, 1 pizza, 1 dining table, 58.8ms
Speed: 2.5ms preprocess, 58.8ms inference, 4.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  31%|███       | 1533/5000 [03:39<09:07,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177357.jpg: 416x640 1 person, 1 surfboard, 67.3ms
Speed: 2.8ms preprocess, 67.3ms inference, 4.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  31%|███       | 1534/5000 [03:39<08:37,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177383.jpg: 640x448 1 train, 71.1ms
Speed: 3.4ms preprocess, 71.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  31%|███       | 1535/5000 [03:40<08:04,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177489.jpg: 480x640 2 persons, 1 bottle, 1 wine glass, 1 spoon, 4 bowls, 4 chairs, 64.9ms
Speed: 3.1ms preprocess, 64.9ms inference, 11.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1536/5000 [03:40<08:13,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177539.jpg: 448x640 1 person, 62.9ms
Speed: 2.9ms preprocess, 62.9ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1537/5000 [03:40<07:51,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177714.jpg: 416x640 5 carrots, 76.1ms
Speed: 3.1ms preprocess, 76.1ms inference, 4.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  31%|███       | 1538/5000 [03:40<07:55,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177861.jpg: 640x480 5 persons, 3 cars, 4 umbrellas, 1 handbag, 72.4ms
Speed: 2.8ms preprocess, 72.4ms inference, 14.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  31%|███       | 1539/5000 [03:40<08:19,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177893.jpg: 448x640 1 car, 1 bus, 60.2ms
Speed: 2.6ms preprocess, 60.2ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1540/5000 [03:40<07:35,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177934.jpg: 480x640 5 persons, 8 cars, 1 motorcycle, 2 trucks, 1 backpack, 73.0ms
Speed: 2.7ms preprocess, 73.0ms inference, 19.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1541/5000 [03:40<08:35,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000177935.jpg: 640x480 1 spoon, 1 oven, 68.5ms
Speed: 2.7ms preprocess, 68.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  31%|███       | 1542/5000 [03:41<08:02,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000178028.jpg: 640x480 2 bottles, 2 wine glasss, 1 sink, 62.6ms
Speed: 4.0ms preprocess, 62.6ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  31%|███       | 1543/5000 [03:41<07:45,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000178469.jpg: 448x640 1 cup, 1 chair, 1 laptop, 2 books, 61.6ms
Speed: 2.7ms preprocess, 61.6ms inference, 9.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1544/5000 [03:41<07:41,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000178618.jpg: 640x448 1 elephant, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  31%|███       | 1545/5000 [03:41<07:17,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000178744.jpg: 448x640 3 boats, 59.2ms
Speed: 2.7ms preprocess, 59.2ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1546/5000 [03:41<06:58,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000178982.jpg: 448x640 2 persons, 1 bicycle, 1 motorcycle, 2 traffic lights, 59.6ms
Speed: 2.8ms preprocess, 59.6ms inference, 7.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1547/5000 [03:41<06:57,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179112.jpg: 480x640 1 person, 2 cell phones, 63.9ms
Speed: 4.3ms preprocess, 63.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1548/5000 [03:41<06:52,  8.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179141.jpg: 480x640 4 persons, 1 bottle, 1 cup, 1 bowl, 1 pizza, 85.9ms
Speed: 3.0ms preprocess, 85.9ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1549/5000 [03:41<07:25,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179174.jpg: 640x448 3 persons, 1 traffic light, 1 umbrella, 24 suitcases, 62.5ms
Speed: 2.4ms preprocess, 62.5ms inference, 24.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  31%|███       | 1550/5000 [03:42<08:36,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179214.jpg: 640x448 1 pizza, 1 dining table, 61.2ms
Speed: 2.5ms preprocess, 61.2ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  31%|███       | 1551/5000 [03:42<07:55,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179265.jpg: 448x640 2 cars, 1 bus, 1 truck, 1 potted plant, 58.4ms
Speed: 2.6ms preprocess, 58.4ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1552/5000 [03:42<07:24,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179285.jpg: 640x448 1 sports ball, 59.9ms
Speed: 2.4ms preprocess, 59.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  31%|███       | 1553/5000 [03:42<07:00,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179392.jpg: 640x480 1 cat, 64.9ms
Speed: 2.8ms preprocess, 64.9ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  31%|███       | 1554/5000 [03:42<06:55,  8.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179487.jpg: 640x512 11 persons, 2 sports balls, 1 tennis racket, 206.3ms
Speed: 3.5ms preprocess, 206.3ms inference, 14.6ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  31%|███       | 1555/5000 [03:42<10:10,  5.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179642.jpg: 448x640 10 persons, 2 tennis rackets, 2 bottles, 3 chairs, 66.1ms
Speed: 3.0ms preprocess, 66.1ms inference, 16.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███       | 1556/5000 [03:43<10:08,  5.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179653.jpg: 480x640 1 clock, 68.8ms
Speed: 2.8ms preprocess, 68.8ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1557/5000 [03:43<09:08,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179765.jpg: 480x640 1 motorcycle, 73.9ms
Speed: 2.8ms preprocess, 73.9ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1558/5000 [03:43<08:32,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000179898.jpg: 480x640 11 persons, 2 hot dogs, 4 chairs, 1 dining table, 75.4ms
Speed: 2.9ms preprocess, 75.4ms inference, 18.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1559/5000 [03:43<09:19,  6.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180011.jpg: 640x480 1 person, 1 zebra, 5 giraffes, 83.7ms
Speed: 3.0ms preprocess, 83.7ms inference, 13.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  31%|███       | 1560/5000 [03:43<09:16,  6.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180101.jpg: 480x640 4 persons, 1 cake, 2 chairs, 1 dining table, 66.4ms
Speed: 2.7ms preprocess, 66.4ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███       | 1561/5000 [03:43<08:47,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180135.jpg: 640x448 1 person, 1 skateboard, 1 tennis racket, 60.8ms
Speed: 4.2ms preprocess, 60.8ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  31%|███       | 1562/5000 [03:43<08:07,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180188.jpg: 480x640 1 person, 1 train, 63.7ms
Speed: 2.5ms preprocess, 63.7ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███▏      | 1563/5000 [03:44<07:37,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180296.jpg: 384x640 1 person, 1 motorcycle, 2 backpacks, 1 suitcase, 55.9ms
Speed: 2.7ms preprocess, 55.9ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  31%|███▏      | 1564/5000 [03:44<07:12,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180383.jpg: 480x640 1 person, 1 dog, 1 tv, 1 laptop, 1 clock, 86.2ms
Speed: 3.5ms preprocess, 86.2ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███▏      | 1565/5000 [03:44<07:34,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180487.jpg: 448x640 2 persons, 2 umbrellas, 1 dining table, 62.6ms
Speed: 2.8ms preprocess, 62.6ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███▏      | 1566/5000 [03:44<07:21,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180560.jpg: 448x640 3 persons, 1 donut, 60.5ms
Speed: 2.6ms preprocess, 60.5ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███▏      | 1567/5000 [03:44<06:59,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180751.jpg: 480x640 2 persons, 10 bottles, 2 wine glasss, 1 dining table, 1 tv, 63.2ms
Speed: 2.6ms preprocess, 63.2ms inference, 16.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  31%|███▏      | 1568/5000 [03:44<07:36,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180792.jpg: 640x480 1 toilet, 66.2ms
Speed: 4.7ms preprocess, 66.2ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  31%|███▏      | 1569/5000 [03:44<07:23,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180798.jpg: 640x480 1 person, 1 cell phone, 66.3ms
Speed: 4.1ms preprocess, 66.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  31%|███▏      | 1570/5000 [03:44<07:12,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000180878.jpg: 448x640 1 cell phone, 83.9ms
Speed: 2.6ms preprocess, 83.9ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███▏      | 1571/5000 [03:45<07:16,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181303.jpg: 384x640 1 person, 1 bench, 1 umbrella, 1 kite, 57.1ms
Speed: 2.4ms preprocess, 57.1ms inference, 6.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  31%|███▏      | 1572/5000 [03:45<06:56,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181421.jpg: 448x640 3 persons, 2 boats, 2 umbrellas, 62.4ms
Speed: 2.9ms preprocess, 62.4ms inference, 6.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  31%|███▏      | 1573/5000 [03:45<06:58,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181499.jpg: 544x640 1 person, 1 tv, 2 laptops, 2 keyboards, 142.6ms
Speed: 2.2ms preprocess, 142.6ms inference, 6.1ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  31%|███▏      | 1574/5000 [03:45<08:28,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181542.jpg: 608x640 11 persons, 1 bicycle, 3 cars, 4 motorcycles, 1 bus, 1 truck, 146.7ms
Speed: 2.3ms preprocess, 146.7ms inference, 25.2ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  32%|███▏      | 1575/5000 [03:45<10:40,  5.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181666.jpg: 448x640 4 persons, 8 sheeps, 6 cows, 60.0ms
Speed: 3.0ms preprocess, 60.0ms inference, 15.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1576/5000 [03:45<10:26,  5.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181753.jpg: 448x640 2 chairs, 1 couch, 1 potted plant, 1 dining table, 1 vase, 59.6ms
Speed: 2.6ms preprocess, 59.6ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1577/5000 [03:46<09:19,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181796.jpg: 384x640 1 wine glass, 4 cups, 2 forks, 1 spoon, 1 bowl, 1 dining table, 52.0ms
Speed: 2.5ms preprocess, 52.0ms inference, 6.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  32%|███▏      | 1578/5000 [03:46<08:31,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181816.jpg: 480x640 2 persons, 1 car, 1 bench, 4 potted plants, 66.2ms
Speed: 3.0ms preprocess, 66.2ms inference, 8.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1579/5000 [03:46<08:16,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181859.jpg: 640x448 1 cat, 1 sink, 61.1ms
Speed: 2.5ms preprocess, 61.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  32%|███▏      | 1580/5000 [03:46<07:38,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000181969.jpg: 640x576 1 dog, 160.5ms
Speed: 2.3ms preprocess, 160.5ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  32%|███▏      | 1581/5000 [03:46<09:02,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182021.jpg: 384x640 2 persons, 55.7ms
Speed: 2.3ms preprocess, 55.7ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  32%|███▏      | 1582/5000 [03:46<08:09,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182155.jpg: 448x640 2 persons, 1 couch, 3 remotes, 60.0ms
Speed: 2.7ms preprocess, 60.0ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1583/5000 [03:46<07:45,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182162.jpg: 448x640 1 backpack, 1 chair, 1 couch, 61.3ms
Speed: 2.6ms preprocess, 61.3ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1584/5000 [03:46<07:19,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182202.jpg: 416x640 1 laptop, 1 cell phone, 57.0ms
Speed: 2.5ms preprocess, 57.0ms inference, 3.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  32%|███▏      | 1585/5000 [03:47<06:55,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182417.jpg: 480x640 2 cups, 2 forks, 1 spoon, 2 bowls, 2 cakes, 1 dining table, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1586/5000 [03:47<07:19,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182441.jpg: 416x640 1 person, 14 birds, 64.9ms
Speed: 3.9ms preprocess, 64.9ms inference, 12.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  32%|███▏      | 1587/5000 [03:47<07:39,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182611.jpg: 640x480 6 persons, 1 bottle, 2 cups, 4 bowls, 64.3ms
Speed: 2.9ms preprocess, 64.3ms inference, 13.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  32%|███▏      | 1588/5000 [03:47<07:57,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182805.jpg: 384x640 3 persons, 1 umbrella, 54.1ms
Speed: 2.5ms preprocess, 54.1ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  32%|███▏      | 1589/5000 [03:47<07:20,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000182923.jpg: 640x480 15 persons, 64.3ms
Speed: 4.1ms preprocess, 64.3ms inference, 13.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  32%|███▏      | 1590/5000 [03:47<07:48,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183049.jpg: 640x448 20 books, 61.1ms
Speed: 2.7ms preprocess, 61.1ms inference, 18.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  32%|███▏      | 1591/5000 [03:47<08:15,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183104.jpg: 480x640 1 giraffe, 64.0ms
Speed: 2.6ms preprocess, 64.0ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1592/5000 [03:48<07:48,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183127.jpg: 448x640 1 person, 1 surfboard, 72.7ms
Speed: 3.7ms preprocess, 72.7ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1593/5000 [03:48<07:37,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183246.jpg: 512x640 2 cars, 1 truck, 10 traffic lights, 131.5ms
Speed: 4.0ms preprocess, 131.5ms inference, 12.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  32%|███▏      | 1594/5000 [03:48<09:03,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183391.jpg: 640x640 8 persons, 1 tennis racket, 1 chair, 158.3ms
Speed: 2.4ms preprocess, 158.3ms inference, 11.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  32%|███▏      | 1595/5000 [03:48<10:32,  5.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183437.jpg: 640x448 4 persons, 2 elephants, 58.8ms
Speed: 2.7ms preprocess, 58.8ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  32%|███▏      | 1596/5000 [03:48<09:24,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183500.jpg: 448x640 1 person, 1 airplane, 70.4ms
Speed: 3.0ms preprocess, 70.4ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1597/5000 [03:48<08:41,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183648.jpg: 448x640 3 persons, 1 elephant, 127.6ms
Speed: 3.0ms preprocess, 127.6ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1598/5000 [03:49<09:16,  6.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183675.jpg: 416x640 1 person, 1 horse, 61.6ms
Speed: 3.0ms preprocess, 61.6ms inference, 2.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  32%|███▏      | 1599/5000 [03:49<08:20,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183709.jpg: 640x480 14 persons, 5 cars, 3 traffic lights, 67.1ms
Speed: 3.0ms preprocess, 67.1ms inference, 20.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  32%|███▏      | 1600/5000 [03:49<08:54,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183716.jpg: 640x480 2 persons, 2 ties, 61.6ms
Speed: 4.1ms preprocess, 61.6ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  32%|███▏      | 1601/5000 [03:49<08:16,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000183965.jpg: 480x640 1 bowl, 1 sandwich, 1 cake, 1 dining table, 64.8ms
Speed: 2.8ms preprocess, 64.8ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1602/5000 [03:49<07:46,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184321.jpg: 480x640 1 train, 1 traffic light, 63.8ms
Speed: 2.9ms preprocess, 63.8ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1603/5000 [03:49<07:21,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184324.jpg: 448x640 17 persons, 1 bicycle, 8 cars, 1 bus, 1 truck, 80.9ms
Speed: 2.9ms preprocess, 80.9ms inference, 30.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1604/5000 [03:49<08:57,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184338.jpg: 448x640 1 truck, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1605/5000 [03:50<08:09,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184384.jpg: 480x640 1 bottle, 3 cups, 1 sandwich, 1 cake, 1 dining table, 62.9ms
Speed: 2.9ms preprocess, 62.9ms inference, 10.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1606/5000 [03:50<07:58,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184400.jpg: 480x640 1 train, 67.9ms
Speed: 2.7ms preprocess, 67.9ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1607/5000 [03:50<07:31,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184611.jpg: 480x640 14 persons, 1 bicycle, 3 cars, 74.2ms
Speed: 3.0ms preprocess, 74.2ms inference, 26.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1608/5000 [03:50<08:33,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184762.jpg: 480x640 1 fork, 1 clock, 62.2ms
Speed: 4.0ms preprocess, 62.2ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1609/5000 [03:50<07:53,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184791.jpg: 512x640 1 bowl, 1 apple, 1 orange, 1 vase, 72.2ms
Speed: 2.8ms preprocess, 72.2ms inference, 4.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  32%|███▏      | 1610/5000 [03:50<07:49,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000184978.jpg: 640x480 1 person, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  32%|███▏      | 1611/5000 [03:50<07:24,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185157.jpg: 640x448 2 bicycles, 1 clock, 61.6ms
Speed: 2.6ms preprocess, 61.6ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  32%|███▏      | 1612/5000 [03:50<06:57,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185250.jpg: 640x416 1 person, 1 dog, 1 sports ball, 142.9ms
Speed: 2.9ms preprocess, 142.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  32%|███▏      | 1613/5000 [03:51<08:15,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185292.jpg: 640x448 1 person, 1 surfboard, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  32%|███▏      | 1614/5000 [03:51<07:39,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185409.jpg: 448x640 9 zebras, 61.9ms
Speed: 3.0ms preprocess, 61.9ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1615/5000 [03:51<07:35,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185472.jpg: 480x640 3 persons, 1 bicycle, 1 train, 65.7ms
Speed: 2.8ms preprocess, 65.7ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1616/5000 [03:51<07:19,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185473.jpg: 224x640 4 persons, 12 horses, 87.3ms
Speed: 2.2ms preprocess, 87.3ms inference, 5.9ms postprocess per image at shape (1, 3, 224, 640)


Segmenting Images:  32%|███▏      | 1617/5000 [03:51<07:43,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185599.jpg: 480x640 3 apples, 5 oranges, 66.6ms
Speed: 2.9ms preprocess, 66.6ms inference, 8.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1618/5000 [03:51<07:39,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185802.jpg: 640x480 1 banana, 1 chair, 96.0ms
Speed: 2.8ms preprocess, 96.0ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  32%|███▏      | 1619/5000 [03:51<07:48,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185890.jpg: 640x512 4 persons, 138.6ms
Speed: 2.9ms preprocess, 138.6ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  32%|███▏      | 1620/5000 [03:52<08:47,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000185950.jpg: 640x448 8 persons, 1 skateboard, 58.4ms
Speed: 4.1ms preprocess, 58.4ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  32%|███▏      | 1621/5000 [03:52<08:20,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186042.jpg: 384x640 1 person, 1 skis, 56.2ms
Speed: 2.7ms preprocess, 56.2ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  32%|███▏      | 1622/5000 [03:52<07:34,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186282.jpg: 448x640 1 tv, 1 mouse, 1 keyboard, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▏      | 1623/5000 [03:52<07:13,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186296.jpg: 480x640 1 cat, 63.8ms
Speed: 2.9ms preprocess, 63.8ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  32%|███▏      | 1624/5000 [03:52<07:05,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186345.jpg: 448x640 7 kites, 60.4ms
Speed: 3.1ms preprocess, 60.4ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  32%|███▎      | 1625/5000 [03:52<07:05,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186422.jpg: 448x640 1 bird, 1 bear, 61.1ms
Speed: 2.8ms preprocess, 61.1ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1626/5000 [03:52<06:45,  8.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186449.jpg: 640x448 5 persons, 1 bench, 1 elephant, 1 suitcase, 61.6ms
Speed: 2.7ms preprocess, 61.6ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  33%|███▎      | 1627/5000 [03:52<06:54,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186624.jpg: 576x640 1 person, 1 train, 151.3ms
Speed: 2.2ms preprocess, 151.3ms inference, 5.2ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  33%|███▎      | 1628/5000 [03:53<08:15,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186632.jpg: 640x480 1 cup, 1 bowl, 1 chair, 1 potted plant, 1 dining table, 1 tv, 1 sink, 63.2ms
Speed: 3.1ms preprocess, 63.2ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  33%|███▎      | 1629/5000 [03:53<07:58,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186637.jpg: 448x640 1 giraffe, 73.9ms
Speed: 3.1ms preprocess, 73.9ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1630/5000 [03:53<07:41,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186873.jpg: 640x640 6 persons, 2 boats, 161.4ms
Speed: 4.1ms preprocess, 161.4ms inference, 9.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  33%|███▎      | 1631/5000 [03:53<09:26,  5.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186929.jpg: 640x448 4 oranges, 1 carrot, 60.8ms
Speed: 3.5ms preprocess, 60.8ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  33%|███▎      | 1632/5000 [03:53<08:32,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186938.jpg: 448x640 2 birds, 59.7ms
Speed: 2.5ms preprocess, 59.7ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1633/5000 [03:53<07:53,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000186980.jpg: 640x480 2 chairs, 1 dining table, 1 microwave, 1 refrigerator, 64.9ms
Speed: 2.3ms preprocess, 64.9ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  33%|███▎      | 1634/5000 [03:54<07:35,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187055.jpg: 640x448 2 persons, 2 sports balls, 3 tennis rackets, 1 bottle, 98.7ms
Speed: 2.7ms preprocess, 98.7ms inference, 7.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  33%|███▎      | 1635/5000 [03:54<08:11,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187144.jpg: 480x640 8 persons, 2 buss, 1 backpack, 64.5ms
Speed: 3.1ms preprocess, 64.5ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1636/5000 [03:54<07:59,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187236.jpg: 480x640 1 person, 2 cats, 1 chair, 63.9ms
Speed: 2.8ms preprocess, 63.9ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1637/5000 [03:54<07:42,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187243.jpg: 640x480 1 person, 1 toilet, 1 sink, 64.9ms
Speed: 2.7ms preprocess, 64.9ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  33%|███▎      | 1638/5000 [03:54<07:14,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187249.jpg: 480x640 1 person, 2 tvs, 62.6ms
Speed: 2.6ms preprocess, 62.6ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1639/5000 [03:54<06:50,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187271.jpg: 640x480 1 oven, 1 refrigerator, 68.5ms
Speed: 2.6ms preprocess, 68.5ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  33%|███▎      | 1640/5000 [03:54<07:00,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187362.jpg: 448x640 1 person, 1 surfboard, 60.5ms
Speed: 2.7ms preprocess, 60.5ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1641/5000 [03:54<06:49,  8.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187513.jpg: 640x448 2 toilets, 2 sinks, 61.7ms
Speed: 2.6ms preprocess, 61.7ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  33%|███▎      | 1642/5000 [03:55<06:40,  8.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187585.jpg: 448x640 1 person, 1 skateboard, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1643/5000 [03:55<06:26,  8.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187734.jpg: 480x640 11 persons, 1 frisbee, 1 tennis racket, 64.0ms
Speed: 2.8ms preprocess, 64.0ms inference, 12.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1644/5000 [03:55<07:02,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187745.jpg: 480x640 1 airplane, 64.1ms
Speed: 2.5ms preprocess, 64.1ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1645/5000 [03:55<06:46,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000187990.jpg: 480x640 1 person, 2 skateboards, 79.0ms
Speed: 4.5ms preprocess, 79.0ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1646/5000 [03:55<06:57,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000188296.jpg: 640x640 6 persons, 1 car, 84.7ms
Speed: 2.4ms preprocess, 84.7ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  33%|███▎      | 1647/5000 [03:55<07:33,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000188439.jpg: 416x640 1 person, 1 truck, 112.5ms
Speed: 2.7ms preprocess, 112.5ms inference, 2.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  33%|███▎      | 1648/5000 [03:55<07:58,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000188465.jpg: 480x640 11 persons, 2 baseball bats, 66.1ms
Speed: 2.5ms preprocess, 66.1ms inference, 13.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1649/5000 [03:56<08:06,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000188592.jpg: 640x480 1 person, 1 hot dog, 1 donut, 63.7ms
Speed: 2.5ms preprocess, 63.7ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  33%|███▎      | 1650/5000 [03:56<07:38,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000188689.jpg: 640x448 2 boats, 1 bird, 80.7ms
Speed: 4.1ms preprocess, 80.7ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  33%|███▎      | 1651/5000 [03:56<07:42,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000188906.jpg: 480x640 1 sandwich, 1 carrot, 66.8ms
Speed: 4.2ms preprocess, 66.8ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1652/5000 [03:56<07:20,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189078.jpg: 448x640 4 bananas, 4 apples, 3 oranges, 59.4ms
Speed: 4.0ms preprocess, 59.4ms inference, 11.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1653/5000 [03:56<07:25,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189213.jpg: 480x640 1 chair, 65.2ms
Speed: 3.7ms preprocess, 65.2ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1654/5000 [03:56<07:05,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189226.jpg: 448x640 1 fire hydrant, 61.3ms
Speed: 2.5ms preprocess, 61.3ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1655/5000 [03:56<06:48,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189310.jpg: 480x640 2 chairs, 1 bed, 71.4ms
Speed: 3.8ms preprocess, 71.4ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1656/5000 [03:56<06:49,  8.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189436.jpg: 640x480 1 teddy bear, 69.6ms
Speed: 2.9ms preprocess, 69.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  33%|███▎      | 1657/5000 [03:56<06:44,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189451.jpg: 448x640 1 knife, 1 pizza, 2 cakes, 1 dining table, 60.8ms
Speed: 2.8ms preprocess, 60.8ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1658/5000 [03:57<06:42,  8.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189475.jpg: 480x640 12 persons, 5 bottles, 8 cups, 4 chairs, 1 dining table, 1 vase, 63.5ms
Speed: 3.9ms preprocess, 63.5ms inference, 27.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1659/5000 [03:57<08:07,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189698.jpg: 448x640 1 car, 62.1ms
Speed: 2.6ms preprocess, 62.1ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1660/5000 [03:57<07:32,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189752.jpg: 480x640 2 persons, 2 cups, 1 knife, 2 pizzas, 1 dining table, 61.6ms
Speed: 2.8ms preprocess, 61.6ms inference, 11.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1661/5000 [03:57<07:30,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189775.jpg: 480x640 11 persons, 3 chairs, 82.6ms
Speed: 3.6ms preprocess, 82.6ms inference, 13.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1662/5000 [03:57<08:12,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189806.jpg: 512x640 1 cat, 1 dog, 1 suitcase, 1 chair, 1 potted plant, 1 mouse, 1 remote, 131.7ms
Speed: 4.5ms preprocess, 131.7ms inference, 7.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  33%|███▎      | 1663/5000 [03:57<09:06,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189820.jpg: 448x640 1 cup, 1 chair, 3 tvs, 1 laptop, 2 mouses, 2 keyboards, 59.8ms
Speed: 2.7ms preprocess, 59.8ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1664/5000 [03:58<08:30,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000189828.jpg: 640x640 1 airplane, 87.3ms
Speed: 4.0ms preprocess, 87.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  33%|███▎      | 1665/5000 [03:58<08:20,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190007.jpg: 384x640 1 person, 1 surfboard, 56.6ms
Speed: 2.4ms preprocess, 56.6ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  33%|███▎      | 1666/5000 [03:58<07:34,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190140.jpg: 448x640 1 person, 2 boats, 1 dog, 62.1ms
Speed: 2.8ms preprocess, 62.1ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  33%|███▎      | 1667/5000 [03:58<07:22,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190236.jpg: 416x640 1 bottle, 1 cup, 2 chairs, 4 tvs, 1 laptop, 1 mouse, 1 remote, 2 keyboards, 64.7ms
Speed: 3.3ms preprocess, 64.7ms inference, 9.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  33%|███▎      | 1668/5000 [03:58<07:37,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190307.jpg: 640x544 1 person, 157.3ms
Speed: 2.2ms preprocess, 157.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  33%|███▎      | 1669/5000 [03:58<08:46,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190637.jpg: 640x448 1 person, 1 couch, 1 bed, 1 remote, 65.6ms
Speed: 2.6ms preprocess, 65.6ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  33%|███▎      | 1670/5000 [03:58<08:04,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190648.jpg: 480x640 1 chair, 1 bed, 70.4ms
Speed: 3.0ms preprocess, 70.4ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1671/5000 [03:59<07:42,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190676.jpg: 288x640 20 persons, 1 airplane, 103.1ms
Speed: 2.7ms preprocess, 103.1ms inference, 11.4ms postprocess per image at shape (1, 3, 288, 640)


Segmenting Images:  33%|███▎      | 1672/5000 [03:59<08:26,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190753.jpg: 480x640 8 persons, 4 benchs, 1 suitcase, 63.4ms
Speed: 3.0ms preprocess, 63.4ms inference, 11.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1673/5000 [03:59<08:25,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190756.jpg: 480x640 1 person, 1 motorcycle, 87.2ms
Speed: 2.7ms preprocess, 87.2ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  33%|███▎      | 1674/5000 [03:59<08:08,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190841.jpg: 448x640 1 person, 1 bench, 1 skateboard, 1 chair, 62.1ms
Speed: 2.6ms preprocess, 62.1ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▎      | 1675/5000 [03:59<07:40,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190853.jpg: 640x384 1 person, 131.0ms
Speed: 2.3ms preprocess, 131.0ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  34%|███▎      | 1676/5000 [03:59<08:20,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000190923.jpg: 640x480 9 persons, 2 traffic lights, 1 handbag, 122.3ms
Speed: 4.4ms preprocess, 122.3ms inference, 11.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  34%|███▎      | 1677/5000 [03:59<09:17,  5.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191013.jpg: 640x480 3 cars, 3 trucks, 1 clock, 64.0ms
Speed: 2.8ms preprocess, 64.0ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  34%|███▎      | 1678/5000 [04:00<08:37,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191288.jpg: 448x640 1 person, 1 horse, 69.2ms
Speed: 3.9ms preprocess, 69.2ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▎      | 1679/5000 [04:00<08:07,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191471.jpg: 480x640 1 stop sign, 68.6ms
Speed: 2.9ms preprocess, 68.6ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▎      | 1680/5000 [04:00<07:37,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191580.jpg: 640x480 1 cup, 2 forks, 1 knife, 1 broccoli, 1 dining table, 72.7ms
Speed: 2.7ms preprocess, 72.7ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  34%|███▎      | 1681/5000 [04:00<07:38,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191614.jpg: 640x640 2 persons, 2 horses, 96.0ms
Speed: 4.1ms preprocess, 96.0ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  34%|███▎      | 1682/5000 [04:00<08:03,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191672.jpg: 448x640 1 person, 2 surfboards, 69.9ms
Speed: 3.4ms preprocess, 69.9ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▎      | 1683/5000 [04:00<07:47,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191761.jpg: 480x640 2 oranges, 1 carrot, 75.3ms
Speed: 4.4ms preprocess, 75.3ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▎      | 1684/5000 [04:00<07:50,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000191845.jpg: 384x640 16 persons, 17 umbrellas, 58.0ms
Speed: 3.1ms preprocess, 58.0ms inference, 22.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  34%|███▎      | 1685/5000 [04:01<08:42,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192047.jpg: 640x480 2 sinks, 64.3ms
Speed: 3.2ms preprocess, 64.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  34%|███▎      | 1686/5000 [04:01<07:59,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192191.jpg: 640x480 1 pizza, 1 oven, 62.2ms
Speed: 2.9ms preprocess, 62.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000192607.jpg: 448x640 1 person, 1 car, 60.2ms
Speed: 2.7ms preprocess, 60.2ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1688/5000 [04:01<07:00,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192670.jpg: 448x640 6 persons, 3 cars, 3 baseball gloves, 62.0ms
Speed: 2.8ms preprocess, 62.0ms inference, 10.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1689/5000 [04:01<07:16,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192699.jpg: 480x640 2 persons, 2 motorcycles, 104.4ms
Speed: 3.8ms preprocess, 104.4ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1690/5000 [04:01<07:51,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192716.jpg: 640x544 1 person, 4 cars, 1 stop sign, 76.0ms
Speed: 2.1ms preprocess, 76.0ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  34%|███▍      | 1691/5000 [04:01<07:56,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192871.jpg: 448x640 1 bowl, 1 apple, 3 oranges, 61.4ms
Speed: 2.7ms preprocess, 61.4ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1692/5000 [04:02<07:33,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192904.jpg: 448x640 3 cups, 1 dining table, 76.8ms
Speed: 3.1ms preprocess, 76.8ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1693/5000 [04:02<07:36,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000192964.jpg: 640x448 1 person, 1 skateboard, 67.7ms
Speed: 2.8ms preprocess, 67.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  34%|███▍      | 1694/5000 [04:02<07:15,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193162.jpg: 448x640 2 persons, 2 sheeps, 1 cow, 107.8ms
Speed: 2.7ms preprocess, 107.8ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1695/5000 [04:02<07:53,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193181.jpg: 640x448 10 persons, 1 baseball bat, 2 baseball gloves, 63.6ms
Speed: 2.7ms preprocess, 63.6ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  34%|███▍      | 1696/5000 [04:02<07:57,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193245.jpg: 480x640 11 persons, 10 kites, 64.2ms
Speed: 4.2ms preprocess, 64.2ms inference, 19.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1697/5000 [04:02<08:28,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193348.jpg: 640x448 1 person, 1 toothbrush, 61.8ms
Speed: 2.8ms preprocess, 61.8ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  34%|███▍      | 1698/5000 [04:02<07:44,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193429.jpg: 448x640 1 person, 1 snowboard, 61.1ms
Speed: 2.8ms preprocess, 61.1ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1699/5000 [04:03<07:14,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193494.jpg: 640x448 1 bed, 68.0ms
Speed: 2.7ms preprocess, 68.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  34%|███▍      | 1700/5000 [04:03<07:03,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193674.jpg: 480x640 2 persons, 4 surfboards, 65.1ms
Speed: 3.2ms preprocess, 65.1ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1701/5000 [04:03<07:03,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193717.jpg: 640x544 2 persons, 7 cars, 1 traffic light, 1 fire hydrant, 74.9ms
Speed: 1.9ms preprocess, 74.9ms inference, 11.2ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  34%|███▍      | 1702/5000 [04:03<07:32,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193743.jpg: 640x448 6 persons, 1 tennis racket, 59.3ms
Speed: 2.8ms preprocess, 59.3ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  34%|███▍      | 1703/5000 [04:03<07:17,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193884.jpg: 480x640 1 stop sign, 65.0ms
Speed: 3.0ms preprocess, 65.0ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1704/5000 [04:03<06:57,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000193926.jpg: 480x640 1 person, 1 bicycle, 1 bowl, 3 bananas, 4 apples, 1 orange, 97.4ms
Speed: 3.0ms preprocess, 97.4ms inference, 15.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1705/5000 [04:03<07:52,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194216.jpg: 320x640 2 birds, 3 bears, 117.5ms
Speed: 2.6ms preprocess, 117.5ms inference, 4.3ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  34%|███▍      | 1706/5000 [04:04<08:19,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194471.jpg: 640x448 1 person, 1 skateboard, 62.6ms
Speed: 4.1ms preprocess, 62.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  34%|███▍      | 1707/5000 [04:04<07:40,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194506.jpg: 512x640 2 bears, 135.0ms
Speed: 3.1ms preprocess, 135.0ms inference, 2.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  34%|███▍      | 1708/5000 [04:04<08:22,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194716.jpg: 448x640 2 persons, 2 cell phones, 59.9ms
Speed: 2.7ms preprocess, 59.9ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1709/5000 [04:04<07:47,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194724.jpg: 480x640 10 bottles, 1 cup, 3 pizzas, 1 dining table, 66.2ms
Speed: 2.7ms preprocess, 66.2ms inference, 14.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1710/5000 [04:04<08:05,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194746.jpg: 480x640 1 pizza, 1 oven, 65.0ms
Speed: 4.3ms preprocess, 65.0ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1711/5000 [04:04<07:41,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194832.jpg: 448x640 2 chairs, 1 tv, 60.1ms
Speed: 2.6ms preprocess, 60.1ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1712/5000 [04:04<07:13,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194875.jpg: 576x640 9 persons, 3 motorcycles, 10 bottles, 154.8ms
Speed: 1.8ms preprocess, 154.8ms inference, 24.0ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  34%|███▍      | 1713/5000 [04:05<09:42,  5.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000194940.jpg: 480x640 1 bottle, 1 cup, 3 bowls, 7 carrots, 1 dining table, 61.4ms
Speed: 4.2ms preprocess, 61.4ms inference, 12.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1714/5000 [04:05<09:12,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000195045.jpg: 480x640 1 chair, 88.0ms
Speed: 2.9ms preprocess, 88.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1715/5000 [04:05<08:42,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000195165.jpg: 480x640 8 bottles, 2 bowls, 1 toilet, 1 sink, 1 refrigerator, 63.8ms
Speed: 2.9ms preprocess, 63.8ms inference, 11.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1716/5000 [04:05<08:28,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000195754.jpg: 448x640 1 person, 1 couch, 1 dining table, 2 tvs, 1 clock, 62.2ms
Speed: 3.0ms preprocess, 62.2ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1717/5000 [04:05<07:59,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000195842.jpg: 480x640 1 person, 1 bowl, 1 tv, 1 clock, 1 teddy bear, 63.0ms
Speed: 2.9ms preprocess, 63.0ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1718/5000 [04:05<07:31,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000195918.jpg: 448x640 1 chair, 2 tvs, 1 laptop, 1 mouse, 2 keyboards, 61.5ms
Speed: 3.0ms preprocess, 61.5ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1719/5000 [04:05<07:18,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000196009.jpg: 480x640 1 carrot, 1 scissors, 65.3ms
Speed: 2.8ms preprocess, 65.3ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1720/5000 [04:06<07:00,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000196141.jpg: 448x640 5 persons, 2 cars, 2 baseball bats, 1 baseball glove, 80.2ms
Speed: 2.9ms preprocess, 80.2ms inference, 11.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1721/5000 [04:06<07:27,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000196185.jpg: 448x640 2 airplanes, 64.0ms
Speed: 2.8ms preprocess, 64.0ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1722/5000 [04:06<07:09,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000196442.jpg: 448x640 3 persons, 5 kites, 78.9ms
Speed: 3.0ms preprocess, 78.9ms inference, 7.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  34%|███▍      | 1723/5000 [04:06<07:24,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000196754.jpg: 640x480 1 stop sign, 66.0ms
Speed: 4.1ms preprocess, 66.0ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  34%|███▍      | 1724/5000 [04:06<07:06,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000196759.jpg: 480x640 5 cars, 1 truck, 68.3ms
Speed: 2.5ms preprocess, 68.3ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  34%|███▍      | 1725/5000 [04:06<07:07,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000196843.jpg: 480x640 1 person, 2 bicycles, 1 motorcycle, 1 bus, 86.5ms
Speed: 2.5ms preprocess, 86.5ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▍      | 1726/5000 [04:06<07:33,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000197004.jpg: 448x640 1 sandwich, 1 dining table, 67.3ms
Speed: 3.1ms preprocess, 67.3ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▍      | 1727/5000 [04:06<07:16,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000197022.jpg: 480x640 2 pizzas, 69.5ms
Speed: 2.9ms preprocess, 69.5ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▍      | 1728/5000 [04:07<07:04,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000197388.jpg: 416x640 5 persons, 4 skiss, 1 snowboard, 122.7ms
Speed: 2.5ms preprocess, 122.7ms inference, 8.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  35%|███▍      | 1729/5000 [04:07<08:06,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000197528.jpg: 640x480 1 cat, 60.3ms
Speed: 2.8ms preprocess, 60.3ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  35%|███▍      | 1730/5000 [04:07<07:21,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000197658.jpg: 640x608 1 person, 1 baseball glove, 159.6ms
Speed: 2.0ms preprocess, 159.6ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  35%|███▍      | 1731/5000 [04:07<08:40,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000197796.jpg: 480x640 4 bottles, 1 sink, 66.5ms
Speed: 3.0ms preprocess, 66.5ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▍      | 1732/5000 [04:07<08:15,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000197870.jpg: 448x640 1 person, 1 bench, 1 bird, 68.8ms
Speed: 3.1ms preprocess, 68.8ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▍      | 1733/5000 [04:07<07:45,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000198489.jpg: 640x448 1 person, 1 car, 1 kite, 1 potted plant, 64.4ms
Speed: 3.1ms preprocess, 64.4ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  35%|███▍      | 1734/5000 [04:07<07:23,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000198510.jpg: 640x640 1 person, 2 trains, 155.9ms
Speed: 2.5ms preprocess, 155.9ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  35%|███▍      | 1735/5000 [04:08<08:37,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000198641.jpg: 480x640 1 cat, 2 tvs, 1 laptop, 1 mouse, 1 keyboard, 65.3ms
Speed: 3.0ms preprocess, 65.3ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▍      | 1736/5000 [04:08<08:15,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000198805.jpg: 416x640 2 cars, 1 truck, 60.3ms
Speed: 3.0ms preprocess, 60.3ms inference, 2.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  35%|███▍      | 1737/5000 [04:08<07:35,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000198915.jpg: 416x640 (no detections), 57.9ms
Speed: 2.9ms preprocess, 57.9ms inference, 0.5ms postprocess per image at shape (1, 3, 416, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000198928.jpg: 640x480 1 person, 1 bus, 2 traffic lights, 57.7ms
Speed: 2.7ms preprocess, 57.7ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  35%|███▍      | 1739/5000 [04:08<06:34,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000198960.jpg: 448x640 4 sheeps, 64.1ms
Speed: 2.9ms preprocess, 64.1ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▍      | 1740/5000 [04:08<06:35,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199055.jpg: 640x480 1 person, 1 skateboard, 77.6ms
Speed: 3.1ms preprocess, 77.6ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  35%|███▍      | 1741/5000 [04:08<06:48,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199236.jpg: 448x640 22 persons, 1 horse, 96.6ms
Speed: 3.1ms preprocess, 96.6ms inference, 27.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▍      | 1742/5000 [04:09<08:21,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199310.jpg: 640x448 1 person, 1 tennis racket, 86.8ms
Speed: 3.7ms preprocess, 86.8ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  35%|███▍      | 1743/5000 [04:09<08:11,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199395.jpg: 640x448 5 persons, 1 kite, 68.0ms
Speed: 3.1ms preprocess, 68.0ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  35%|███▍      | 1744/5000 [04:09<07:54,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199442.jpg: 448x640 1 person, 1 surfboard, 69.5ms
Speed: 3.2ms preprocess, 69.5ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▍      | 1745/5000 [04:09<07:34,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199551.jpg: 448x640 1 person, 2 beds, 69.1ms
Speed: 2.8ms preprocess, 69.1ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▍      | 1746/5000 [04:09<07:13,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199681.jpg: 480x640 1 cat, 1 keyboard, 1 cell phone, 1 oven, 97.5ms
Speed: 4.1ms preprocess, 97.5ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▍      | 1747/5000 [04:09<07:41,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199771.jpg: 448x640 8 persons, 2 bottles, 2 cups, 1 knife, 1 bowl, 1 pizza, 66.8ms
Speed: 3.0ms preprocess, 66.8ms inference, 14.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▍      | 1748/5000 [04:09<07:59,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000199977.jpg: 640x480 1 airplane, 72.3ms
Speed: 2.9ms preprocess, 72.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  35%|███▍      | 1749/5000 [04:10<07:33,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000200152.jpg: 448x640 1 tie, 63.0ms
Speed: 2.9ms preprocess, 63.0ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▌      | 1750/5000 [04:10<07:08,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000200162.jpg: 448x640 1 train, 6 traffic lights, 65.4ms
Speed: 3.2ms preprocess, 65.4ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▌      | 1751/5000 [04:10<07:12,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000200252.jpg: 480x640 1 bed, 99.0ms
Speed: 8.5ms preprocess, 99.0ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1752/5000 [04:10<07:27,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000200421.jpg: 480x640 1 person, 1 suitcase, 1 tennis racket, 68.3ms
Speed: 3.4ms preprocess, 68.3ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1753/5000 [04:10<07:19,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000200667.jpg: 448x640 1 dog, 73.8ms
Speed: 2.8ms preprocess, 73.8ms inference, 1.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▌      | 1754/5000 [04:10<07:08,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000200839.jpg: 480x640 2 persons, 1 bus, 1 truck, 80.6ms
Speed: 3.2ms preprocess, 80.6ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1755/5000 [04:10<07:16,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000200961.jpg: 448x640 1 person, 5 cars, 1 frisbee, 70.8ms
Speed: 3.0ms preprocess, 70.8ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▌      | 1756/5000 [04:11<07:18,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201025.jpg: 640x448 2 elephants, 91.3ms
Speed: 7.3ms preprocess, 91.3ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  35%|███▌      | 1757/5000 [04:11<07:39,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201072.jpg: 640x416 6 persons, 1 tennis racket, 145.7ms
Speed: 3.4ms preprocess, 145.7ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  35%|███▌      | 1758/5000 [04:11<08:45,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201148.jpg: 384x640 1 train, 1 traffic light, 139.7ms
Speed: 2.6ms preprocess, 139.7ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  35%|███▌      | 1759/5000 [04:11<09:10,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201418.jpg: 480x640 10 persons, 6 suitcases, 69.5ms
Speed: 4.6ms preprocess, 69.5ms inference, 15.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1760/5000 [04:11<09:17,  5.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201426.jpg: 512x640 2 persons, 3 cups, 1 bowl, 1 pizza, 1 chair, 1 dining table, 172.7ms
Speed: 3.3ms preprocess, 172.7ms inference, 9.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  35%|███▌      | 1761/5000 [04:12<10:40,  5.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201646.jpg: 448x640 1 person, 1 couch, 64.0ms
Speed: 3.8ms preprocess, 64.0ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  35%|███▌      | 1762/5000 [04:12<09:27,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201676.jpg: 480x640 6 zebras, 72.0ms
Speed: 3.1ms preprocess, 72.0ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1763/5000 [04:12<08:51,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201775.jpg: 512x640 2 toilets, 1 sink, 75.5ms
Speed: 4.2ms preprocess, 75.5ms inference, 4.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  35%|███▌      | 1764/5000 [04:12<08:23,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000201934.jpg: 480x640 8 cars, 1 bus, 70.1ms
Speed: 4.3ms preprocess, 70.1ms inference, 8.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1765/5000 [04:12<08:18,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000202001.jpg: 608x640 1 person, 1 chair, 1 remote, 1 cell phone, 187.3ms
Speed: 3.0ms preprocess, 187.3ms inference, 4.5ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  35%|███▌      | 1766/5000 [04:12<09:44,  5.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000202228.jpg: 640x480 1 person, 1 cup, 1 sink, 60.6ms
Speed: 2.8ms preprocess, 60.6ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  35%|███▌      | 1767/5000 [04:12<08:39,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000202339.jpg: 640x480 1 person, 1 car, 2 buss, 1 handbag, 1 tie, 61.2ms
Speed: 2.7ms preprocess, 61.2ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  35%|███▌      | 1768/5000 [04:13<08:01,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000202445.jpg: 640x640 1 cat, 1 bed, 84.4ms
Speed: 3.9ms preprocess, 84.4ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  35%|███▌      | 1769/5000 [04:13<07:53,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203095.jpg: 480x640 1 apple, 4 carrots, 1 dining table, 64.7ms
Speed: 2.9ms preprocess, 64.7ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1770/5000 [04:13<07:31,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203294.jpg: 480x640 2 persons, 1 car, 1 bus, 1 backpack, 1 handbag, 94.9ms
Speed: 2.9ms preprocess, 94.9ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1771/5000 [04:13<07:50,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203317.jpg: 544x640 1 bicycle, 146.2ms
Speed: 2.5ms preprocess, 146.2ms inference, 2.3ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  35%|███▌      | 1772/5000 [04:13<08:37,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203389.jpg: 480x640 8 persons, 1 motorcycle, 63.4ms
Speed: 2.9ms preprocess, 63.4ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1773/5000 [04:13<08:09,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203488.jpg: 480x640 1 car, 1 parking meter, 61.2ms
Speed: 2.8ms preprocess, 61.2ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  35%|███▌      | 1774/5000 [04:13<07:32,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203546.jpg: 480x640 1 bear, 62.4ms
Speed: 2.8ms preprocess, 62.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1775/5000 [04:14<07:10,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203580.jpg: 448x640 1 bench, 2 umbrellas, 68.8ms
Speed: 4.4ms preprocess, 68.8ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1776/5000 [04:14<07:03,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203629.jpg: 448x640 13 persons, 2 wine glasss, 56.9ms
Speed: 2.6ms preprocess, 56.9ms inference, 13.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1777/5000 [04:14<07:17,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203639.jpg: 640x512 1 person, 130.7ms
Speed: 2.6ms preprocess, 130.7ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  36%|███▌      | 1778/5000 [04:14<08:02,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203864.jpg: 512x640 2 persons, 3 tennis rackets, 71.6ms
Speed: 2.7ms preprocess, 71.6ms inference, 6.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  36%|███▌      | 1779/5000 [04:14<07:53,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000203931.jpg: 640x448 4 persons, 3 baseball bats, 172.9ms
Speed: 3.0ms preprocess, 172.9ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  36%|███▌      | 1780/5000 [04:14<09:25,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000204186.jpg: 544x640 20 persons, 1 motorcycle, 73.4ms
Speed: 2.2ms preprocess, 73.4ms inference, 23.9ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  36%|███▌      | 1781/5000 [04:15<09:59,  5.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000204329.jpg: 640x448 6 persons, 1 surfboard, 63.0ms
Speed: 2.6ms preprocess, 63.0ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  36%|███▌      | 1782/5000 [04:15<09:09,  5.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000204871.jpg: 640x640 2 persons, 3 cars, 1 truck, 1 fire hydrant, 85.1ms
Speed: 4.3ms preprocess, 85.1ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  36%|███▌      | 1783/5000 [04:15<09:06,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205105.jpg: 448x640 1 person, 2 sports balls, 1 tennis racket, 1 chair, 68.2ms
Speed: 3.2ms preprocess, 68.2ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1784/5000 [04:15<08:37,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205282.jpg: 480x640 7 giraffes, 74.4ms
Speed: 3.4ms preprocess, 74.4ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1785/5000 [04:15<08:21,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205289.jpg: 480x640 1 truck, 67.8ms
Speed: 2.8ms preprocess, 67.8ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1786/5000 [04:15<07:47,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205324.jpg: 448x640 1 person, 1 frisbee, 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1787/5000 [04:15<07:19,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205333.jpg: 192x640 2 persons, 4 skiss, 103.7ms
Speed: 2.4ms preprocess, 103.7ms inference, 2.7ms postprocess per image at shape (1, 3, 192, 640)


Segmenting Images:  36%|███▌      | 1788/5000 [04:16<07:27,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205401.jpg: 416x640 2 boats, 165.0ms
Speed: 2.4ms preprocess, 165.0ms inference, 4.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  36%|███▌      | 1789/5000 [04:16<08:42,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205514.jpg: 448x640 3 chairs, 1 couch, 1 potted plant, 1 tv, 2 books, 2 vases, 60.9ms
Speed: 2.7ms preprocess, 60.9ms inference, 10.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1790/5000 [04:16<08:15,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205542.jpg: 640x480 1 teddy bear, 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  36%|███▌      | 1791/5000 [04:16<07:34,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205647.jpg: 448x640 1 truck, 63.8ms
Speed: 2.6ms preprocess, 63.8ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1792/5000 [04:16<07:12,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205776.jpg: 448x640 (no detections), 91.4ms
Speed: 3.0ms preprocess, 91.4ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1793/5000 [04:16<07:15,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000205834.jpg: 448x640 1 cat, 1 dog, 1 bowl, 60.8ms
Speed: 2.4ms preprocess, 60.8ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1794/5000 [04:16<06:50,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206025.jpg: 640x448 1 person, 1 tie, 61.1ms
Speed: 2.6ms preprocess, 61.1ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  36%|███▌      | 1795/5000 [04:16<06:38,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206027.jpg: 480x640 3 bottles, 2 cups, 1 pizza, 1 dining table, 61.3ms
Speed: 2.7ms preprocess, 61.3ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1796/5000 [04:17<06:38,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206135.jpg: 640x448 4 cows, 1 clock, 60.6ms
Speed: 3.7ms preprocess, 60.6ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  36%|███▌      | 1797/5000 [04:17<06:33,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206218.jpg: 480x640 1 laptop, 1 keyboard, 83.4ms
Speed: 3.3ms preprocess, 83.4ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1798/5000 [04:17<06:40,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206271.jpg: 640x352 1 toilet, 119.1ms
Speed: 2.9ms preprocess, 119.1ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 352)


Segmenting Images:  36%|███▌      | 1799/5000 [04:17<07:18,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206411.jpg: 480x640 1 person, 1 toilet, 62.9ms
Speed: 2.7ms preprocess, 62.9ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1800/5000 [04:17<06:54,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206487.jpg: 480x640 1 person, 1 car, 1 motorcycle, 1 bus, 58.6ms
Speed: 2.8ms preprocess, 58.6ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1801/5000 [04:17<06:38,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206579.jpg: 480x640 2 persons, 1 cake, 1 dining table, 92.4ms
Speed: 2.5ms preprocess, 92.4ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1802/5000 [04:17<07:01,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206831.jpg: 448x640 2 dogs, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1803/5000 [04:18<06:44,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206838.jpg: 480x640 2 persons, 2 horses, 62.8ms
Speed: 2.7ms preprocess, 62.8ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1804/5000 [04:18<06:36,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000206994.jpg: 448x640 1 train, 2 benchs, 58.8ms
Speed: 2.8ms preprocess, 58.8ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▌      | 1805/5000 [04:18<06:26,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000207306.jpg: 640x480 2 persons, 1 traffic light, 1 teddy bear, 63.1ms
Speed: 3.2ms preprocess, 63.1ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  36%|███▌      | 1806/5000 [04:18<06:22,  8.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000207538.jpg: 480x640 1 microwave, 91.4ms
Speed: 3.4ms preprocess, 91.4ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1807/5000 [04:18<06:41,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000207585.jpg: 480x640 3 teddy bears, 66.9ms
Speed: 2.7ms preprocess, 66.9ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1808/5000 [04:18<06:34,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000207728.jpg: 384x640 2 sheeps, 111.2ms
Speed: 2.2ms preprocess, 111.2ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  36%|███▌      | 1809/5000 [04:18<07:06,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000207844.jpg: 480x640 4 persons, 6 umbrellas, 18 chairs, 63.4ms
Speed: 3.0ms preprocess, 63.4ms inference, 25.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1810/5000 [04:18<08:09,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000208208.jpg: 480x640 1 airplane, 80.3ms
Speed: 2.5ms preprocess, 80.3ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1811/5000 [04:19<07:43,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000208363.jpg: 480x640 20 persons, 2 pizzas, 2 chairs, 1 laptop, 60.5ms
Speed: 2.6ms preprocess, 60.5ms inference, 20.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▌      | 1812/5000 [04:19<08:18,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000208423.jpg: 480x640 5 cars, 7 kites, 57.3ms
Speed: 2.5ms preprocess, 57.3ms inference, 12.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▋      | 1813/5000 [04:19<08:05,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000208901.jpg: 448x640 1 airplane, 56.7ms
Speed: 2.4ms preprocess, 56.7ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000209142.jpg: 448x640 1 fork, 1 spoon, 1 bowl, 6 broccolis, 1 carrot, 57.5ms
Speed: 2.5ms preprocess, 57.5ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▋      | 1815/5000 [04:19<07:04,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209222.jpg: 448x640 6 persons, 1 truck, 1 bench, 79.3ms
Speed: 2.7ms preprocess, 79.3ms inference, 8.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▋      | 1816/5000 [04:19<07:16,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209530.jpg: 448x640 7 persons, 2 bottles, 1 wine glass, 1 cup, 3 pizzas, 1 dining table, 57.0ms
Speed: 2.7ms preprocess, 57.0ms inference, 13.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▋      | 1817/5000 [04:19<07:22,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209613.jpg: 448x640 1 dog, 9 sheeps, 1 bear, 56.3ms
Speed: 2.7ms preprocess, 56.3ms inference, 12.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▋      | 1818/5000 [04:20<07:21,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209747.jpg: 448x640 1 cat, 1 cake, 1 sink, 56.4ms
Speed: 3.5ms preprocess, 56.4ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  36%|███▋      | 1819/5000 [04:20<06:55,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209753.jpg: 640x480 1 person, 61.8ms
Speed: 2.3ms preprocess, 61.8ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  36%|███▋      | 1820/5000 [04:20<06:34,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209757.jpg: 640x448 6 persons, 1 bench, 1 skateboard, 76.6ms
Speed: 3.4ms preprocess, 76.6ms inference, 7.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  36%|███▋      | 1821/5000 [04:20<06:59,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209829.jpg: 480x640 3 persons, 1 surfboard, 60.2ms
Speed: 2.5ms preprocess, 60.2ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  36%|███▋      | 1822/5000 [04:20<06:41,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000209972.jpg: 320x640 1 boat, 101.5ms
Speed: 1.8ms preprocess, 101.5ms inference, 1.5ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  36%|███▋      | 1823/5000 [04:20<06:55,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210030.jpg: 512x640 1 wine glass, 1 fork, 1 cake, 1 dining table, 134.9ms
Speed: 3.7ms preprocess, 134.9ms inference, 3.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  36%|███▋      | 1824/5000 [04:20<07:51,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210032.jpg: 416x640 2 persons, 1 bird, 2 sandwichs, 53.8ms
Speed: 2.3ms preprocess, 53.8ms inference, 4.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  36%|███▋      | 1825/5000 [04:20<07:09,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210099.jpg: 544x640 1 cat, 3 chairs, 179.5ms
Speed: 2.4ms preprocess, 179.5ms inference, 3.9ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  37%|███▋      | 1826/5000 [04:21<08:43,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210230.jpg: 480x640 8 persons, 1 umbrella, 1 pizza, 2 dining tables, 62.3ms
Speed: 2.6ms preprocess, 62.3ms inference, 11.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1827/5000 [04:21<08:26,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210273.jpg: 448x640 5 persons, 15 cars, 10 buss, 59.3ms
Speed: 2.6ms preprocess, 59.3ms inference, 20.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1828/5000 [04:21<08:43,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210299.jpg: 480x640 1 person, 1 bicycle, 1 motorcycle, 64.2ms
Speed: 2.8ms preprocess, 64.2ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1829/5000 [04:21<07:57,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210388.jpg: 448x640 3 persons, 82.5ms
Speed: 2.7ms preprocess, 82.5ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1830/5000 [04:21<07:44,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210394.jpg: 480x640 13 persons, 2 bicycles, 1 car, 1 motorcycle, 1 bus, 1 truck, 1 traffic light, 66.3ms
Speed: 3.2ms preprocess, 66.3ms inference, 18.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1831/5000 [04:21<08:08,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210502.jpg: 480x640 1 train, 62.5ms
Speed: 2.5ms preprocess, 62.5ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1832/5000 [04:22<07:21,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210520.jpg: 640x640 3 wine glasss, 3 cups, 2 forks, 3 spoons, 2 bowls, 1 broccoli, 1 dining table, 151.1ms
Speed: 3.4ms preprocess, 151.1ms inference, 21.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  37%|███▋      | 1833/5000 [04:22<09:20,  5.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210708.jpg: 448x640 2 elephants, 57.4ms
Speed: 4.1ms preprocess, 57.4ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1834/5000 [04:22<08:17,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210789.jpg: 640x480 2 persons, 1 car, 2 umbrellas, 80.2ms
Speed: 4.9ms preprocess, 80.2ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1835/5000 [04:22<07:59,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210855.jpg: 448x640 1 toilet, 1 sink, 62.2ms
Speed: 2.6ms preprocess, 62.2ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1836/5000 [04:22<07:20,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000210915.jpg: 480x640 1 person, 3 surfboards, 61.7ms
Speed: 2.5ms preprocess, 61.7ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1837/5000 [04:22<06:59,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000211042.jpg: 640x480 1 cat, 1 toilet, 65.7ms
Speed: 2.7ms preprocess, 65.7ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1838/5000 [04:22<06:46,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000211069.jpg: 480x640 3 giraffes, 98.2ms
Speed: 3.3ms preprocess, 98.2ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1839/5000 [04:23<07:16,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000211120.jpg: 384x640 4 teddy bears, 61.3ms
Speed: 2.5ms preprocess, 61.3ms inference, 5.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  37%|███▋      | 1840/5000 [04:23<06:59,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000211674.jpg: 416x640 6 persons, 1 bus, 63.3ms
Speed: 2.7ms preprocess, 63.3ms inference, 6.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  37%|███▋      | 1841/5000 [04:23<06:56,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000211825.jpg: 480x640 2 persons, 2 cakes, 1 dining table, 1 oven, 76.0ms
Speed: 3.1ms preprocess, 76.0ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1842/5000 [04:23<07:05,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212072.jpg: 480x640 1 stop sign, 70.7ms
Speed: 4.0ms preprocess, 70.7ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1843/5000 [04:23<07:09,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212166.jpg: 640x640 1 person, 1 pizza, 1 dining table, 93.5ms
Speed: 6.0ms preprocess, 93.5ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  37%|███▋      | 1844/5000 [04:23<07:28,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212226.jpg: 448x640 1 person, 62.4ms
Speed: 3.8ms preprocess, 62.4ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1845/5000 [04:23<07:03,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212453.jpg: 640x480 1 tie, 1 toilet, 67.4ms
Speed: 2.9ms preprocess, 67.4ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1846/5000 [04:24<06:48,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212559.jpg: 544x640 9 sheeps, 76.2ms
Speed: 1.7ms preprocess, 76.2ms inference, 8.2ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  37%|███▋      | 1847/5000 [04:24<07:07,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212573.jpg: 576x640 2 persons, 4 cars, 2 traffic lights, 1 umbrella, 156.4ms
Speed: 1.6ms preprocess, 156.4ms inference, 9.8ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  37%|███▋      | 1848/5000 [04:24<08:47,  5.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212800.jpg: 448x640 12 persons, 1 boat, 14 umbrellas, 59.7ms
Speed: 3.0ms preprocess, 59.7ms inference, 22.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1849/5000 [04:24<09:05,  5.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000212895.jpg: 640x480 1 giraffe, 63.7ms
Speed: 2.7ms preprocess, 63.7ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1850/5000 [04:24<08:09,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213033.jpg: 640x480 2 persons, 1 umbrella, 59.5ms
Speed: 2.6ms preprocess, 59.5ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1851/5000 [04:24<07:26,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213035.jpg: 448x640 2 persons, 1 cell phone, 54.9ms
Speed: 2.5ms preprocess, 54.9ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1852/5000 [04:24<06:53,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213086.jpg: 640x480 2 persons, 1 cat, 2 ovens, 93.9ms
Speed: 4.2ms preprocess, 93.9ms inference, 8.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1853/5000 [04:25<07:13,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213171.jpg: 640x480 1 person, 1 baseball glove, 68.2ms
Speed: 4.1ms preprocess, 68.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1854/5000 [04:25<06:57,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213224.jpg: 640x448 1 potted plant, 1 dining table, 3 vases, 156.1ms
Speed: 3.0ms preprocess, 156.1ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  37%|███▋      | 1855/5000 [04:25<08:19,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213255.jpg: 640x480 1 person, 4 cars, 3 bottles, 67.5ms
Speed: 2.8ms preprocess, 67.5ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1856/5000 [04:25<08:02,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213422.jpg: 448x640 2 chairs, 2 beds, 89.3ms
Speed: 2.6ms preprocess, 89.3ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1857/5000 [04:25<07:55,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213445.jpg: 640x544 1 cat, 1 bowl, 1 chair, 2 couchs, 3 books, 139.1ms
Speed: 2.7ms preprocess, 139.1ms inference, 9.0ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  37%|███▋      | 1858/5000 [04:25<08:48,  5.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213547.jpg: 640x480 2 persons, 1 bottle, 1 refrigerator, 57.3ms
Speed: 2.4ms preprocess, 57.3ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1859/5000 [04:26<07:56,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213593.jpg: 448x640 2 persons, 8 cars, 1 stop sign, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 11.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1860/5000 [04:26<07:43,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213605.jpg: 640x480 1 car, 1 motorcycle, 1 bus, 66.3ms
Speed: 3.1ms preprocess, 66.3ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1861/5000 [04:26<07:36,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213816.jpg: 480x640 1 train, 73.5ms
Speed: 3.2ms preprocess, 73.5ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1862/5000 [04:26<07:20,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213830.jpg: 448x640 6 persons, 1 sports ball, 2 tennis rackets, 69.2ms
Speed: 3.1ms preprocess, 69.2ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1863/5000 [04:26<07:25,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000213935.jpg: 448x640 7 apples, 3 oranges, 63.4ms
Speed: 3.4ms preprocess, 63.4ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1864/5000 [04:26<07:20,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214192.jpg: 448x640 2 persons, 2 motorcycles, 58.5ms
Speed: 3.1ms preprocess, 58.5ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1865/5000 [04:26<06:54,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214200.jpg: 640x448 4 cars, 1 stop sign, 82.7ms
Speed: 2.6ms preprocess, 82.7ms inference, 13.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  37%|███▋      | 1866/5000 [04:26<07:09,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214205.jpg: 448x640 1 airplane, 1 frisbee, 58.1ms
Speed: 3.0ms preprocess, 58.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1867/5000 [04:27<06:42,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214224.jpg: 448x640 4 bottles, 1 cup, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1868/5000 [04:27<06:26,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214539.jpg: 448x640 8 persons, 1 umbrella, 1 sports ball, 62.8ms
Speed: 2.7ms preprocess, 62.8ms inference, 8.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1869/5000 [04:27<06:44,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214703.jpg: 640x448 3 persons, 71.1ms
Speed: 3.3ms preprocess, 71.1ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  37%|███▋      | 1870/5000 [04:27<06:49,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214720.jpg: 640x480 1 person, 1 cup, 1 bowl, 1 cake, 2 chairs, 1 couch, 3 potted plants, 1 dining table, 79.7ms
Speed: 5.4ms preprocess, 79.7ms inference, 11.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  37%|███▋      | 1871/5000 [04:27<07:21,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214753.jpg: 448x640 2 persons, 2 horses, 58.2ms
Speed: 2.6ms preprocess, 58.2ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  37%|███▋      | 1872/5000 [04:27<06:54,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000214869.jpg: 480x640 1 person, 1 suitcase, 1 couch, 62.0ms
Speed: 2.6ms preprocess, 62.0ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1873/5000 [04:27<06:36,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000215072.jpg: 480x640 4 persons, 1 bicycle, 1 umbrella, 1 chair, 63.1ms
Speed: 2.6ms preprocess, 63.1ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  37%|███▋      | 1874/5000 [04:27<06:37,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000215114.jpg: 640x576 1 person, 5 bottles, 2 refrigerators, 167.4ms
Speed: 1.8ms preprocess, 167.4ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  38%|███▊      | 1875/5000 [04:28<08:20,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000215245.jpg: 480x640 8 zebras, 61.2ms
Speed: 2.9ms preprocess, 61.2ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1876/5000 [04:28<07:52,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000215259.jpg: 640x448 2 persons, 1 couch, 2 remotes, 61.1ms
Speed: 4.0ms preprocess, 61.1ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  38%|███▊      | 1877/5000 [04:28<07:23,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000215644.jpg: 480x640 1 suitcase, 1 bottle, 1 microwave, 65.6ms
Speed: 2.5ms preprocess, 65.6ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1878/5000 [04:28<06:59,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000215723.jpg: 480x640 2 persons, 2 cars, 1 umbrella, 78.2ms
Speed: 2.5ms preprocess, 78.2ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1879/5000 [04:28<07:04,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000215778.jpg: 448x640 1 cup, 1 laptop, 2 mouses, 10 books, 57.6ms
Speed: 2.5ms preprocess, 57.6ms inference, 11.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1880/5000 [04:28<07:10,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000216277.jpg: 480x640 1 person, 1 apple, 1 orange, 1 donut, 1 cell phone, 60.5ms
Speed: 2.9ms preprocess, 60.5ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1881/5000 [04:28<06:43,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000216296.jpg: 448x640 3 persons, 1 tennis racket, 59.2ms
Speed: 2.8ms preprocess, 59.2ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1882/5000 [04:29<06:26,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000216419.jpg: 480x640 1 clock, 67.6ms
Speed: 2.5ms preprocess, 67.6ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1883/5000 [04:29<06:23,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000216497.jpg: 480x640 3 chairs, 1 couch, 1 oven, 62.2ms
Speed: 2.7ms preprocess, 62.2ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1884/5000 [04:29<06:22,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000216516.jpg: 640x480 1 person, 1 skis, 63.7ms
Speed: 3.0ms preprocess, 63.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  38%|███▊      | 1885/5000 [04:29<06:13,  8.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000216636.jpg: 448x640 1 cup, 1 cake, 55.1ms
Speed: 2.6ms preprocess, 55.1ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1886/5000 [04:29<05:56,  8.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000216739.jpg: 448x640 1 giraffe, 57.7ms
Speed: 2.4ms preprocess, 57.7ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1887/5000 [04:29<05:46,  8.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217060.jpg: 448x640 1 airplane, 53.6ms
Speed: 2.4ms preprocess, 53.6ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1888/5000 [04:29<05:43,  9.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217219.jpg: 384x640 1 bed, 114.4ms
Speed: 2.7ms preprocess, 114.4ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  38%|███▊      | 1889/5000 [04:29<06:30,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217285.jpg: 448x640 11 persons, 3 baseball bats, 1 baseball glove, 2 chairs, 52.9ms
Speed: 2.9ms preprocess, 52.9ms inference, 16.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1890/5000 [04:30<06:51,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217400.jpg: 480x640 1 train, 60.2ms
Speed: 2.6ms preprocess, 60.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1891/5000 [04:30<06:31,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217425.jpg: 480x640 1 clock, 84.9ms
Speed: 2.5ms preprocess, 84.9ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1892/5000 [04:30<06:34,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217614.jpg: 640x480 1 giraffe, 61.0ms
Speed: 2.8ms preprocess, 61.0ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  38%|███▊      | 1893/5000 [04:30<06:20,  8.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217753.jpg: 480x640 1 apple, 2 oranges, 69.3ms
Speed: 2.8ms preprocess, 69.3ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1894/5000 [04:30<06:24,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217872.jpg: 640x448 1 person, 2 snowboards, 60.9ms
Speed: 3.9ms preprocess, 60.9ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  38%|███▊      | 1895/5000 [04:30<06:14,  8.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217948.jpg: 448x640 4 bears, 78.5ms
Speed: 2.6ms preprocess, 78.5ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1896/5000 [04:30<06:31,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000217957.jpg: 640x480 1 clock, 65.0ms
Speed: 2.6ms preprocess, 65.0ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  38%|███▊      | 1897/5000 [04:30<06:19,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000218091.jpg: 448x640 2 chairs, 2 couchs, 1 bed, 1 tv, 1 book, 56.8ms
Speed: 2.7ms preprocess, 56.8ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1898/5000 [04:31<06:15,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000218249.jpg: 448x640 1 sandwich, 1 dining table, 62.0ms
Speed: 2.8ms preprocess, 62.0ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1899/5000 [04:31<06:11,  8.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000218362.jpg: 448x640 1 tv, 1 mouse, 1 book, 2 clocks, 95.8ms
Speed: 2.8ms preprocess, 95.8ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1900/5000 [04:31<06:49,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000218424.jpg: 480x640 3 zebras, 71.9ms
Speed: 3.0ms preprocess, 71.9ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1901/5000 [04:31<06:46,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000218439.jpg: 640x480 1 person, 1 tv, 1 cell phone, 1 sink, 69.2ms
Speed: 3.1ms preprocess, 69.2ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  38%|███▊      | 1902/5000 [04:31<06:41,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000218997.jpg: 640x640 5 persons, 1 backpack, 1 sports ball, 1 baseball glove, 80.8ms
Speed: 2.2ms preprocess, 80.8ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  38%|███▊      | 1903/5000 [04:31<07:08,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000219271.jpg: 640x448 12 persons, 1 truck, 94.5ms
Speed: 3.6ms preprocess, 94.5ms inference, 13.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  38%|███▊      | 1904/5000 [04:31<07:46,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000219283.jpg: 640x640 1 teddy bear, 80.4ms
Speed: 3.2ms preprocess, 80.4ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  38%|███▊      | 1905/5000 [04:32<07:32,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000219440.jpg: 448x640 3 cows, 63.3ms
Speed: 2.5ms preprocess, 63.3ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1906/5000 [04:32<07:00,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000219485.jpg: 640x448 1 cat, 60.5ms
Speed: 2.5ms preprocess, 60.5ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  38%|███▊      | 1907/5000 [04:32<06:31,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000219578.jpg: 448x640 1 cat, 1 dog, 1 couch, 52.6ms
Speed: 2.7ms preprocess, 52.6ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1908/5000 [04:32<06:14,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000220310.jpg: 640x448 2 cars, 2 teddy bears, 82.6ms
Speed: 3.8ms preprocess, 82.6ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  38%|███▊      | 1909/5000 [04:32<06:35,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000220584.jpg: 448x640 1 person, 1 surfboard, 65.9ms
Speed: 2.7ms preprocess, 65.9ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1910/5000 [04:32<06:21,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000220732.jpg: 480x640 2 persons, 2 cars, 1 bus, 1 truck, 68.5ms
Speed: 2.5ms preprocess, 68.5ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1911/5000 [04:32<06:27,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000220764.jpg: 448x640 1 person, 66.5ms
Speed: 3.1ms preprocess, 66.5ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1912/5000 [04:32<06:16,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000220858.jpg: 480x640 2 persons, 1 boat, 108.5ms
Speed: 4.4ms preprocess, 108.5ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1913/5000 [04:33<06:54,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221017.jpg: 480x640 1 cow, 69.4ms
Speed: 2.9ms preprocess, 69.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1914/5000 [04:33<06:44,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221155.jpg: 512x640 1 person, 2 cows, 1 clock, 145.3ms
Speed: 3.1ms preprocess, 145.3ms inference, 3.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  38%|███▊      | 1915/5000 [04:33<07:51,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221213.jpg: 480x640 2 trains, 61.1ms
Speed: 2.8ms preprocess, 61.1ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1916/5000 [04:33<07:11,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221281.jpg: 640x384 3 giraffes, 152.8ms
Speed: 2.0ms preprocess, 152.8ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  38%|███▊      | 1917/5000 [04:33<08:05,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221291.jpg: 640x448 1 person, 1 kite, 57.2ms
Speed: 4.0ms preprocess, 57.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  38%|███▊      | 1918/5000 [04:33<07:15,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221502.jpg: 320x640 1 bench, 2 potted plants, 105.0ms
Speed: 2.5ms preprocess, 105.0ms inference, 2.3ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  38%|███▊      | 1919/5000 [04:33<07:24,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221693.jpg: 448x640 1 dog, 1 frisbee, 58.7ms
Speed: 2.5ms preprocess, 58.7ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1920/5000 [04:34<06:47,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221708.jpg: 640x480 3 chairs, 1 refrigerator, 82.9ms
Speed: 3.2ms preprocess, 82.9ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  38%|███▊      | 1921/5000 [04:34<06:59,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221754.jpg: 448x640 4 persons, 4 cars, 3 traffic lights, 62.6ms
Speed: 2.6ms preprocess, 62.6ms inference, 9.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1922/5000 [04:34<07:01,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000221872.jpg: 448x640 1 fork, 5 bowls, 2 carrots, 63.4ms
Speed: 2.8ms preprocess, 63.4ms inference, 7.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1923/5000 [04:34<06:56,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222094.jpg: 480x640 9 cars, 1 stop sign, 66.9ms
Speed: 4.2ms preprocess, 66.9ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  38%|███▊      | 1924/5000 [04:34<07:03,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222118.jpg: 448x640 1 person, 1 backpack, 1 handbag, 87.4ms
Speed: 2.7ms preprocess, 87.4ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  38%|███▊      | 1925/5000 [04:34<07:00,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222235.jpg: 448x640 1 cat, 60.6ms
Speed: 2.8ms preprocess, 60.6ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▊      | 1926/5000 [04:34<06:33,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222299.jpg: 480x640 1 laptop, 1 cell phone, 5 books, 64.1ms
Speed: 4.6ms preprocess, 64.1ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▊      | 1927/5000 [04:34<06:38,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222317.jpg: 480x640 1 dog, 2 couchs, 72.0ms
Speed: 2.8ms preprocess, 72.0ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▊      | 1928/5000 [04:35<06:49,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222455.jpg: 640x640 (no detections), 98.4ms
Speed: 4.4ms preprocess, 98.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  39%|███▊      | 1929/5000 [04:35<06:57,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222458.jpg: 448x640 5 persons, 6 benchs, 63.2ms
Speed: 3.0ms preprocess, 63.2ms inference, 10.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▊      | 1930/5000 [04:35<07:03,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222559.jpg: 448x640 7 persons, 7 boats, 1 bird, 58.7ms
Speed: 3.0ms preprocess, 58.7ms inference, 13.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▊      | 1931/5000 [04:35<07:01,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222735.jpg: 640x480 2 persons, 1 tv, 1 remote, 62.7ms
Speed: 2.6ms preprocess, 62.7ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  39%|███▊      | 1932/5000 [04:35<06:54,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222825.jpg: 640x448 1 toilet, 1 oven, 1 refrigerator, 90.6ms
Speed: 2.7ms preprocess, 90.6ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  39%|███▊      | 1933/5000 [04:35<06:59,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222863.jpg: 448x640 1 cow, 62.4ms
Speed: 2.6ms preprocess, 62.4ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▊      | 1934/5000 [04:35<06:32,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000222991.jpg: 640x480 (no detections), 62.0ms
Speed: 3.4ms preprocess, 62.0ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000223090.jpg: 480x640 1 knife, 5 carrots, 1 dining table, 58.4ms
Speed: 2.5ms preprocess, 58.4ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▊      | 1936/5000 [04:36<06:02,  8.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223130.jpg: 640x448 1 bird, 1 giraffe, 69.9ms
Speed: 2.7ms preprocess, 69.9ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  39%|███▊      | 1937/5000 [04:36<06:14,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223182.jpg: 544x640 6 persons, 2 tennis rackets, 2 bottles, 154.1ms
Speed: 2.2ms preprocess, 154.1ms inference, 10.3ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  39%|███▉      | 1938/5000 [04:36<07:43,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223188.jpg: 640x448 1 person, 57.5ms
Speed: 2.8ms preprocess, 57.5ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  39%|███▉      | 1939/5000 [04:36<07:05,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223738.jpg: 640x640 4 persons, 1 sports ball, 1 baseball bat, 82.6ms
Speed: 2.4ms preprocess, 82.6ms inference, 7.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  39%|███▉      | 1940/5000 [04:36<07:16,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223747.jpg: 480x640 2 persons, 65.4ms
Speed: 4.0ms preprocess, 65.4ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1941/5000 [04:36<07:09,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223789.jpg: 640x448 2 bottles, 1 sink, 63.2ms
Speed: 4.5ms preprocess, 63.2ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  39%|███▉      | 1942/5000 [04:37<06:49,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223955.jpg: 448x640 1 person, 1 frisbee, 60.1ms
Speed: 3.7ms preprocess, 60.1ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1943/5000 [04:37<06:26,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000223959.jpg: 640x480 1 person, 1 tennis racket, 65.5ms
Speed: 2.8ms preprocess, 65.5ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  39%|███▉      | 1944/5000 [04:37<06:17,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224051.jpg: 448x640 1 bicycle, 2 cars, 67.6ms
Speed: 2.8ms preprocess, 67.6ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1945/5000 [04:37<06:24,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224093.jpg: 320x640 3 horses, 4 cows, 48.3ms
Speed: 2.1ms preprocess, 48.3ms inference, 4.8ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  39%|███▉      | 1946/5000 [04:37<06:07,  8.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224119.jpg: 448x640 1 person, 59.2ms
Speed: 2.4ms preprocess, 59.2ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1947/5000 [04:37<05:54,  8.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224200.jpg: 640x640 1 fire hydrant, 1 dog, 80.5ms
Speed: 2.3ms preprocess, 80.5ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  39%|███▉      | 1948/5000 [04:37<06:11,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224222.jpg: 448x640 3 persons, 1 surfboard, 90.3ms
Speed: 2.7ms preprocess, 90.3ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1949/5000 [04:37<06:34,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224337.jpg: 448x640 4 persons, 59.6ms
Speed: 2.7ms preprocess, 59.6ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1950/5000 [04:37<06:22,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224664.jpg: 480x640 4 kites, 64.1ms
Speed: 2.8ms preprocess, 64.1ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1951/5000 [04:38<06:15,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224675.jpg: 480x640 2 persons, 1 umbrella, 65.0ms
Speed: 3.8ms preprocess, 65.0ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1952/5000 [04:38<06:12,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224724.jpg: 640x480 3 persons, 1 bicycle, 1 car, 1 traffic light, 1 handbag, 63.7ms
Speed: 2.9ms preprocess, 63.7ms inference, 12.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  39%|███▉      | 1953/5000 [04:38<06:31,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000224807.jpg: 448x640 21 persons, 2 handbags, 2 cups, 1 chair, 1 dining table, 61.7ms
Speed: 3.1ms preprocess, 61.7ms inference, 24.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1954/5000 [04:38<07:33,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000225184.jpg: 640x480 4 sheeps, 66.7ms
Speed: 3.0ms preprocess, 66.7ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  39%|███▉      | 1955/5000 [04:38<07:07,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000225405.jpg: 480x640 3 persons, 1 sports ball, 64.6ms
Speed: 3.1ms preprocess, 64.6ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1956/5000 [04:38<06:56,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000225532.jpg: 480x640 3 cars, 99.5ms
Speed: 4.4ms preprocess, 99.5ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1957/5000 [04:38<07:20,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000225670.jpg: 448x640 2 persons, 1 frisbee, 1 skateboard, 69.7ms
Speed: 2.7ms preprocess, 69.7ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1958/5000 [04:39<07:02,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000225757.jpg: 480x640 3 tvs, 1 laptop, 4 mouses, 1 keyboard, 3 cell phones, 79.7ms
Speed: 4.5ms preprocess, 79.7ms inference, 11.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1959/5000 [04:39<07:28,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000225946.jpg: 480x640 1 train, 75.4ms
Speed: 2.7ms preprocess, 75.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1960/5000 [04:39<07:09,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226058.jpg: 480x640 1 couch, 1 bed, 2 teddy bears, 120.4ms
Speed: 3.0ms preprocess, 120.4ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1961/5000 [04:39<07:47,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226111.jpg: 640x480 (no detections), 88.0ms
Speed: 3.0ms preprocess, 88.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  39%|███▉      | 1962/5000 [04:39<07:21,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226130.jpg: 448x640 1 broccoli, 1 hot dog, 1 dining table, 90.0ms
Speed: 4.0ms preprocess, 90.0ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1963/5000 [04:39<07:28,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226147.jpg: 640x480 2 persons, 2 handbags, 1 cup, 1 fork, 1 sandwich, 1 potted plant, 1 dining table, 1 vase, 76.9ms
Speed: 3.5ms preprocess, 76.9ms inference, 11.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  39%|███▉      | 1964/5000 [04:40<08:27,  5.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226154.jpg: 512x640 1 person, 1 motorcycle, 1 bus, 71.5ms
Speed: 4.1ms preprocess, 71.5ms inference, 4.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  39%|███▉      | 1965/5000 [04:40<07:59,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226171.jpg: 480x640 1 bottle, 2 cups, 1 chair, 1 tv, 1 laptop, 2 keyboards, 70.7ms
Speed: 3.3ms preprocess, 70.7ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1966/5000 [04:40<07:43,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226408.jpg: 480x640 3 persons, 1 teddy bear, 72.6ms
Speed: 3.1ms preprocess, 72.6ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1967/5000 [04:40<07:28,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226417.jpg: 448x640 10 persons, 3 bicycles, 1 car, 2 motorcycles, 3 traffic lights, 87.3ms
Speed: 5.9ms preprocess, 87.3ms inference, 18.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1968/5000 [04:40<08:58,  5.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226592.jpg: 480x640 1 person, 1 bed, 1 book, 72.8ms
Speed: 3.5ms preprocess, 72.8ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1969/5000 [04:40<08:16,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226662.jpg: 480x640 1 person, 2 motorcycles, 67.6ms
Speed: 2.9ms preprocess, 67.6ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1970/5000 [04:40<07:40,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226802.jpg: 384x640 12 persons, 6 cars, 3 buss, 62.0ms
Speed: 3.3ms preprocess, 62.0ms inference, 17.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  39%|███▉      | 1971/5000 [04:41<07:57,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226883.jpg: 480x640 1 horse, 1 umbrella, 92.3ms
Speed: 3.1ms preprocess, 92.3ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1972/5000 [04:41<08:01,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226903.jpg: 480x640 1 umbrella, 1 cake, 1 potted plant, 1 tv, 75.3ms
Speed: 3.5ms preprocess, 75.3ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  39%|███▉      | 1973/5000 [04:41<07:38,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000226984.jpg: 448x640 1 bottle, 1 cup, 1 knife, 1 bowl, 1 chair, 2 potted plants, 1 microwave, 1 oven, 1 refrigerator, 62.9ms
Speed: 3.3ms preprocess, 62.9ms inference, 10.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  39%|███▉      | 1974/5000 [04:41<07:35,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227044.jpg: 480x640 1 cat, 3 bottles, 3 cups, 3 sinks, 71.2ms
Speed: 3.1ms preprocess, 71.2ms inference, 10.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1975/5000 [04:41<07:38,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227187.jpg: 480x640 2 birds, 3 bananas, 65.1ms
Speed: 2.8ms preprocess, 65.1ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1976/5000 [04:41<07:35,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227399.jpg: 512x640 18 persons, 1 bicycle, 1 motorcycle, 1 bus, 1 train, 1 truck, 1 traffic light, 1 backpack, 1 handbag, 75.4ms
Speed: 4.9ms preprocess, 75.4ms inference, 27.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  40%|███▉      | 1977/5000 [04:42<08:33,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227478.jpg: 480x640 2 persons, 4 benchs, 61.4ms
Speed: 2.6ms preprocess, 61.4ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1978/5000 [04:42<07:52,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227482.jpg: 480x640 1 person, 1 frisbee, 71.0ms
Speed: 2.9ms preprocess, 71.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1979/5000 [04:42<07:16,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227491.jpg: 480x640 1 person, 1 fire hydrant, 2 benchs, 63.0ms
Speed: 3.0ms preprocess, 63.0ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1980/5000 [04:42<06:49,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227511.jpg: 480x640 6 cars, 81.7ms
Speed: 4.2ms preprocess, 81.7ms inference, 9.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1981/5000 [04:42<07:10,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227686.jpg: 640x448 1 car, 1 horse, 62.4ms
Speed: 2.4ms preprocess, 62.4ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|███▉      | 1982/5000 [04:42<06:45,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227765.jpg: 480x640 1 spoon, 1 bowl, 1 carrot, 64.4ms
Speed: 2.6ms preprocess, 64.4ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1983/5000 [04:42<06:30,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227898.jpg: 448x640 5 persons, 1 car, 1 horse, 62.3ms
Speed: 2.6ms preprocess, 62.3ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|███▉      | 1984/5000 [04:42<06:26,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000227985.jpg: 480x640 1 knife, 2 sandwichs, 1 dining table, 64.0ms
Speed: 2.7ms preprocess, 64.0ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1985/5000 [04:43<06:37,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000228144.jpg: 448x640 3 chairs, 1 couch, 1 potted plant, 1 bed, 73.1ms
Speed: 3.3ms preprocess, 73.1ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|███▉      | 1986/5000 [04:43<06:39,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000228214.jpg: 640x448 1 person, 1 cell phone, 70.0ms
Speed: 2.5ms preprocess, 70.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|███▉      | 1987/5000 [04:43<06:30,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000228436.jpg: 512x640 1 bicycle, 5 cars, 6 boats, 78.8ms
Speed: 3.2ms preprocess, 78.8ms inference, 12.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  40%|███▉      | 1988/5000 [04:43<07:07,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000228771.jpg: 448x640 (no detections), 66.1ms
Speed: 2.7ms preprocess, 66.1ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|███▉      | 1989/5000 [04:43<06:29,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000228942.jpg: 320x640 7 persons, 2 cars, 1 bus, 2 traffic lights, 45.3ms
Speed: 2.1ms preprocess, 45.3ms inference, 11.7ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  40%|███▉      | 1990/5000 [04:43<06:23,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000228981.jpg: 480x640 3 cell phones, 77.5ms
Speed: 3.5ms preprocess, 77.5ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1991/5000 [04:43<06:31,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229111.jpg: 640x480 1 person, 2 remotes, 66.1ms
Speed: 4.0ms preprocess, 66.1ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  40%|███▉      | 1992/5000 [04:44<06:22,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229216.jpg: 448x640 2 persons, 61.6ms
Speed: 3.2ms preprocess, 61.6ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|███▉      | 1993/5000 [04:44<06:09,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229221.jpg: 448x640 2 cows, 68.5ms
Speed: 2.8ms preprocess, 68.5ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|███▉      | 1994/5000 [04:44<06:03,  8.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229311.jpg: 480x640 1 bowl, 1 oven, 2 sinks, 1 vase, 94.4ms
Speed: 4.1ms preprocess, 94.4ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1995/5000 [04:44<06:36,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229358.jpg: 448x640 2 persons, 4 bottles, 1 cell phone, 1 sink, 72.6ms
Speed: 2.8ms preprocess, 72.6ms inference, 9.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|███▉      | 1996/5000 [04:44<06:40,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229553.jpg: 512x640 1 person, 1 skateboard, 77.9ms
Speed: 2.9ms preprocess, 77.9ms inference, 3.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  40%|███▉      | 1997/5000 [04:44<06:44,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229601.jpg: 448x640 8 persons, 2 baseball gloves, 61.7ms
Speed: 2.6ms preprocess, 61.7ms inference, 9.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|███▉      | 1998/5000 [04:44<06:46,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229659.jpg: 480x640 3 persons, 6 chairs, 2 dining tables, 1 laptop, 1 clock, 67.0ms
Speed: 2.7ms preprocess, 67.0ms inference, 13.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|███▉      | 1999/5000 [04:44<07:12,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229747.jpg: 544x640 1 airplane, 78.7ms
Speed: 2.1ms preprocess, 78.7ms inference, 2.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  40%|████      | 2000/5000 [04:45<07:04,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229753.jpg: 448x640 2 giraffes, 61.4ms
Speed: 3.0ms preprocess, 61.4ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|████      | 2001/5000 [04:45<06:37,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229849.jpg: 640x448 9 persons, 7 chairs, 62.9ms
Speed: 2.5ms preprocess, 62.9ms inference, 14.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|████      | 2002/5000 [04:45<07:08,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229858.jpg: 576x640 2 giraffes, 186.5ms
Speed: 2.7ms preprocess, 186.5ms inference, 3.7ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  40%|████      | 2003/5000 [04:45<08:35,  5.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229948.jpg: 448x640 7 persons, 1 truck, 3 horses, 63.4ms
Speed: 2.6ms preprocess, 63.4ms inference, 9.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|████      | 2004/5000 [04:45<08:03,  6.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000229997.jpg: 448x640 5 birds, 1 bear, 61.5ms
Speed: 2.4ms preprocess, 61.5ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|████      | 2005/5000 [04:45<07:27,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000230008.jpg: 384x640 1 person, 1 car, 1 motorcycle, 62.6ms
Speed: 2.6ms preprocess, 62.6ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  40%|████      | 2006/5000 [04:46<06:50,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000230166.jpg: 448x640 14 sheeps, 82.9ms
Speed: 2.4ms preprocess, 82.9ms inference, 16.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|████      | 2007/5000 [04:46<07:27,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000230362.jpg: 480x640 7 boats, 72.0ms
Speed: 2.8ms preprocess, 72.0ms inference, 7.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|████      | 2008/5000 [04:46<07:17,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000230450.jpg: 480x640 1 person, 1 traffic light, 4 parking meters, 73.5ms
Speed: 2.8ms preprocess, 73.5ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|████      | 2009/5000 [04:46<07:12,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000230819.jpg: 448x640 1 person, 2 surfboards, 69.8ms
Speed: 2.9ms preprocess, 69.8ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  40%|████      | 2010/5000 [04:46<06:55,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000230983.jpg: 640x448 1 person, 3 cars, 3 skateboards, 88.5ms
Speed: 2.5ms preprocess, 88.5ms inference, 17.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|████      | 2011/5000 [04:46<07:27,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000230993.jpg: 416x640 2 persons, 1 backpack, 2 umbrellas, 3 handbags, 168.0ms
Speed: 4.4ms preprocess, 168.0ms inference, 7.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  40%|████      | 2012/5000 [04:47<08:51,  5.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231088.jpg: 640x448 6 umbrellas, 70.9ms
Speed: 2.6ms preprocess, 70.9ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|████      | 2013/5000 [04:47<08:15,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231097.jpg: 480x640 1 fork, 2 bowls, 2 broccolis, 2 carrots, 1 dining table, 78.9ms
Speed: 3.2ms preprocess, 78.9ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|████      | 2014/5000 [04:47<08:06,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231125.jpg: 640x480 1 person, 69.2ms
Speed: 4.2ms preprocess, 69.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  40%|████      | 2015/5000 [04:47<07:25,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231169.jpg: 480x640 1 train, 1 truck, 92.8ms
Speed: 2.8ms preprocess, 92.8ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|████      | 2016/5000 [04:47<07:23,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231237.jpg: 640x448 1 potted plant, 1 dining table, 1 vase, 72.4ms
Speed: 4.5ms preprocess, 72.4ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|████      | 2017/5000 [04:47<06:59,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231339.jpg: 640x480 1 person, 1 refrigerator, 65.7ms
Speed: 2.9ms preprocess, 65.7ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  40%|████      | 2018/5000 [04:47<06:42,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231508.jpg: 640x448 10 persons, 2 cars, 1 baseball glove, 63.1ms
Speed: 4.1ms preprocess, 63.1ms inference, 11.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|████      | 2019/5000 [04:47<06:52,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231527.jpg: 640x448 2 cups, 1 fork, 1 bowl, 6 oranges, 1 dining table, 59.3ms
Speed: 2.4ms preprocess, 59.3ms inference, 9.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  40%|████      | 2020/5000 [04:48<07:04,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231549.jpg: 480x640 1 couch, 1 bed, 1 laptop, 76.9ms
Speed: 3.1ms preprocess, 76.9ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|████      | 2021/5000 [04:48<06:54,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231580.jpg: 480x640 7 persons, 65.6ms
Speed: 2.8ms preprocess, 65.6ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|████      | 2022/5000 [04:48<06:47,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231747.jpg: 416x640 2 persons, 1 refrigerator, 59.2ms
Speed: 2.6ms preprocess, 59.2ms inference, 3.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  40%|████      | 2023/5000 [04:48<06:23,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231822.jpg: 480x640 1 bottle, 2 cups, 1 fork, 3 knifes, 1 bowl, 3 sandwichs, 1 dining table, 66.4ms
Speed: 4.1ms preprocess, 66.4ms inference, 13.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  40%|████      | 2024/5000 [04:48<07:05,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231831.jpg: 640x512 1 cat, 1 mouse, 146.0ms
Speed: 3.4ms preprocess, 146.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  40%|████      | 2025/5000 [04:48<07:57,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000231879.jpg: 448x640 5 persons, 1 cake, 2 chairs, 1 dining table, 58.5ms
Speed: 2.6ms preprocess, 58.5ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2026/5000 [04:48<07:25,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232088.jpg: 480x640 3 couchs, 1 tv, 68.2ms
Speed: 2.8ms preprocess, 68.2ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2027/5000 [04:49<07:08,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232244.jpg: 480x640 2 elephants, 108.4ms
Speed: 4.2ms preprocess, 108.4ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2028/5000 [04:49<07:27,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232348.jpg: 512x640 1 tv, 80.0ms
Speed: 3.4ms preprocess, 80.0ms inference, 2.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  41%|████      | 2029/5000 [04:49<07:08,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232489.jpg: 640x640 2 cups, 1 pizza, 1 dining table, 175.7ms
Speed: 3.0ms preprocess, 175.7ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  41%|████      | 2030/5000 [04:49<08:32,  5.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232538.jpg: 480x640 2 cars, 1 train, 66.6ms
Speed: 3.0ms preprocess, 66.6ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2031/5000 [04:49<07:46,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232563.jpg: 640x480 2 persons, 2 umbrellas, 2 cell phones, 67.7ms
Speed: 2.9ms preprocess, 67.7ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████      | 2032/5000 [04:49<07:31,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232646.jpg: 512x640 1 stop sign, 74.7ms
Speed: 3.9ms preprocess, 74.7ms inference, 2.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  41%|████      | 2033/5000 [04:50<07:02,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232649.jpg: 640x480 1 person, 1 toilet, 1 sink, 71.2ms
Speed: 4.7ms preprocess, 71.2ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████      | 2034/5000 [04:50<06:56,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232684.jpg: 448x640 1 person, 1 laptop, 1 keyboard, 70.9ms
Speed: 3.4ms preprocess, 70.9ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2035/5000 [04:50<06:42,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000232692.jpg: 448x640 3 persons, 1 boat, 85.5ms
Speed: 3.7ms preprocess, 85.5ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2036/5000 [04:50<06:49,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233033.jpg: 480x640 2 umbrellas, 4 chairs, 69.0ms
Speed: 2.9ms preprocess, 69.0ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2037/5000 [04:50<06:41,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233139.jpg: 448x640 1 cup, 2 clocks, 75.4ms
Speed: 2.8ms preprocess, 75.4ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2038/5000 [04:50<06:36,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233238.jpg: 640x480 9 persons, 1 car, 6 bottles, 1 dining table, 77.0ms
Speed: 2.9ms preprocess, 77.0ms inference, 17.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████      | 2039/5000 [04:50<07:24,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233370.jpg: 640x448 1 person, 1 suitcase, 61.3ms
Speed: 2.9ms preprocess, 61.3ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  41%|████      | 2040/5000 [04:51<06:52,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233567.jpg: 640x480 2 sheeps, 2 cows, 84.8ms
Speed: 3.2ms preprocess, 84.8ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████      | 2041/5000 [04:51<06:58,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233727.jpg: 448x640 4 persons, 1 car, 1 bus, 70.6ms
Speed: 3.0ms preprocess, 70.6ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2042/5000 [04:51<06:49,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233771.jpg: 640x640 8 persons, 2 buss, 2 umbrellas, 1 handbag, 86.0ms
Speed: 2.6ms preprocess, 86.0ms inference, 16.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  41%|████      | 2043/5000 [04:51<07:32,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000233825.jpg: 480x640 2 couchs, 1 potted plant, 1 dining table, 1 tv, 1 vase, 64.1ms
Speed: 2.5ms preprocess, 64.1ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2044/5000 [04:51<07:07,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234366.jpg: 640x480 2 clocks, 112.9ms
Speed: 3.0ms preprocess, 112.9ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████      | 2045/5000 [04:51<07:25,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234413.jpg: 480x640 1 toilet, 63.6ms
Speed: 2.9ms preprocess, 63.6ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2046/5000 [04:51<06:50,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234526.jpg: 640x640 3 persons, 1 giraffe, 84.9ms
Speed: 4.3ms preprocess, 84.9ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  41%|████      | 2047/5000 [04:52<07:03,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234607.jpg: 640x480 2 persons, 2 remotes, 64.8ms
Speed: 4.3ms preprocess, 64.8ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████      | 2048/5000 [04:52<06:40,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234660.jpg: 480x640 2 airplanes, 1 train, 118.9ms
Speed: 3.1ms preprocess, 118.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2049/5000 [04:52<07:17,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234757.jpg: 480x640 (no detections), 67.1ms
Speed: 2.9ms preprocess, 67.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2050/5000 [04:52<06:35,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234779.jpg: 448x640 2 sandwichs, 1 chair, 1 dining table, 64.4ms
Speed: 2.8ms preprocess, 64.4ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2051/5000 [04:52<06:23,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000234807.jpg: 480x640 1 horse, 2 cows, 70.2ms
Speed: 2.5ms preprocess, 70.2ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2052/5000 [04:52<06:18,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235057.jpg: 640x480 1 giraffe, 113.6ms
Speed: 3.1ms preprocess, 113.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████      | 2053/5000 [04:52<06:52,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235064.jpg: 480x640 1 bear, 84.8ms
Speed: 3.3ms preprocess, 84.8ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2054/5000 [04:52<06:54,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235241.jpg: 448x640 6 persons, 1 frisbee, 88.4ms
Speed: 3.4ms preprocess, 88.4ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2055/5000 [04:53<07:26,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235252.jpg: 448x640 3 giraffes, 75.5ms
Speed: 4.4ms preprocess, 75.5ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2056/5000 [04:53<07:25,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235399.jpg: 480x640 1 dog, 145.4ms
Speed: 4.1ms preprocess, 145.4ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2057/5000 [04:53<08:29,  5.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235778.jpg: 352x640 2 persons, 2 kites, 374.5ms
Speed: 3.6ms preprocess, 374.5ms inference, 5.9ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  41%|████      | 2058/5000 [04:53<12:37,  3.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235784.jpg: 480x640 1 person, 2 backpacks, 108.1ms
Speed: 7.2ms preprocess, 108.1ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████      | 2059/5000 [04:54<11:50,  4.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235836.jpg: 640x448 1 person, 3 sports balls, 2 chairs, 91.4ms
Speed: 3.5ms preprocess, 91.4ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  41%|████      | 2060/5000 [04:54<10:45,  4.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000235857.jpg: 448x640 3 cows, 85.1ms
Speed: 3.3ms preprocess, 85.1ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████      | 2061/5000 [04:54<10:12,  4.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236166.jpg: 640x448 1 person, 80.0ms
Speed: 4.3ms preprocess, 80.0ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  41%|████      | 2062/5000 [04:54<09:19,  5.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236308.jpg: 384x640 3 persons, 2 backpacks, 2 skiss, 74.8ms
Speed: 2.4ms preprocess, 74.8ms inference, 7.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  41%|████▏     | 2063/5000 [04:54<08:40,  5.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236412.jpg: 480x640 1 pizza, 1 dining table, 72.6ms
Speed: 2.9ms preprocess, 72.6ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████▏     | 2064/5000 [04:54<07:53,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236426.jpg: 448x640 13 persons, 3 sports balls, 1 tennis racket, 2 chairs, 99.5ms
Speed: 3.0ms preprocess, 99.5ms inference, 16.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████▏     | 2065/5000 [04:55<08:56,  5.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236592.jpg: 480x640 1 dog, 2 ovens, 82.4ms
Speed: 4.0ms preprocess, 82.4ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████▏     | 2066/5000 [04:55<08:22,  5.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236599.jpg: 448x640 6 persons, 1 umbrella, 11 kites, 68.8ms
Speed: 5.4ms preprocess, 68.8ms inference, 16.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████▏     | 2067/5000 [04:55<08:21,  5.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236690.jpg: 416x640 1 bird, 60.4ms
Speed: 2.9ms preprocess, 60.4ms inference, 2.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  41%|████▏     | 2068/5000 [04:55<07:26,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236721.jpg: 480x640 3 cups, 3 bananas, 1 dining table, 65.3ms
Speed: 2.7ms preprocess, 65.3ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████▏     | 2069/5000 [04:55<07:12,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236730.jpg: 640x480 1 zebra, 168.7ms
Speed: 4.7ms preprocess, 168.7ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  41%|████▏     | 2070/5000 [04:55<08:18,  5.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236784.jpg: 512x640 1 cat, 2 dogs, 1 couch, 79.5ms
Speed: 4.8ms preprocess, 79.5ms inference, 5.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  41%|████▏     | 2071/5000 [04:56<08:00,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236845.jpg: 640x512 1 car, 1 truck, 76.3ms
Speed: 3.3ms preprocess, 76.3ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  41%|████▏     | 2072/5000 [04:56<07:30,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000236914.jpg: 480x640 1 bottle, 1 pizza, 1 chair, 2 teddy bears, 78.8ms
Speed: 3.6ms preprocess, 78.8ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  41%|████▏     | 2073/5000 [04:56<07:23,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000237071.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 96.2ms
Speed: 3.1ms preprocess, 96.2ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  41%|████▏     | 2074/5000 [04:56<07:30,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000237118.jpg: 640x608 1 person, 1 tennis racket, 1 microwave, 182.9ms
Speed: 2.6ms preprocess, 182.9ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  42%|████▏     | 2075/5000 [04:56<08:43,  5.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000237316.jpg: 640x480 1 toilet, 1 sink, 67.4ms
Speed: 4.2ms preprocess, 67.4ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2076/5000 [04:56<07:51,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000237517.jpg: 640x480 1 bench, 1 fork, 2 knifes, 3 bowls, 1 cake, 1 potted plant, 1 dining table, 72.9ms
Speed: 4.3ms preprocess, 72.9ms inference, 8.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2077/5000 [04:57<07:48,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000237864.jpg: 448x640 3 elephants, 61.7ms
Speed: 2.8ms preprocess, 61.7ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2078/5000 [04:57<07:06,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000237928.jpg: 640x480 1 microwave, 1 refrigerator, 65.7ms
Speed: 3.0ms preprocess, 65.7ms inference, 7.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2079/5000 [04:57<06:50,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000237984.jpg: 448x640 2 benchs, 1 chair, 67.8ms
Speed: 2.7ms preprocess, 67.8ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2080/5000 [04:57<06:47,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000238013.jpg: 448x640 1 person, 2 sports balls, 1 tennis racket, 77.8ms
Speed: 4.7ms preprocess, 77.8ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2081/5000 [04:57<06:45,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000238039.jpg: 640x480 1 person, 1 sheep, 73.0ms
Speed: 3.3ms preprocess, 73.0ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2082/5000 [04:57<06:34,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000238410.jpg: 480x640 12 persons, 3 bottles, 2 wine glasss, 1 cup, 1 chair, 1 dining table, 115.9ms
Speed: 4.9ms preprocess, 115.9ms inference, 25.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2083/5000 [04:57<08:10,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000238866.jpg: 448x640 30 donuts, 70.2ms
Speed: 3.1ms preprocess, 70.2ms inference, 26.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2084/5000 [04:58<08:47,  5.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239041.jpg: 448x640 1 bottle, 2 toilets, 1 tv, 2 sinks, 1 vase, 57.2ms
Speed: 2.7ms preprocess, 57.2ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2085/5000 [04:58<07:51,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239274.jpg: 640x608 6 persons, 1 boat, 78.6ms
Speed: 2.6ms preprocess, 78.6ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  42%|████▏     | 2086/5000 [04:58<07:40,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239318.jpg: 640x480 1 laptop, 72.1ms
Speed: 4.3ms preprocess, 72.1ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2087/5000 [04:58<07:16,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239347.jpg: 480x640 3 persons, 1 bed, 92.3ms
Speed: 3.7ms preprocess, 92.3ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2088/5000 [04:58<07:22,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239537.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 63.3ms
Speed: 2.6ms preprocess, 63.3ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2089/5000 [04:58<06:49,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239627.jpg: 448x640 2 bottles, 2 cups, 1 scissors, 64.5ms
Speed: 3.1ms preprocess, 64.5ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2090/5000 [04:58<06:32,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239717.jpg: 640x576 5 persons, 170.6ms
Speed: 2.1ms preprocess, 170.6ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  42%|████▏     | 2091/5000 [04:59<07:59,  6.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239773.jpg: 448x640 2 persons, 1 sports ball, 2 baseball bats, 2 baseball gloves, 91.7ms
Speed: 4.2ms preprocess, 91.7ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2092/5000 [04:59<07:53,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239843.jpg: 480x640 1 stop sign, 65.7ms
Speed: 4.6ms preprocess, 65.7ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2093/5000 [04:59<07:07,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000239857.jpg: 448x640 6 persons, 1 frisbee, 9 chairs, 61.1ms
Speed: 4.4ms preprocess, 61.1ms inference, 15.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2094/5000 [04:59<07:20,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000240023.jpg: 640x480 9 persons, 1 sports ball, 1 tennis racket, 1 chair, 82.2ms
Speed: 3.3ms preprocess, 82.2ms inference, 13.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2095/5000 [04:59<07:39,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000240049.jpg: 640x448 3 persons, 3 giraffes, 90.4ms
Speed: 3.0ms preprocess, 90.4ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  42%|████▏     | 2096/5000 [04:59<07:33,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000240250.jpg: 640x640 11 persons, 1 cup, 1 knife, 4 pizzas, 1 chair, 1 dining table, 161.9ms
Speed: 4.0ms preprocess, 161.9ms inference, 26.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  42%|████▏     | 2097/5000 [05:00<09:43,  4.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000240754.jpg: 448x640 2 cows, 63.4ms
Speed: 2.8ms preprocess, 63.4ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2098/5000 [05:00<08:25,  5.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000240767.jpg: 480x640 (no detections), 64.9ms
Speed: 4.3ms preprocess, 64.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2099/5000 [05:00<07:26,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000240940.jpg: 640x480 1 cat, 1 tv, 1 refrigerator, 100.7ms
Speed: 9.5ms preprocess, 100.7ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2100/5000 [05:00<07:30,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000241297.jpg: 448x640 1 train, 72.0ms
Speed: 3.2ms preprocess, 72.0ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2101/5000 [05:00<07:09,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000241319.jpg: 480x640 (no detections), 69.2ms
Speed: 3.1ms preprocess, 69.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2102/5000 [05:00<06:28,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000241326.jpg: 480x640 1 cat, 1 dog, 1 couch, 1 dining table, 66.6ms
Speed: 4.5ms preprocess, 66.6ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2103/5000 [05:01<06:37,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000241602.jpg: 448x640 1 potted plant, 1 toilet, 1 clock, 1 vase, 71.7ms
Speed: 3.6ms preprocess, 71.7ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2104/5000 [05:01<06:34,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000241668.jpg: 640x480 3 persons, 1 cake, 1 chair, 73.6ms
Speed: 2.8ms preprocess, 73.6ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2105/5000 [05:01<06:37,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000241677.jpg: 480x640 4 persons, 9 horses, 2 cows, 70.2ms
Speed: 2.8ms preprocess, 70.2ms inference, 15.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2106/5000 [05:01<07:04,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000242060.jpg: 480x640 1 bottle, 2 cups, 1 spoon, 8 cakes, 1 dining table, 114.6ms
Speed: 2.9ms preprocess, 114.6ms inference, 14.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2107/5000 [05:01<08:01,  6.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000242287.jpg: 640x448 1 bicycle, 68.3ms
Speed: 3.1ms preprocess, 68.3ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  42%|████▏     | 2108/5000 [05:01<07:20,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000242411.jpg: 640x448 2 persons, 2 cars, 2 trucks, 1 traffic light, 1 clock, 70.7ms
Speed: 3.1ms preprocess, 70.7ms inference, 11.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  42%|████▏     | 2109/5000 [05:01<07:13,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000242678.jpg: 512x640 2 persons, 1 bench, 1 horse, 73.5ms
Speed: 3.3ms preprocess, 73.5ms inference, 4.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  42%|████▏     | 2110/5000 [05:02<06:55,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000242724.jpg: 480x640 2 persons, 2 horses, 95.4ms
Speed: 3.1ms preprocess, 95.4ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2111/5000 [05:02<07:03,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000242934.jpg: 320x640 2 chairs, 4 couchs, 2 clocks, 150.5ms
Speed: 3.6ms preprocess, 150.5ms inference, 5.6ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  42%|████▏     | 2112/5000 [05:02<07:57,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000242946.jpg: 480x640 6 persons, 6 cups, 6 bowls, 1 carrot, 4 chairs, 1 potted plant, 2 dining tables, 67.3ms
Speed: 2.5ms preprocess, 67.3ms inference, 22.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2113/5000 [05:02<08:26,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243034.jpg: 640x448 3 persons, 1 clock, 65.7ms
Speed: 2.8ms preprocess, 65.7ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  42%|████▏     | 2114/5000 [05:02<07:39,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243075.jpg: 576x640 1 bear, 211.4ms
Speed: 2.3ms preprocess, 211.4ms inference, 2.3ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  42%|████▏     | 2115/5000 [05:02<09:05,  5.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243148.jpg: 384x640 2 persons, 127.7ms
Speed: 2.5ms preprocess, 127.7ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  42%|████▏     | 2116/5000 [05:03<08:53,  5.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243199.jpg: 448x640 1 bed, 1 laptop, 57.4ms
Speed: 4.3ms preprocess, 57.4ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2117/5000 [05:03<07:46,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243204.jpg: 640x480 2 persons, 70.9ms
Speed: 3.0ms preprocess, 70.9ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2118/5000 [05:03<07:36,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243344.jpg: 640x480 1 cat, 1 refrigerator, 84.5ms
Speed: 5.8ms preprocess, 84.5ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2119/5000 [05:03<07:37,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243495.jpg: 640x480 1 toilet, 72.2ms
Speed: 4.1ms preprocess, 72.2ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  42%|████▏     | 2120/5000 [05:03<07:06,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243626.jpg: 480x640 1 cup, 1 fork, 1 sandwich, 1 dining table, 64.5ms
Speed: 4.3ms preprocess, 64.5ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  42%|████▏     | 2121/5000 [05:03<06:42,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243867.jpg: 448x640 1 person, 2 cars, 1 bus, 68.8ms
Speed: 2.7ms preprocess, 68.8ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2122/5000 [05:03<06:43,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000243989.jpg: 576x640 6 persons, 112.1ms
Speed: 4.7ms preprocess, 112.1ms inference, 8.5ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  42%|████▏     | 2123/5000 [05:04<07:23,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244019.jpg: 448x640 1 bicycle, 2 fire hydrants, 59.8ms
Speed: 3.1ms preprocess, 59.8ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▏     | 2124/5000 [05:04<06:48,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244099.jpg: 448x640 1 person, 1 horse, 59.3ms
Speed: 2.6ms preprocess, 59.3ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  42%|████▎     | 2125/5000 [05:04<06:18,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244181.jpg: 480x640 1 bottle, 1 cup, 3 sandwichs, 1 dining table, 68.4ms
Speed: 2.5ms preprocess, 68.4ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2126/5000 [05:04<06:17,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244379.jpg: 480x640 2 traffic lights, 114.5ms
Speed: 3.4ms preprocess, 114.5ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2127/5000 [05:04<06:50,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244411.jpg: 480x640 5 cows, 66.3ms
Speed: 2.7ms preprocess, 66.3ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2128/5000 [05:04<06:35,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244496.jpg: 640x384 1 person, 1 tie, 114.0ms
Speed: 3.9ms preprocess, 114.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  43%|████▎     | 2129/5000 [05:04<06:58,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244592.jpg: 480x640 2 zebras, 71.4ms
Speed: 5.3ms preprocess, 71.4ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2130/5000 [05:05<06:41,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244750.jpg: 640x512 6 persons, 3 wine glasss, 2 cups, 2 dining tables, 241.7ms
Speed: 4.7ms preprocess, 241.7ms inference, 14.3ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  43%|████▎     | 2131/5000 [05:05<09:36,  4.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000244833.jpg: 448x640 2 persons, 2 handbags, 84.2ms
Speed: 3.6ms preprocess, 84.2ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2132/5000 [05:05<08:52,  5.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245026.jpg: 448x640 1 person, 1 cup, 1 spoon, 1 cake, 1 chair, 2 dining tables, 76.7ms
Speed: 2.9ms preprocess, 76.7ms inference, 9.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2133/5000 [05:05<08:30,  5.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245102.jpg: 448x640 (no detections), 67.6ms
Speed: 5.8ms preprocess, 67.6ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2134/5000 [05:05<07:29,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245173.jpg: 640x480 1 clock, 80.0ms
Speed: 3.1ms preprocess, 80.0ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  43%|████▎     | 2135/5000 [05:06<07:16,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245311.jpg: 640x640 1 person, 1 bowl, 6 donuts, 160.0ms
Speed: 4.0ms preprocess, 160.0ms inference, 13.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  43%|████▎     | 2136/5000 [05:06<08:40,  5.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245320.jpg: 640x448 1 person, 1 skateboard, 63.2ms
Speed: 2.9ms preprocess, 63.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  43%|████▎     | 2137/5000 [05:06<07:31,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245448.jpg: 480x640 5 persons, 1 car, 3 motorcycles, 1 truck, 70.1ms
Speed: 3.0ms preprocess, 70.1ms inference, 15.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2138/5000 [05:06<07:50,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245513.jpg: 512x640 1 bird, 1 giraffe, 141.5ms
Speed: 4.4ms preprocess, 141.5ms inference, 3.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  43%|████▎     | 2139/5000 [05:06<08:20,  5.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245576.jpg: 480x640 1 cat, 1 laptop, 1 keyboard, 1 cell phone, 74.7ms
Speed: 3.1ms preprocess, 74.7ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2140/5000 [05:06<07:44,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245651.jpg: 640x480 1 cake, 1 dining table, 73.2ms
Speed: 3.8ms preprocess, 73.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  43%|████▎     | 2141/5000 [05:06<07:13,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245764.jpg: 480x640 1 cat, 1 toilet, 90.6ms
Speed: 2.5ms preprocess, 90.6ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2142/5000 [05:07<07:04,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000245915.jpg: 448x640 16 elephants, 65.4ms
Speed: 3.9ms preprocess, 65.4ms inference, 15.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2143/5000 [05:07<07:20,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000246308.jpg: 640x480 5 persons, 1 bicycle, 2 chairs, 1 tv, 1 laptop, 1 mouse, 1 keyboard, 77.4ms
Speed: 3.7ms preprocess, 77.4ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  43%|████▎     | 2144/5000 [05:07<07:26,  6.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000246436.jpg: 640x480 1 person, 3 bottles, 1 cup, 1 bowl, 1 pizza, 68.4ms
Speed: 3.2ms preprocess, 68.4ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  43%|████▎     | 2145/5000 [05:07<07:13,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000246454.jpg: 640x640 2 persons, 1 horse, 2 cows, 84.8ms
Speed: 2.8ms preprocess, 84.8ms inference, 7.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  43%|████▎     | 2146/5000 [05:07<07:51,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000246522.jpg: 640x448 2 clocks, 88.0ms
Speed: 3.2ms preprocess, 88.0ms inference, 10.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  43%|████▎     | 2147/5000 [05:07<07:42,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000246883.jpg: 480x640 2 persons, 2 surfboards, 83.1ms
Speed: 3.3ms preprocess, 83.1ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2148/5000 [05:08<07:24,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000246963.jpg: 448x640 6 persons, 7 motorcycles, 1 stop sign, 70.6ms
Speed: 2.9ms preprocess, 70.6ms inference, 13.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2149/5000 [05:08<07:27,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000246968.jpg: 448x640 1 person, 4 bottles, 1 wine glass, 1 cup, 1 bowl, 1 chair, 1 potted plant, 1 dining table, 1 oven, 65.8ms
Speed: 2.8ms preprocess, 65.8ms inference, 12.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2150/5000 [05:08<07:42,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000247806.jpg: 480x640 1 clock, 72.7ms
Speed: 3.4ms preprocess, 72.7ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2151/5000 [05:08<07:10,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000247838.jpg: 448x640 10 sheeps, 1 cow, 64.3ms
Speed: 3.2ms preprocess, 64.3ms inference, 9.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2152/5000 [05:08<06:58,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000247917.jpg: 640x640 10 persons, 1 car, 6 baseball gloves, 82.7ms
Speed: 4.1ms preprocess, 82.7ms inference, 21.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  43%|████▎     | 2153/5000 [05:08<07:53,  6.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248111.jpg: 448x640 1 bottle, 2 cups, 1 chair, 1 oven, 2 sinks, 2 refrigerators, 69.8ms
Speed: 3.5ms preprocess, 69.8ms inference, 8.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2154/5000 [05:09<07:40,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248112.jpg: 480x640 3 persons, 5 tennis rackets, 76.5ms
Speed: 3.5ms preprocess, 76.5ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2155/5000 [05:09<07:30,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248284.jpg: 640x448 2 persons, 2 umbrellas, 66.7ms
Speed: 2.8ms preprocess, 66.7ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  43%|████▎     | 2156/5000 [05:09<07:00,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248314.jpg: 480x640 1 person, 1 bottle, 2 chairs, 1 dining table, 2 laptops, 2 mouses, 1 keyboard, 1 cell phone, 76.5ms
Speed: 3.1ms preprocess, 76.5ms inference, 10.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2157/5000 [05:09<07:08,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248334.jpg: 480x640 4 persons, 2 boats, 73.5ms
Speed: 4.5ms preprocess, 73.5ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2158/5000 [05:09<06:58,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248400.jpg: 448x640 1 person, 1 bowl, 1 pizza, 2 potted plants, 86.6ms
Speed: 3.0ms preprocess, 86.6ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2159/5000 [05:09<06:58,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248616.jpg: 448x640 3 persons, 1 sports ball, 3 tennis rackets, 64.3ms
Speed: 2.8ms preprocess, 64.3ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2160/5000 [05:09<06:43,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248631.jpg: 480x640 1 laptop, 1 mouse, 74.8ms
Speed: 2.8ms preprocess, 74.8ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2161/5000 [05:10<06:33,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248752.jpg: 480x640 1 person, 1 baseball glove, 84.0ms
Speed: 5.1ms preprocess, 84.0ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2162/5000 [05:10<06:36,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248810.jpg: 640x480 1 giraffe, 66.7ms
Speed: 4.1ms preprocess, 66.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  43%|████▎     | 2163/5000 [05:10<06:22,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000248980.jpg: 480x640 1 bowl, 6 broccolis, 73.4ms
Speed: 4.7ms preprocess, 73.4ms inference, 7.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2164/5000 [05:10<06:32,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000249025.jpg: 640x512 1 vase, 72.7ms
Speed: 4.9ms preprocess, 72.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  43%|████▎     | 2165/5000 [05:10<06:23,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000249129.jpg: 448x640 5 chairs, 1 dining table, 8 teddy bears, 72.7ms
Speed: 3.1ms preprocess, 72.7ms inference, 14.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2166/5000 [05:10<06:51,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000249180.jpg: 448x640 2 persons, 66.2ms
Speed: 3.1ms preprocess, 66.2ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2167/5000 [05:10<06:31,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000249219.jpg: 480x640 1 person, 1 bus, 69.7ms
Speed: 2.8ms preprocess, 69.7ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2168/5000 [05:10<06:22,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000249550.jpg: 640x448 1 couch, 1 potted plant, 1 bed, 1 dining table, 98.1ms
Speed: 5.2ms preprocess, 98.1ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  43%|████▎     | 2169/5000 [05:11<06:53,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000249643.jpg: 480x640 1 train, 1 zebra, 90.5ms
Speed: 4.2ms preprocess, 90.5ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2170/5000 [05:11<06:59,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000249786.jpg: 448x640 2 persons, 1 skis, 1 snowboard, 71.2ms
Speed: 3.0ms preprocess, 71.2ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  43%|████▎     | 2171/5000 [05:11<06:43,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250127.jpg: 640x640 1 person, 1 bench, 1 umbrella, 1 handbag, 95.5ms
Speed: 3.0ms preprocess, 95.5ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  43%|████▎     | 2172/5000 [05:11<07:02,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250137.jpg: 640x480 8 persons, 2 umbrellas, 100.5ms
Speed: 3.0ms preprocess, 100.5ms inference, 10.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  43%|████▎     | 2173/5000 [05:11<07:33,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250205.jpg: 480x640 2 birds, 67.5ms
Speed: 3.2ms preprocess, 67.5ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  43%|████▎     | 2174/5000 [05:11<06:59,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250282.jpg: 448x640 26 persons, 3 ties, 1 sports ball, 66.5ms
Speed: 2.9ms preprocess, 66.5ms inference, 25.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▎     | 2175/5000 [05:12<07:43,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250619.jpg: 512x640 1 person, 2 umbrellas, 71.4ms
Speed: 3.0ms preprocess, 71.4ms inference, 4.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  44%|████▎     | 2176/5000 [05:12<07:13,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250758.jpg: 480x640 2 zebras, 112.9ms
Speed: 4.0ms preprocess, 112.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▎     | 2177/5000 [05:12<07:20,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250766.jpg: 544x640 4 forks, 1 broccoli, 1 carrot, 1 dining table, 145.9ms
Speed: 3.5ms preprocess, 145.9ms inference, 12.7ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  44%|████▎     | 2178/5000 [05:12<08:15,  5.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000250901.jpg: 448x640 3 persons, 1 sandwich, 59.9ms
Speed: 2.7ms preprocess, 59.9ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▎     | 2179/5000 [05:12<07:24,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000251065.jpg: 640x480 1 toilet, 83.8ms
Speed: 3.0ms preprocess, 83.8ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▎     | 2180/5000 [05:12<07:06,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000251119.jpg: 480x640 1 cup, 1 sandwich, 1 dining table, 100.8ms
Speed: 4.6ms preprocess, 100.8ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▎     | 2181/5000 [05:13<07:12,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000251140.jpg: 640x480 2 persons, 1 bicycle, 1 tennis racket, 70.4ms
Speed: 4.4ms preprocess, 70.4ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▎     | 2182/5000 [05:13<06:54,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000251537.jpg: 640x640 1 carrot, 88.9ms
Speed: 2.7ms preprocess, 88.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  44%|████▎     | 2183/5000 [05:13<06:45,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000251572.jpg: 448x640 3 persons, 1 dog, 71.7ms
Speed: 2.8ms preprocess, 71.7ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▎     | 2184/5000 [05:13<06:37,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000251824.jpg: 480x640 1 person, 2 wine glasss, 1 cup, 1 dining table, 1 scissors, 70.4ms
Speed: 4.0ms preprocess, 70.4ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▎     | 2185/5000 [05:13<06:44,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252216.jpg: 480x640 2 horses, 1 sheep, 67.2ms
Speed: 3.0ms preprocess, 67.2ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▎     | 2186/5000 [05:13<06:28,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252219.jpg: 448x640 3 persons, 2 traffic lights, 1 backpack, 1 umbrella, 67.1ms
Speed: 2.9ms preprocess, 67.1ms inference, 6.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▎     | 2187/5000 [05:13<06:22,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252294.jpg: 640x480 3 persons, 78.5ms
Speed: 4.5ms preprocess, 78.5ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▍     | 2188/5000 [05:13<06:21,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252332.jpg: 480x640 1 stop sign, 77.0ms
Speed: 3.1ms preprocess, 77.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2189/5000 [05:14<06:18,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252507.jpg: 640x480 1 person, 1 skis, 107.5ms
Speed: 4.2ms preprocess, 107.5ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▍     | 2190/5000 [05:14<06:41,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252559.jpg: 640x480 10 persons, 1 airplane, 66.6ms
Speed: 2.6ms preprocess, 66.6ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▍     | 2191/5000 [05:14<06:47,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252701.jpg: 384x640 2 persons, 1 surfboard, 129.7ms
Speed: 2.6ms preprocess, 129.7ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  44%|████▍     | 2192/5000 [05:14<07:05,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252716.jpg: 640x448 1 person, 1 frisbee, 60.6ms
Speed: 2.7ms preprocess, 60.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  44%|████▍     | 2193/5000 [05:14<06:38,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000252776.jpg: 640x448 5 persons, 84.5ms
Speed: 2.8ms preprocess, 84.5ms inference, 12.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  44%|████▍     | 2194/5000 [05:14<06:50,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253002.jpg: 448x640 2 buss, 1 train, 79.7ms
Speed: 3.2ms preprocess, 79.7ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2195/5000 [05:15<06:43,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253386.jpg: 448x640 2 persons, 1 dog, 76.6ms
Speed: 5.9ms preprocess, 76.6ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2196/5000 [05:15<06:39,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253433.jpg: 480x640 1 bed, 1 teddy bear, 65.4ms
Speed: 2.7ms preprocess, 65.4ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2197/5000 [05:15<06:23,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253452.jpg: 448x640 2 cups, 1 fork, 1 spoon, 1 bowl, 1 banana, 1 orange, 3 donuts, 1 dining table, 72.3ms
Speed: 3.3ms preprocess, 72.3ms inference, 12.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2198/5000 [05:15<06:40,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253695.jpg: 640x448 1 person, 1 baseball glove, 67.9ms
Speed: 2.4ms preprocess, 67.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  44%|████▍     | 2199/5000 [05:15<06:21,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253742.jpg: 480x640 10 persons, 4 sheeps, 17 umbrellas, 85.8ms
Speed: 3.0ms preprocess, 85.8ms inference, 27.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2200/5000 [05:15<07:46,  6.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253819.jpg: 448x640 1 person, 1 skateboard, 57.6ms
Speed: 2.7ms preprocess, 57.6ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2201/5000 [05:15<06:55,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000253835.jpg: 480x640 2 persons, 1 train, 2 traffic lights, 1 backpack, 70.6ms
Speed: 2.9ms preprocess, 70.6ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2202/5000 [05:16<07:09,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000254016.jpg: 640x480 6 donuts, 73.2ms
Speed: 3.0ms preprocess, 73.2ms inference, 7.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▍     | 2203/5000 [05:16<07:10,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000254368.jpg: 640x416 2 persons, 123.6ms
Speed: 2.8ms preprocess, 123.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  44%|████▍     | 2204/5000 [05:16<07:27,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000254516.jpg: 448x640 7 persons, 1 sports ball, 1 baseball bat, 82.7ms
Speed: 2.7ms preprocess, 82.7ms inference, 8.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2205/5000 [05:16<07:18,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000254814.jpg: 480x640 3 persons, 2 cars, 11 motorcycles, 3 buss, 1 truck, 70.5ms
Speed: 3.1ms preprocess, 70.5ms inference, 21.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2206/5000 [05:16<07:44,  6.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255165.jpg: 480x640 1 cup, 3 tvs, 2 laptops, 2 keyboards, 1 cell phone, 2 books, 72.6ms
Speed: 3.2ms preprocess, 72.6ms inference, 11.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2207/5000 [05:16<07:44,  6.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255401.jpg: 640x544 2 toilets, 2 sinks, 159.0ms
Speed: 2.9ms preprocess, 159.0ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  44%|████▍     | 2208/5000 [05:17<08:24,  5.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255483.jpg: 640x608 1 person, 2 books, 219.7ms
Speed: 3.3ms preprocess, 219.7ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  44%|████▍     | 2209/5000 [05:17<09:44,  4.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255536.jpg: 640x640 2 persons, 1 kite, 2 chairs, 84.7ms
Speed: 3.9ms preprocess, 84.7ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  44%|████▍     | 2210/5000 [05:17<08:56,  5.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255664.jpg: 480x640 1 dog, 1 frisbee, 66.8ms
Speed: 4.2ms preprocess, 66.8ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2211/5000 [05:17<07:55,  5.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255718.jpg: 640x480 1 fire hydrant, 78.4ms
Speed: 2.8ms preprocess, 78.4ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▍     | 2212/5000 [05:17<07:24,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255747.jpg: 480x640 2 sandwichs, 1 dining table, 75.0ms
Speed: 3.4ms preprocess, 75.0ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2213/5000 [05:17<07:17,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255749.jpg: 448x640 4 persons, 1 bus, 69.9ms
Speed: 4.2ms preprocess, 69.9ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2214/5000 [05:18<06:58,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255824.jpg: 480x640 1 cup, 1 sandwich, 1 dining table, 66.7ms
Speed: 3.1ms preprocess, 66.7ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2215/5000 [05:18<06:32,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255912.jpg: 384x640 1 bottle, 2 sandwichs, 1 dining table, 65.6ms
Speed: 2.8ms preprocess, 65.6ms inference, 4.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  44%|████▍     | 2216/5000 [05:18<06:18,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255917.jpg: 448x640 16 cars, 93.9ms
Speed: 4.2ms preprocess, 93.9ms inference, 20.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2217/5000 [05:18<07:20,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000255965.jpg: 448x640 1 cat, 70.0ms
Speed: 2.8ms preprocess, 70.0ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2218/5000 [05:18<06:50,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256192.jpg: 480x640 10 persons, 1 motorcycle, 78.6ms
Speed: 3.0ms preprocess, 78.6ms inference, 12.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2219/5000 [05:18<07:07,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256195.jpg: 448x640 1 person, 1 train, 83.5ms
Speed: 3.3ms preprocess, 83.5ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2220/5000 [05:18<07:05,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256407.jpg: 640x480 4 cups, 2 cakes, 108.3ms
Speed: 3.2ms preprocess, 108.3ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  44%|████▍     | 2221/5000 [05:19<07:24,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256518.jpg: 448x640 2 cups, 3 knifes, 1 bowl, 1 sandwich, 1 dining table, 70.1ms
Speed: 3.1ms preprocess, 70.1ms inference, 8.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2222/5000 [05:19<07:11,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256775.jpg: 480x640 9 persons, 1 kite, 75.1ms
Speed: 3.4ms preprocess, 75.1ms inference, 10.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2223/5000 [05:19<07:15,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256868.jpg: 448x640 5 persons, 1 car, 1 bench, 3 skateboards, 71.8ms
Speed: 3.0ms preprocess, 71.8ms inference, 10.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  44%|████▍     | 2224/5000 [05:19<07:10,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256916.jpg: 480x640 1 knife, 4 pizzas, 1 chair, 1 dining table, 93.1ms
Speed: 2.8ms preprocess, 93.1ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  44%|████▍     | 2225/5000 [05:19<07:17,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000256941.jpg: 640x448 1 umbrella, 64.3ms
Speed: 3.8ms preprocess, 64.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▍     | 2226/5000 [05:19<06:38,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257084.jpg: 448x640 5 persons, 3 skateboards, 81.5ms
Speed: 4.5ms preprocess, 81.5ms inference, 9.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▍     | 2227/5000 [05:20<06:57,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257169.jpg: 640x480 1 toilet, 83.9ms
Speed: 2.9ms preprocess, 83.9ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▍     | 2228/5000 [05:20<06:47,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257370.jpg: 480x640 4 persons, 2 potted plants, 2 tvs, 1 vase, 106.1ms
Speed: 2.7ms preprocess, 106.1ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▍     | 2229/5000 [05:20<07:16,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257478.jpg: 512x640 1 person, 1 baseball glove, 193.5ms
Speed: 5.0ms preprocess, 193.5ms inference, 3.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  45%|████▍     | 2230/5000 [05:20<08:38,  5.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257566.jpg: 480x640 8 persons, 1 boat, 72.3ms
Speed: 4.0ms preprocess, 72.3ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▍     | 2231/5000 [05:20<08:11,  5.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257624.jpg: 640x448 1 person, 2 sports balls, 69.4ms
Speed: 2.6ms preprocess, 69.4ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▍     | 2232/5000 [05:20<07:26,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257865.jpg: 448x640 1 person, 66.8ms
Speed: 2.8ms preprocess, 66.8ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▍     | 2233/5000 [05:21<06:57,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000257896.jpg: 640x480 1 person, 1 car, 1 tie, 89.5ms
Speed: 5.1ms preprocess, 89.5ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▍     | 2234/5000 [05:21<06:56,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000258388.jpg: 448x640 3 persons, 1 skateboard, 68.1ms
Speed: 3.1ms preprocess, 68.1ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▍     | 2235/5000 [05:21<06:40,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000258541.jpg: 640x480 1 person, 1 tie, 67.7ms
Speed: 3.8ms preprocess, 67.7ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▍     | 2236/5000 [05:21<06:19,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000258793.jpg: 480x640 2 cars, 1 traffic light, 63.1ms
Speed: 2.3ms preprocess, 63.1ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▍     | 2237/5000 [05:21<06:00,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000258883.jpg: 640x448 1 person, 1 sandwich, 1 dining table, 66.2ms
Speed: 2.8ms preprocess, 66.2ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▍     | 2238/5000 [05:21<06:08,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000258911.jpg: 448x640 3 persons, 2 cows, 64.8ms
Speed: 3.1ms preprocess, 64.8ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▍     | 2239/5000 [05:21<06:05,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259097.jpg: 448x640 1 person, 1 frisbee, 60.4ms
Speed: 3.9ms preprocess, 60.4ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▍     | 2240/5000 [05:21<05:48,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259382.jpg: 480x640 1 giraffe, 64.5ms
Speed: 2.8ms preprocess, 64.5ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▍     | 2241/5000 [05:22<05:35,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259571.jpg: 384x640 10 persons, 1 truck, 59.4ms
Speed: 4.0ms preprocess, 59.4ms inference, 8.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  45%|████▍     | 2242/5000 [05:22<05:44,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259597.jpg: 320x640 21 persons, 1 chair, 1 dining table, 2 cell phones, 152.7ms
Speed: 2.3ms preprocess, 152.7ms inference, 18.8ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  45%|████▍     | 2243/5000 [05:22<07:43,  5.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259625.jpg: 448x640 1 elephant, 67.2ms
Speed: 3.4ms preprocess, 67.2ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▍     | 2244/5000 [05:22<07:07,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259640.jpg: 448x640 9 persons, 3 bicycles, 1 handbag, 68.6ms
Speed: 2.9ms preprocess, 68.6ms inference, 12.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▍     | 2245/5000 [05:22<07:13,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259690.jpg: 480x640 9 persons, 2 bicycles, 1 car, 1 baseball bat, 79.2ms
Speed: 3.3ms preprocess, 79.2ms inference, 14.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▍     | 2246/5000 [05:22<07:25,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259830.jpg: 640x448 1 person, 4 bicycles, 1 motorcycle, 64.1ms
Speed: 2.9ms preprocess, 64.1ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▍     | 2247/5000 [05:23<06:59,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000259854.jpg: 480x640 (no detections), 71.1ms
Speed: 3.0ms preprocess, 71.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▍     | 2248/5000 [05:23<06:21,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000260105.jpg: 480x640 1 cup, 2 forks, 1 bowl, 3 pizzas, 1 chair, 1 dining table, 110.6ms
Speed: 3.6ms preprocess, 110.6ms inference, 12.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▍     | 2249/5000 [05:23<07:03,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000260106.jpg: 640x448 5 persons, 1 horse, 62.3ms
Speed: 4.2ms preprocess, 62.3ms inference, 6.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▌     | 2250/5000 [05:23<06:40,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000260261.jpg: 640x448 1 person, 1 car, 1 truck, 1 apple, 62.3ms
Speed: 2.7ms preprocess, 62.3ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▌     | 2251/5000 [05:23<06:18,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000260266.jpg: 640x480 4 cars, 5 traffic lights, 69.3ms
Speed: 2.7ms preprocess, 69.3ms inference, 9.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▌     | 2252/5000 [05:23<06:21,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000260470.jpg: 448x640 3 persons, 1 car, 9 cakes, 92.1ms
Speed: 3.0ms preprocess, 92.1ms inference, 14.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▌     | 2253/5000 [05:23<07:00,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000260657.jpg: 352x640 (no detections), 124.1ms
Speed: 3.9ms preprocess, 124.1ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  45%|████▌     | 2254/5000 [05:24<07:06,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000260925.jpg: 480x640 1 cat, 71.1ms
Speed: 3.2ms preprocess, 71.1ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▌     | 2255/5000 [05:24<06:40,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261036.jpg: 640x448 4 persons, 1 frisbee, 2 sports balls, 70.3ms
Speed: 4.1ms preprocess, 70.3ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▌     | 2256/5000 [05:24<06:42,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261061.jpg: 448x640 1 person, 1 frisbee, 1 surfboard, 86.3ms
Speed: 4.3ms preprocess, 86.3ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▌     | 2257/5000 [05:24<06:40,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261097.jpg: 640x448 12 persons, 1 tennis racket, 68.1ms
Speed: 4.3ms preprocess, 68.1ms inference, 13.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▌     | 2258/5000 [05:24<06:48,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261116.jpg: 480x640 2 persons, 1 cup, 2 forks, 2 bowls, 2 cakes, 64.4ms
Speed: 4.5ms preprocess, 64.4ms inference, 10.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▌     | 2259/5000 [05:24<06:48,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261161.jpg: 448x640 1 bench, 1 dog, 73.5ms
Speed: 3.3ms preprocess, 73.5ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▌     | 2260/5000 [05:24<06:28,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261318.jpg: 640x448 2 persons, 1 suitcase, 1 chair, 98.8ms
Speed: 3.2ms preprocess, 98.8ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▌     | 2261/5000 [05:25<06:44,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261535.jpg: 640x448 2 persons, 5 sports balls, 2 tennis rackets, 69.8ms
Speed: 2.9ms preprocess, 69.8ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  45%|████▌     | 2262/5000 [05:25<06:45,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261706.jpg: 640x480 1 cat, 1 couch, 1 remote, 76.1ms
Speed: 3.2ms preprocess, 76.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▌     | 2263/5000 [05:25<06:33,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261712.jpg: 480x640 2 giraffes, 69.7ms
Speed: 3.0ms preprocess, 69.7ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▌     | 2264/5000 [05:25<06:15,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261732.jpg: 448x640 1 person, 2 tennis rackets, 64.1ms
Speed: 2.8ms preprocess, 64.1ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▌     | 2265/5000 [05:25<06:07,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261796.jpg: 640x544 (no detections), 138.7ms
Speed: 1.8ms preprocess, 138.7ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  45%|████▌     | 2266/5000 [05:25<06:42,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261888.jpg: 448x640 1 person, 1 bicycle, 2 cows, 1 backpack, 66.8ms
Speed: 3.5ms preprocess, 66.8ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  45%|████▌     | 2267/5000 [05:25<06:23,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000261982.jpg: 640x480 1 person, 5 cars, 1 skateboard, 107.5ms
Speed: 3.1ms preprocess, 107.5ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▌     | 2268/5000 [05:26<06:52,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262048.jpg: 640x640 6 persons, 1 umbrella, 91.5ms
Speed: 4.5ms preprocess, 91.5ms inference, 9.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  45%|████▌     | 2269/5000 [05:26<07:11,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262227.jpg: 640x480 1 chair, 1 sink, 69.0ms
Speed: 3.0ms preprocess, 69.0ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▌     | 2270/5000 [05:26<06:45,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262440.jpg: 640x416 2 toilets, 2 sinks, 122.7ms
Speed: 2.6ms preprocess, 122.7ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  45%|████▌     | 2271/5000 [05:26<07:07,  6.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262487.jpg: 480x640 2 persons, 2 baseball bats, 76.5ms
Speed: 2.7ms preprocess, 76.5ms inference, 12.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▌     | 2272/5000 [05:26<06:59,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262587.jpg: 480x640 1 person, 1 surfboard, 66.9ms
Speed: 2.8ms preprocess, 66.9ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  45%|████▌     | 2273/5000 [05:26<06:29,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262631.jpg: 640x480 3 vases, 134.8ms
Speed: 4.5ms preprocess, 134.8ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  45%|████▌     | 2274/5000 [05:27<07:16,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262682.jpg: 640x448 1 oven, 3 sinks, 68.3ms
Speed: 2.9ms preprocess, 68.3ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  46%|████▌     | 2275/5000 [05:27<06:50,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262895.jpg: 640x448 1 person, 1 tie, 70.7ms
Speed: 4.3ms preprocess, 70.7ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  46%|████▌     | 2276/5000 [05:27<06:28,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000262938.jpg: 480x640 1 book, 2 teddy bears, 108.2ms
Speed: 3.3ms preprocess, 108.2ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▌     | 2277/5000 [05:27<06:59,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263068.jpg: 640x448 1 person, 2 skateboards, 66.8ms
Speed: 4.4ms preprocess, 66.8ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  46%|████▌     | 2278/5000 [05:27<06:34,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263299.jpg: 448x640 1 train, 64.8ms
Speed: 4.0ms preprocess, 64.8ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2279/5000 [05:27<06:12,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263403.jpg: 416x640 1 person, 1 surfboard, 125.4ms
Speed: 3.9ms preprocess, 125.4ms inference, 2.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  46%|████▌     | 2280/5000 [05:27<06:48,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263425.jpg: 448x640 2 persons, 1 truck, 1 boat, 1 dog, 103.3ms
Speed: 3.1ms preprocess, 103.3ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2281/5000 [05:28<07:04,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263463.jpg: 448x640 2 cars, 1 dog, 59.5ms
Speed: 2.8ms preprocess, 59.5ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2282/5000 [05:28<06:28,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263474.jpg: 640x480 2 toilets, 66.0ms
Speed: 2.9ms preprocess, 66.0ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▌     | 2283/5000 [05:28<06:04,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263594.jpg: 512x640 8 cars, 158.9ms
Speed: 4.1ms preprocess, 158.9ms inference, 10.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  46%|████▌     | 2284/5000 [05:28<07:20,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263644.jpg: 640x480 1 umbrella, 1 bed, 87.6ms
Speed: 3.2ms preprocess, 87.6ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▌     | 2285/5000 [05:28<07:10,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263679.jpg: 448x640 4 persons, 2 kites, 68.9ms
Speed: 3.1ms preprocess, 68.9ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2286/5000 [05:28<06:45,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263796.jpg: 640x480 2 toilets, 67.0ms
Speed: 2.7ms preprocess, 67.0ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▌     | 2287/5000 [05:28<06:19,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263860.jpg: 448x640 2 elephants, 75.5ms
Speed: 2.9ms preprocess, 75.5ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2288/5000 [05:29<06:10,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263966.jpg: 448x640 1 horse, 68.6ms
Speed: 2.7ms preprocess, 68.6ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2289/5000 [05:29<05:54,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000263969.jpg: 640x640 3 persons, 1 tie, 1 knife, 1 cake, 117.3ms
Speed: 2.2ms preprocess, 117.3ms inference, 8.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  46%|████▌     | 2290/5000 [05:29<06:42,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000264335.jpg: 640x480 1 bird, 69.1ms
Speed: 2.9ms preprocess, 69.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▌     | 2291/5000 [05:29<06:18,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000264441.jpg: 640x480 1 cat, 78.3ms
Speed: 4.8ms preprocess, 78.3ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▌     | 2292/5000 [05:29<06:12,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000264535.jpg: 640x640 2 persons, 1 bench, 83.6ms
Speed: 4.6ms preprocess, 83.6ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  46%|████▌     | 2293/5000 [05:29<06:21,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000264968.jpg: 640x448 6 persons, 3 sports balls, 2 baseball bats, 68.4ms
Speed: 4.2ms preprocess, 68.4ms inference, 10.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  46%|████▌     | 2294/5000 [05:29<06:44,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000265108.jpg: 640x448 2 suitcases, 1 potted plant, 70.0ms
Speed: 3.7ms preprocess, 70.0ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  46%|████▌     | 2295/5000 [05:30<06:29,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000265518.jpg: 640x640 1 cup, 1 fork, 1 pizza, 1 dining table, 92.4ms
Speed: 4.0ms preprocess, 92.4ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  46%|████▌     | 2296/5000 [05:30<06:39,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000265777.jpg: 640x640 1 person, 2 wine glasss, 1 pizza, 1 dining table, 88.1ms
Speed: 4.1ms preprocess, 88.1ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  46%|████▌     | 2297/5000 [05:30<06:50,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000265816.jpg: 416x640 1 person, 1 horse, 1 potted plant, 64.2ms
Speed: 2.6ms preprocess, 64.2ms inference, 3.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  46%|████▌     | 2298/5000 [05:30<06:43,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000266082.jpg: 640x480 1 fire hydrant, 75.8ms
Speed: 5.7ms preprocess, 75.8ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▌     | 2299/5000 [05:30<06:28,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000266206.jpg: 640x448 (no detections), 68.1ms
Speed: 2.6ms preprocess, 68.1ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  46%|████▌     | 2300/5000 [05:30<05:56,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000266400.jpg: 416x640 2 persons, 1 bicycle, 3 motorcycles, 71.4ms
Speed: 3.7ms preprocess, 71.4ms inference, 5.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  46%|████▌     | 2301/5000 [05:30<05:59,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000266409.jpg: 480x640 1 person, 1 skis, 67.4ms
Speed: 2.9ms preprocess, 67.4ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▌     | 2302/5000 [05:30<05:54,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000266768.jpg: 640x640 13 persons, 1 bowl, 149.8ms
Speed: 4.9ms preprocess, 149.8ms inference, 18.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  46%|████▌     | 2303/5000 [05:31<07:38,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000266892.jpg: 448x640 15 persons, 4 tennis rackets, 62.7ms
Speed: 2.8ms preprocess, 62.7ms inference, 18.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2304/5000 [05:31<07:53,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000266981.jpg: 640x448 11 persons, 5 cars, 2 suitcases, 78.4ms
Speed: 3.1ms preprocess, 78.4ms inference, 19.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  46%|████▌     | 2305/5000 [05:31<08:05,  5.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267169.jpg: 448x640 3 persons, 2 elephants, 68.7ms
Speed: 3.0ms preprocess, 68.7ms inference, 6.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2306/5000 [05:31<07:29,  6.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267191.jpg: 480x640 2 persons, 3 elephants, 89.2ms
Speed: 2.8ms preprocess, 89.2ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▌     | 2307/5000 [05:31<07:15,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267300.jpg: 480x640 1 dog, 1 banana, 1 donut, 1 potted plant, 1 dining table, 72.1ms
Speed: 2.8ms preprocess, 72.1ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▌     | 2308/5000 [05:32<06:54,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267351.jpg: 448x640 3 clocks, 71.3ms
Speed: 2.8ms preprocess, 71.3ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▌     | 2309/5000 [05:32<06:30,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267434.jpg: 480x640 9 cows, 69.5ms
Speed: 2.6ms preprocess, 69.5ms inference, 10.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▌     | 2310/5000 [05:32<06:35,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267537.jpg: 480x640 7 persons, 1 umbrella, 66.4ms
Speed: 3.0ms preprocess, 66.4ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▌     | 2311/5000 [05:32<06:51,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267670.jpg: 640x480 1 bottle, 1 toilet, 1 sink, 90.5ms
Speed: 15.0ms preprocess, 90.5ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▌     | 2312/5000 [05:32<07:11,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267903.jpg: 448x640 1 vase, 66.8ms
Speed: 3.4ms preprocess, 66.8ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▋     | 2313/5000 [05:32<06:39,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267933.jpg: 448x640 1 horse, 6 sheeps, 4 cows, 76.8ms
Speed: 4.2ms preprocess, 76.8ms inference, 11.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▋     | 2314/5000 [05:32<06:53,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267940.jpg: 480x640 1 boat, 114.7ms
Speed: 3.5ms preprocess, 114.7ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▋     | 2315/5000 [05:33<07:03,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000267946.jpg: 448x640 (no detections), 78.8ms
Speed: 4.8ms preprocess, 78.8ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▋     | 2316/5000 [05:33<06:30,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000268000.jpg: 384x640 1 person, 1 train, 122.8ms
Speed: 2.7ms preprocess, 122.8ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  46%|████▋     | 2317/5000 [05:33<06:49,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000268375.jpg: 448x640 5 elephants, 63.4ms
Speed: 2.6ms preprocess, 63.4ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▋     | 2318/5000 [05:33<06:25,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000268378.jpg: 384x640 10 persons, 4 wine glasss, 2 cups, 4 chairs, 1 potted plant, 1 dining table, 59.3ms
Speed: 2.4ms preprocess, 59.3ms inference, 34.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  46%|████▋     | 2319/5000 [05:33<06:59,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000268729.jpg: 480x640 2 cows, 4 zebras, 1 giraffe, 70.5ms
Speed: 2.9ms preprocess, 70.5ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▋     | 2320/5000 [05:33<06:52,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000268831.jpg: 512x640 1 toilet, 1 sink, 80.3ms
Speed: 6.0ms preprocess, 80.3ms inference, 4.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  46%|████▋     | 2321/5000 [05:34<06:37,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000268996.jpg: 480x640 1 traffic light, 80.2ms
Speed: 4.9ms preprocess, 80.2ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▋     | 2322/5000 [05:34<06:30,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269113.jpg: 480x640 3 dogs, 80.9ms
Speed: 4.7ms preprocess, 80.9ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  46%|████▋     | 2323/5000 [05:34<06:43,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269121.jpg: 640x480 1 person, 2 tennis rackets, 85.8ms
Speed: 3.5ms preprocess, 85.8ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  46%|████▋     | 2324/5000 [05:34<06:37,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269196.jpg: 448x640 10 sheeps, 70.0ms
Speed: 2.9ms preprocess, 70.0ms inference, 9.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  46%|████▋     | 2325/5000 [05:34<06:39,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269314.jpg: 480x640 (no detections), 76.1ms
Speed: 3.5ms preprocess, 76.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2326/5000 [05:34<06:14,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269316.jpg: 448x640 5 persons, 1 frisbee, 69.1ms
Speed: 3.3ms preprocess, 69.1ms inference, 13.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2327/5000 [05:34<06:22,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269632.jpg: 448x640 2 persons, 3 cars, 1 bus, 63.6ms
Speed: 2.7ms preprocess, 63.6ms inference, 7.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2328/5000 [05:35<06:13,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269682.jpg: 480x640 11 traffic lights, 70.6ms
Speed: 3.1ms preprocess, 70.6ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2329/5000 [05:35<06:21,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269866.jpg: 448x640 1 microwave, 1 oven, 59.8ms
Speed: 2.6ms preprocess, 59.8ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2330/5000 [05:35<05:54,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269932.jpg: 640x480 1 person, 66.6ms
Speed: 4.4ms preprocess, 66.6ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  47%|████▋     | 2331/5000 [05:35<06:09,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000269942.jpg: 640x480 1 person, 1 car, 6 traffic lights, 83.9ms
Speed: 2.9ms preprocess, 83.9ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  47%|████▋     | 2332/5000 [05:35<06:39,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270066.jpg: 480x640 1 motorcycle, 1 truck, 74.4ms
Speed: 4.6ms preprocess, 74.4ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2333/5000 [05:35<06:22,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270122.jpg: 448x640 1 person, 99.8ms
Speed: 2.5ms preprocess, 99.8ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2334/5000 [05:35<06:25,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270244.jpg: 448x640 1 zebra, 67.8ms
Speed: 3.1ms preprocess, 67.8ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2335/5000 [05:35<06:05,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270297.jpg: 448x640 2 trains, 1 traffic light, 66.4ms
Speed: 2.5ms preprocess, 66.4ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2336/5000 [05:36<05:52,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270386.jpg: 480x640 (no detections), 67.7ms
Speed: 4.6ms preprocess, 67.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2337/5000 [05:36<05:28,  8.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270402.jpg: 448x640 1 bird, 3 elephants, 67.6ms
Speed: 2.9ms preprocess, 67.6ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2338/5000 [05:36<05:35,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270474.jpg: 480x640 1 person, 1 baseball bat, 68.3ms
Speed: 4.7ms preprocess, 68.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2339/5000 [05:36<05:34,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270677.jpg: 640x640 1 person, 93.8ms
Speed: 2.1ms preprocess, 93.8ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  47%|████▋     | 2340/5000 [05:36<05:56,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270705.jpg: 640x480 1 bird, 74.8ms
Speed: 3.1ms preprocess, 74.8ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  47%|████▋     | 2341/5000 [05:36<05:49,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270883.jpg: 416x640 2 persons, 1 potted plant, 1 bed, 66.8ms
Speed: 2.8ms preprocess, 66.8ms inference, 4.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  47%|████▋     | 2342/5000 [05:36<05:42,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000270908.jpg: 384x640 1 person, 1 tennis racket, 87.9ms
Speed: 5.5ms preprocess, 87.9ms inference, 5.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  47%|████▋     | 2343/5000 [05:37<05:55,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000271116.jpg: 448x640 11 persons, 2 ties, 3 cups, 2 chairs, 1 dining table, 2 cell phones, 72.0ms
Speed: 3.3ms preprocess, 72.0ms inference, 19.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2344/5000 [05:37<06:41,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000271402.jpg: 640x448 2 persons, 2 tennis rackets, 72.0ms
Speed: 3.1ms preprocess, 72.0ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  47%|████▋     | 2345/5000 [05:37<06:33,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000271457.jpg: 448x640 1 bench, 109.2ms
Speed: 6.1ms preprocess, 109.2ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2346/5000 [05:37<06:51,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000271471.jpg: 448x640 3 persons, 5 bananas, 66.8ms
Speed: 3.1ms preprocess, 66.8ms inference, 8.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2347/5000 [05:37<06:41,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000271728.jpg: 448x640 1 cat, 1 cup, 1 couch, 3 beds, 1 dining table, 1 laptop, 2 remotes, 1 book, 72.3ms
Speed: 3.6ms preprocess, 72.3ms inference, 11.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2348/5000 [05:37<06:44,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000271997.jpg: 640x512 1 person, 1 tie, 135.6ms
Speed: 2.9ms preprocess, 135.6ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  47%|████▋     | 2349/5000 [05:38<07:11,  6.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000272049.jpg: 448x640 1 person, 1 truck, 59.2ms
Speed: 2.6ms preprocess, 59.2ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2350/5000 [05:38<06:30,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000272136.jpg: 448x640 1 train, 1 truck, 66.9ms
Speed: 3.1ms preprocess, 66.9ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2351/5000 [05:38<06:06,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000272148.jpg: 384x640 4 persons, 57.8ms
Speed: 2.4ms preprocess, 57.8ms inference, 4.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  47%|████▋     | 2352/5000 [05:38<05:44,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000272212.jpg: 480x640 5 cows, 70.1ms
Speed: 2.7ms preprocess, 70.1ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2353/5000 [05:38<05:45,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000272364.jpg: 640x448 4 giraffes, 108.3ms
Speed: 3.0ms preprocess, 108.3ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  47%|████▋     | 2354/5000 [05:38<06:05,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000272416.jpg: 480x640 1 toilet, 68.3ms
Speed: 2.7ms preprocess, 68.3ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2355/5000 [05:38<05:52,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000272566.jpg: 480x640 3 persons, 1 sports ball, 2 bananas, 1 apple, 63.2ms
Speed: 2.8ms preprocess, 63.2ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2356/5000 [05:38<05:46,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273132.jpg: 640x640 1 person, 1 bench, 84.1ms
Speed: 2.0ms preprocess, 84.1ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  47%|████▋     | 2357/5000 [05:39<05:48,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273198.jpg: 480x640 1 person, 4 cars, 1 fire hydrant, 1 cat, 86.1ms
Speed: 3.8ms preprocess, 86.1ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2358/5000 [05:39<06:09,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273232.jpg: 448x640 1 person, 1 boat, 1 dog, 4 kites, 61.3ms
Speed: 2.4ms preprocess, 61.3ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2359/5000 [05:39<05:56,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273420.jpg: 512x640 1 person, 1 cup, 73.7ms
Speed: 4.1ms preprocess, 73.7ms inference, 2.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  47%|████▋     | 2360/5000 [05:39<05:48,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273493.jpg: 448x640 2 persons, 61.1ms
Speed: 4.0ms preprocess, 61.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2361/5000 [05:39<05:32,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273551.jpg: 480x640 3 motorcycles, 1 bench, 75.2ms
Speed: 2.9ms preprocess, 75.2ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2362/5000 [05:39<05:40,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273617.jpg: 448x640 1 stop sign, 61.8ms
Speed: 2.6ms preprocess, 61.8ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2363/5000 [05:39<05:23,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273642.jpg: 640x480 1 cat, 1 remote, 65.3ms
Speed: 4.2ms preprocess, 65.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  47%|████▋     | 2364/5000 [05:39<05:19,  8.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273711.jpg: 480x640 4 persons, 6 bowls, 1 cake, 3 chairs, 1 dining table, 67.5ms
Speed: 2.9ms preprocess, 67.5ms inference, 15.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2365/5000 [05:40<05:51,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273712.jpg: 640x480 1 refrigerator, 65.9ms
Speed: 2.9ms preprocess, 65.9ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  47%|████▋     | 2366/5000 [05:40<05:34,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273715.jpg: 480x640 4 persons, 1 skis, 67.2ms
Speed: 4.5ms preprocess, 67.2ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2367/5000 [05:40<05:43,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000273760.jpg: 640x480 1 person, 1 tennis racket, 68.6ms
Speed: 2.9ms preprocess, 68.6ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  47%|████▋     | 2368/5000 [05:40<05:40,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000274066.jpg: 448x640 4 persons, 2 surfboards, 61.8ms
Speed: 2.7ms preprocess, 61.8ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  47%|████▋     | 2369/5000 [05:40<05:33,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000274219.jpg: 640x480 1 person, 2 ties, 63.3ms
Speed: 2.6ms preprocess, 63.3ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  47%|████▋     | 2370/5000 [05:40<05:21,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000274272.jpg: 480x640 2 cars, 1 bus, 2 traffic lights, 1 cow, 89.6ms
Speed: 2.5ms preprocess, 89.6ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2371/5000 [05:40<05:44,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000274411.jpg: 640x448 1 person, 1 sports ball, 2 tennis rackets, 57.9ms
Speed: 2.6ms preprocess, 57.9ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  47%|████▋     | 2372/5000 [05:40<05:30,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000274460.jpg: 480x640 15 persons, 1 backpack, 3 surfboards, 60.3ms
Speed: 2.6ms preprocess, 60.3ms inference, 19.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2373/5000 [05:41<06:01,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000274687.jpg: 480x640 2 bicycles, 65.6ms
Speed: 2.6ms preprocess, 65.6ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  47%|████▋     | 2374/5000 [05:41<05:41,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000274708.jpg: 640x640 7 persons, 2 skiss, 80.2ms
Speed: 3.0ms preprocess, 80.2ms inference, 15.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  48%|████▊     | 2375/5000 [05:41<06:20,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000275058.jpg: 640x448 1 person, 1 surfboard, 57.0ms
Speed: 2.6ms preprocess, 57.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  48%|████▊     | 2376/5000 [05:41<05:51,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000275198.jpg: 480x640 5 persons, 4 umbrellas, 3 bottles, 3 cups, 1 spoon, 1 bowl, 1 dining table, 59.6ms
Speed: 2.4ms preprocess, 59.6ms inference, 17.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2377/5000 [05:41<06:11,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000275392.jpg: 640x480 1 person, 1 horse, 64.2ms
Speed: 2.6ms preprocess, 64.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  48%|████▊     | 2378/5000 [05:41<05:49,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000275727.jpg: 480x640 1 train, 1 traffic light, 60.9ms
Speed: 2.7ms preprocess, 60.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2379/5000 [05:41<05:44,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000275749.jpg: 480x640 4 persons, 2 bottles, 1 cup, 1 tv, 1 laptop, 76.8ms
Speed: 5.8ms preprocess, 76.8ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2380/5000 [05:42<06:05,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000275791.jpg: 448x640 2 trains, 2 boats, 55.9ms
Speed: 3.0ms preprocess, 55.9ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2381/5000 [05:42<05:42,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276018.jpg: 640x416 12 persons, 2 teddy bears, 57.4ms
Speed: 2.5ms preprocess, 57.4ms inference, 10.6ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  48%|████▊     | 2382/5000 [05:42<05:47,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276024.jpg: 480x640 3 persons, 4 horses, 2 cows, 65.9ms
Speed: 2.7ms preprocess, 65.9ms inference, 12.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2383/5000 [05:42<06:03,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276055.jpg: 480x640 1 person, 2 umbrellas, 1 bottle, 1 cup, 64.7ms
Speed: 4.1ms preprocess, 64.7ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2384/5000 [05:42<05:53,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276284.jpg: 640x512 4 cars, 1 motorcycle, 65.8ms
Speed: 2.5ms preprocess, 65.8ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  48%|████▊     | 2385/5000 [05:42<05:45,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276285.jpg: 640x448 1 person, 4 cups, 1 fork, 1 pizza, 1 chair, 1 dining table, 62.8ms
Speed: 2.8ms preprocess, 62.8ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  48%|████▊     | 2386/5000 [05:42<05:43,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276434.jpg: 448x640 2 persons, 1 knife, 3 cakes, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2387/5000 [05:42<05:38,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276707.jpg: 480x640 1 person, 1 car, 114.3ms
Speed: 2.7ms preprocess, 114.3ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2388/5000 [05:43<06:05,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276720.jpg: 480x640 1 person, 1 car, 1 truck, 1 stop sign, 1 kite, 62.6ms
Speed: 2.6ms preprocess, 62.6ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2389/5000 [05:43<05:50,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276804.jpg: 640x512 5 persons, 2 baseball bats, 69.9ms
Speed: 2.8ms preprocess, 69.9ms inference, 9.8ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  48%|████▊     | 2390/5000 [05:43<05:52,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000276921.jpg: 640x480 2 teddy bears, 64.4ms
Speed: 2.7ms preprocess, 64.4ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  48%|████▊     | 2391/5000 [05:43<05:31,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000277005.jpg: 448x640 11 persons, 1 bicycle, 5 cars, 1 train, 83.0ms
Speed: 2.5ms preprocess, 83.0ms inference, 16.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2392/5000 [05:43<06:19,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000277020.jpg: 384x640 4 persons, 2 ties, 2 cups, 52.1ms
Speed: 2.5ms preprocess, 52.1ms inference, 6.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  48%|████▊     | 2393/5000 [05:43<05:56,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000277051.jpg: 448x640 2 birds, 1 bottle, 1 chair, 58.6ms
Speed: 2.5ms preprocess, 58.6ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2394/5000 [05:43<05:36,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000277197.jpg: 448x640 2 chairs, 3 couchs, 1 potted plant, 1 vase, 59.7ms
Speed: 3.8ms preprocess, 59.7ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2395/5000 [05:44<05:32,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000277584.jpg: 480x640 2 benchs, 1 cat, 93.6ms
Speed: 2.7ms preprocess, 93.6ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2396/5000 [05:44<05:45,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000277689.jpg: 448x640 2 wine glasss, 2 cakes, 60.5ms
Speed: 2.7ms preprocess, 60.5ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2397/5000 [05:44<05:29,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000278006.jpg: 448x640 (no detections), 60.7ms
Speed: 2.6ms preprocess, 60.7ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000278353.jpg: 640x608 1 sheep, 141.8ms
Speed: 1.9ms preprocess, 141.8ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  48%|████▊     | 2399/5000 [05:44<05:44,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000278463.jpg: 480x640 1 cat, 1 bottle, 1 potted plant, 1 laptop, 1 keyboard, 3 books, 60.1ms
Speed: 2.6ms preprocess, 60.1ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2400/5000 [05:44<05:39,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000278705.jpg: 480x640 2 persons, 4 cars, 2 skateboards, 88.8ms
Speed: 2.8ms preprocess, 88.8ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2401/5000 [05:44<05:59,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000278749.jpg: 448x640 7 persons, 2 cars, 1 truck, 1 handbag, 63.2ms
Speed: 4.0ms preprocess, 63.2ms inference, 8.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2402/5000 [05:44<05:58,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000278848.jpg: 640x480 4 persons, 3 cars, 1 bus, 4 umbrellas, 62.0ms
Speed: 2.8ms preprocess, 62.0ms inference, 11.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  48%|████▊     | 2403/5000 [05:45<06:00,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000278973.jpg: 448x640 1 person, 1 surfboard, 62.0ms
Speed: 2.9ms preprocess, 62.0ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2404/5000 [05:45<05:39,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279145.jpg: 480x640 1 bench, 5 potted plants, 63.3ms
Speed: 2.9ms preprocess, 63.3ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2405/5000 [05:45<05:38,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279278.jpg: 448x640 10 persons, 6 bicycles, 1 motorcycle, 1 dog, 1 skateboard, 83.0ms
Speed: 2.8ms preprocess, 83.0ms inference, 19.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2406/5000 [05:45<06:24,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279541.jpg: 640x480 1 person, 10 pizzas, 1 dining table, 62.4ms
Speed: 3.1ms preprocess, 62.4ms inference, 10.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  48%|████▊     | 2407/5000 [05:45<06:16,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279714.jpg: 640x448 2 cars, 56.5ms
Speed: 2.4ms preprocess, 56.5ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  48%|████▊     | 2408/5000 [05:45<05:45,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279730.jpg: 448x640 1 person, 2 cups, 1 donut, 60.0ms
Speed: 4.4ms preprocess, 60.0ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2409/5000 [05:45<05:31,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279769.jpg: 480x640 1 banana, 1 dining table, 1 book, 96.3ms
Speed: 3.0ms preprocess, 96.3ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2410/5000 [05:46<05:48,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279774.jpg: 640x544 6 persons, 1 baseball bat, 138.3ms
Speed: 2.9ms preprocess, 138.3ms inference, 7.2ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  48%|████▊     | 2411/5000 [05:46<06:42,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279887.jpg: 448x640 1 car, 2 motorcycles, 57.3ms
Speed: 2.6ms preprocess, 57.3ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2412/5000 [05:46<06:02,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000279927.jpg: 448x640 5 persons, 1 bench, 4 umbrellas, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2413/5000 [05:46<05:57,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000280325.jpg: 512x640 1 parking meter, 153.4ms
Speed: 4.0ms preprocess, 153.4ms inference, 1.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  48%|████▊     | 2414/5000 [05:46<06:46,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000280710.jpg: 480x640 8 persons, 2 buss, 1 handbag, 60.9ms
Speed: 2.9ms preprocess, 60.9ms inference, 10.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2415/5000 [05:46<06:28,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000280779.jpg: 512x640 1 person, 1 skis, 1 sports ball, 68.9ms
Speed: 2.8ms preprocess, 68.9ms inference, 3.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  48%|████▊     | 2416/5000 [05:46<06:05,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000280891.jpg: 640x448 3 persons, 13 oranges, 57.6ms
Speed: 2.6ms preprocess, 57.6ms inference, 13.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  48%|████▊     | 2417/5000 [05:47<06:09,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000280918.jpg: 480x640 2 persons, 1 bottle, 1 oven, 64.4ms
Speed: 2.8ms preprocess, 64.4ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2418/5000 [05:47<05:49,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000280930.jpg: 448x640 1 person, 3 ovens, 59.8ms
Speed: 2.6ms preprocess, 59.8ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2419/5000 [05:47<05:39,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281032.jpg: 448x640 1 person, 1 pizza, 1 oven, 69.1ms
Speed: 2.8ms preprocess, 69.1ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2420/5000 [05:47<05:29,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281179.jpg: 480x640 1 traffic light, 63.9ms
Speed: 2.6ms preprocess, 63.9ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2421/5000 [05:47<05:17,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281409.jpg: 448x640 15 persons, 1 baseball bat, 1 baseball glove, 61.0ms
Speed: 2.4ms preprocess, 61.0ms inference, 14.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2422/5000 [05:47<05:42,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281414.jpg: 640x448 1 person, 1 cell phone, 58.1ms
Speed: 2.8ms preprocess, 58.1ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  48%|████▊     | 2423/5000 [05:47<05:31,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281447.jpg: 448x640 2 horses, 1 cow, 63.7ms
Speed: 3.3ms preprocess, 63.7ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  48%|████▊     | 2424/5000 [05:47<05:28,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281687.jpg: 480x640 10 persons, 1 handbag, 1 suitcase, 63.9ms
Speed: 4.0ms preprocess, 63.9ms inference, 12.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  48%|████▊     | 2425/5000 [05:48<05:44,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281693.jpg: 512x640 4 airplanes, 68.7ms
Speed: 2.8ms preprocess, 68.7ms inference, 5.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  49%|████▊     | 2426/5000 [05:48<05:38,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281754.jpg: 640x448 2 persons, 1 umbrella, 1 handbag, 61.2ms
Speed: 2.7ms preprocess, 61.2ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  49%|████▊     | 2427/5000 [05:48<05:26,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281759.jpg: 448x640 7 persons, 5 umbrellas, 1 kite, 60.2ms
Speed: 2.5ms preprocess, 60.2ms inference, 19.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▊     | 2428/5000 [05:48<05:52,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000281929.jpg: 512x640 1 person, 1 bicycle, 1 tie, 69.4ms
Speed: 2.5ms preprocess, 69.4ms inference, 3.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  49%|████▊     | 2429/5000 [05:48<05:37,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000282037.jpg: 480x640 6 persons, 1 sports ball, 1 baseball glove, 59.7ms
Speed: 2.6ms preprocess, 59.7ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▊     | 2430/5000 [05:48<05:39,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000282046.jpg: 384x640 1 bench, 55.1ms
Speed: 3.5ms preprocess, 55.1ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  49%|████▊     | 2431/5000 [05:48<05:19,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000282296.jpg: 640x480 1 toilet, 1 refrigerator, 63.5ms
Speed: 2.8ms preprocess, 63.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  49%|████▊     | 2432/5000 [05:49<05:10,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000282298.jpg: 480x640 15 persons, 1 handbag, 73.2ms
Speed: 2.8ms preprocess, 73.2ms inference, 16.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▊     | 2433/5000 [05:49<05:51,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000282912.jpg: 640x480 1 giraffe, 58.5ms
Speed: 2.7ms preprocess, 58.5ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  49%|████▊     | 2434/5000 [05:49<05:31,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283037.jpg: 640x448 1 car, 1 traffic light, 59.3ms
Speed: 2.5ms preprocess, 59.3ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  49%|████▊     | 2435/5000 [05:49<05:14,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283038.jpg: 384x640 2 cars, 1 truck, 1 stop sign, 48.6ms
Speed: 3.6ms preprocess, 48.6ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  49%|████▊     | 2436/5000 [05:49<05:00,  8.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283070.jpg: 448x640 5 oranges, 1 dining table, 59.4ms
Speed: 2.5ms preprocess, 59.4ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▊     | 2437/5000 [05:49<05:09,  8.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283113.jpg: 640x480 1 cup, 2 hot dogs, 66.7ms
Speed: 2.8ms preprocess, 66.7ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  49%|████▉     | 2438/5000 [05:49<05:07,  8.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283268.jpg: 448x640 4 persons, 1 cup, 1 hot dog, 56.7ms
Speed: 2.4ms preprocess, 56.7ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2439/5000 [05:49<05:05,  8.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283318.jpg: 448x640 2 persons, 3 cars, 1 fire hydrant, 58.3ms
Speed: 2.5ms preprocess, 58.3ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2440/5000 [05:49<05:04,  8.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283412.jpg: 448x640 1 cat, 1 dog, 1 cup, 1 book, 89.1ms
Speed: 2.6ms preprocess, 89.1ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2441/5000 [05:50<05:22,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283520.jpg: 448x640 2 persons, 3 frisbees, 56.1ms
Speed: 2.7ms preprocess, 56.1ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2442/5000 [05:50<05:11,  8.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283717.jpg: 480x640 1 chair, 1 microwave, 63.4ms
Speed: 2.9ms preprocess, 63.4ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▉     | 2443/5000 [05:50<05:03,  8.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000283785.jpg: 448x640 6 persons, 61.7ms
Speed: 4.3ms preprocess, 61.7ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2444/5000 [05:50<05:03,  8.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284106.jpg: 448x640 1 bottle, 3 bowls, 1 carrot, 2 dining tables, 59.9ms
Speed: 2.7ms preprocess, 59.9ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2445/5000 [05:50<05:05,  8.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284279.jpg: 512x640 1 bird, 89.6ms
Speed: 3.2ms preprocess, 89.6ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  49%|████▉     | 2446/5000 [05:50<05:24,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284282.jpg: 448x640 1 microwave, 57.6ms
Speed: 4.0ms preprocess, 57.6ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2447/5000 [05:50<05:07,  8.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284296.jpg: 448x640 3 giraffes, 53.7ms
Speed: 2.8ms preprocess, 53.7ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2448/5000 [05:50<04:54,  8.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284445.jpg: 608x640 13 persons, 1 bus, 1 truck, 4 traffic lights, 138.0ms
Speed: 1.6ms preprocess, 138.0ms inference, 22.2ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  49%|████▉     | 2449/5000 [05:51<06:41,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284623.jpg: 480x640 1 cat, 1 bottle, 2 vases, 68.6ms
Speed: 2.8ms preprocess, 68.6ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▉     | 2450/5000 [05:51<06:22,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284698.jpg: 640x480 1 person, 1 skateboard, 1 cell phone, 63.1ms
Speed: 3.8ms preprocess, 63.1ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  49%|████▉     | 2451/5000 [05:51<06:02,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284725.jpg: 448x640 3 persons, 3 cars, 1 bus, 60.0ms
Speed: 2.5ms preprocess, 60.0ms inference, 7.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2452/5000 [05:51<05:49,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284743.jpg: 448x640 4 persons, 1 baseball bat, 57.9ms
Speed: 2.7ms preprocess, 57.9ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2453/5000 [05:51<05:32,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284762.jpg: 448x640 1 traffic light, 59.0ms
Speed: 2.6ms preprocess, 59.0ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2454/5000 [05:51<05:10,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284764.jpg: 448x640 1 person, 1 pizza, 81.2ms
Speed: 5.5ms preprocess, 81.2ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2455/5000 [05:51<05:32,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000284991.jpg: 448x640 3 persons, 60.6ms
Speed: 2.7ms preprocess, 60.6ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2456/5000 [05:52<05:18,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000285047.jpg: 640x448 1 person, 1 car, 1 cup, 1 dining table, 1 cell phone, 56.1ms
Speed: 2.5ms preprocess, 56.1ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  49%|████▉     | 2457/5000 [05:52<05:07,  8.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000285349.jpg: 480x640 1 bowl, 63.4ms
Speed: 3.9ms preprocess, 63.4ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▉     | 2458/5000 [05:52<05:03,  8.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000285788.jpg: 512x640 1 bird, 2 giraffes, 65.0ms
Speed: 2.6ms preprocess, 65.0ms inference, 3.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  49%|████▉     | 2459/5000 [05:52<05:00,  8.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000285894.jpg: 448x640 3 persons, 2 giraffes, 89.9ms
Speed: 3.6ms preprocess, 89.9ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2460/5000 [05:52<05:25,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286182.jpg: 640x480 1 person, 1 bottle, 1 fork, 1 spoon, 5 bowls, 1 pizza, 1 chair, 1 dining table, 61.9ms
Speed: 2.6ms preprocess, 61.9ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  49%|████▉     | 2461/5000 [05:52<05:38,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286422.jpg: 448x640 1 person, 2 boats, 1 dog, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2462/5000 [05:52<05:22,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286458.jpg: 448x640 1 backpack, 1 suitcase, 57.7ms
Speed: 3.6ms preprocess, 57.7ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2463/5000 [05:52<05:07,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286503.jpg: 448x640 2 persons, 1 elephant, 77.7ms
Speed: 2.6ms preprocess, 77.7ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2464/5000 [05:53<05:22,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286507.jpg: 448x640 1 train, 60.1ms
Speed: 2.8ms preprocess, 60.1ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  49%|████▉     | 2465/5000 [05:53<05:10,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286523.jpg: 544x640 3 persons, 137.4ms
Speed: 1.7ms preprocess, 137.4ms inference, 2.8ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  49%|████▉     | 2466/5000 [05:53<05:59,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286553.jpg: 480x640 2 persons, 1 bottle, 2 cups, 1 knife, 2 pizzas, 1 chair, 1 dining table, 63.1ms
Speed: 2.7ms preprocess, 63.1ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▉     | 2467/5000 [05:53<05:51,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286660.jpg: 640x480 1 person, 1 teddy bear, 72.7ms
Speed: 4.3ms preprocess, 72.7ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  49%|████▉     | 2468/5000 [05:53<05:45,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286708.jpg: 512x640 2 cats, 73.8ms
Speed: 3.2ms preprocess, 73.8ms inference, 3.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  49%|████▉     | 2469/5000 [05:53<05:37,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286849.jpg: 640x448 5 giraffes, 61.2ms
Speed: 3.6ms preprocess, 61.2ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  49%|████▉     | 2470/5000 [05:53<05:24,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286907.jpg: 640x480 1 bottle, 1 toilet, 65.5ms
Speed: 2.5ms preprocess, 65.5ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  49%|████▉     | 2471/5000 [05:53<05:14,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286908.jpg: 640x640 16 bowls, 5 dining tables, 182.1ms
Speed: 2.5ms preprocess, 182.1ms inference, 22.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  49%|████▉     | 2472/5000 [05:54<07:30,  5.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000286994.jpg: 480x640 8 elephants, 63.0ms
Speed: 3.0ms preprocess, 63.0ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▉     | 2473/5000 [05:54<06:57,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287291.jpg: 480x640 8 persons, 6 cars, 68.5ms
Speed: 3.8ms preprocess, 68.5ms inference, 14.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  49%|████▉     | 2474/5000 [05:54<06:54,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287347.jpg: 640x480 2 persons, 2 bottles, 2 cups, 1 pizza, 1 dining table, 1 mouse, 64.6ms
Speed: 2.6ms preprocess, 64.6ms inference, 8.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  50%|████▉     | 2475/5000 [05:54<06:28,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287527.jpg: 640x640 3 persons, 1 tie, 78.6ms
Speed: 2.7ms preprocess, 78.6ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  50%|████▉     | 2476/5000 [05:54<06:13,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287545.jpg: 480x640 2 giraffes, 95.7ms
Speed: 4.7ms preprocess, 95.7ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2477/5000 [05:54<06:12,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287649.jpg: 480x640 1 cat, 1 laptop, 1 keyboard, 63.1ms
Speed: 2.7ms preprocess, 63.1ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2478/5000 [05:55<05:52,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287667.jpg: 480x640 1 knife, 2 carrots, 1 dining table, 1 cell phone, 61.6ms
Speed: 2.6ms preprocess, 61.6ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2479/5000 [05:55<05:37,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287714.jpg: 480x640 2 bottles, 2 cups, 1 sink, 61.2ms
Speed: 2.5ms preprocess, 61.2ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2480/5000 [05:55<05:25,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287874.jpg: 448x640 6 persons, 1 train, 89.4ms
Speed: 2.7ms preprocess, 89.4ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|████▉     | 2481/5000 [05:55<05:39,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000287959.jpg: 480x640 1 sandwich, 64.1ms
Speed: 2.7ms preprocess, 64.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2482/5000 [05:55<05:21,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288042.jpg: 480x640 1 person, 1 car, 1 umbrella, 62.7ms
Speed: 2.6ms preprocess, 62.7ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2483/5000 [05:55<05:13,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288062.jpg: 640x544 1 bird, 2 toilets, 68.8ms
Speed: 1.9ms preprocess, 68.8ms inference, 6.4ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  50%|████▉     | 2484/5000 [05:55<05:13,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288391.jpg: 640x448 2 remotes, 1 cell phone, 61.8ms
Speed: 2.7ms preprocess, 61.8ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  50%|████▉     | 2485/5000 [05:55<05:03,  8.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288430.jpg: 480x640 5 persons, 1 kite, 62.5ms
Speed: 3.7ms preprocess, 62.5ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2486/5000 [05:56<05:07,  8.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288584.jpg: 448x640 9 persons, 1 giraffe, 1 backpack, 1 handbag, 1 chair, 91.6ms
Speed: 2.6ms preprocess, 91.6ms inference, 13.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|████▉     | 2487/5000 [05:56<05:47,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288685.jpg: 448x640 21 persons, 1 dog, 3 sheeps, 55.6ms
Speed: 2.9ms preprocess, 55.6ms inference, 21.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|████▉     | 2488/5000 [05:56<06:15,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288762.jpg: 448x640 5 carrots, 57.1ms
Speed: 2.6ms preprocess, 57.1ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|████▉     | 2489/5000 [05:56<05:48,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288862.jpg: 640x448 2 persons, 55.6ms
Speed: 2.5ms preprocess, 55.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  50%|████▉     | 2490/5000 [05:56<05:20,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000288882.jpg: 384x640 2 giraffes, 52.1ms
Speed: 2.0ms preprocess, 52.1ms inference, 5.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  50%|████▉     | 2491/5000 [05:56<05:06,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289059.jpg: 480x640 3 persons, 1 tennis racket, 69.9ms
Speed: 3.1ms preprocess, 69.9ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2492/5000 [05:56<05:08,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289222.jpg: 480x640 1 person, 1 bus, 3 traffic lights, 1 potted plant, 59.4ms
Speed: 2.8ms preprocess, 59.4ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2493/5000 [05:57<05:02,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289229.jpg: 640x480 1 person, 1 tie, 59.8ms
Speed: 4.1ms preprocess, 59.8ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  50%|████▉     | 2494/5000 [05:57<04:58,  8.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289343.jpg: 640x544 1 person, 1 bicycle, 1 dog, 103.1ms
Speed: 1.5ms preprocess, 103.1ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  50%|████▉     | 2495/5000 [05:57<05:24,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289393.jpg: 480x640 1 bird, 2 dogs, 1 giraffe, 1 banana, 65.3ms
Speed: 2.8ms preprocess, 65.3ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2496/5000 [05:57<05:17,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289415.jpg: 448x640 1 person, 1 knife, 9 pizzas, 1 dining table, 60.4ms
Speed: 2.7ms preprocess, 60.4ms inference, 10.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|████▉     | 2497/5000 [05:57<05:24,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289417.jpg: 480x640 6 persons, 2 skiss, 63.1ms
Speed: 4.0ms preprocess, 63.1ms inference, 8.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|████▉     | 2498/5000 [05:57<05:26,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289516.jpg: 640x320 2 clocks, 109.9ms
Speed: 2.4ms preprocess, 109.9ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 320)


Segmenting Images:  50%|████▉     | 2499/5000 [05:57<05:53,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289586.jpg: 640x448 1 giraffe, 65.0ms
Speed: 4.4ms preprocess, 65.0ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  50%|█████     | 2500/5000 [05:57<05:36,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289594.jpg: 640x544 1 person, 4 cars, 70.8ms
Speed: 1.6ms preprocess, 70.8ms inference, 9.2ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  50%|█████     | 2501/5000 [05:58<05:33,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289659.jpg: 448x640 1 giraffe, 56.9ms
Speed: 2.5ms preprocess, 56.9ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2502/5000 [05:58<05:11,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289702.jpg: 448x640 2 dogs, 1 bottle, 1 bowl, 80.6ms
Speed: 2.6ms preprocess, 80.6ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2503/5000 [05:58<05:21,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289741.jpg: 480x640 7 persons, 2 boats, 1 umbrella, 4 chairs, 68.1ms
Speed: 2.5ms preprocess, 68.1ms inference, 13.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|█████     | 2504/5000 [05:58<05:38,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289938.jpg: 448x640 1 person, 1 car, 1 umbrella, 61.3ms
Speed: 2.6ms preprocess, 61.3ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2505/5000 [05:58<05:22,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289960.jpg: 480x640 1 person, 64.0ms
Speed: 2.8ms preprocess, 64.0ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|█████     | 2506/5000 [05:58<05:09,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000289992.jpg: 448x640 1 sink, 91.9ms
Speed: 2.6ms preprocess, 91.9ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2507/5000 [05:58<05:18,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290081.jpg: 640x640 2 bowls, 6 oranges, 80.3ms
Speed: 3.1ms preprocess, 80.3ms inference, 8.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  50%|█████     | 2508/5000 [05:58<05:39,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290163.jpg: 640x512 4 persons, 1 bowl, 1 pizza, 1 cake, 3 chairs, 1 dining table, 123.6ms
Speed: 2.9ms preprocess, 123.6ms inference, 10.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  50%|█████     | 2509/5000 [05:59<06:30,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290179.jpg: 448x640 3 boats, 55.5ms
Speed: 2.5ms preprocess, 55.5ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2510/5000 [05:59<05:54,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290248.jpg: 480x640 13 persons, 1 car, 1 suitcase, 1 clock, 57.0ms
Speed: 2.7ms preprocess, 57.0ms inference, 13.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|█████     | 2511/5000 [05:59<05:59,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290293.jpg: 352x640 5 persons, 1 train, 130.9ms
Speed: 4.2ms preprocess, 130.9ms inference, 4.0ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  50%|█████     | 2512/5000 [05:59<06:31,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290592.jpg: 448x640 14 sheeps, 60.4ms
Speed: 2.8ms preprocess, 60.4ms inference, 11.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2513/5000 [05:59<06:20,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290619.jpg: 480x640 1 cup, 1 banana, 64.5ms
Speed: 2.8ms preprocess, 64.5ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|█████     | 2514/5000 [05:59<05:50,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290768.jpg: 640x640 2 bottles, 2 cups, 2 knifes, 84.8ms
Speed: 3.6ms preprocess, 84.8ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  50%|█████     | 2515/5000 [06:00<05:59,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290771.jpg: 480x640 2 chairs, 1 couch, 1 remote, 1 book, 116.1ms
Speed: 2.6ms preprocess, 116.1ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  50%|█████     | 2516/5000 [06:00<06:21,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290833.jpg: 448x640 2 zebras, 58.4ms
Speed: 2.7ms preprocess, 58.4ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2517/5000 [06:00<05:50,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000290843.jpg: 640x448 1 person, 1 cat, 1 bed, 60.7ms
Speed: 2.6ms preprocess, 60.7ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  50%|█████     | 2518/5000 [06:00<05:31,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000291490.jpg: 416x640 1 cat, 2 beds, 118.8ms
Speed: 2.6ms preprocess, 118.8ms inference, 3.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  50%|█████     | 2519/5000 [06:00<05:55,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000291551.jpg: 640x448 3 persons, 1 skateboard, 53.7ms
Speed: 2.6ms preprocess, 53.7ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  50%|█████     | 2520/5000 [06:00<05:35,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000291619.jpg: 448x640 2 persons, 1 frisbee, 66.3ms
Speed: 3.0ms preprocess, 66.3ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2521/5000 [06:00<05:24,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000291634.jpg: 640x448 8 persons, 1 bicycle, 4 motorcycles, 61.2ms
Speed: 2.5ms preprocess, 61.2ms inference, 15.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  50%|█████     | 2522/5000 [06:01<05:30,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000291664.jpg: 640x608 1 fire hydrant, 1 dog, 142.2ms
Speed: 2.4ms preprocess, 142.2ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  50%|█████     | 2523/5000 [06:01<06:21,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000291791.jpg: 512x640 1 person, 1 motorcycle, 127.0ms
Speed: 3.0ms preprocess, 127.0ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  50%|█████     | 2524/5000 [06:01<06:40,  6.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000291861.jpg: 448x640 1 zebra, 84.3ms
Speed: 2.5ms preprocess, 84.3ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  50%|█████     | 2525/5000 [06:01<06:23,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292005.jpg: 640x480 3 bottles, 1 cup, 132.5ms
Speed: 2.7ms preprocess, 132.5ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  51%|█████     | 2526/5000 [06:01<06:48,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292024.jpg: 640x640 1 person, 1 knife, 2 pizzas, 78.6ms
Speed: 3.3ms preprocess, 78.6ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  51%|█████     | 2527/5000 [06:01<06:27,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292060.jpg: 640x448 1 bottle, 2 knifes, 1 oven, 1 sink, 60.5ms
Speed: 2.7ms preprocess, 60.5ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  51%|█████     | 2528/5000 [06:01<06:02,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292082.jpg: 640x640 3 persons, 3 ties, 1 chair, 114.2ms
Speed: 1.9ms preprocess, 114.2ms inference, 9.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  51%|█████     | 2529/5000 [06:02<06:30,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292155.jpg: 480x640 1 person, 1 umbrella, 63.3ms
Speed: 2.7ms preprocess, 63.3ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2530/5000 [06:02<05:58,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292225.jpg: 480x640 2 persons, 60.7ms
Speed: 4.1ms preprocess, 60.7ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2531/5000 [06:02<05:28,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292236.jpg: 640x448 5 persons, 61.3ms
Speed: 4.1ms preprocess, 61.3ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  51%|█████     | 2532/5000 [06:02<05:16,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292330.jpg: 512x640 1 dog, 96.5ms
Speed: 2.7ms preprocess, 96.5ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  51%|█████     | 2533/5000 [06:02<05:29,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292415.jpg: 640x448 1 person, 59.2ms
Speed: 3.6ms preprocess, 59.2ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  51%|█████     | 2534/5000 [06:02<05:09,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292446.jpg: 640x640 4 persons, 2 pizzas, 1 dining table, 2 laptops, 2 books, 76.2ms
Speed: 3.6ms preprocess, 76.2ms inference, 16.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  51%|█████     | 2535/5000 [06:02<05:44,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292456.jpg: 480x640 2 persons, 1 motorcycle, 1 potted plant, 61.0ms
Speed: 4.4ms preprocess, 61.0ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2536/5000 [06:03<05:27,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292488.jpg: 448x640 1 person, 2 cell phones, 90.1ms
Speed: 2.5ms preprocess, 90.1ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2537/5000 [06:03<05:35,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292908.jpg: 640x384 1 person, 1 surfboard, 122.3ms
Speed: 2.4ms preprocess, 122.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  51%|█████     | 2538/5000 [06:03<05:57,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000292997.jpg: 480x640 2 persons, 3 cars, 3 trucks, 2 traffic lights, 63.1ms
Speed: 2.7ms preprocess, 63.1ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2539/5000 [06:03<05:48,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293044.jpg: 384x640 3 bowls, 1 dining table, 113.1ms
Speed: 3.1ms preprocess, 113.1ms inference, 3.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  51%|█████     | 2540/5000 [06:03<06:03,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293071.jpg: 640x448 3 cars, 1 fire hydrant, 79.1ms
Speed: 3.5ms preprocess, 79.1ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  51%|█████     | 2541/5000 [06:03<05:55,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293200.jpg: 448x640 16 persons, 2 kites, 59.5ms
Speed: 2.6ms preprocess, 59.5ms inference, 15.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2542/5000 [06:03<06:06,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293245.jpg: 480x640 1 motorcycle, 1 potted plant, 66.5ms
Speed: 2.7ms preprocess, 66.5ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2543/5000 [06:04<05:42,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293300.jpg: 448x640 1 person, 2 elephants, 57.1ms
Speed: 2.8ms preprocess, 57.1ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2544/5000 [06:04<05:18,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293324.jpg: 416x640 3 airplanes, 1 truck, 56.5ms
Speed: 2.5ms preprocess, 56.5ms inference, 4.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  51%|█████     | 2545/5000 [06:04<05:03,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293390.jpg: 480x640 1 bottle, 1 sink, 1 vase, 2 toothbrushs, 85.8ms
Speed: 3.4ms preprocess, 85.8ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2546/5000 [06:04<05:22,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293474.jpg: 448x640 1 person, 1 fire hydrant, 61.9ms
Speed: 4.2ms preprocess, 61.9ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2547/5000 [06:04<05:10,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293625.jpg: 480x640 2 persons, 1 vase, 61.6ms
Speed: 2.6ms preprocess, 61.6ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2548/5000 [06:04<04:57,  8.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293794.jpg: 640x448 4 persons, 1 tie, 10 bananas, 57.7ms
Speed: 2.4ms preprocess, 57.7ms inference, 17.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  51%|█████     | 2549/5000 [06:04<05:17,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293804.jpg: 448x640 2 couchs, 1 potted plant, 81.5ms
Speed: 3.8ms preprocess, 81.5ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2550/5000 [06:04<05:22,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000293858.jpg: 608x640 1 person, 1 bottle, 1 cup, 4 sandwichs, 3 chairs, 2 dining tables, 148.2ms
Speed: 2.7ms preprocess, 148.2ms inference, 16.7ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  51%|█████     | 2551/5000 [06:05<06:44,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000294162.jpg: 640x480 1 laptop, 1 mouse, 58.3ms
Speed: 2.6ms preprocess, 58.3ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  51%|█████     | 2552/5000 [06:05<05:58,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000294163.jpg: 640x448 4 clocks, 60.9ms
Speed: 2.5ms preprocess, 60.9ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  51%|█████     | 2553/5000 [06:05<05:34,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000294350.jpg: 448x640 4 persons, 1 bottle, 1 wine glass, 87.5ms
Speed: 2.8ms preprocess, 87.5ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2554/5000 [06:05<05:47,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000294695.jpg: 448x640 4 persons, 4 kites, 61.3ms
Speed: 2.7ms preprocess, 61.3ms inference, 8.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2555/5000 [06:05<05:36,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000294783.jpg: 448x640 1 bed, 62.3ms
Speed: 2.8ms preprocess, 62.3ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2556/5000 [06:05<05:14,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000294831.jpg: 384x640 1 person, 2 bottles, 1 pizza, 1 dining table, 56.3ms
Speed: 2.3ms preprocess, 56.3ms inference, 4.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  51%|█████     | 2557/5000 [06:05<05:04,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000294855.jpg: 480x640 1 suitcase, 86.4ms
Speed: 4.3ms preprocess, 86.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2558/5000 [06:06<05:15,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295138.jpg: 448x640 2 persons, 1 skateboard, 62.2ms
Speed: 2.8ms preprocess, 62.2ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████     | 2559/5000 [06:06<05:05,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295231.jpg: 640x480 7 sheeps, 65.4ms
Speed: 2.8ms preprocess, 65.4ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  51%|█████     | 2560/5000 [06:06<05:08,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295316.jpg: 480x640 4 persons, 1 surfboard, 65.6ms
Speed: 2.6ms preprocess, 65.6ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2561/5000 [06:06<05:00,  8.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295420.jpg: 480x640 1 person, 1 car, 2 trucks, 86.0ms
Speed: 2.6ms preprocess, 86.0ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████     | 2562/5000 [06:06<05:17,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295478.jpg: 640x512 1 person, 1 dog, 2 handbags, 136.0ms
Speed: 2.7ms preprocess, 136.0ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  51%|█████▏    | 2563/5000 [06:06<06:04,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295713.jpg: 448x640 8 persons, 3 benchs, 57.3ms
Speed: 2.7ms preprocess, 57.3ms inference, 10.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████▏    | 2564/5000 [06:06<05:51,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295797.jpg: 640x448 1 airplane, 2 clocks, 60.1ms
Speed: 2.8ms preprocess, 60.1ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  51%|█████▏    | 2565/5000 [06:06<05:27,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000295809.jpg: 512x640 1 person, 3 cars, 1 truck, 4 traffic lights, 115.1ms
Speed: 2.9ms preprocess, 115.1ms inference, 8.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  51%|█████▏    | 2566/5000 [06:07<06:16,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296222.jpg: 480x640 1 person, 1 cup, 60.8ms
Speed: 2.9ms preprocess, 60.8ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████▏    | 2567/5000 [06:07<05:45,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296224.jpg: 480x640 2 cars, 1 bus, 59.1ms
Speed: 3.0ms preprocess, 59.1ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████▏    | 2568/5000 [06:07<05:21,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296231.jpg: 480x640 1 tv, 1 clock, 62.6ms
Speed: 2.5ms preprocess, 62.6ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████▏    | 2569/5000 [06:07<05:06,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296284.jpg: 640x480 3 donuts, 1 refrigerator, 64.7ms
Speed: 3.8ms preprocess, 64.7ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  51%|█████▏    | 2570/5000 [06:07<05:00,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296317.jpg: 480x640 2 giraffes, 95.2ms
Speed: 2.5ms preprocess, 95.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████▏    | 2571/5000 [06:07<05:16,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296634.jpg: 480x640 2 bowls, 1 carrot, 1 dining table, 62.3ms
Speed: 2.7ms preprocess, 62.3ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  51%|█████▏    | 2572/5000 [06:07<05:05,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296649.jpg: 448x640 11 persons, 2 cars, 2 motorcycles, 60.8ms
Speed: 2.6ms preprocess, 60.8ms inference, 17.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████▏    | 2573/5000 [06:08<05:24,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296657.jpg: 448x640 6 persons, 1 frisbee, 60.0ms
Speed: 2.6ms preprocess, 60.0ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  51%|█████▏    | 2574/5000 [06:08<05:14,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000296969.jpg: 448x640 7 giraffes, 92.0ms
Speed: 2.8ms preprocess, 92.0ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2575/5000 [06:08<05:32,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297022.jpg: 512x640 3 persons, 1 truck, 68.1ms
Speed: 2.7ms preprocess, 68.1ms inference, 4.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  52%|█████▏    | 2576/5000 [06:08<05:22,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297084.jpg: 640x640 3 persons, 2 bottles, 4 wine glasss, 4 cups, 1 spoon, 2 bowls, 1 donut, 1 chair, 1 dining table, 150.5ms
Speed: 3.9ms preprocess, 150.5ms inference, 22.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  52%|█████▏    | 2577/5000 [06:08<07:08,  5.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297085.jpg: 480x640 1 cat, 3 tvs, 1 clock, 60.6ms
Speed: 4.0ms preprocess, 60.6ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2578/5000 [06:08<06:24,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297147.jpg: 480x640 2 persons, 1 motorcycle, 60.3ms
Speed: 2.7ms preprocess, 60.3ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2579/5000 [06:08<05:55,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297343.jpg: 448x640 1 stop sign, 68.7ms
Speed: 2.7ms preprocess, 68.7ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2580/5000 [06:09<05:33,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297353.jpg: 640x640 1 person, 2 bottles, 1 bed, 80.7ms
Speed: 2.3ms preprocess, 80.7ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  52%|█████▏    | 2581/5000 [06:09<05:34,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297396.jpg: 480x640 1 person, 2 donuts, 1 cake, 1 dining table, 64.6ms
Speed: 3.7ms preprocess, 64.6ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2582/5000 [06:09<05:23,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297427.jpg: 640x608 2 persons, 1 sandwich, 1 donut, 188.5ms
Speed: 1.6ms preprocess, 188.5ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  52%|█████▏    | 2583/5000 [06:09<06:44,  5.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297562.jpg: 480x640 12 persons, 2 cell phones, 59.0ms
Speed: 2.6ms preprocess, 59.0ms inference, 13.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2584/5000 [06:09<06:27,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297578.jpg: 384x640 3 persons, 1 handbag, 1 tie, 50.7ms
Speed: 3.4ms preprocess, 50.7ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  52%|█████▏    | 2585/5000 [06:09<05:47,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297595.jpg: 448x640 1 person, 4 surfboards, 58.1ms
Speed: 2.7ms preprocess, 58.1ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2586/5000 [06:09<05:26,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297681.jpg: 448x640 2 cars, 76.3ms
Speed: 2.7ms preprocess, 76.3ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2587/5000 [06:10<05:20,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297698.jpg: 448x640 1 airplane, 60.8ms
Speed: 2.8ms preprocess, 60.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2588/5000 [06:10<05:02,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000297830.jpg: 640x512 1 dog, 1 bottle, 65.5ms
Speed: 2.9ms preprocess, 65.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  52%|█████▏    | 2589/5000 [06:10<04:53,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000298251.jpg: 160x640 1 bird, 5 zebras, 84.8ms
Speed: 2.0ms preprocess, 84.8ms inference, 2.4ms postprocess per image at shape (1, 3, 160, 640)


Segmenting Images:  52%|█████▏    | 2590/5000 [06:10<04:55,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000298396.jpg: 480x640 2 bowls, 1 chair, 1 dining table, 2 ovens, 1 sink, 2 clocks, 94.1ms
Speed: 2.5ms preprocess, 94.1ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2591/5000 [06:10<05:25,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000298697.jpg: 480x640 2 cows, 60.8ms
Speed: 2.9ms preprocess, 60.8ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2592/5000 [06:10<05:06,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000298738.jpg: 640x448 1 bear, 62.4ms
Speed: 2.8ms preprocess, 62.4ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  52%|█████▏    | 2593/5000 [06:10<04:54,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000298904.jpg: 640x480 2 vases, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  52%|█████▏    | 2594/5000 [06:10<04:48,  8.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000298994.jpg: 448x640 1 bottle, 2 apples, 1 vase, 59.6ms
Speed: 2.6ms preprocess, 59.6ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2595/5000 [06:11<04:44,  8.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000299355.jpg: 480x640 2 broccolis, 87.6ms
Speed: 3.9ms preprocess, 87.6ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2596/5000 [06:11<05:03,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000299553.jpg: 640x480 7 persons, 3 kites, 63.1ms
Speed: 3.5ms preprocess, 63.1ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  52%|█████▏    | 2597/5000 [06:11<05:08,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000299609.jpg: 480x640 1 horse, 66.2ms
Speed: 2.8ms preprocess, 66.2ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2598/5000 [06:11<05:02,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000299720.jpg: 448x640 2 giraffes, 60.6ms
Speed: 2.8ms preprocess, 60.6ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2599/5000 [06:11<04:53,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000299887.jpg: 480x640 2 persons, 2 motorcycles, 68.4ms
Speed: 2.9ms preprocess, 68.4ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2600/5000 [06:11<05:02,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300039.jpg: 480x640 2 persons, 2 bottles, 4 cups, 1 book, 66.9ms
Speed: 3.6ms preprocess, 66.9ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2601/5000 [06:11<05:16,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300155.jpg: 448x640 1 bird, 58.8ms
Speed: 2.7ms preprocess, 58.8ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2602/5000 [06:11<04:58,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300233.jpg: 512x640 2 cups, 3 bowls, 1 cake, 1 dining table, 1 vase, 65.8ms
Speed: 2.6ms preprocess, 65.8ms inference, 11.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  52%|█████▏    | 2603/5000 [06:12<05:09,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300276.jpg: 384x640 3 persons, 2 bowls, 3 cakes, 2 chairs, 1 dining table, 54.9ms
Speed: 2.1ms preprocess, 54.9ms inference, 8.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  52%|█████▏    | 2604/5000 [06:12<05:01,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300341.jpg: 448x640 6 persons, 1 sports ball, 1 remote, 91.6ms
Speed: 2.5ms preprocess, 91.6ms inference, 8.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2605/5000 [06:12<05:28,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300659.jpg: 384x640 7 airplanes, 1 bird, 54.1ms
Speed: 2.1ms preprocess, 54.1ms inference, 6.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  52%|█████▏    | 2606/5000 [06:12<05:12,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300842.jpg: 448x640 2 airplanes, 57.3ms
Speed: 2.7ms preprocess, 57.3ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2607/5000 [06:12<04:55,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000300913.jpg: 480x640 1 cat, 1 bed, 63.4ms
Speed: 3.9ms preprocess, 63.4ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2608/5000 [06:12<04:47,  8.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301061.jpg: 640x480 1 person, 72.6ms
Speed: 2.6ms preprocess, 72.6ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  52%|█████▏    | 2609/5000 [06:12<04:50,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301135.jpg: 640x448 12 persons, 5 cars, 1 bench, 2 backpacks, 1 handbag, 56.4ms
Speed: 2.8ms preprocess, 56.4ms inference, 18.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  52%|█████▏    | 2610/5000 [06:12<05:21,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301376.jpg: 640x448 4 persons, 2 bicycles, 5 cars, 4 traffic lights, 59.4ms
Speed: 4.0ms preprocess, 59.4ms inference, 13.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  52%|█████▏    | 2611/5000 [06:13<05:31,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301421.jpg: 448x640 3 chairs, 1 tv, 1 mouse, 1 keyboard, 2 cell phones, 1 vase, 55.4ms
Speed: 2.4ms preprocess, 55.4ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2612/5000 [06:13<05:19,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301563.jpg: 448x640 3 persons, 1 skis, 2 snowboards, 74.6ms
Speed: 2.8ms preprocess, 74.6ms inference, 11.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2613/5000 [06:13<05:23,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301718.jpg: 640x480 2 persons, 3 bottles, 1 cup, 1 knife, 2 bowls, 1 pizza, 1 dining table, 63.4ms
Speed: 2.7ms preprocess, 63.4ms inference, 10.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  52%|█████▏    | 2614/5000 [06:13<05:27,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301867.jpg: 480x640 3 persons, 1 backpack, 1 umbrella, 3 handbags, 63.4ms
Speed: 3.0ms preprocess, 63.4ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2615/5000 [06:13<05:24,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000301981.jpg: 480x640 1 giraffe, 62.7ms
Speed: 2.7ms preprocess, 62.7ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2616/5000 [06:13<05:07,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302030.jpg: 384x640 1 person, 1 tv, 1 laptop, 2 mouses, 3 keyboards, 2 books, 75.9ms
Speed: 2.0ms preprocess, 75.9ms inference, 13.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  52%|█████▏    | 2617/5000 [06:13<05:22,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302107.jpg: 640x448 3 persons, 1 baseball bat, 1 baseball glove, 61.2ms
Speed: 2.6ms preprocess, 61.2ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  52%|█████▏    | 2618/5000 [06:14<05:06,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302165.jpg: 480x640 6 cows, 62.5ms
Speed: 2.8ms preprocess, 62.5ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2619/5000 [06:14<05:00,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302452.jpg: 640x640 4 persons, 1 giraffe, 78.7ms
Speed: 3.2ms preprocess, 78.7ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  52%|█████▏    | 2620/5000 [06:14<05:10,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302536.jpg: 480x640 3 persons, 1 cup, 1 couch, 1 dining table, 1 remote, 88.4ms
Speed: 3.9ms preprocess, 88.4ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2621/5000 [06:14<05:28,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302760.jpg: 480x640 1 person, 2 bottles, 1 chair, 1 sink, 63.0ms
Speed: 3.7ms preprocess, 63.0ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▏    | 2622/5000 [06:14<05:18,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302882.jpg: 512x640 1 person, 65.7ms
Speed: 3.9ms preprocess, 65.7ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  52%|█████▏    | 2623/5000 [06:14<05:05,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000302990.jpg: 448x640 5 persons, 2 surfboards, 61.3ms
Speed: 2.5ms preprocess, 61.3ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  52%|█████▏    | 2624/5000 [06:14<05:00,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303305.jpg: 480x640 2 cars, 1 bus, 1 truck, 91.0ms
Speed: 2.8ms preprocess, 91.0ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  52%|█████▎    | 2625/5000 [06:14<05:16,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303499.jpg: 640x448 2 persons, 4 horses, 58.8ms
Speed: 4.1ms preprocess, 58.8ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2626/5000 [06:15<05:06,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303566.jpg: 448x640 15 persons, 1 car, 1 backpack, 4 apples, 59.4ms
Speed: 2.4ms preprocess, 59.4ms inference, 17.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2627/5000 [06:15<05:31,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303653.jpg: 448x640 3 persons, 3 horses, 1 cow, 60.2ms
Speed: 2.7ms preprocess, 60.2ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2628/5000 [06:15<05:20,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303713.jpg: 640x448 5 persons, 1 sports ball, 5 chairs, 59.5ms
Speed: 2.7ms preprocess, 59.5ms inference, 11.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2629/5000 [06:15<05:20,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303818.jpg: 640x480 10 persons, 1 car, 1 bus, 1 traffic light, 91.2ms
Speed: 3.3ms preprocess, 91.2ms inference, 9.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  53%|█████▎    | 2630/5000 [06:15<05:51,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303863.jpg: 448x640 3 persons, 1 train, 57.4ms
Speed: 2.5ms preprocess, 57.4ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2631/5000 [06:15<05:25,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303893.jpg: 448x640 2 persons, 2 sheeps, 59.8ms
Speed: 2.8ms preprocess, 59.8ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2632/5000 [06:15<05:07,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000303908.jpg: 448x640 1 boat, 1 bench, 58.2ms
Speed: 2.8ms preprocess, 58.2ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2633/5000 [06:16<04:51,  8.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304180.jpg: 448x640 4 persons, 1 skateboard, 1 tennis racket, 91.8ms
Speed: 2.7ms preprocess, 91.8ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2634/5000 [06:16<05:11,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304291.jpg: 448x640 1 horse, 58.3ms
Speed: 2.6ms preprocess, 58.3ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2635/5000 [06:16<04:53,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304365.jpg: 448x640 1 person, 1 train, 61.5ms
Speed: 2.6ms preprocess, 61.5ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2636/5000 [06:16<04:44,  8.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304396.jpg: 640x448 4 clocks, 62.1ms
Speed: 2.5ms preprocess, 62.1ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2637/5000 [06:16<04:43,  8.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304404.jpg: 448x640 22 persons, 1 baseball glove, 62.0ms
Speed: 2.9ms preprocess, 62.0ms inference, 27.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2638/5000 [06:16<05:35,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304545.jpg: 544x640 1 suitcase, 136.4ms
Speed: 1.6ms preprocess, 136.4ms inference, 2.2ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  53%|█████▎    | 2639/5000 [06:16<06:04,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304560.jpg: 448x640 1 cat, 58.2ms
Speed: 2.7ms preprocess, 58.2ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2640/5000 [06:16<05:30,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304812.jpg: 640x448 1 person, 2 surfboards, 62.7ms
Speed: 2.7ms preprocess, 62.7ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2641/5000 [06:17<05:12,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304817.jpg: 480x640 1 surfboard, 63.5ms
Speed: 2.9ms preprocess, 63.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2642/5000 [06:17<05:01,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000304984.jpg: 480x640 1 wine glass, 1 cup, 1 fork, 2 bowls, 2 sandwichs, 1 dining table, 70.9ms
Speed: 5.2ms preprocess, 70.9ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2643/5000 [06:17<05:11,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000305309.jpg: 416x640 2 persons, 1 baseball bat, 122.1ms
Speed: 2.6ms preprocess, 122.1ms inference, 2.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  53%|█████▎    | 2644/5000 [06:17<05:37,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000305317.jpg: 640x448 8 persons, 3 kites, 56.2ms
Speed: 3.2ms preprocess, 56.2ms inference, 7.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2645/5000 [06:17<05:25,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000305343.jpg: 448x640 2 vases, 61.8ms
Speed: 2.7ms preprocess, 61.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2646/5000 [06:17<05:05,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000305609.jpg: 640x640 1 bottle, 1 fork, 1 knife, 2 bowls, 1 sandwich, 1 pizza, 1 dining table, 103.1ms
Speed: 3.2ms preprocess, 103.1ms inference, 13.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  53%|█████▎    | 2647/5000 [06:17<05:47,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000305695.jpg: 480x640 1 car, 1 truck, 8 zebras, 59.4ms
Speed: 3.0ms preprocess, 59.4ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2648/5000 [06:18<05:39,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000306136.jpg: 640x448 2 persons, 1 bicycle, 1 bus, 58.2ms
Speed: 2.8ms preprocess, 58.2ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2649/5000 [06:18<05:16,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000306139.jpg: 448x640 9 persons, 1 bottle, 2 chairs, 2 potted plants, 3 laptops, 58.3ms
Speed: 3.0ms preprocess, 58.3ms inference, 16.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2650/5000 [06:18<05:30,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000306437.jpg: 640x448 1 person, 1 bicycle, 1 skateboard, 59.7ms
Speed: 2.9ms preprocess, 59.7ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2651/5000 [06:18<05:13,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000306582.jpg: 480x640 3 cows, 69.6ms
Speed: 3.3ms preprocess, 69.6ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2652/5000 [06:18<05:07,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000306700.jpg: 640x480 1 person, 1 fork, 1 pizza, 1 chair, 60.7ms
Speed: 2.9ms preprocess, 60.7ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  53%|█████▎    | 2653/5000 [06:18<05:00,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000306733.jpg: 448x640 1 bottle, 1 potted plant, 1 sink, 57.6ms
Speed: 2.7ms preprocess, 57.6ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2654/5000 [06:18<04:46,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000306893.jpg: 480x640 5 persons, 1 bicycle, 1 motorcycle, 2 trains, 6 traffic lights, 91.4ms
Speed: 3.9ms preprocess, 91.4ms inference, 14.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2655/5000 [06:19<05:31,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000307074.jpg: 448x640 8 cars, 5 traffic lights, 53.5ms
Speed: 3.8ms preprocess, 53.5ms inference, 11.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2656/5000 [06:19<05:30,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000307145.jpg: 480x640 1 microwave, 2 ovens, 64.0ms
Speed: 4.1ms preprocess, 64.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2657/5000 [06:19<05:13,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000307172.jpg: 448x640 1 person, 1 bowl, 2 pizzas, 1 dining table, 67.2ms
Speed: 2.8ms preprocess, 67.2ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2658/5000 [06:19<05:11,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000307598.jpg: 448x640 1 train, 1 traffic light, 101.5ms
Speed: 2.6ms preprocess, 101.5ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2659/5000 [06:19<05:24,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000307658.jpg: 640x608 1 bear, 161.5ms
Speed: 1.9ms preprocess, 161.5ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  53%|█████▎    | 2660/5000 [06:19<06:16,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308165.jpg: 640x448 1 person, 1 skateboard, 60.0ms
Speed: 3.0ms preprocess, 60.0ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  53%|█████▎    | 2661/5000 [06:19<05:40,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308193.jpg: 640x480 1 clock, 66.2ms
Speed: 3.4ms preprocess, 66.2ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  53%|█████▎    | 2662/5000 [06:20<05:19,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308328.jpg: 480x640 4 persons, 1 truck, 2 suitcases, 1 skateboard, 68.0ms
Speed: 2.5ms preprocess, 68.0ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2663/5000 [06:20<05:26,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308391.jpg: 448x640 (no detections), 59.5ms
Speed: 3.6ms preprocess, 59.5ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000308394.jpg: 448x640 1 person, 1 train, 1 handbag, 2 couchs, 60.3ms
Speed: 2.7ms preprocess, 60.3ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2665/5000 [06:20<04:48,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308430.jpg: 480x640 2 bowls, 3 carrots, 1 dining table, 67.6ms
Speed: 2.9ms preprocess, 67.6ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2666/5000 [06:20<04:49,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308466.jpg: 480x640 1 toilet, 1 sink, 62.3ms
Speed: 2.8ms preprocess, 62.3ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2667/5000 [06:20<04:42,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308476.jpg: 480x640 1 bear, 65.4ms
Speed: 3.1ms preprocess, 65.4ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2668/5000 [06:20<04:42,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308531.jpg: 640x384 1 clock, 119.7ms
Speed: 5.0ms preprocess, 119.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  53%|█████▎    | 2669/5000 [06:20<05:13,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308545.jpg: 448x640 3 persons, 3 horses, 58.8ms
Speed: 2.1ms preprocess, 58.8ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2670/5000 [06:21<05:02,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308587.jpg: 640x480 3 persons, 3 kites, 1 surfboard, 54.7ms
Speed: 2.2ms preprocess, 54.7ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  53%|█████▎    | 2671/5000 [06:21<04:54,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308631.jpg: 448x640 14 persons, 1 motorcycle, 91.5ms
Speed: 2.7ms preprocess, 91.5ms inference, 20.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  53%|█████▎    | 2672/5000 [06:21<05:35,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308753.jpg: 480x640 1 bear, 60.8ms
Speed: 2.2ms preprocess, 60.8ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2673/5000 [06:21<05:07,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308793.jpg: 480x640 2 persons, 2 skiss, 1 snowboard, 61.9ms
Speed: 2.2ms preprocess, 61.9ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  53%|█████▎    | 2674/5000 [06:21<04:58,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000308799.jpg: 640x480 2 bottles, 1 oven, 59.0ms
Speed: 3.9ms preprocess, 59.0ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  54%|█████▎    | 2675/5000 [06:21<04:49,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309173.jpg: 640x448 1 stop sign, 118.3ms
Speed: 3.9ms preprocess, 118.3ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  54%|█████▎    | 2676/5000 [06:21<05:15,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309391.jpg: 384x640 4 persons, 3 cars, 1 bus, 8 traffic lights, 105.2ms
Speed: 2.6ms preprocess, 105.2ms inference, 9.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  54%|█████▎    | 2677/5000 [06:22<05:52,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309452.jpg: 480x640 1 bird, 61.9ms
Speed: 2.5ms preprocess, 61.9ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▎    | 2678/5000 [06:22<05:23,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309467.jpg: 352x640 2 persons, 4 backpacks, 2 skiss, 108.8ms
Speed: 1.9ms preprocess, 108.8ms inference, 5.5ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  54%|█████▎    | 2679/5000 [06:22<05:41,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309484.jpg: 640x544 1 dog, 1 toothbrush, 180.1ms
Speed: 1.5ms preprocess, 180.1ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  54%|█████▎    | 2680/5000 [06:22<06:39,  5.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309495.jpg: 640x448 1 toilet, 60.0ms
Speed: 4.1ms preprocess, 60.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  54%|█████▎    | 2681/5000 [06:22<05:54,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309655.jpg: 480x640 13 persons, 65.3ms
Speed: 2.6ms preprocess, 65.3ms inference, 10.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▎    | 2682/5000 [06:22<05:49,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309678.jpg: 416x640 1 person, 1 carrot, 1 pizza, 116.8ms
Speed: 2.7ms preprocess, 116.8ms inference, 2.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  54%|█████▎    | 2683/5000 [06:22<05:57,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309938.jpg: 480x640 1 laptop, 2 books, 1 teddy bear, 81.7ms
Speed: 3.9ms preprocess, 81.7ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▎    | 2684/5000 [06:23<05:45,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000309964.jpg: 448x640 2 persons, 2 cars, 1 umbrella, 59.5ms
Speed: 2.6ms preprocess, 59.5ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▎    | 2685/5000 [06:23<05:23,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000310072.jpg: 384x640 3 cars, 1 bench, 50.5ms
Speed: 2.4ms preprocess, 50.5ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  54%|█████▎    | 2686/5000 [06:23<04:58,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000310200.jpg: 480x640 2 cups, 1 bowl, 3 cakes, 1 dining table, 62.7ms
Speed: 2.2ms preprocess, 62.7ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▎    | 2687/5000 [06:23<04:56,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000310622.jpg: 448x640 (no detections), 59.4ms
Speed: 2.8ms preprocess, 59.4ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000310862.jpg: 640x640 1 person, 1 tie, 133.6ms
Speed: 5.3ms preprocess, 133.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  54%|█████▍    | 2689/5000 [06:23<05:10,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000310980.jpg: 448x640 1 person, 1 tv, 2 laptops, 1 keyboard, 52.6ms
Speed: 2.5ms preprocess, 52.6ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2690/5000 [06:23<04:58,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311002.jpg: 448x640 5 persons, 1 train, 1 sports ball, 2 baseball bats, 3 baseball gloves, 58.5ms
Speed: 2.5ms preprocess, 58.5ms inference, 10.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2691/5000 [06:23<04:59,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311081.jpg: 640x480 2 toilets, 65.1ms
Speed: 2.8ms preprocess, 65.1ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  54%|█████▍    | 2692/5000 [06:24<04:48,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311180.jpg: 640x480 1 person, 1 scissors, 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 7.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  54%|█████▍    | 2693/5000 [06:24<04:50,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311190.jpg: 640x480 2 persons, 1 dog, 1 chair, 1 couch, 62.5ms
Speed: 4.0ms preprocess, 62.5ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  54%|█████▍    | 2694/5000 [06:24<04:51,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311295.jpg: 448x640 7 zebras, 1 giraffe, 64.0ms
Speed: 2.4ms preprocess, 64.0ms inference, 10.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2695/5000 [06:24<04:54,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311303.jpg: 448x640 2 persons, 1 cup, 1 spoon, 1 sandwich, 1 chair, 2 dining tables, 60.9ms
Speed: 2.7ms preprocess, 60.9ms inference, 7.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2696/5000 [06:24<04:54,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311392.jpg: 448x640 3 persons, 2 cows, 89.7ms
Speed: 2.6ms preprocess, 89.7ms inference, 8.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2697/5000 [06:24<05:09,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311394.jpg: 640x480 1 person, 1 toothbrush, 63.8ms
Speed: 2.7ms preprocess, 63.8ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  54%|█████▍    | 2698/5000 [06:24<04:56,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311518.jpg: 448x640 1 boat, 62.4ms
Speed: 2.7ms preprocess, 62.4ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2699/5000 [06:24<04:43,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311789.jpg: 448x640 1 cat, 1 keyboard, 56.1ms
Speed: 2.7ms preprocess, 56.1ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2700/5000 [06:25<04:32,  8.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311883.jpg: 320x640 1 train, 132.0ms
Speed: 2.2ms preprocess, 132.0ms inference, 1.8ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  54%|█████▍    | 2701/5000 [06:25<05:09,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311909.jpg: 512x640 1 bus, 132.6ms
Speed: 2.7ms preprocess, 132.6ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  54%|█████▍    | 2702/5000 [06:25<05:39,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311928.jpg: 416x640 1 person, 1 sports ball, 1 baseball bat, 53.8ms
Speed: 2.4ms preprocess, 53.8ms inference, 3.3ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  54%|█████▍    | 2703/5000 [06:25<05:11,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000311950.jpg: 640x448 2 kites, 2 hot dogs, 61.4ms
Speed: 2.3ms preprocess, 61.4ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  54%|█████▍    | 2704/5000 [06:25<04:56,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312192.jpg: 480x640 1 person, 2 wine glasss, 1 dining table, 59.9ms
Speed: 2.2ms preprocess, 59.9ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2705/5000 [06:25<04:46,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312213.jpg: 480x640 1 cat, 1 laptop, 89.2ms
Speed: 2.3ms preprocess, 89.2ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2706/5000 [06:25<05:01,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312237.jpg: 448x640 3 persons, 1 umbrella, 59.9ms
Speed: 3.5ms preprocess, 59.9ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2707/5000 [06:25<04:51,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312263.jpg: 640x544 2 clocks, 72.1ms
Speed: 1.8ms preprocess, 72.1ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  54%|█████▍    | 2708/5000 [06:26<04:46,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312278.jpg: 320x640 (no detections), 48.2ms
Speed: 2.2ms preprocess, 48.2ms inference, 0.3ms postprocess per image at shape (1, 3, 320, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000312340.jpg: 448x640 1 cat, 51.9ms
Speed: 2.6ms preprocess, 51.9ms inference, 1.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2710/5000 [06:26<04:08,  9.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312406.jpg: 480x640 2 cups, 1 banana, 60.3ms
Speed: 2.3ms preprocess, 60.3ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2711/5000 [06:26<04:08,  9.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312421.jpg: 448x640 5 persons, 1 motorcycle, 1 umbrella, 84.7ms
Speed: 2.3ms preprocess, 84.7ms inference, 8.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2712/5000 [06:26<04:35,  8.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312489.jpg: 448x640 2 persons, 2 surfboards, 55.8ms
Speed: 4.3ms preprocess, 55.8ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2713/5000 [06:26<04:30,  8.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312549.jpg: 480x640 (no detections), 62.9ms
Speed: 2.9ms preprocess, 62.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000312552.jpg: 480x640 1 person, 1 pizza, 55.6ms
Speed: 3.5ms preprocess, 55.6ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2715/5000 [06:26<04:12,  9.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312586.jpg: 448x640 3 bears, 69.9ms
Speed: 2.6ms preprocess, 69.9ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2716/5000 [06:27<04:20,  8.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000312720.jpg: 448x640 1 person, 1 skis, 63.9ms
Speed: 2.4ms preprocess, 63.9ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2717/5000 [06:27<04:22,  8.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000313034.jpg: 480x640 2 persons, 1 bottle, 1 chair, 63.3ms
Speed: 2.2ms preprocess, 63.3ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2718/5000 [06:27<04:25,  8.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000313130.jpg: 480x640 2 chairs, 1 microwave, 3 refrigerators, 62.1ms
Speed: 2.5ms preprocess, 62.1ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2719/5000 [06:27<04:30,  8.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000313182.jpg: 448x640 2 persons, 2 cars, 1 bus, 58.3ms
Speed: 2.2ms preprocess, 58.3ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  54%|█████▍    | 2720/5000 [06:27<04:31,  8.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000313454.jpg: 416x640 1 person, 1 tie, 2 bottles, 1 wine glass, 1 potted plant, 1 tv, 63.4ms
Speed: 2.7ms preprocess, 63.4ms inference, 5.5ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  54%|█████▍    | 2721/5000 [06:27<04:37,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000313562.jpg: 480x640 2 persons, 2 bananas, 1 hot dog, 65.7ms
Speed: 2.2ms preprocess, 65.7ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2722/5000 [06:27<04:37,  8.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000313588.jpg: 640x480 1 person, 2 cars, 1 truck, 1 umbrella, 1 handbag, 68.2ms
Speed: 2.5ms preprocess, 68.2ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  54%|█████▍    | 2723/5000 [06:27<04:38,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000313783.jpg: 480x640 1 fork, 1 sandwich, 6 carrots, 1 dining table, 87.7ms
Speed: 3.8ms preprocess, 87.7ms inference, 10.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  54%|█████▍    | 2724/5000 [06:28<05:07,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314034.jpg: 480x640 2 birds, 16 cows, 64.5ms
Speed: 2.4ms preprocess, 64.5ms inference, 17.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▍    | 2725/5000 [06:28<05:31,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314177.jpg: 640x512 1 person, 1 cell phone, 127.3ms
Speed: 2.6ms preprocess, 127.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  55%|█████▍    | 2726/5000 [06:28<05:52,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314182.jpg: 640x480 4 bowls, 1 broccoli, 8 carrots, 1 dining table, 64.7ms
Speed: 4.3ms preprocess, 64.7ms inference, 13.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▍    | 2727/5000 [06:28<05:49,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314251.jpg: 480x640 11 persons, 10 motorcycles, 98.1ms
Speed: 2.1ms preprocess, 98.1ms inference, 19.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▍    | 2728/5000 [06:28<06:24,  5.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314264.jpg: 640x480 1 person, 1 potted plant, 59.7ms
Speed: 2.6ms preprocess, 59.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▍    | 2729/5000 [06:28<05:44,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314294.jpg: 384x640 1 elephant, 108.2ms
Speed: 2.5ms preprocess, 108.2ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  55%|█████▍    | 2730/5000 [06:28<05:43,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314541.jpg: 448x640 1 bench, 58.8ms
Speed: 2.3ms preprocess, 58.8ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▍    | 2731/5000 [06:29<05:12,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314709.jpg: 448x640 1 person, 1 skis, 64.1ms
Speed: 2.4ms preprocess, 64.1ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▍    | 2732/5000 [06:29<04:52,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000314914.jpg: 640x480 1 car, 89.5ms
Speed: 2.4ms preprocess, 89.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▍    | 2733/5000 [06:29<05:00,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000315001.jpg: 640x640 11 birds, 171.3ms
Speed: 2.4ms preprocess, 171.3ms inference, 15.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  55%|█████▍    | 2734/5000 [06:29<06:30,  5.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000315187.jpg: 448x640 2 cars, 1 bus, 1 truck, 61.2ms
Speed: 2.8ms preprocess, 61.2ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▍    | 2735/5000 [06:29<05:52,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000315219.jpg: 640x448 (no detections), 59.6ms
Speed: 3.6ms preprocess, 59.6ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 448)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000315257.jpg: 544x640 1 bird, 1 bowl, 157.0ms
Speed: 1.4ms preprocess, 157.0ms inference, 2.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  55%|█████▍    | 2737/5000 [06:30<05:42,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000315450.jpg: 448x640 4 cars, 2 buss, 1 traffic light, 56.6ms
Speed: 2.7ms preprocess, 56.6ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▍    | 2738/5000 [06:30<05:22,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000315492.jpg: 640x640 1 dog, 1 bowl, 1 toilet, 82.5ms
Speed: 3.4ms preprocess, 82.5ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  55%|█████▍    | 2739/5000 [06:30<05:21,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000316015.jpg: 512x640 1 cat, 1 tv, 1 laptop, 1 remote, 1 keyboard, 160.0ms
Speed: 4.3ms preprocess, 160.0ms inference, 6.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  55%|█████▍    | 2740/5000 [06:30<06:11,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000316054.jpg: 448x640 1 person, 3 trains, 55.9ms
Speed: 2.0ms preprocess, 55.9ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▍    | 2741/5000 [06:30<05:37,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000316404.jpg: 448x640 2 persons, 1 sports ball, 3 tennis rackets, 59.8ms
Speed: 2.7ms preprocess, 59.8ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▍    | 2742/5000 [06:30<05:19,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000316666.jpg: 480x640 1 bottle, 2 potted plants, 2 keyboards, 2 clocks, 2 vases, 65.0ms
Speed: 2.3ms preprocess, 65.0ms inference, 8.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▍    | 2743/5000 [06:30<05:15,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000317024.jpg: 480x640 1 zebra, 93.3ms
Speed: 2.4ms preprocess, 93.3ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▍    | 2744/5000 [06:31<05:16,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000317433.jpg: 448x640 1 person, 3 horses, 61.4ms
Speed: 2.8ms preprocess, 61.4ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▍    | 2745/5000 [06:31<05:04,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000317999.jpg: 640x640 3 persons, 81.1ms
Speed: 3.5ms preprocess, 81.1ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  55%|█████▍    | 2746/5000 [06:31<05:08,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000318080.jpg: 480x640 2 bears, 65.3ms
Speed: 3.2ms preprocess, 65.3ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▍    | 2747/5000 [06:31<04:53,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000318114.jpg: 512x640 1 cup, 2 forks, 1 banana, 1 cake, 1 dining table, 103.2ms
Speed: 4.6ms preprocess, 103.2ms inference, 5.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  55%|█████▍    | 2748/5000 [06:31<05:17,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000318138.jpg: 640x480 3 persons, 4 remotes, 62.3ms
Speed: 2.7ms preprocess, 62.3ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▍    | 2749/5000 [06:31<05:08,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000318238.jpg: 640x480 2 dogs, 1 bear, 1 bed, 62.7ms
Speed: 2.2ms preprocess, 62.7ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▌    | 2750/5000 [06:31<04:55,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000318455.jpg: 480x640 21 cakes, 1 book, 65.6ms
Speed: 2.2ms preprocess, 65.6ms inference, 19.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2751/5000 [06:31<05:25,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000318908.jpg: 448x640 1 dog, 1 couch, 1 remote, 71.4ms
Speed: 4.3ms preprocess, 71.4ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▌    | 2752/5000 [06:32<05:22,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319100.jpg: 448x640 2 persons, 1 chair, 2 couchs, 3 remotes, 1 book, 60.0ms
Speed: 4.4ms preprocess, 60.0ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▌    | 2753/5000 [06:32<05:15,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319184.jpg: 384x640 9 persons, 1 car, 1 frisbee, 50.6ms
Speed: 3.7ms preprocess, 50.6ms inference, 10.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  55%|█████▌    | 2754/5000 [06:32<05:02,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319369.jpg: 448x640 4 persons, 3 cars, 5 umbrellas, 65.1ms
Speed: 2.9ms preprocess, 65.1ms inference, 10.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▌    | 2755/5000 [06:32<05:06,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319534.jpg: 480x640 2 persons, 1 bus, 1 train, 67.6ms
Speed: 2.3ms preprocess, 67.6ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2756/5000 [06:32<04:59,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319607.jpg: 640x640 2 persons, 2 traffic lights, 106.2ms
Speed: 1.7ms preprocess, 106.2ms inference, 7.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  55%|█████▌    | 2757/5000 [06:32<05:21,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319617.jpg: 480x640 2 cups, 1 hot dog, 1 dining table, 63.4ms
Speed: 4.3ms preprocess, 63.4ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2758/5000 [06:32<05:05,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319696.jpg: 448x640 1 bowl, 1 microwave, 2 vases, 59.9ms
Speed: 3.8ms preprocess, 59.9ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▌    | 2759/5000 [06:33<04:53,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319721.jpg: 640x480 3 persons, 1 horse, 101.5ms
Speed: 2.6ms preprocess, 101.5ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▌    | 2760/5000 [06:33<05:08,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000319935.jpg: 416x640 2 chairs, 2 couchs, 1 potted plant, 1 dining table, 1 tv, 1 book, 59.1ms
Speed: 2.7ms preprocess, 59.1ms inference, 6.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  55%|█████▌    | 2761/5000 [06:33<04:59,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320232.jpg: 640x480 3 persons, 1 car, 1 traffic light, 64.8ms
Speed: 4.2ms preprocess, 64.8ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▌    | 2762/5000 [06:33<04:51,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320425.jpg: 480x640 8 giraffes, 67.1ms
Speed: 2.9ms preprocess, 67.1ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2763/5000 [06:33<04:56,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320490.jpg: 448x640 3 persons, 1 sports ball, 1 baseball bat, 87.5ms
Speed: 2.3ms preprocess, 87.5ms inference, 7.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▌    | 2764/5000 [06:33<05:07,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320554.jpg: 480x640 2 benchs, 2 cats, 62.3ms
Speed: 3.9ms preprocess, 62.3ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2765/5000 [06:33<04:54,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320632.jpg: 480x640 7 zebras, 62.8ms
Speed: 2.6ms preprocess, 62.8ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2766/5000 [06:33<04:51,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320642.jpg: 416x640 4 persons, 2 remotes, 57.6ms
Speed: 3.9ms preprocess, 57.6ms inference, 5.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  55%|█████▌    | 2767/5000 [06:34<04:42,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320664.jpg: 480x640 1 fork, 1 knife, 1 carrot, 3 hot dogs, 1 dining table, 95.8ms
Speed: 2.5ms preprocess, 95.8ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2768/5000 [06:34<05:03,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320696.jpg: 448x640 1 person, 1 surfboard, 60.3ms
Speed: 2.2ms preprocess, 60.3ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▌    | 2769/5000 [06:34<04:42,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320706.jpg: 640x480 1 cup, 1 donut, 1 toilet, 65.6ms
Speed: 2.4ms preprocess, 65.6ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  55%|█████▌    | 2770/5000 [06:34<04:38,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000320743.jpg: 384x640 12 persons, 13 elephants, 51.9ms
Speed: 2.5ms preprocess, 51.9ms inference, 18.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  55%|█████▌    | 2771/5000 [06:34<05:01,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000321118.jpg: 448x640 1 cat, 1 laptop, 1 mouse, 60.6ms
Speed: 2.0ms preprocess, 60.6ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  55%|█████▌    | 2772/5000 [06:34<04:43,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000321214.jpg: 480x640 2 persons, 1 cake, 107.2ms
Speed: 2.6ms preprocess, 107.2ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  55%|█████▌    | 2773/5000 [06:34<05:04,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000321333.jpg: 416x640 3 persons, 1 teddy bear, 58.1ms
Speed: 2.7ms preprocess, 58.1ms inference, 4.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  55%|█████▌    | 2774/5000 [06:35<04:47,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000321557.jpg: 640x576 1 motorcycle, 147.7ms
Speed: 1.5ms preprocess, 147.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  56%|█████▌    | 2775/5000 [06:35<05:30,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000321790.jpg: 448x640 1 person, 1 tennis racket, 92.8ms
Speed: 2.7ms preprocess, 92.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2776/5000 [06:35<05:25,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000321887.jpg: 448x640 1 person, 1 sheep, 1 teddy bear, 63.9ms
Speed: 2.5ms preprocess, 63.9ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2777/5000 [06:35<05:02,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322163.jpg: 480x640 8 persons, 2 bottles, 2 bowls, 2 potted plants, 1 dining table, 61.4ms
Speed: 2.5ms preprocess, 61.4ms inference, 15.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2778/5000 [06:35<05:12,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322211.jpg: 480x640 8 donuts, 88.6ms
Speed: 2.6ms preprocess, 88.6ms inference, 11.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2779/5000 [06:35<05:25,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322352.jpg: 480x640 7 chairs, 2 potted plants, 2 dining tables, 2 vases, 61.8ms
Speed: 4.1ms preprocess, 61.8ms inference, 12.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2780/5000 [06:35<05:29,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322429.jpg: 640x480 1 cup, 6 vases, 65.3ms
Speed: 2.8ms preprocess, 65.3ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2781/5000 [06:36<05:16,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322574.jpg: 480x640 1 sandwich, 1 dining table, 63.2ms
Speed: 2.6ms preprocess, 63.2ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2782/5000 [06:36<04:53,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322610.jpg: 480x640 11 persons, 4 umbrellas, 3 handbags, 1 suitcase, 64.0ms
Speed: 2.6ms preprocess, 64.0ms inference, 23.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2783/5000 [06:36<05:30,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322724.jpg: 448x640 3 persons, 1 skis, 53.8ms
Speed: 2.7ms preprocess, 53.8ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2784/5000 [06:36<05:03,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322829.jpg: 448x640 1 bench, 59.1ms
Speed: 2.9ms preprocess, 59.1ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2785/5000 [06:36<04:45,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322844.jpg: 640x480 2 teddy bears, 61.2ms
Speed: 2.5ms preprocess, 61.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2786/5000 [06:36<04:30,  8.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322864.jpg: 640x448 1 person, 9 cars, 1 parking meter, 61.0ms
Speed: 2.5ms preprocess, 61.0ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  56%|█████▌    | 2787/5000 [06:36<04:43,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322895.jpg: 480x640 1 bowl, 2 chairs, 2 couchs, 1 potted plant, 2 tvs, 1 vase, 77.3ms
Speed: 3.4ms preprocess, 77.3ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2788/5000 [06:36<04:58,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322944.jpg: 640x480 1 person, 2 teddy bears, 64.9ms
Speed: 2.6ms preprocess, 64.9ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2789/5000 [06:37<04:45,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322959.jpg: 640x640 2 bowls, 1 banana, 1 sandwich, 1 dining table, 81.3ms
Speed: 3.3ms preprocess, 81.3ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  56%|█████▌    | 2790/5000 [06:37<04:56,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000322968.jpg: 640x480 1 person, 1 potted plant, 1 clock, 88.3ms
Speed: 2.7ms preprocess, 88.3ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2791/5000 [06:37<05:00,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323151.jpg: 640x480 2 cups, 1 bowl, 1 pizza, 1 dining table, 1 cell phone, 64.7ms
Speed: 2.7ms preprocess, 64.7ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2792/5000 [06:37<04:52,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323202.jpg: 640x480 1 toilet, 64.1ms
Speed: 2.6ms preprocess, 64.1ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2793/5000 [06:37<04:38,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323263.jpg: 448x640 5 persons, 1 umbrella, 1 handbag, 58.9ms
Speed: 4.3ms preprocess, 58.9ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2794/5000 [06:37<04:38,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323355.jpg: 640x480 2 persons, 1 pizza, 3 books, 65.5ms
Speed: 2.3ms preprocess, 65.5ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2795/5000 [06:37<04:36,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323496.jpg: 512x640 6 persons, 1 sports ball, 4 tennis rackets, 96.1ms
Speed: 4.2ms preprocess, 96.1ms inference, 11.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  56%|█████▌    | 2796/5000 [06:38<05:09,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323571.jpg: 640x480 3 cars, 1 traffic light, 61.5ms
Speed: 2.7ms preprocess, 61.5ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2797/5000 [06:38<04:57,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323709.jpg: 320x640 1 airplane, 1 clock, 104.7ms
Speed: 2.7ms preprocess, 104.7ms inference, 2.5ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  56%|█████▌    | 2798/5000 [06:38<05:05,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323751.jpg: 448x640 2 persons, 1 train, 59.6ms
Speed: 2.6ms preprocess, 59.6ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2799/5000 [06:38<04:46,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323799.jpg: 480x640 10 persons, 2 frisbees, 92.0ms
Speed: 2.9ms preprocess, 92.0ms inference, 11.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2800/5000 [06:38<05:15,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323828.jpg: 640x448 2 trains, 59.2ms
Speed: 2.6ms preprocess, 59.2ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  56%|█████▌    | 2801/5000 [06:38<04:55,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000323895.jpg: 480x640 2 persons, 2 tennis rackets, 63.2ms
Speed: 2.7ms preprocess, 63.2ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2802/5000 [06:38<04:44,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000324158.jpg: 448x640 1 person, 4 cars, 1 dog, 2 skateboards, 98.0ms
Speed: 3.8ms preprocess, 98.0ms inference, 7.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2803/5000 [06:38<05:08,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000324258.jpg: 480x640 2 persons, 1 bottle, 1 cup, 3 couchs, 1 remote, 64.3ms
Speed: 2.7ms preprocess, 64.3ms inference, 8.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2804/5000 [06:39<05:01,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000324614.jpg: 448x640 1 person, 1 toothbrush, 64.1ms
Speed: 2.5ms preprocess, 64.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2805/5000 [06:39<04:45,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000324715.jpg: 448x640 1 laptop, 1 book, 1 scissors, 57.9ms
Speed: 3.6ms preprocess, 57.9ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2806/5000 [06:39<04:32,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000324818.jpg: 448x640 2 birds, 57.3ms
Speed: 2.5ms preprocess, 57.3ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▌    | 2807/5000 [06:39<04:20,  8.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000324927.jpg: 640x480 1 knife, 2 pizzas, 92.8ms
Speed: 2.8ms preprocess, 92.8ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▌    | 2808/5000 [06:39<04:37,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325031.jpg: 608x640 1 person, 1 fire hydrant, 1 elephant, 150.8ms
Speed: 1.8ms preprocess, 150.8ms inference, 5.0ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  56%|█████▌    | 2809/5000 [06:39<05:28,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325114.jpg: 480x640 1 dining table, 1 toilet, 56.4ms
Speed: 2.8ms preprocess, 56.4ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2810/5000 [06:39<04:59,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325306.jpg: 480x640 1 zebra, 66.5ms
Speed: 3.4ms preprocess, 66.5ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▌    | 2811/5000 [06:40<04:45,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325347.jpg: 384x640 3 persons, 1 tennis racket, 4 chairs, 1 potted plant, 140.5ms
Speed: 2.7ms preprocess, 140.5ms inference, 6.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  56%|█████▌    | 2812/5000 [06:40<05:33,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325483.jpg: 480x640 3 persons, 2 bottles, 3 couchs, 1 dining table, 2 remotes, 61.7ms
Speed: 2.7ms preprocess, 61.7ms inference, 9.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▋    | 2813/5000 [06:40<05:19,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325527.jpg: 480x640 1 teddy bear, 62.1ms
Speed: 3.3ms preprocess, 62.1ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▋    | 2814/5000 [06:40<04:57,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325838.jpg: 480x640 1 person, 5 chairs, 1 dining table, 2 laptops, 1 keyboard, 96.4ms
Speed: 2.6ms preprocess, 96.4ms inference, 9.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▋    | 2815/5000 [06:40<05:16,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000325991.jpg: 448x640 1 person, 1 frisbee, 60.4ms
Speed: 3.9ms preprocess, 60.4ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▋    | 2816/5000 [06:40<04:55,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326082.jpg: 448x640 6 chairs, 1 couch, 1 dining table, 1 tv, 1 clock, 59.0ms
Speed: 2.6ms preprocess, 59.0ms inference, 8.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▋    | 2817/5000 [06:40<04:52,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326128.jpg: 640x480 1 person, 63.6ms
Speed: 2.8ms preprocess, 63.6ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  56%|█████▋    | 2818/5000 [06:40<04:39,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326174.jpg: 480x640 8 persons, 1 frisbee, 4 surfboards, 65.9ms
Speed: 4.4ms preprocess, 65.9ms inference, 13.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▋    | 2819/5000 [06:41<04:54,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326248.jpg: 448x640 10 persons, 2 chairs, 59.4ms
Speed: 2.6ms preprocess, 59.4ms inference, 10.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  56%|█████▋    | 2820/5000 [06:41<04:57,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326462.jpg: 640x640 1 donut, 81.1ms
Speed: 4.0ms preprocess, 81.1ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  56%|█████▋    | 2821/5000 [06:41<04:54,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326541.jpg: 384x640 3 persons, 3 cell phones, 52.0ms
Speed: 2.7ms preprocess, 52.0ms inference, 8.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  56%|█████▋    | 2822/5000 [06:41<04:49,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326542.jpg: 480x640 3 persons, 4 skiss, 63.9ms
Speed: 3.0ms preprocess, 63.9ms inference, 7.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▋    | 2823/5000 [06:41<04:45,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326627.jpg: 480x640 1 person, 4 cars, 66.4ms
Speed: 2.8ms preprocess, 66.4ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▋    | 2824/5000 [06:41<04:40,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000326970.jpg: 480x640 3 persons, 1 remote, 1 book, 63.5ms
Speed: 2.5ms preprocess, 63.5ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  56%|█████▋    | 2825/5000 [06:41<04:33,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327306.jpg: 416x640 1 person, 1 skateboard, 140.8ms
Speed: 2.0ms preprocess, 140.8ms inference, 2.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  57%|█████▋    | 2826/5000 [06:42<05:13,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327592.jpg: 384x640 1 person, 1 donut, 49.4ms
Speed: 2.6ms preprocess, 49.4ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000327601.jpg: 640x448 1 person, 2 chairs, 52.1ms
Speed: 3.7ms preprocess, 52.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2828/5000 [06:42<04:28,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327605.jpg: 480x640 1 person, 2 skiss, 62.6ms
Speed: 2.8ms preprocess, 62.6ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2829/5000 [06:42<04:25,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327617.jpg: 640x384 1 person, 2 tennis rackets, 138.5ms
Speed: 2.4ms preprocess, 138.5ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  57%|█████▋    | 2830/5000 [06:42<05:02,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327701.jpg: 448x640 3 persons, 1 sports ball, 1 baseball bat, 58.1ms
Speed: 2.4ms preprocess, 58.1ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2831/5000 [06:42<04:50,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327769.jpg: 448x640 1 cat, 1 bottle, 2 cups, 1 toilet, 1 sink, 59.7ms
Speed: 2.6ms preprocess, 59.7ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2832/5000 [06:42<04:41,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327780.jpg: 480x640 1 fork, 3 cakes, 94.0ms
Speed: 2.8ms preprocess, 94.0ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2833/5000 [06:42<04:55,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000327890.jpg: 480x640 7 cars, 2 benchs, 1 chair, 1 laptop, 64.4ms
Speed: 2.6ms preprocess, 64.4ms inference, 9.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2834/5000 [06:43<04:58,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328030.jpg: 640x448 5 teddy bears, 62.2ms
Speed: 2.8ms preprocess, 62.2ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2835/5000 [06:43<04:44,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328117.jpg: 640x480 11 donuts, 1 oven, 100.8ms
Speed: 2.7ms preprocess, 100.8ms inference, 11.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  57%|█████▋    | 2836/5000 [06:43<05:18,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328238.jpg: 448x640 10 persons, 1 backpack, 1 frisbee, 1 chair, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 11.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2837/5000 [06:43<05:16,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328286.jpg: 448x640 4 boats, 58.6ms
Speed: 2.7ms preprocess, 58.6ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2838/5000 [06:43<04:54,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328337.jpg: 448x640 1 person, 83.9ms
Speed: 2.6ms preprocess, 83.9ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2839/5000 [06:43<04:53,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328430.jpg: 640x448 5 persons, 1 frisbee, 1 sports ball, 56.4ms
Speed: 2.6ms preprocess, 56.4ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2840/5000 [06:43<04:42,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328601.jpg: 640x416 1 person, 1 sports ball, 2 tennis rackets, 121.8ms
Speed: 2.4ms preprocess, 121.8ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  57%|█████▋    | 2841/5000 [06:44<05:09,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328683.jpg: 640x480 1 parking meter, 1 bench, 89.4ms
Speed: 2.2ms preprocess, 89.4ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  57%|█████▋    | 2842/5000 [06:44<05:07,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000328959.jpg: 640x416 1 toilet, 54.5ms
Speed: 3.0ms preprocess, 54.5ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  57%|█████▋    | 2843/5000 [06:44<04:41,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329041.jpg: 640x480 3 persons, 8 suitcases, 63.0ms
Speed: 2.9ms preprocess, 63.0ms inference, 10.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  57%|█████▋    | 2844/5000 [06:44<04:48,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329080.jpg: 480x640 3 beds, 1 laptop, 61.2ms
Speed: 2.8ms preprocess, 61.2ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2845/5000 [06:44<04:41,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329219.jpg: 448x640 1 person, 1 dog, 1 chair, 1 clock, 58.8ms
Speed: 2.8ms preprocess, 58.8ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2846/5000 [06:44<04:31,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329319.jpg: 640x448 1 cat, 56.0ms
Speed: 2.6ms preprocess, 56.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2847/5000 [06:44<04:17,  8.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329323.jpg: 640x448 17 persons, 1 tie, 57.0ms
Speed: 2.7ms preprocess, 57.0ms inference, 17.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2848/5000 [06:45<04:44,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329447.jpg: 512x640 1 dog, 4 cows, 145.4ms
Speed: 2.9ms preprocess, 145.4ms inference, 5.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  57%|█████▋    | 2849/5000 [06:45<05:34,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329455.jpg: 480x640 3 persons, 5 cups, 2 bowls, 1 pizza, 2 chairs, 3 dining tables, 60.1ms
Speed: 2.7ms preprocess, 60.1ms inference, 16.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2850/5000 [06:45<05:36,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329456.jpg: 448x640 7 persons, 1 frisbee, 55.8ms
Speed: 2.5ms preprocess, 55.8ms inference, 7.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2851/5000 [06:45<05:11,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329542.jpg: 640x640 1 apple, 1 cake, 106.9ms
Speed: 3.6ms preprocess, 106.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  57%|█████▋    | 2852/5000 [06:45<05:22,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329614.jpg: 288x640 2 persons, 11 cars, 1 bus, 106.1ms
Speed: 3.6ms preprocess, 106.1ms inference, 6.8ms postprocess per image at shape (1, 3, 288, 640)


Segmenting Images:  57%|█████▋    | 2853/5000 [06:45<05:35,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000329827.jpg: 448x640 1 person, 1 skis, 57.4ms
Speed: 3.2ms preprocess, 57.4ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2854/5000 [06:45<05:04,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000330369.jpg: 448x640 1 boat, 85.9ms
Speed: 4.2ms preprocess, 85.9ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2855/5000 [06:46<05:01,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000330396.jpg: 480x640 4 persons, 1 bird, 1 frisbee, 65.2ms
Speed: 4.4ms preprocess, 65.2ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2856/5000 [06:46<04:52,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000330554.jpg: 480x640 (no detections), 62.7ms
Speed: 4.1ms preprocess, 62.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2857/5000 [06:46<04:29,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000330790.jpg: 448x640 3 elephants, 92.6ms
Speed: 2.9ms preprocess, 92.6ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2858/5000 [06:46<04:46,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000330818.jpg: 640x448 1 person, 4 bottles, 1 bowl, 1 oven, 1 clock, 60.0ms
Speed: 3.0ms preprocess, 60.0ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2859/5000 [06:46<04:42,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331075.jpg: 608x640 1 dog, 1 horse, 147.3ms
Speed: 1.6ms preprocess, 147.3ms inference, 2.6ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  57%|█████▋    | 2860/5000 [06:46<05:24,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331280.jpg: 480x640 15 persons, 1 horse, 88.2ms
Speed: 3.9ms preprocess, 88.2ms inference, 18.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2861/5000 [06:46<05:50,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331317.jpg: 512x640 1 clock, 62.2ms
Speed: 3.2ms preprocess, 62.2ms inference, 2.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  57%|█████▋    | 2862/5000 [06:47<05:16,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331352.jpg: 640x480 2 toilets, 1 sink, 62.8ms
Speed: 3.8ms preprocess, 62.8ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  57%|█████▋    | 2863/5000 [06:47<04:56,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331569.jpg: 640x640 1 cup, 1 dining table, 81.6ms
Speed: 2.9ms preprocess, 81.6ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  57%|█████▋    | 2864/5000 [06:47<04:57,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331604.jpg: 448x640 1 person, 1 kite, 1 surfboard, 94.1ms
Speed: 4.1ms preprocess, 94.1ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2865/5000 [06:47<05:05,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331799.jpg: 480x640 4 persons, 1 banana, 64.7ms
Speed: 3.5ms preprocess, 64.7ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2866/5000 [06:47<04:54,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000331817.jpg: 480x640 2 persons, 1 wine glass, 4 bowls, 1 dining table, 1 tv, 68.9ms
Speed: 2.9ms preprocess, 68.9ms inference, 16.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2867/5000 [06:47<05:11,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000332318.jpg: 448x640 8 cows, 62.0ms
Speed: 3.3ms preprocess, 62.0ms inference, 7.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2868/5000 [06:47<04:59,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000332351.jpg: 448x640 14 persons, 1 kite, 2 surfboards, 55.5ms
Speed: 2.9ms preprocess, 55.5ms inference, 17.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  57%|█████▋    | 2869/5000 [06:48<05:07,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000332455.jpg: 640x448 1 toilet, 78.0ms
Speed: 3.5ms preprocess, 78.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2870/5000 [06:48<04:57,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000332570.jpg: 640x512 1 person, 1 cell phone, 138.0ms
Speed: 4.1ms preprocess, 138.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  57%|█████▋    | 2871/5000 [06:48<05:28,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000332845.jpg: 480x640 2 persons, 1 chair, 1 bed, 2 laptops, 60.8ms
Speed: 2.6ms preprocess, 60.8ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  57%|█████▋    | 2872/5000 [06:48<05:07,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000332901.jpg: 416x640 3 persons, 1 cat, 1 dog, 2 cows, 155.0ms
Speed: 2.6ms preprocess, 155.0ms inference, 4.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  57%|█████▋    | 2873/5000 [06:48<05:52,  6.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000333069.jpg: 640x448 2 giraffes, 58.6ms
Speed: 2.6ms preprocess, 58.6ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  57%|█████▋    | 2874/5000 [06:48<05:16,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000333237.jpg: 416x640 1 chair, 1 bed, 59.6ms
Speed: 2.4ms preprocess, 59.6ms inference, 3.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  57%|█████▊    | 2875/5000 [06:48<04:50,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000333402.jpg: 448x640 1 person, 2 buss, 1 skateboard, 76.6ms
Speed: 2.8ms preprocess, 76.6ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2876/5000 [06:49<04:49,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000333697.jpg: 640x448 1 stop sign, 61.2ms
Speed: 3.0ms preprocess, 61.2ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  58%|█████▊    | 2877/5000 [06:49<04:31,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000333745.jpg: 640x448 2 persons, 1 car, 2 umbrellas, 59.2ms
Speed: 2.6ms preprocess, 59.2ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  58%|█████▊    | 2878/5000 [06:49<04:24,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000333772.jpg: 480x640 1 cat, 1 tv, 1 mouse, 1 remote, 1 keyboard, 80.5ms
Speed: 3.8ms preprocess, 80.5ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2879/5000 [06:49<04:42,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000333956.jpg: 640x448 1 parking meter, 62.6ms
Speed: 2.7ms preprocess, 62.6ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  58%|█████▊    | 2880/5000 [06:49<04:26,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334006.jpg: 448x640 1 person, 1 car, 2 trucks, 60.5ms
Speed: 2.7ms preprocess, 60.5ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2881/5000 [06:49<04:16,  8.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334309.jpg: 448x640 2 persons, 4 cars, 59.4ms
Speed: 2.5ms preprocess, 59.4ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2882/5000 [06:49<04:22,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334371.jpg: 448x640 1 person, 2 cars, 4 buss, 1 traffic light, 65.3ms
Speed: 3.6ms preprocess, 65.3ms inference, 8.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2883/5000 [06:49<04:29,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334399.jpg: 480x640 4 persons, 1 clock, 66.4ms
Speed: 2.6ms preprocess, 66.4ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2884/5000 [06:50<04:26,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334417.jpg: 448x640 1 person, 2 ties, 59.9ms
Speed: 2.9ms preprocess, 59.9ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2885/5000 [06:50<04:17,  8.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334483.jpg: 640x480 7 persons, 1 cup, 1 bowl, 2 cakes, 1 dining table, 89.4ms
Speed: 2.9ms preprocess, 89.4ms inference, 13.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  58%|█████▊    | 2886/5000 [06:50<04:48,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334521.jpg: 480x640 2 giraffes, 66.0ms
Speed: 4.2ms preprocess, 66.0ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2887/5000 [06:50<04:35,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334530.jpg: 480x640 1 person, 2 surfboards, 62.9ms
Speed: 3.1ms preprocess, 62.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2888/5000 [06:50<04:25,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334555.jpg: 448x640 11 persons, 3 cars, 2 trucks, 1 cow, 83.3ms
Speed: 2.6ms preprocess, 83.3ms inference, 18.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2889/5000 [06:50<05:02,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334719.jpg: 416x640 6 persons, 55.6ms
Speed: 2.1ms preprocess, 55.6ms inference, 5.5ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  58%|█████▊    | 2890/5000 [06:50<04:46,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334767.jpg: 448x640 5 persons, 5 skiss, 64.2ms
Speed: 4.1ms preprocess, 64.2ms inference, 10.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2891/5000 [06:51<04:48,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000334977.jpg: 320x640 3 books, 138.3ms
Speed: 2.4ms preprocess, 138.3ms inference, 2.2ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  58%|█████▊    | 2892/5000 [06:51<05:17,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335081.jpg: 448x640 1 person, 5 teddy bears, 59.2ms
Speed: 2.7ms preprocess, 59.2ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2893/5000 [06:51<05:00,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335177.jpg: 480x640 2 cars, 1 truck, 1 stop sign, 65.7ms
Speed: 3.2ms preprocess, 65.7ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2894/5000 [06:51<04:45,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335328.jpg: 640x512 2 persons, 1 surfboard, 96.5ms
Speed: 2.7ms preprocess, 96.5ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  58%|█████▊    | 2895/5000 [06:51<04:58,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335427.jpg: 448x640 1 spoon, 1 bowl, 4 broccolis, 1 dining table, 62.6ms
Speed: 2.9ms preprocess, 62.6ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2896/5000 [06:51<04:47,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335450.jpg: 448x640 3 horses, 1 cow, 59.5ms
Speed: 2.7ms preprocess, 59.5ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2897/5000 [06:51<04:34,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335529.jpg: 480x640 3 boats, 1 bench, 105.6ms
Speed: 2.9ms preprocess, 105.6ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2898/5000 [06:52<04:59,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335658.jpg: 480x640 1 mouse, 1 keyboard, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2899/5000 [06:52<04:39,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335800.jpg: 448x640 1 kite, 58.2ms
Speed: 2.4ms preprocess, 58.2ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2900/5000 [06:52<04:21,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000335954.jpg: 640x640 2 bowls, 82.3ms
Speed: 3.2ms preprocess, 82.3ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  58%|█████▊    | 2901/5000 [06:52<04:37,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336053.jpg: 480x640 6 persons, 1 bottle, 3 wine glasss, 1 cup, 1 fork, 1 bowl, 5 chairs, 2 dining tables, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 19.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2902/5000 [06:52<05:04,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336209.jpg: 448x640 1 person, 57.9ms
Speed: 2.5ms preprocess, 57.9ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2903/5000 [06:52<04:38,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336232.jpg: 448x640 2 persons, 26 cars, 1 motorcycle, 2 buss, 1 truck, 60.7ms
Speed: 2.9ms preprocess, 60.7ms inference, 38.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2904/5000 [06:52<05:36,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336265.jpg: 640x480 1 person, 1 frisbee, 62.3ms
Speed: 2.6ms preprocess, 62.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  58%|█████▊    | 2905/5000 [06:52<05:06,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336309.jpg: 448x640 1 airplane, 55.0ms
Speed: 2.7ms preprocess, 55.0ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000336356.jpg: 640x448 4 persons, 1 bottle, 2 wine glasss, 1 fork, 2 knifes, 2 pizzas, 1 chair, 1 dining table, 51.1ms
Speed: 2.6ms preprocess, 51.1ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  58%|█████▊    | 2907/5000 [06:53<04:35,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336587.jpg: 480x640 1 traffic light, 1 stop sign, 88.5ms
Speed: 2.7ms preprocess, 88.5ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2908/5000 [06:53<04:40,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336628.jpg: 640x448 6 persons, 59.7ms
Speed: 2.6ms preprocess, 59.7ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  58%|█████▊    | 2909/5000 [06:53<04:32,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000336658.jpg: 448x640 1 car, 1 bus, 1 bench, 59.0ms
Speed: 2.6ms preprocess, 59.0ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2910/5000 [06:53<04:21,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000337055.jpg: 448x640 1 person, 1 suitcase, 86.3ms
Speed: 2.6ms preprocess, 86.3ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2911/5000 [06:53<04:25,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000337498.jpg: 480x640 2 cups, 1 pizza, 1 dining table, 69.0ms
Speed: 2.9ms preprocess, 69.0ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2912/5000 [06:53<04:25,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000337987.jpg: 480x640 2 birds, 63.7ms
Speed: 2.9ms preprocess, 63.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2913/5000 [06:53<04:16,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338191.jpg: 640x640 9 fire hydrants, 110.7ms
Speed: 1.7ms preprocess, 110.7ms inference, 10.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  58%|█████▊    | 2914/5000 [06:54<04:57,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338219.jpg: 576x640 8 persons, 2 motorcycles, 1 umbrella, 138.0ms
Speed: 1.5ms preprocess, 138.0ms inference, 14.4ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  58%|█████▊    | 2915/5000 [06:54<05:49,  5.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338304.jpg: 640x448 10 persons, 2 sheeps, 2 handbags, 57.3ms
Speed: 2.5ms preprocess, 57.3ms inference, 13.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  58%|█████▊    | 2916/5000 [06:54<05:31,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338325.jpg: 448x640 1 airplane, 94.5ms
Speed: 2.9ms preprocess, 94.5ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  58%|█████▊    | 2917/5000 [06:54<05:19,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338428.jpg: 384x640 1 car, 1 bus, 1 truck, 118.2ms
Speed: 2.5ms preprocess, 118.2ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  58%|█████▊    | 2918/5000 [06:54<05:27,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338532.jpg: 640x480 1 giraffe, 60.4ms
Speed: 3.6ms preprocess, 60.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  58%|█████▊    | 2919/5000 [06:54<04:56,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338560.jpg: 640x480 5 cars, 1 traffic light, 1 fire hydrant, 62.1ms
Speed: 2.7ms preprocess, 62.1ms inference, 11.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  58%|█████▊    | 2920/5000 [06:55<04:53,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338624.jpg: 384x640 3 persons, 2 cars, 1 dog, 9 chairs, 57.3ms
Speed: 2.6ms preprocess, 57.3ms inference, 11.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  58%|█████▊    | 2921/5000 [06:55<04:53,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338625.jpg: 480x640 4 persons, 3 cars, 1 bus, 64.2ms
Speed: 2.8ms preprocess, 64.2ms inference, 8.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2922/5000 [06:55<04:48,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338718.jpg: 576x640 2 cars, 73.5ms
Speed: 3.1ms preprocess, 73.5ms inference, 3.4ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  58%|█████▊    | 2923/5000 [06:55<04:41,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338901.jpg: 480x640 1 dog, 1 couch, 1 bed, 1 laptop, 1 remote, 92.5ms
Speed: 2.8ms preprocess, 92.5ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2924/5000 [06:55<04:53,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338905.jpg: 480x640 10 persons, 1 wine glass, 1 cup, 7 chairs, 1 dining table, 63.6ms
Speed: 4.1ms preprocess, 63.6ms inference, 18.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  58%|█████▊    | 2925/5000 [06:55<05:10,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000338986.jpg: 640x416 3 cars, 1 airplane, 1 train, 1 truck, 1 traffic light, 115.8ms
Speed: 2.8ms preprocess, 115.8ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  59%|█████▊    | 2926/5000 [06:55<05:31,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000339442.jpg: 640x544 1 apple, 1 sandwich, 171.4ms
Speed: 1.6ms preprocess, 171.4ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  59%|█████▊    | 2927/5000 [06:56<06:09,  5.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000339823.jpg: 640x512 1 person, 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  59%|█████▊    | 2928/5000 [06:56<05:30,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000339870.jpg: 448x640 4 traffic lights, 56.6ms
Speed: 2.6ms preprocess, 56.6ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▊    | 2929/5000 [06:56<04:58,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000340015.jpg: 480x640 1 pizza, 64.0ms
Speed: 2.8ms preprocess, 64.0ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▊    | 2930/5000 [06:56<04:38,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000340175.jpg: 416x640 2 chairs, 1 couch, 1 potted plant, 1 dining table, 131.5ms
Speed: 2.1ms preprocess, 131.5ms inference, 4.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  59%|█████▊    | 2931/5000 [06:56<05:08,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000340272.jpg: 384x640 3 sheeps, 50.8ms
Speed: 2.4ms preprocess, 50.8ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000340451.jpg: 448x640 1 person, 2 benchs, 53.6ms
Speed: 2.8ms preprocess, 53.6ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▊    | 2933/5000 [06:56<04:19,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000340697.jpg: 448x640 12 bottles, 1 cup, 1 cake, 1 vase, 53.7ms
Speed: 4.2ms preprocess, 53.7ms inference, 12.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▊    | 2934/5000 [06:57<04:29,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000340894.jpg: 480x640 2 persons, 1 cup, 1 tv, 1 laptop, 1 mouse, 3 keyboards, 111.5ms
Speed: 2.4ms preprocess, 111.5ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▊    | 2935/5000 [06:57<04:59,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000340930.jpg: 480x640 1 cell phone, 60.9ms
Speed: 2.4ms preprocess, 60.9ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▊    | 2936/5000 [06:57<04:36,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341058.jpg: 640x384 1 clock, 115.5ms
Speed: 2.1ms preprocess, 115.5ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  59%|█████▊    | 2937/5000 [06:57<04:51,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341094.jpg: 480x640 1 elephant, 100.5ms
Speed: 4.0ms preprocess, 100.5ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2938/5000 [06:57<04:55,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341196.jpg: 480x640 2 persons, 1 surfboard, 63.9ms
Speed: 2.5ms preprocess, 63.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2939/5000 [06:57<04:39,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341469.jpg: 640x480 2 persons, 1 bicycle, 2 suitcases, 60.7ms
Speed: 2.5ms preprocess, 60.7ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  59%|█████▉    | 2940/5000 [06:57<04:27,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341681.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 55.6ms
Speed: 2.4ms preprocess, 55.6ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2941/5000 [06:58<04:19,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341719.jpg: 480x640 6 persons, 1 skis, 2 snowboards, 67.1ms
Speed: 4.8ms preprocess, 67.1ms inference, 11.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2942/5000 [06:58<04:30,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341828.jpg: 640x640 1 person, 154.2ms
Speed: 3.2ms preprocess, 154.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  59%|█████▉    | 2943/5000 [06:58<05:14,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341921.jpg: 480x640 1 person, 1 traffic light, 1 bench, 61.4ms
Speed: 2.7ms preprocess, 61.4ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2944/5000 [06:58<04:51,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000341973.jpg: 480x640 2 persons, 1 bench, 2 donuts, 82.1ms
Speed: 3.8ms preprocess, 82.1ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2945/5000 [06:58<04:56,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000342006.jpg: 640x480 1 person, 1 car, 3 boats, 1 clock, 63.9ms
Speed: 2.4ms preprocess, 63.9ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  59%|█████▉    | 2946/5000 [06:58<04:44,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000342128.jpg: 640x512 9 persons, 1 tennis racket, 4 chairs, 69.3ms
Speed: 2.9ms preprocess, 69.3ms inference, 15.7ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  59%|█████▉    | 2947/5000 [06:58<04:57,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000342186.jpg: 640x480 1 pizza, 97.2ms
Speed: 2.4ms preprocess, 97.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  59%|█████▉    | 2948/5000 [06:59<04:57,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000342295.jpg: 640x448 1 person, 1 bottle, 1 toilet, 129.9ms
Speed: 4.0ms preprocess, 129.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  59%|█████▉    | 2949/5000 [06:59<05:23,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000342367.jpg: 480x640 3 persons, 2 ties, 1 laptop, 68.6ms
Speed: 3.6ms preprocess, 68.6ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2950/5000 [06:59<05:08,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000342397.jpg: 448x640 7 persons, 1 skis, 56.9ms
Speed: 2.7ms preprocess, 56.9ms inference, 8.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2951/5000 [06:59<04:55,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000342971.jpg: 640x288 1 person, 1 frisbee, 137.3ms
Speed: 2.7ms preprocess, 137.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 288)


Segmenting Images:  59%|█████▉    | 2952/5000 [06:59<05:18,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343076.jpg: 640x480 1 cat, 60.8ms
Speed: 2.4ms preprocess, 60.8ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  59%|█████▉    | 2953/5000 [06:59<04:48,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343149.jpg: 640x480 4 persons, 1 handbag, 1 clock, 61.0ms
Speed: 2.9ms preprocess, 61.0ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  59%|█████▉    | 2954/5000 [06:59<04:36,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343218.jpg: 448x640 2 persons, 2 cars, 1 truck, 1 sports ball, 1 tennis racket, 86.5ms
Speed: 2.9ms preprocess, 86.5ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2955/5000 [07:00<04:48,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343315.jpg: 480x640 1 fire hydrant, 63.3ms
Speed: 2.5ms preprocess, 63.3ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2956/5000 [07:00<04:28,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343453.jpg: 480x640 1 person, 1 skis, 65.1ms
Speed: 3.2ms preprocess, 65.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2957/5000 [07:00<04:19,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343466.jpg: 480x640 1 oven, 1 sink, 104.7ms
Speed: 4.2ms preprocess, 104.7ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2958/5000 [07:00<04:34,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343496.jpg: 416x640 1 person, 1 car, 1 traffic light, 1 fire hydrant, 1 stop sign, 2 handbags, 57.1ms
Speed: 2.2ms preprocess, 57.1ms inference, 6.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  59%|█████▉    | 2959/5000 [07:00<04:24,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343524.jpg: 448x640 1 person, 1 tennis racket, 60.6ms
Speed: 2.6ms preprocess, 60.6ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2960/5000 [07:00<04:10,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343561.jpg: 448x640 13 persons, 7 bicycles, 78.2ms
Speed: 2.4ms preprocess, 78.2ms inference, 20.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2961/5000 [07:00<04:49,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343706.jpg: 512x640 2 persons, 1 hot dog, 1 cell phone, 143.3ms
Speed: 3.1ms preprocess, 143.3ms inference, 4.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  59%|█████▉    | 2962/5000 [07:01<05:25,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343803.jpg: 640x448 1 person, 56.4ms
Speed: 2.7ms preprocess, 56.4ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  59%|█████▉    | 2963/5000 [07:01<04:52,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343934.jpg: 480x640 1 motorcycle, 89.4ms
Speed: 2.8ms preprocess, 89.4ms inference, 11.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2964/5000 [07:01<04:54,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343937.jpg: 448x640 1 person, 58.7ms
Speed: 2.8ms preprocess, 58.7ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2965/5000 [07:01<04:31,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000343976.jpg: 480x640 (no detections), 63.0ms
Speed: 4.1ms preprocess, 63.0ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000344029.jpg: 480x640 1 person, 3 cars, 2 buss, 62.4ms
Speed: 2.5ms preprocess, 62.4ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  59%|█████▉    | 2967/5000 [07:01<04:04,  8.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344059.jpg: 448x640 2 giraffes, 87.6ms
Speed: 2.8ms preprocess, 87.6ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2968/5000 [07:01<04:15,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344100.jpg: 448x640 1 handbag, 1 bottle, 1 cup, 1 sandwich, 1 dining table, 59.7ms
Speed: 2.9ms preprocess, 59.7ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2969/5000 [07:01<04:13,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344268.jpg: 512x640 1 person, 2 ties, 1 baseball bat, 68.7ms
Speed: 2.8ms preprocess, 68.7ms inference, 6.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  59%|█████▉    | 2970/5000 [07:02<04:12,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344611.jpg: 448x640 2 stop signs, 99.1ms
Speed: 2.7ms preprocess, 99.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2971/5000 [07:02<04:26,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344614.jpg: 640x480 2 clocks, 60.8ms
Speed: 2.7ms preprocess, 60.8ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  59%|█████▉    | 2972/5000 [07:02<04:16,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344621.jpg: 448x640 1 chair, 1 couch, 3 potted plants, 1 book, 62.0ms
Speed: 3.7ms preprocess, 62.0ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  59%|█████▉    | 2973/5000 [07:02<04:14,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344795.jpg: 384x640 1 bowl, 1 oven, 65.7ms
Speed: 2.5ms preprocess, 65.7ms inference, 4.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  59%|█████▉    | 2974/5000 [07:02<04:13,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344816.jpg: 448x640 1 train, 57.4ms
Speed: 2.3ms preprocess, 57.4ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|█████▉    | 2975/5000 [07:02<04:03,  8.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344888.jpg: 416x640 2 cars, 2 horses, 59.9ms
Speed: 2.5ms preprocess, 59.9ms inference, 4.3ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  60%|█████▉    | 2976/5000 [07:02<03:59,  8.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000344909.jpg: 640x480 4 cars, 1 fire hydrant, 105.2ms
Speed: 3.0ms preprocess, 105.2ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  60%|█████▉    | 2977/5000 [07:02<04:26,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345027.jpg: 416x640 3 persons, 1 motorcycle, 2 boats, 2 benchs, 1 kite, 55.5ms
Speed: 2.4ms preprocess, 55.5ms inference, 8.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  60%|█████▉    | 2978/5000 [07:03<04:18,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345252.jpg: 480x640 1 person, 1 chair, 1 tv, 66.0ms
Speed: 2.9ms preprocess, 66.0ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2979/5000 [07:03<04:13,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345261.jpg: 640x608 2 giraffes, 151.0ms
Speed: 1.8ms preprocess, 151.0ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  60%|█████▉    | 2980/5000 [07:03<05:02,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345356.jpg: 480x640 2 persons, 3 cups, 1 pizza, 1 chair, 1 potted plant, 1 dining table, 1 cell phone, 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2981/5000 [07:03<04:59,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345361.jpg: 480x640 5 persons, 2 cups, 1 dining table, 67.8ms
Speed: 4.1ms preprocess, 67.8ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2982/5000 [07:03<04:49,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345385.jpg: 480x640 2 persons, 1 bed, 58.1ms
Speed: 2.4ms preprocess, 58.1ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2983/5000 [07:03<04:30,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345397.jpg: 480x640 4 persons, 1 tie, 1 cell phone, 96.4ms
Speed: 4.4ms preprocess, 96.4ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2984/5000 [07:03<04:47,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345466.jpg: 480x640 6 persons, 1 baseball glove, 61.7ms
Speed: 3.3ms preprocess, 61.7ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2985/5000 [07:04<04:41,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345469.jpg: 640x480 11 donuts, 63.5ms
Speed: 2.3ms preprocess, 63.5ms inference, 9.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  60%|█████▉    | 2986/5000 [07:04<04:39,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000345941.jpg: 640x640 2 persons, 1 fork, 1 sandwich, 166.3ms
Speed: 2.2ms preprocess, 166.3ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  60%|█████▉    | 2987/5000 [07:04<05:31,  6.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000346232.jpg: 640x448 7 persons, 1 car, 1 truck, 1 backpack, 1 umbrella, 55.0ms
Speed: 3.0ms preprocess, 55.0ms inference, 9.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|█████▉    | 2988/5000 [07:04<05:12,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000346638.jpg: 480x640 1 bottle, 1 tv, 1 mouse, 1 keyboard, 62.6ms
Speed: 2.8ms preprocess, 62.6ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2989/5000 [07:04<04:50,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000346703.jpg: 640x576 8 persons, 2 bottles, 2 wine glasss, 3 cups, 3 cakes, 3 dining tables, 157.1ms
Speed: 1.8ms preprocess, 157.1ms inference, 27.9ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  60%|█████▉    | 2990/5000 [07:04<06:14,  5.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000346707.jpg: 640x448 1 banana, 58.9ms
Speed: 4.3ms preprocess, 58.9ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|█████▉    | 2991/5000 [07:05<05:25,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000346905.jpg: 448x640 3 elephants, 56.8ms
Speed: 2.5ms preprocess, 56.8ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|█████▉    | 2992/5000 [07:05<04:55,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000346968.jpg: 448x640 1 person, 1 tie, 2 cell phones, 7 books, 79.8ms
Speed: 2.8ms preprocess, 79.8ms inference, 13.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|█████▉    | 2993/5000 [07:05<05:01,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347163.jpg: 448x640 1 stop sign, 60.2ms
Speed: 2.8ms preprocess, 60.2ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|█████▉    | 2994/5000 [07:05<04:34,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347174.jpg: 640x448 1 person, 2 beds, 56.3ms
Speed: 2.6ms preprocess, 56.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|█████▉    | 2995/5000 [07:05<04:18,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347254.jpg: 640x480 4 persons, 1 cup, 97.9ms
Speed: 4.9ms preprocess, 97.9ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  60%|█████▉    | 2996/5000 [07:05<04:36,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347265.jpg: 640x448 1 person, 2 skiss, 65.6ms
Speed: 2.8ms preprocess, 65.6ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|█████▉    | 2997/5000 [07:05<04:27,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347335.jpg: 640x480 1 cup, 1 fork, 2 bananas, 1 sandwich, 1 dining table, 79.8ms
Speed: 2.5ms preprocess, 79.8ms inference, 11.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  60%|█████▉    | 2998/5000 [07:05<04:35,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347370.jpg: 480x640 1 apple, 1 carrot, 61.6ms
Speed: 4.2ms preprocess, 61.6ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|█████▉    | 2999/5000 [07:06<04:21,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347456.jpg: 320x640 2 elephants, 106.8ms
Speed: 2.1ms preprocess, 106.8ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  60%|██████    | 3000/5000 [07:06<04:33,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347544.jpg: 480x640 1 bus, 102.2ms
Speed: 1.7ms preprocess, 102.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3001/5000 [07:06<04:39,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347664.jpg: 448x640 2 horses, 1 sheep, 7 cows, 58.7ms
Speed: 2.3ms preprocess, 58.7ms inference, 9.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|██████    | 3002/5000 [07:06<04:37,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347693.jpg: 448x640 1 bed, 58.0ms
Speed: 2.4ms preprocess, 58.0ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|██████    | 3003/5000 [07:06<04:18,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000347930.jpg: 480x640 1 cat, 1 dog, 1 sports ball, 1 couch, 1 bed, 97.2ms
Speed: 2.6ms preprocess, 97.2ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3004/5000 [07:06<04:34,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000348012.jpg: 448x640 (no detections), 58.2ms
Speed: 3.0ms preprocess, 58.2ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000348045.jpg: 640x608 3 toilets, 65.9ms
Speed: 2.2ms preprocess, 65.9ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  60%|██████    | 3006/5000 [07:06<04:01,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000348216.jpg: 640x480 1 toilet, 58.6ms
Speed: 2.6ms preprocess, 58.6ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  60%|██████    | 3007/5000 [07:07<03:52,  8.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000348243.jpg: 640x448 1 horse, 101.5ms
Speed: 3.0ms preprocess, 101.5ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|██████    | 3008/5000 [07:07<04:10,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000348481.jpg: 480x640 1 laptop, 2 mouses, 3 cell phones, 63.9ms
Speed: 2.3ms preprocess, 63.9ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3009/5000 [07:07<04:09,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000348488.jpg: 480x640 1 bicycle, 63.1ms
Speed: 2.5ms preprocess, 63.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3010/5000 [07:07<04:02,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000348708.jpg: 640x384 1 bowl, 2 bananas, 1 dining table, 146.0ms
Speed: 1.9ms preprocess, 146.0ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  60%|██████    | 3011/5000 [07:07<04:43,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000348881.jpg: 480x640 2 persons, 2 airplanes, 61.9ms
Speed: 2.4ms preprocess, 61.9ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3012/5000 [07:07<04:27,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349152.jpg: 640x480 1 cup, 1 bowl, 4 hot dogs, 5 donuts, 1 dining table, 62.4ms
Speed: 2.6ms preprocess, 62.4ms inference, 11.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  60%|██████    | 3013/5000 [07:07<04:32,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349184.jpg: 640x448 5 persons, 1 bench, 84.0ms
Speed: 2.4ms preprocess, 84.0ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|██████    | 3014/5000 [07:08<04:36,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349302.jpg: 448x640 2 giraffes, 61.6ms
Speed: 2.1ms preprocess, 61.6ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|██████    | 3015/5000 [07:08<04:20,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349480.jpg: 480x640 1 person, 2 bowls, 1 banana, 2 teddy bears, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3016/5000 [07:08<04:15,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349594.jpg: 640x544 1 person, 1 donut, 174.8ms
Speed: 2.9ms preprocess, 174.8ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  60%|██████    | 3017/5000 [07:08<05:11,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349678.jpg: 640x448 1 clock, 58.9ms
Speed: 2.5ms preprocess, 58.9ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|██████    | 3018/5000 [07:08<04:42,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349837.jpg: 448x640 5 refrigerators, 58.4ms
Speed: 3.3ms preprocess, 58.4ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|██████    | 3019/5000 [07:08<04:27,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000349860.jpg: 448x640 3 persons, 2 cars, 1 bench, 1 skateboard, 95.2ms
Speed: 2.9ms preprocess, 95.2ms inference, 7.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  60%|██████    | 3020/5000 [07:08<04:42,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350002.jpg: 640x448 1 person, 61.3ms
Speed: 2.7ms preprocess, 61.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|██████    | 3021/5000 [07:09<04:24,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350003.jpg: 480x640 3 persons, 1 car, 1 bus, 1 truck, 67.2ms
Speed: 3.4ms preprocess, 67.2ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3022/5000 [07:09<04:20,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350019.jpg: 640x448 1 person, 1 suitcase, 73.1ms
Speed: 2.2ms preprocess, 73.1ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  60%|██████    | 3023/5000 [07:09<04:14,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350023.jpg: 480x640 1 person, 12 cars, 2 motorcycles, 1 bus, 1 traffic light, 62.7ms
Speed: 3.0ms preprocess, 62.7ms inference, 15.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3024/5000 [07:09<04:36,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350054.jpg: 480x640 1 chair, 2 tvs, 1 oven, 1 clock, 62.6ms
Speed: 3.9ms preprocess, 62.6ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  60%|██████    | 3025/5000 [07:09<04:25,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350122.jpg: 480x640 12 persons, 2 trains, 1 backpack, 69.1ms
Speed: 3.1ms preprocess, 69.1ms inference, 15.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3026/5000 [07:09<04:47,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350148.jpg: 640x480 4 persons, 1 car, 1 truck, 1 cup, 2 sandwichs, 2 chairs, 1 dining table, 63.2ms
Speed: 2.5ms preprocess, 63.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  61%|██████    | 3027/5000 [07:09<04:48,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350388.jpg: 640x480 1 broccoli, 1 dining table, 62.2ms
Speed: 2.9ms preprocess, 62.2ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  61%|██████    | 3028/5000 [07:09<04:25,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350405.jpg: 448x640 2 persons, 1 snowboard, 72.0ms
Speed: 3.6ms preprocess, 72.0ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3029/5000 [07:10<04:27,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350488.jpg: 448x640 1 bird, 1 zebra, 60.7ms
Speed: 2.9ms preprocess, 60.7ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3030/5000 [07:10<04:11,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350607.jpg: 448x640 1 bench, 62.6ms
Speed: 3.0ms preprocess, 62.6ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3031/5000 [07:10<04:01,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350679.jpg: 448x640 10 persons, 1 bottle, 2 cakes, 78.3ms
Speed: 3.0ms preprocess, 78.3ms inference, 16.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3032/5000 [07:10<04:31,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000350833.jpg: 480x640 3 toilets, 1 sink, 64.1ms
Speed: 2.8ms preprocess, 64.1ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3033/5000 [07:10<04:19,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351096.jpg: 480x640 1 person, 1 scissors, 64.3ms
Speed: 4.5ms preprocess, 64.3ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3034/5000 [07:10<04:09,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351331.jpg: 480x640 1 fork, 1 pizza, 1 dining table, 85.9ms
Speed: 3.0ms preprocess, 85.9ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3035/5000 [07:10<04:16,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351362.jpg: 640x448 1 toilet, 1 sink, 61.7ms
Speed: 2.1ms preprocess, 61.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  61%|██████    | 3036/5000 [07:11<04:06,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351530.jpg: 384x640 2 persons, 1 bicycle, 110.3ms
Speed: 2.6ms preprocess, 110.3ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  61%|██████    | 3037/5000 [07:11<04:26,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351559.jpg: 352x640 1 traffic light, 143.9ms
Speed: 2.5ms preprocess, 143.9ms inference, 1.8ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  61%|██████    | 3038/5000 [07:11<04:58,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351589.jpg: 448x640 1 bench, 1 potted plant, 56.5ms
Speed: 2.7ms preprocess, 56.5ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3039/5000 [07:11<04:31,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351609.jpg: 448x640 1 spoon, 2 oranges, 2 donuts, 1 chair, 58.4ms
Speed: 3.6ms preprocess, 58.4ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3040/5000 [07:11<04:18,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351810.jpg: 480x640 4 persons, 1 backpack, 3 suitcases, 1 chair, 84.7ms
Speed: 5.0ms preprocess, 84.7ms inference, 13.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3041/5000 [07:11<04:43,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000351823.jpg: 640x512 1 person, 1 tennis racket, 137.3ms
Speed: 3.5ms preprocess, 137.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  61%|██████    | 3042/5000 [07:11<05:07,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000352491.jpg: 480x640 1 person, 1 airplane, 1 train, 57.0ms
Speed: 2.4ms preprocess, 57.0ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3043/5000 [07:12<04:40,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000352582.jpg: 640x448 2 persons, 1 car, 1 frisbee, 59.7ms
Speed: 2.7ms preprocess, 59.7ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  61%|██████    | 3044/5000 [07:12<04:23,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000352584.jpg: 640x480 (no detections), 88.1ms
Speed: 5.6ms preprocess, 88.1ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  61%|██████    | 3045/5000 [07:12<04:19,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000352618.jpg: 448x640 1 banana, 1 apple, 1 orange, 56.3ms
Speed: 2.3ms preprocess, 56.3ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3046/5000 [07:12<04:02,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000352684.jpg: 640x448 7 persons, 1 tie, 59.4ms
Speed: 2.6ms preprocess, 59.4ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  61%|██████    | 3047/5000 [07:12<04:04,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000352760.jpg: 640x544 1 person, 2 skiss, 96.3ms
Speed: 1.7ms preprocess, 96.3ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  61%|██████    | 3048/5000 [07:12<04:20,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000352900.jpg: 448x640 2 forks, 1 knife, 4 broccolis, 1 dining table, 59.9ms
Speed: 2.6ms preprocess, 59.9ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3049/5000 [07:12<04:15,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000353027.jpg: 448x640 1 person, 1 sandwich, 1 pizza, 58.1ms
Speed: 1.9ms preprocess, 58.1ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3050/5000 [07:12<04:01,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000353051.jpg: 448x640 1 person, 1 horse, 83.7ms
Speed: 2.4ms preprocess, 83.7ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3051/5000 [07:13<04:11,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000353096.jpg: 448x640 1 tv, 1 mouse, 1 keyboard, 59.5ms
Speed: 4.8ms preprocess, 59.5ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3052/5000 [07:13<04:01,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000353180.jpg: 448x640 9 persons, 1 bus, 57.0ms
Speed: 2.8ms preprocess, 57.0ms inference, 8.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3053/5000 [07:13<04:01,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000353518.jpg: 416x640 1 person, 3 boats, 147.6ms
Speed: 2.5ms preprocess, 147.6ms inference, 3.3ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  61%|██████    | 3054/5000 [07:13<04:47,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000353970.jpg: 448x640 1 cat, 1 chair, 1 sink, 56.5ms
Speed: 2.7ms preprocess, 56.5ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████    | 3055/5000 [07:13<04:24,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000354072.jpg: 640x384 1 bottle, 1 bowl, 1 potted plant, 1 sink, 56.0ms
Speed: 2.5ms preprocess, 56.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  61%|██████    | 3056/5000 [07:13<04:09,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000354307.jpg: 480x640 1 dog, 1 bed, 63.0ms
Speed: 4.2ms preprocess, 63.0ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3057/5000 [07:13<04:00,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000354547.jpg: 640x448 1 person, 1 tie, 86.6ms
Speed: 3.1ms preprocess, 86.6ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  61%|██████    | 3058/5000 [07:13<04:09,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000354753.jpg: 480x640 (no detections), 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3059/5000 [07:14<03:54,  8.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000354829.jpg: 480x640 1 airplane, 68.0ms
Speed: 4.5ms preprocess, 68.0ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3060/5000 [07:14<04:01,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355169.jpg: 480x640 1 clock, 64.5ms
Speed: 3.2ms preprocess, 64.5ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████    | 3061/5000 [07:14<04:00,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355240.jpg: 384x640 1 car, 2 dogs, 55.7ms
Speed: 2.4ms preprocess, 55.7ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  61%|██████    | 3062/5000 [07:14<03:50,  8.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355257.jpg: 448x640 1 dog, 1 bed, 2 tvs, 1 microwave, 89.5ms
Speed: 2.6ms preprocess, 89.5ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████▏   | 3063/5000 [07:14<04:12,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355325.jpg: 640x480 1 wine glass, 2 cups, 1 spoon, 1 bowl, 1 pizza, 1 dining table, 75.8ms
Speed: 3.2ms preprocess, 75.8ms inference, 7.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  61%|██████▏   | 3064/5000 [07:14<04:25,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355610.jpg: 448x640 3 teddy bears, 65.8ms
Speed: 3.3ms preprocess, 65.8ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  61%|██████▏   | 3065/5000 [07:14<04:15,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355677.jpg: 480x640 1 boat, 1 bird, 1 surfboard, 67.2ms
Speed: 2.9ms preprocess, 67.2ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████▏   | 3066/5000 [07:14<04:10,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355817.jpg: 480x640 1 person, 1 bus, 70.8ms
Speed: 3.4ms preprocess, 70.8ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████▏   | 3067/5000 [07:15<04:07,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000355905.jpg: 480x640 1 dog, 64.2ms
Speed: 3.0ms preprocess, 64.2ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████▏   | 3068/5000 [07:15<03:58,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356094.jpg: 416x640 2 persons, 1 baseball bat, 1 baseball glove, 57.7ms
Speed: 2.2ms preprocess, 57.7ms inference, 4.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  61%|██████▏   | 3069/5000 [07:15<03:51,  8.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356125.jpg: 480x640 2 persons, 1 elephant, 61.7ms
Speed: 3.1ms preprocess, 61.7ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████▏   | 3070/5000 [07:15<03:45,  8.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356169.jpg: 480x640 4 cars, 1 fire hydrant, 60.5ms
Speed: 3.1ms preprocess, 60.5ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████▏   | 3071/5000 [07:15<03:47,  8.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356248.jpg: 640x480 5 persons, 1 bottle, 4 cups, 2 chairs, 1 potted plant, 95.4ms
Speed: 2.5ms preprocess, 95.4ms inference, 13.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  61%|██████▏   | 3072/5000 [07:15<04:25,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356261.jpg: 480x640 1 horse, 63.3ms
Speed: 2.8ms preprocess, 63.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████▏   | 3073/5000 [07:15<04:10,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356347.jpg: 480x640 1 cup, 1 spoon, 2 bowls, 59.9ms
Speed: 2.9ms preprocess, 59.9ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  61%|██████▏   | 3074/5000 [07:15<04:03,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356387.jpg: 448x640 7 persons, 2 bicycles, 1 clock, 83.0ms
Speed: 3.4ms preprocess, 83.0ms inference, 9.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3075/5000 [07:16<04:20,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356424.jpg: 640x480 4 persons, 1 car, 1 bottle, 3 cups, 2 sandwichs, 1 chair, 1 dining table, 64.9ms
Speed: 2.8ms preprocess, 64.9ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  62%|██████▏   | 3076/5000 [07:16<04:29,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356427.jpg: 448x640 1 person, 4 backpacks, 56.7ms
Speed: 2.4ms preprocess, 56.7ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3077/5000 [07:16<04:12,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356428.jpg: 384x640 5 persons, 1 handbag, 88.1ms
Speed: 3.4ms preprocess, 88.1ms inference, 5.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  62%|██████▏   | 3078/5000 [07:16<04:23,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356432.jpg: 416x640 1 chair, 4 couchs, 56.8ms
Speed: 2.3ms preprocess, 56.8ms inference, 4.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  62%|██████▏   | 3079/5000 [07:16<04:08,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356498.jpg: 448x640 1 broccoli, 1 pizza, 59.0ms
Speed: 3.6ms preprocess, 59.0ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3080/5000 [07:16<03:57,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356505.jpg: 448x640 1 person, 1 surfboard, 57.5ms
Speed: 2.8ms preprocess, 57.5ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3081/5000 [07:16<03:48,  8.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356531.jpg: 480x640 1 person, 1 bottle, 1 cup, 1 fork, 1 pizza, 1 dining table, 82.4ms
Speed: 4.6ms preprocess, 82.4ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3082/5000 [07:17<04:06,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356612.jpg: 416x640 2 persons, 1 bicycle, 2 cars, 1 bus, 6 cows, 53.5ms
Speed: 2.0ms preprocess, 53.5ms inference, 8.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  62%|██████▏   | 3083/5000 [07:17<04:04,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000356968.jpg: 480x640 1 horse, 67.6ms
Speed: 2.4ms preprocess, 67.6ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3084/5000 [07:17<03:55,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357060.jpg: 448x640 1 bird, 56.2ms
Speed: 2.8ms preprocess, 56.2ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3085/5000 [07:17<03:44,  8.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357081.jpg: 448x640 1 dog, 2 cows, 74.7ms
Speed: 2.5ms preprocess, 74.7ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3086/5000 [07:17<03:55,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357238.jpg: 640x448 1 person, 1 kite, 63.0ms
Speed: 2.7ms preprocess, 63.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  62%|██████▏   | 3087/5000 [07:17<03:48,  8.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357430.jpg: 448x640 1 pizza, 91.2ms
Speed: 3.1ms preprocess, 91.2ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3088/5000 [07:17<03:59,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357459.jpg: 448x640 1 person, 1 dog, 1 frisbee, 58.9ms
Speed: 2.8ms preprocess, 58.9ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3089/5000 [07:17<03:51,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357501.jpg: 480x640 1 scissors, 64.2ms
Speed: 3.1ms preprocess, 64.2ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3090/5000 [07:17<03:49,  8.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357567.jpg: 640x480 2 toilets, 60.8ms
Speed: 3.1ms preprocess, 60.8ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  62%|██████▏   | 3091/5000 [07:18<03:45,  8.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357737.jpg: 480x640 2 persons, 2 bicycles, 2 cars, 1 motorcycle, 78.6ms
Speed: 4.3ms preprocess, 78.6ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3092/5000 [07:18<04:02,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357742.jpg: 640x448 1 person, 1 backpack, 1 skis, 60.5ms
Speed: 2.9ms preprocess, 60.5ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  62%|██████▏   | 3093/5000 [07:18<03:54,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357748.jpg: 480x640 3 persons, 1 bus, 64.8ms
Speed: 3.0ms preprocess, 64.8ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3094/5000 [07:18<03:51,  8.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357816.jpg: 608x640 5 persons, 1 car, 1 umbrella, 1 sports ball, 2 baseball bats, 1 baseball glove, 178.0ms
Speed: 2.5ms preprocess, 178.0ms inference, 12.3ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  62%|██████▏   | 3095/5000 [07:18<05:15,  6.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357888.jpg: 480x640 1 person, 1 umbrella, 1 potted plant, 1 sink, 1 vase, 61.4ms
Speed: 2.5ms preprocess, 61.4ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3096/5000 [07:18<04:48,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357903.jpg: 640x480 2 pizzas, 64.5ms
Speed: 2.2ms preprocess, 64.5ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  62%|██████▏   | 3097/5000 [07:18<04:27,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357941.jpg: 480x640 1 cat, 1 bottle, 2 tvs, 96.7ms
Speed: 4.3ms preprocess, 96.7ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3098/5000 [07:19<04:40,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000357978.jpg: 480x640 4 persons, 1 baseball bat, 1 couch, 64.4ms
Speed: 4.2ms preprocess, 64.4ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3099/5000 [07:19<04:28,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000358195.jpg: 640x512 1 person, 1 skateboard, 70.7ms
Speed: 2.7ms preprocess, 70.7ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  62%|██████▏   | 3100/5000 [07:19<04:17,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000358427.jpg: 448x640 1 broccoli, 80.2ms
Speed: 2.6ms preprocess, 80.2ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3101/5000 [07:19<04:14,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000358525.jpg: 480x640 1 person, 2 beds, 1 laptop, 69.0ms
Speed: 2.5ms preprocess, 69.0ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3102/5000 [07:19<04:08,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000358923.jpg: 448x640 6 persons, 3 umbrellas, 3 cell phones, 61.5ms
Speed: 2.7ms preprocess, 61.5ms inference, 12.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3103/5000 [07:19<04:15,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359135.jpg: 448x640 3 elephants, 81.8ms
Speed: 3.9ms preprocess, 81.8ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3104/5000 [07:19<04:18,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359219.jpg: 480x640 1 fork, 1 knife, 1 pizza, 1 dining table, 62.4ms
Speed: 3.3ms preprocess, 62.4ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3105/5000 [07:20<04:05,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359540.jpg: 448x640 5 persons, 1 bird, 1 sports ball, 1 baseball bat, 2 baseball gloves, 61.3ms
Speed: 2.6ms preprocess, 61.3ms inference, 9.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3106/5000 [07:20<04:09,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359677.jpg: 640x448 1 person, 1 surfboard, 86.6ms
Speed: 3.0ms preprocess, 86.6ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  62%|██████▏   | 3107/5000 [07:20<04:15,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359781.jpg: 512x640 1 giraffe, 131.4ms
Speed: 3.0ms preprocess, 131.4ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  62%|██████▏   | 3108/5000 [07:20<04:40,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359833.jpg: 640x480 1 frisbee, 4 apples, 61.8ms
Speed: 2.5ms preprocess, 61.8ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  62%|██████▏   | 3109/5000 [07:20<04:24,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359855.jpg: 512x640 1 airplane, 89.9ms
Speed: 2.5ms preprocess, 89.9ms inference, 3.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  62%|██████▏   | 3110/5000 [07:20<04:28,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000359937.jpg: 480x640 2 buss, 66.2ms
Speed: 2.4ms preprocess, 66.2ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3111/5000 [07:20<04:13,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360097.jpg: 448x640 1 backpack, 4 suitcases, 58.2ms
Speed: 3.3ms preprocess, 58.2ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3112/5000 [07:20<04:04,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360137.jpg: 640x640 1 person, 1 umbrella, 1 handbag, 172.4ms
Speed: 1.9ms preprocess, 172.4ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  62%|██████▏   | 3113/5000 [07:21<04:59,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360325.jpg: 448x640 12 sheeps, 57.3ms
Speed: 2.5ms preprocess, 57.3ms inference, 11.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3114/5000 [07:21<04:46,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360393.jpg: 448x640 1 pizza, 1 dining table, 61.6ms
Speed: 2.1ms preprocess, 61.6ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3115/5000 [07:21<04:23,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360564.jpg: 448x640 1 bottle, 2 toilets, 2 sinks, 75.1ms
Speed: 2.1ms preprocess, 75.1ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3116/5000 [07:21<04:23,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360661.jpg: 480x640 4 persons, 3 horses, 60.5ms
Speed: 3.0ms preprocess, 60.5ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▏   | 3117/5000 [07:21<04:15,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360943.jpg: 640x512 1 cat, 67.5ms
Speed: 3.8ms preprocess, 67.5ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  62%|██████▏   | 3118/5000 [07:21<04:04,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360951.jpg: 448x640 1 tv, 3 laptops, 3 mouses, 3 keyboards, 59.7ms
Speed: 2.6ms preprocess, 59.7ms inference, 10.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3119/5000 [07:21<04:08,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000360960.jpg: 640x448 8 persons, 1 umbrella, 83.9ms
Speed: 2.7ms preprocess, 83.9ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  62%|██████▏   | 3120/5000 [07:22<04:27,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361103.jpg: 448x640 11 persons, 1 bicycle, 4 traffic lights, 1 handbag, 1 potted plant, 59.6ms
Speed: 2.5ms preprocess, 59.6ms inference, 16.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3121/5000 [07:22<04:34,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361142.jpg: 640x480 1 person, 9 cars, 1 traffic light, 1 clock, 63.9ms
Speed: 2.8ms preprocess, 63.9ms inference, 11.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  62%|██████▏   | 3122/5000 [07:22<04:33,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361147.jpg: 640x448 7 persons, 2 tennis rackets, 1 chair, 86.7ms
Speed: 2.7ms preprocess, 86.7ms inference, 13.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  62%|██████▏   | 3123/5000 [07:22<04:44,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361180.jpg: 448x640 1 bear, 59.4ms
Speed: 2.9ms preprocess, 59.4ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  62%|██████▏   | 3124/5000 [07:22<04:19,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361238.jpg: 480x640 4 persons, 1 bottle, 1 cup, 1 pizza, 1 chair, 1 potted plant, 1 vase, 62.6ms
Speed: 2.3ms preprocess, 62.6ms inference, 9.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  62%|██████▎   | 3125/5000 [07:22<04:16,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361268.jpg: 480x640 1 cow, 89.2ms
Speed: 2.6ms preprocess, 89.2ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3126/5000 [07:23<04:19,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361506.jpg: 480x640 1 person, 60.4ms
Speed: 2.4ms preprocess, 60.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3127/5000 [07:23<04:02,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361551.jpg: 640x480 5 persons, 1 airplane, 2 trucks, 5 backpacks, 1 handbag, 1 suitcase, 64.7ms
Speed: 2.6ms preprocess, 64.7ms inference, 14.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  63%|██████▎   | 3128/5000 [07:23<04:16,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361571.jpg: 448x640 1 dog, 1 cow, 83.2ms
Speed: 2.3ms preprocess, 83.2ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3129/5000 [07:23<04:14,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361586.jpg: 384x640 11 persons, 1 handbag, 3 suitcases, 1 tv, 54.6ms
Speed: 2.6ms preprocess, 54.6ms inference, 12.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  63%|██████▎   | 3130/5000 [07:23<04:21,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361621.jpg: 448x640 1 cat, 1 sink, 58.0ms
Speed: 3.0ms preprocess, 58.0ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3131/5000 [07:23<04:03,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361730.jpg: 480x640 10 persons, 10 kites, 98.7ms
Speed: 2.7ms preprocess, 98.7ms inference, 23.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3132/5000 [07:23<04:49,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000361919.jpg: 448x640 14 persons, 1 skis, 57.9ms
Speed: 2.8ms preprocess, 57.9ms inference, 14.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3133/5000 [07:24<04:44,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000362434.jpg: 480x640 1 teddy bear, 63.4ms
Speed: 4.7ms preprocess, 63.4ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3134/5000 [07:24<04:23,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000362520.jpg: 640x448 1 person, 1 skateboard, 82.8ms
Speed: 3.2ms preprocess, 82.8ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  63%|██████▎   | 3135/5000 [07:24<04:19,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000362682.jpg: 448x640 1 person, 1 bus, 2 trucks, 60.1ms
Speed: 2.3ms preprocess, 60.1ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3136/5000 [07:24<04:07,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000362716.jpg: 448x640 2 persons, 1 sports ball, 1 tennis racket, 59.1ms
Speed: 3.1ms preprocess, 59.1ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3137/5000 [07:24<03:56,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363072.jpg: 640x640 2 trains, 116.7ms
Speed: 2.1ms preprocess, 116.7ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  63%|██████▎   | 3138/5000 [07:24<04:19,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363188.jpg: 448x640 9 persons, 5 cell phones, 56.8ms
Speed: 2.3ms preprocess, 56.8ms inference, 12.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3139/5000 [07:24<04:20,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363207.jpg: 480x640 7 persons, 1 bowl, 2 cakes, 62.6ms
Speed: 3.0ms preprocess, 62.6ms inference, 10.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3140/5000 [07:24<04:19,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363461.jpg: 480x640 1 chair, 1 tv, 2 laptops, 2 keyboards, 64.6ms
Speed: 2.4ms preprocess, 64.6ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3141/5000 [07:25<04:11,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363666.jpg: 448x640 6 persons, 5 cups, 2 knifes, 2 bowls, 2 pizzas, 1 donut, 1 cake, 5 chairs, 4 dining tables, 83.5ms
Speed: 2.5ms preprocess, 83.5ms inference, 27.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3142/5000 [07:25<04:55,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363784.jpg: 480x640 3 toilets, 1 sink, 60.3ms
Speed: 2.6ms preprocess, 60.3ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3143/5000 [07:25<04:30,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363840.jpg: 480x640 1 tv, 2 laptops, 2 mouses, 3 keyboards, 1 cell phone, 59.3ms
Speed: 2.3ms preprocess, 59.3ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3144/5000 [07:25<04:21,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000363875.jpg: 640x576 1 person, 178.7ms
Speed: 1.7ms preprocess, 178.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  63%|██████▎   | 3145/5000 [07:25<05:07,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364102.jpg: 416x640 1 person, 1 frisbee, 112.2ms
Speed: 2.5ms preprocess, 112.2ms inference, 2.5ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  63%|██████▎   | 3146/5000 [07:25<05:03,  6.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364126.jpg: 448x640 1 person, 1 surfboard, 56.8ms
Speed: 2.4ms preprocess, 56.8ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3147/5000 [07:26<04:30,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364166.jpg: 480x640 2 zebras, 72.2ms
Speed: 3.6ms preprocess, 72.2ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3148/5000 [07:26<04:22,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364297.jpg: 448x640 1 cat, 1 tv, 1 keyboard, 62.7ms
Speed: 3.2ms preprocess, 62.7ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3149/5000 [07:26<04:08,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364322.jpg: 480x640 16 sheeps, 1 bear, 61.8ms
Speed: 2.3ms preprocess, 61.8ms inference, 15.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3150/5000 [07:26<04:23,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364557.jpg: 448x640 2 persons, 1 surfboard, 58.5ms
Speed: 2.3ms preprocess, 58.5ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3151/5000 [07:26<04:05,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364587.jpg: 448x640 1 train, 60.1ms
Speed: 2.5ms preprocess, 60.1ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3152/5000 [07:26<03:51,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364636.jpg: 544x640 1 dog, 181.2ms
Speed: 1.8ms preprocess, 181.2ms inference, 2.1ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  63%|██████▎   | 3153/5000 [07:26<04:49,  6.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000364884.jpg: 480x640 8 persons, 57.0ms
Speed: 4.3ms preprocess, 57.0ms inference, 7.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3154/5000 [07:27<04:32,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365095.jpg: 448x640 1 person, 2 sports balls, 1 baseball glove, 59.1ms
Speed: 2.6ms preprocess, 59.1ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3155/5000 [07:27<04:12,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365098.jpg: 448x640 (no detections), 77.6ms
Speed: 2.7ms preprocess, 77.6ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3156/5000 [07:27<03:58,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365207.jpg: 608x640 1 car, 1 dog, 151.1ms
Speed: 1.7ms preprocess, 151.1ms inference, 2.8ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  63%|██████▎   | 3157/5000 [07:27<04:41,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365208.jpg: 640x608 1 person, 2 umbrellas, 1 surfboard, 1 chair, 140.2ms
Speed: 2.0ms preprocess, 140.2ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  63%|██████▎   | 3158/5000 [07:27<05:06,  6.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365385.jpg: 448x640 1 sink, 1 clock, 1 toothbrush, 60.1ms
Speed: 2.4ms preprocess, 60.1ms inference, 7.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3159/5000 [07:27<04:42,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365387.jpg: 448x640 1 bed, 1 toilet, 68.2ms
Speed: 2.8ms preprocess, 68.2ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3160/5000 [07:27<04:25,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365521.jpg: 640x448 10 persons, 1 skateboard, 59.5ms
Speed: 2.5ms preprocess, 59.5ms inference, 9.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  63%|██████▎   | 3161/5000 [07:28<04:21,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365642.jpg: 640x448 1 person, 4 cars, 1 truck, 1 toilet, 88.6ms
Speed: 2.7ms preprocess, 88.6ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  63%|██████▎   | 3162/5000 [07:28<04:27,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365655.jpg: 448x640 1 person, 1 bus, 58.4ms
Speed: 2.6ms preprocess, 58.4ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3163/5000 [07:28<04:09,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365745.jpg: 480x640 2 persons, 2 trucks, 1 traffic light, 90.5ms
Speed: 3.0ms preprocess, 90.5ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3164/5000 [07:28<04:19,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365766.jpg: 480x640 1 microwave, 1 oven, 1 sink, 68.5ms
Speed: 2.8ms preprocess, 68.5ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3165/5000 [07:28<04:10,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000365886.jpg: 480x640 1 person, 1 cell phone, 87.8ms
Speed: 4.1ms preprocess, 87.8ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3166/5000 [07:28<04:14,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000366141.jpg: 480x640 2 couchs, 1 tv, 62.0ms
Speed: 3.1ms preprocess, 62.0ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3167/5000 [07:28<03:59,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000366178.jpg: 640x480 4 cars, 1 fire hydrant, 114.9ms
Speed: 3.3ms preprocess, 114.9ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  63%|██████▎   | 3168/5000 [07:28<04:21,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000366199.jpg: 448x640 1 cat, 1 couch, 78.5ms
Speed: 2.9ms preprocess, 78.5ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  63%|██████▎   | 3169/5000 [07:29<04:14,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000366225.jpg: 640x544 1 tv, 1 mouse, 2 keyboards, 159.8ms
Speed: 2.2ms preprocess, 159.8ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  63%|██████▎   | 3170/5000 [07:29<04:57,  6.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000366611.jpg: 480x640 2 dogs, 1 frisbee, 91.0ms
Speed: 2.8ms preprocess, 91.0ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3171/5000 [07:29<04:45,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000366711.jpg: 640x448 1 person, 10 oranges, 59.1ms
Speed: 3.1ms preprocess, 59.1ms inference, 11.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  63%|██████▎   | 3172/5000 [07:29<04:36,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000366884.jpg: 480x640 1 person, 1 dog, 3 chairs, 1 bed, 1 keyboard, 67.6ms
Speed: 2.9ms preprocess, 67.6ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  63%|██████▎   | 3173/5000 [07:29<04:24,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367082.jpg: 640x640 1 dog, 1 couch, 179.1ms
Speed: 3.3ms preprocess, 179.1ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  63%|██████▎   | 3174/5000 [07:29<05:15,  5.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367095.jpg: 448x640 3 persons, 2 cups, 3 chairs, 1 potted plant, 1 dining table, 2 tvs, 3 laptops, 1 mouse, 2 keyboards, 57.3ms
Speed: 2.9ms preprocess, 57.3ms inference, 16.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▎   | 3175/5000 [07:30<05:07,  5.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367195.jpg: 416x640 1 person, 1 dog, 1 chair, 1 couch, 1 bed, 56.8ms
Speed: 4.1ms preprocess, 56.8ms inference, 4.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  64%|██████▎   | 3176/5000 [07:30<04:38,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367228.jpg: 640x448 1 person, 1 kite, 80.9ms
Speed: 4.2ms preprocess, 80.9ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  64%|██████▎   | 3177/5000 [07:30<04:30,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367386.jpg: 480x640 2 couchs, 2 potted plants, 6 books, 65.1ms
Speed: 3.1ms preprocess, 65.1ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▎   | 3178/5000 [07:30<04:24,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367569.jpg: 640x480 1 couch, 1 dining table, 65.0ms
Speed: 2.6ms preprocess, 65.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▎   | 3179/5000 [07:30<04:07,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367680.jpg: 512x640 5 persons, 2 cars, 1 bus, 2 trucks, 161.2ms
Speed: 4.4ms preprocess, 161.2ms inference, 7.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  64%|██████▎   | 3180/5000 [07:30<05:01,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000367818.jpg: 448x640 1 person, 1 horse, 57.6ms
Speed: 2.7ms preprocess, 57.6ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▎   | 3181/5000 [07:30<04:29,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368038.jpg: 320x640 2 trains, 113.6ms
Speed: 2.6ms preprocess, 113.6ms inference, 2.1ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  64%|██████▎   | 3182/5000 [07:31<04:35,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368212.jpg: 640x480 1 person, 1 toilet, 80.1ms
Speed: 4.1ms preprocess, 80.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▎   | 3183/5000 [07:31<04:29,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368294.jpg: 448x640 1 person, 58.6ms
Speed: 2.8ms preprocess, 58.6ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▎   | 3184/5000 [07:31<04:07,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368335.jpg: 640x416 4 cars, 1 truck, 1 horse, 120.9ms
Speed: 3.1ms preprocess, 120.9ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  64%|██████▎   | 3185/5000 [07:31<04:30,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368456.jpg: 480x640 1 train, 60.6ms
Speed: 2.8ms preprocess, 60.6ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▎   | 3186/5000 [07:31<04:12,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368684.jpg: 480x640 1 chair, 1 tv, 1 laptop, 2 books, 77.9ms
Speed: 3.9ms preprocess, 77.9ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▎   | 3187/5000 [07:31<04:13,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368752.jpg: 640x480 2 persons, 1 cup, 2 bowls, 1 cake, 1 dining table, 66.7ms
Speed: 3.2ms preprocess, 66.7ms inference, 8.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▍   | 3188/5000 [07:31<04:13,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368900.jpg: 480x640 3 chairs, 1 tv, 63.3ms
Speed: 2.7ms preprocess, 63.3ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3189/5000 [07:32<04:03,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368940.jpg: 480x640 1 sink, 58.4ms
Speed: 3.1ms preprocess, 58.4ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3190/5000 [07:32<03:52,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368961.jpg: 480x640 2 persons, 2 elephants, 74.1ms
Speed: 4.0ms preprocess, 74.1ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3191/5000 [07:32<03:55,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000368982.jpg: 480x640 1 scissors, 63.9ms
Speed: 3.0ms preprocess, 63.9ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3192/5000 [07:32<03:45,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369037.jpg: 640x448 3 persons, 1 elephant, 96.9ms
Speed: 3.1ms preprocess, 96.9ms inference, 7.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  64%|██████▍   | 3193/5000 [07:32<04:00,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369081.jpg: 448x640 3 horses, 55.7ms
Speed: 3.1ms preprocess, 55.7ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▍   | 3194/5000 [07:32<03:50,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369310.jpg: 640x416 1 person, 1 cell phone, 57.1ms
Speed: 2.8ms preprocess, 57.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  64%|██████▍   | 3195/5000 [07:32<03:39,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369323.jpg: 640x512 4 persons, 2 tennis rackets, 146.6ms
Speed: 4.7ms preprocess, 146.6ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  64%|██████▍   | 3196/5000 [07:33<04:27,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369370.jpg: 480x640 3 sandwichs, 1 orange, 1 broccoli, 60.0ms
Speed: 2.5ms preprocess, 60.0ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3197/5000 [07:33<04:10,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369442.jpg: 480x640 16 persons, 1 sheep, 4 cows, 62.3ms
Speed: 2.8ms preprocess, 62.3ms inference, 25.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3198/5000 [07:33<04:40,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369503.jpg: 480x640 1 person, 2 sinks, 1 refrigerator, 60.3ms
Speed: 3.9ms preprocess, 60.3ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3199/5000 [07:33<04:18,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369541.jpg: 640x480 5 persons, 3 dogs, 1 frisbee, 60.5ms
Speed: 4.1ms preprocess, 60.5ms inference, 10.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▍   | 3200/5000 [07:33<04:15,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369675.jpg: 448x640 3 persons, 1 train, 85.5ms
Speed: 2.9ms preprocess, 85.5ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▍   | 3201/5000 [07:33<04:16,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369751.jpg: 640x448 2 persons, 4 cars, 1 stop sign, 5 parking meters, 60.3ms
Speed: 2.5ms preprocess, 60.3ms inference, 9.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  64%|██████▍   | 3202/5000 [07:33<04:14,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369757.jpg: 480x640 8 pizzas, 66.0ms
Speed: 2.7ms preprocess, 66.0ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3203/5000 [07:34<04:10,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369771.jpg: 480x640 1 cup, 4 bowls, 1 broccoli, 1 dining table, 95.8ms
Speed: 2.6ms preprocess, 95.8ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3204/5000 [07:34<04:22,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000369812.jpg: 640x640 2 persons, 5 cars, 1 truck, 79.6ms
Speed: 2.3ms preprocess, 79.6ms inference, 12.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  64%|██████▍   | 3205/5000 [07:34<04:28,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370042.jpg: 384x640 1 umbrella, 1 potted plant, 5 vases, 114.5ms
Speed: 2.7ms preprocess, 114.5ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  64%|██████▍   | 3206/5000 [07:34<04:39,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370208.jpg: 480x640 1 person, 1 bicycle, 86.3ms
Speed: 4.7ms preprocess, 86.3ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3207/5000 [07:34<04:32,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370270.jpg: 640x480 3 persons, 1 cell phone, 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▍   | 3208/5000 [07:34<04:15,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370375.jpg: 640x448 1 person, 3 baseball bats, 59.5ms
Speed: 2.8ms preprocess, 59.5ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  64%|██████▍   | 3209/5000 [07:34<04:02,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370478.jpg: 480x640 1 backpack, 1 suitcase, 1 laptop, 1 mouse, 1 keyboard, 94.0ms
Speed: 3.2ms preprocess, 94.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3210/5000 [07:35<04:12,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370486.jpg: 640x448 9 persons, 3 umbrellas, 1 handbag, 1 sports ball, 59.0ms
Speed: 2.7ms preprocess, 59.0ms inference, 12.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  64%|██████▍   | 3211/5000 [07:35<04:13,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370677.jpg: 448x640 3 persons, 14 donuts, 56.5ms
Speed: 4.0ms preprocess, 56.5ms inference, 16.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▍   | 3212/5000 [07:35<04:19,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370711.jpg: 480x640 1 stop sign, 1 bird, 63.4ms
Speed: 2.7ms preprocess, 63.4ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3213/5000 [07:35<04:05,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370813.jpg: 640x544 3 persons, 1 skateboard, 158.6ms
Speed: 2.3ms preprocess, 158.6ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  64%|██████▍   | 3214/5000 [07:35<04:46,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370818.jpg: 640x480 1 toilet, 3 sinks, 60.0ms
Speed: 2.2ms preprocess, 60.0ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▍   | 3215/5000 [07:35<04:20,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370900.jpg: 480x640 4 teddy bears, 62.4ms
Speed: 2.2ms preprocess, 62.4ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3216/5000 [07:35<04:05,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000370999.jpg: 480x640 5 donuts, 1 clock, 101.3ms
Speed: 2.7ms preprocess, 101.3ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3217/5000 [07:36<04:16,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000371042.jpg: 448x640 2 persons, 1 car, 1 backpack, 1 sports ball, 1 tennis racket, 62.2ms
Speed: 2.6ms preprocess, 62.2ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▍   | 3218/5000 [07:36<04:08,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000371472.jpg: 640x480 3 bananas, 1 pizza, 65.3ms
Speed: 2.9ms preprocess, 65.3ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▍   | 3219/5000 [07:36<04:00,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000371529.jpg: 640x512 1 person, 2 potted plants, 2 toilets, 90.3ms
Speed: 2.7ms preprocess, 90.3ms inference, 9.5ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  64%|██████▍   | 3220/5000 [07:36<04:11,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000371552.jpg: 448x640 5 persons, 5 sports balls, 59.8ms
Speed: 2.6ms preprocess, 59.8ms inference, 9.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  64%|██████▍   | 3221/5000 [07:36<04:06,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000371677.jpg: 480x640 1 bottle, 3 tvs, 2 mouses, 1 remote, 3 keyboards, 62.3ms
Speed: 2.5ms preprocess, 62.3ms inference, 9.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3222/5000 [07:36<04:03,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000371699.jpg: 640x480 3 persons, 2 chairs, 3 dining tables, 1 laptop, 87.1ms
Speed: 2.7ms preprocess, 87.1ms inference, 10.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  64%|██████▍   | 3223/5000 [07:36<04:19,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000371749.jpg: 480x640 3 persons, 1 dog, 1 couch, 3 remotes, 60.3ms
Speed: 3.8ms preprocess, 60.3ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  64%|██████▍   | 3224/5000 [07:37<04:10,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372203.jpg: 384x640 1 person, 1 bench, 55.9ms
Speed: 2.4ms preprocess, 55.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  64%|██████▍   | 3225/5000 [07:37<03:50,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372260.jpg: 640x448 1 car, 2 potted plants, 1 clock, 96.4ms
Speed: 2.7ms preprocess, 96.4ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  65%|██████▍   | 3226/5000 [07:37<04:03,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372307.jpg: 448x640 1 horse, 59.1ms
Speed: 2.9ms preprocess, 59.1ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3227/5000 [07:37<03:46,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372317.jpg: 448x640 1 person, 1 bus, 56.1ms
Speed: 2.6ms preprocess, 56.1ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3228/5000 [07:37<03:37,  8.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372349.jpg: 448x640 2 persons, 1 bird, 83.9ms
Speed: 3.5ms preprocess, 83.9ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3229/5000 [07:37<03:45,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372466.jpg: 384x640 2 persons, 3 cups, 1 chair, 1 mouse, 52.9ms
Speed: 2.5ms preprocess, 52.9ms inference, 8.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  65%|██████▍   | 3230/5000 [07:37<03:39,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372577.jpg: 640x480 2 persons, 1 tennis racket, 64.7ms
Speed: 3.0ms preprocess, 64.7ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▍   | 3231/5000 [07:37<03:33,  8.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372718.jpg: 640x640 1 potted plant, 1 vase, 109.4ms
Speed: 1.7ms preprocess, 109.4ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  65%|██████▍   | 3232/5000 [07:38<03:55,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000372819.jpg: 448x640 2 persons, 1 bench, 4 dogs, 1 chair, 61.6ms
Speed: 2.6ms preprocess, 61.6ms inference, 7.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3233/5000 [07:38<03:51,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000373315.jpg: 640x480 1 person, 1 skateboard, 58.5ms
Speed: 2.5ms preprocess, 58.5ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▍   | 3234/5000 [07:38<03:39,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000373353.jpg: 448x640 6 persons, 17 cars, 1 bus, 1 truck, 90.1ms
Speed: 3.2ms preprocess, 90.1ms inference, 20.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3235/5000 [07:38<04:25,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000373382.jpg: 640x480 9 persons, 1 tennis racket, 62.9ms
Speed: 3.0ms preprocess, 62.9ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▍   | 3236/5000 [07:38<04:19,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000373705.jpg: 448x640 5 persons, 1 fire hydrant, 57.8ms
Speed: 2.6ms preprocess, 57.8ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3237/5000 [07:38<04:00,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000374052.jpg: 448x640 3 persons, 1 parking meter, 15 birds, 96.6ms
Speed: 2.8ms preprocess, 96.6ms inference, 16.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3238/5000 [07:38<04:32,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000374083.jpg: 640x448 2 persons, 1 bowl, 3 cakes, 1 dining table, 57.4ms
Speed: 2.5ms preprocess, 57.4ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  65%|██████▍   | 3239/5000 [07:39<04:14,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000374369.jpg: 480x640 1 person, 2 skiss, 60.5ms
Speed: 2.9ms preprocess, 60.5ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  65%|██████▍   | 3240/5000 [07:39<03:57,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000374545.jpg: 640x576 3 persons, 2 kites, 177.4ms
Speed: 2.0ms preprocess, 177.4ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  65%|██████▍   | 3241/5000 [07:39<04:51,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000374551.jpg: 384x640 2 birds, 52.1ms
Speed: 3.4ms preprocess, 52.1ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  65%|██████▍   | 3242/5000 [07:39<04:17,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000374727.jpg: 448x640 (no detections), 59.9ms
Speed: 3.0ms preprocess, 59.9ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000374982.jpg: 640x480 2 persons, 1 pizza, 2 chairs, 3 dining tables, 52.7ms
Speed: 3.6ms preprocess, 52.7ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▍   | 3244/5000 [07:39<03:45,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000375015.jpg: 416x640 1 bear, 126.3ms
Speed: 2.6ms preprocess, 126.3ms inference, 2.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  65%|██████▍   | 3245/5000 [07:39<04:06,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000375078.jpg: 640x480 (no detections), 61.8ms
Speed: 2.6ms preprocess, 61.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000375278.jpg: 640x448 2 persons, 2 cats, 1 book, 52.1ms
Speed: 3.7ms preprocess, 52.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  65%|██████▍   | 3247/5000 [07:40<03:35,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000375430.jpg: 448x640 1 sandwich, 2 chairs, 1 dining table, 58.5ms
Speed: 2.9ms preprocess, 58.5ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▍   | 3248/5000 [07:40<03:32,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000375469.jpg: 192x640 3 persons, 2 surfboards, 121.5ms
Speed: 2.2ms preprocess, 121.5ms inference, 2.5ms postprocess per image at shape (1, 3, 192, 640)


Segmenting Images:  65%|██████▍   | 3249/5000 [07:40<03:51,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000375493.jpg: 480x640 1 person, 2 dogs, 1 sheep, 64.9ms
Speed: 2.7ms preprocess, 64.9ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  65%|██████▌   | 3250/5000 [07:40<03:45,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000375763.jpg: 480x640 11 sheeps, 70.1ms
Speed: 2.9ms preprocess, 70.1ms inference, 11.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  65%|██████▌   | 3251/5000 [07:40<03:57,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376093.jpg: 480x640 6 persons, 2 cups, 1 bowl, 2 pizzas, 2 dining tables, 99.7ms
Speed: 2.6ms preprocess, 99.7ms inference, 12.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  65%|██████▌   | 3252/5000 [07:40<04:21,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376112.jpg: 640x448 5 persons, 2 backpacks, 4 skiss, 58.0ms
Speed: 2.7ms preprocess, 58.0ms inference, 9.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  65%|██████▌   | 3253/5000 [07:40<04:12,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376206.jpg: 448x640 1 person, 2 surfboards, 59.3ms
Speed: 2.5ms preprocess, 59.3ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▌   | 3254/5000 [07:41<03:55,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376264.jpg: 512x640 1 cup, 1 dining table, 1 laptop, 1 remote, 1 book, 165.7ms
Speed: 2.7ms preprocess, 165.7ms inference, 4.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  65%|██████▌   | 3255/5000 [07:41<04:40,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376278.jpg: 448x640 3 zebras, 57.9ms
Speed: 2.7ms preprocess, 57.9ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▌   | 3256/5000 [07:41<04:13,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376284.jpg: 448x640 6 cars, 1 truck, 2 fire hydrants, 59.4ms
Speed: 4.0ms preprocess, 59.4ms inference, 8.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▌   | 3257/5000 [07:41<04:05,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376307.jpg: 640x480 3 persons, 1 cup, 3 bananas, 2 apples, 1 chair, 88.5ms
Speed: 4.4ms preprocess, 88.5ms inference, 10.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▌   | 3258/5000 [07:41<04:20,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376310.jpg: 480x640 1 wine glass, 1 bowl, 2 sinks, 63.5ms
Speed: 2.7ms preprocess, 63.5ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  65%|██████▌   | 3259/5000 [07:41<04:02,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376322.jpg: 640x480 9 persons, 1 bottle, 5 cups, 2 forks, 2 knifes, 1 spoon, 1 sandwich, 1 cake, 1 dining table, 60.2ms
Speed: 2.7ms preprocess, 60.2ms inference, 23.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▌   | 3260/5000 [07:42<04:25,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376365.jpg: 480x640 1 person, 1 car, 1 cup, 65.5ms
Speed: 4.1ms preprocess, 65.5ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  65%|██████▌   | 3261/5000 [07:42<04:15,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376442.jpg: 640x480 1 cat, 1 toilet, 62.5ms
Speed: 3.4ms preprocess, 62.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▌   | 3262/5000 [07:42<03:56,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376478.jpg: 640x480 1 dining table, 1 vase, 62.3ms
Speed: 2.5ms preprocess, 62.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▌   | 3263/5000 [07:42<03:51,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376625.jpg: 448x640 6 persons, 3 cars, 1 bus, 1 train, 69.8ms
Speed: 3.3ms preprocess, 69.8ms inference, 10.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▌   | 3264/5000 [07:42<04:00,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376856.jpg: 640x480 1 clock, 65.4ms
Speed: 2.7ms preprocess, 65.4ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▌   | 3265/5000 [07:42<03:58,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000376900.jpg: 640x480 9 persons, 1 tennis racket, 63.2ms
Speed: 2.8ms preprocess, 63.2ms inference, 9.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  65%|██████▌   | 3266/5000 [07:42<04:04,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377000.jpg: 640x544 1 cat, 180.7ms
Speed: 2.0ms preprocess, 180.7ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  65%|██████▌   | 3267/5000 [07:43<04:50,  5.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377113.jpg: 448x640 5 persons, 1 skis, 58.7ms
Speed: 2.8ms preprocess, 58.7ms inference, 6.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▌   | 3268/5000 [07:43<04:24,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377239.jpg: 448x640 4 persons, 1 umbrella, 1 bottle, 55.5ms
Speed: 2.7ms preprocess, 55.5ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▌   | 3269/5000 [07:43<04:05,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377368.jpg: 512x640 1 person, 1 cup, 1 cake, 1 dining table, 100.6ms
Speed: 2.7ms preprocess, 100.6ms inference, 4.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  65%|██████▌   | 3270/5000 [07:43<04:11,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377393.jpg: 480x640 1 person, 67.1ms
Speed: 2.8ms preprocess, 67.1ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  65%|██████▌   | 3271/5000 [07:43<03:57,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377486.jpg: 448x640 3 persons, 2 horses, 53.9ms
Speed: 2.5ms preprocess, 53.9ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  65%|██████▌   | 3272/5000 [07:43<03:42,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377497.jpg: 640x448 1 zebra, 77.5ms
Speed: 2.7ms preprocess, 77.5ms inference, 10.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  65%|██████▌   | 3273/5000 [07:43<03:47,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377575.jpg: 640x448 1 fire hydrant, 63.8ms
Speed: 2.7ms preprocess, 63.8ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  65%|██████▌   | 3274/5000 [07:43<03:36,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377588.jpg: 448x640 6 persons, 1 baseball bat, 1 baseball glove, 1 tennis racket, 59.8ms
Speed: 2.6ms preprocess, 59.8ms inference, 9.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3275/5000 [07:44<03:37,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377635.jpg: 448x640 3 persons, 1 skateboard, 58.9ms
Speed: 2.9ms preprocess, 58.9ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3276/5000 [07:44<03:34,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377670.jpg: 640x512 3 persons, 2 boats, 1 bench, 1 teddy bear, 143.8ms
Speed: 3.7ms preprocess, 143.8ms inference, 7.1ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  66%|██████▌   | 3277/5000 [07:44<04:22,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377723.jpg: 448x640 10 persons, 1 bus, 60.4ms
Speed: 2.7ms preprocess, 60.4ms inference, 10.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3278/5000 [07:44<04:17,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377814.jpg: 480x640 8 donuts, 61.7ms
Speed: 2.7ms preprocess, 61.7ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▌   | 3279/5000 [07:44<04:06,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377882.jpg: 480x640 (no detections), 85.2ms
Speed: 2.7ms preprocess, 85.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▌   | 3280/5000 [07:44<03:58,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000377946.jpg: 448x640 6 persons, 2 cars, 1 bus, 1 truck, 1 traffic light, 64.3ms
Speed: 3.1ms preprocess, 64.3ms inference, 8.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3281/5000 [07:44<03:57,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378099.jpg: 352x640 1 mouse, 1 keyboard, 106.2ms
Speed: 2.4ms preprocess, 106.2ms inference, 2.1ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  66%|██████▌   | 3282/5000 [07:45<04:04,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378116.jpg: 480x640 1 person, 1 surfboard, 87.1ms
Speed: 2.6ms preprocess, 87.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▌   | 3283/5000 [07:45<04:03,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378139.jpg: 448x640 1 car, 2 boats, 1 clock, 60.2ms
Speed: 2.8ms preprocess, 60.2ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3284/5000 [07:45<03:51,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378244.jpg: 640x448 1 person, 98.9ms
Speed: 2.7ms preprocess, 98.9ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  66%|██████▌   | 3285/5000 [07:45<03:56,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378284.jpg: 448x640 1 person, 1 fork, 1 knife, 1 pizza, 1 dining table, 58.0ms
Speed: 2.7ms preprocess, 58.0ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3286/5000 [07:45<03:45,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378453.jpg: 448x640 13 zebras, 60.3ms
Speed: 3.5ms preprocess, 60.3ms inference, 11.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3287/5000 [07:45<03:51,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378454.jpg: 640x448 1 person, 1 frisbee, 81.0ms
Speed: 2.6ms preprocess, 81.0ms inference, 7.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  66%|██████▌   | 3288/5000 [07:45<03:50,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378515.jpg: 640x448 7 persons, 1 bicycle, 1 umbrella, 1 bowl, 61.1ms
Speed: 2.6ms preprocess, 61.1ms inference, 9.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  66%|██████▌   | 3289/5000 [07:46<03:52,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378605.jpg: 640x448 1 cup, 1 donut, 1 dining table, 56.9ms
Speed: 2.5ms preprocess, 56.9ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  66%|██████▌   | 3290/5000 [07:46<03:39,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378673.jpg: 448x640 20 persons, 1 skateboard, 89.5ms
Speed: 2.5ms preprocess, 89.5ms inference, 22.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3291/5000 [07:46<04:14,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000378873.jpg: 448x640 5 persons, 4 apples, 2 oranges, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 9.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3292/5000 [07:46<04:08,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000379332.jpg: 448x640 3 persons, 1 bench, 1 tennis racket, 56.1ms
Speed: 3.8ms preprocess, 56.1ms inference, 6.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3293/5000 [07:46<03:53,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000379441.jpg: 480x640 1 chair, 1 couch, 1 remote, 95.6ms
Speed: 2.6ms preprocess, 95.6ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▌   | 3294/5000 [07:46<03:57,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000379453.jpg: 640x640 1 airplane, 159.7ms
Speed: 2.9ms preprocess, 159.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  66%|██████▌   | 3295/5000 [07:46<04:33,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000379476.jpg: 640x512 5 wine glasss, 1 dining table, 65.0ms
Speed: 2.8ms preprocess, 65.0ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  66%|██████▌   | 3296/5000 [07:47<04:17,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000379533.jpg: 640x480 2 elephants, 1 zebra, 85.7ms
Speed: 2.9ms preprocess, 85.7ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  66%|██████▌   | 3297/5000 [07:47<04:11,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000379800.jpg: 512x640 3 cars, 1 train, 1 stop sign, 63.6ms
Speed: 3.0ms preprocess, 63.6ms inference, 5.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  66%|██████▌   | 3298/5000 [07:47<04:01,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000379842.jpg: 384x640 1 remote, 1 book, 114.9ms
Speed: 2.7ms preprocess, 114.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  66%|██████▌   | 3299/5000 [07:47<04:10,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000380203.jpg: 640x448 3 horses, 59.7ms
Speed: 2.7ms preprocess, 59.7ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  66%|██████▌   | 3300/5000 [07:47<03:57,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000380706.jpg: 448x640 6 persons, 1 bicycle, 1 car, 1 bus, 1 traffic light, 74.7ms
Speed: 3.5ms preprocess, 74.7ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3301/5000 [07:47<04:01,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000380711.jpg: 640x640 1 person, 1 surfboard, 82.9ms
Speed: 3.1ms preprocess, 82.9ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  66%|██████▌   | 3302/5000 [07:47<03:58,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000380913.jpg: 448x640 6 persons, 62.4ms
Speed: 3.0ms preprocess, 62.4ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3303/5000 [07:48<03:50,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000381360.jpg: 448x640 1 person, 1 skateboard, 1 surfboard, 59.6ms
Speed: 2.5ms preprocess, 59.6ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3304/5000 [07:48<03:41,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000381587.jpg: 640x448 2 persons, 1 bottle, 6 cups, 22 bowls, 1 broccoli, 1 cake, 4 dining tables, 76.0ms
Speed: 4.2ms preprocess, 76.0ms inference, 30.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  66%|██████▌   | 3305/5000 [07:48<04:38,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000381639.jpg: 640x608 10 persons, 1 airplane, 1 bottle, 151.3ms
Speed: 2.0ms preprocess, 151.3ms inference, 15.4ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  66%|██████▌   | 3306/5000 [07:48<05:18,  5.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000381971.jpg: 448x640 4 persons, 2 cars, 1 fire hydrant, 1 horse, 57.1ms
Speed: 2.6ms preprocess, 57.1ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3307/5000 [07:48<04:46,  5.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382009.jpg: 448x640 1 person, 1 tennis racket, 2 chairs, 60.0ms
Speed: 3.0ms preprocess, 60.0ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3308/5000 [07:48<04:24,  6.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382030.jpg: 480x640 1 person, 1 book, 78.6ms
Speed: 6.1ms preprocess, 78.6ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▌   | 3309/5000 [07:49<04:18,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382088.jpg: 448x640 1 horse, 68.4ms
Speed: 3.3ms preprocess, 68.4ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3310/5000 [07:49<04:04,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382111.jpg: 448x640 2 persons, 3 cars, 2 trucks, 1 horse, 67.4ms
Speed: 3.7ms preprocess, 67.4ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▌   | 3311/5000 [07:49<04:00,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382122.jpg: 640x448 2 clocks, 65.1ms
Speed: 2.8ms preprocess, 65.1ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  66%|██████▌   | 3312/5000 [07:49<03:48,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382125.jpg: 480x640 9 persons, 5 cups, 1 pizza, 6 chairs, 94.1ms
Speed: 3.9ms preprocess, 94.1ms inference, 25.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▋   | 3313/5000 [07:49<04:28,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382696.jpg: 480x640 1 oven, 1 refrigerator, 61.5ms
Speed: 3.0ms preprocess, 61.5ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▋   | 3314/5000 [07:49<04:05,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382734.jpg: 640x480 1 toilet, 1 refrigerator, 62.2ms
Speed: 3.2ms preprocess, 62.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  66%|██████▋   | 3315/5000 [07:49<03:48,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000382743.jpg: 480x640 4 kites, 86.6ms
Speed: 3.1ms preprocess, 86.6ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▋   | 3316/5000 [07:49<03:51,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383289.jpg: 448x640 1 person, 2 cars, 63.5ms
Speed: 3.1ms preprocess, 63.5ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▋   | 3317/5000 [07:50<03:41,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383337.jpg: 480x640 1 person, 1 bowl, 4 donuts, 61.8ms
Speed: 3.3ms preprocess, 61.8ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▋   | 3318/5000 [07:50<03:39,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383339.jpg: 448x640 1 person, 97.0ms
Speed: 2.6ms preprocess, 97.0ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▋   | 3319/5000 [07:50<03:45,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383384.jpg: 384x640 7 persons, 1 baseball glove, 54.5ms
Speed: 2.5ms preprocess, 54.5ms inference, 6.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  66%|██████▋   | 3320/5000 [07:50<03:38,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383386.jpg: 480x640 1 toilet, 4 teddy bears, 61.6ms
Speed: 2.7ms preprocess, 61.6ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▋   | 3321/5000 [07:50<03:34,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383443.jpg: 448x640 1 tv, 1 microwave, 1 sink, 76.8ms
Speed: 2.6ms preprocess, 76.8ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▋   | 3322/5000 [07:50<03:37,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383606.jpg: 480x640 1 person, 1 bottle, 1 sink, 66.0ms
Speed: 3.1ms preprocess, 66.0ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▋   | 3323/5000 [07:50<03:34,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383621.jpg: 448x640 1 airplane, 60.0ms
Speed: 3.0ms preprocess, 60.0ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  66%|██████▋   | 3324/5000 [07:50<03:24,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383676.jpg: 480x640 2 giraffes, 64.4ms
Speed: 2.7ms preprocess, 64.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  66%|██████▋   | 3325/5000 [07:51<03:23,  8.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383838.jpg: 608x640 2 persons, 153.7ms
Speed: 2.9ms preprocess, 153.7ms inference, 3.1ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  67%|██████▋   | 3326/5000 [07:51<04:06,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383842.jpg: 448x640 11 persons, 1 handbag, 3 ties, 1 laptop, 1 remote, 1 cell phone, 57.9ms
Speed: 2.8ms preprocess, 57.9ms inference, 16.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3327/5000 [07:51<04:12,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000383921.jpg: 480x640 14 elephants, 63.1ms
Speed: 2.9ms preprocess, 63.1ms inference, 13.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3328/5000 [07:51<04:11,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384136.jpg: 416x640 2 persons, 1 cell phone, 168.1ms
Speed: 2.4ms preprocess, 168.1ms inference, 3.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  67%|██████▋   | 3329/5000 [07:51<04:44,  5.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384350.jpg: 480x640 2 airplanes, 65.7ms
Speed: 2.8ms preprocess, 65.7ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3330/5000 [07:51<04:20,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384468.jpg: 480x640 4 persons, 1 car, 1 elephant, 72.8ms
Speed: 3.6ms preprocess, 72.8ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3331/5000 [07:52<04:11,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384513.jpg: 480x640 1 bicycle, 1 horse, 73.8ms
Speed: 2.9ms preprocess, 73.8ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3332/5000 [07:52<04:05,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384527.jpg: 480x640 1 cup, 1 bowl, 4 chairs, 1 potted plant, 1 dining table, 1 book, 2 vases, 71.2ms
Speed: 3.4ms preprocess, 71.2ms inference, 11.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3333/5000 [07:52<04:13,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384616.jpg: 480x640 2 fire hydrants, 67.0ms
Speed: 3.2ms preprocess, 67.0ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3334/5000 [07:52<03:56,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384651.jpg: 448x640 1 laptop, 58.0ms
Speed: 2.8ms preprocess, 58.0ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3335/5000 [07:52<03:40,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384661.jpg: 480x640 2 bottles, 2 spoons, 1 oven, 1 refrigerator, 85.4ms
Speed: 4.4ms preprocess, 85.4ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3336/5000 [07:52<03:52,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384666.jpg: 448x640 11 persons, 1 backpack, 1 skis, 57.6ms
Speed: 4.3ms preprocess, 57.6ms inference, 13.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3337/5000 [07:52<03:54,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384670.jpg: 480x640 2 persons, 1 tennis racket, 61.5ms
Speed: 2.7ms preprocess, 61.5ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3338/5000 [07:53<03:38,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384808.jpg: 640x480 1 person, 5 bottles, 1 cell phone, 2 sinks, 113.9ms
Speed: 4.1ms preprocess, 113.9ms inference, 10.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  67%|██████▋   | 3339/5000 [07:53<04:08,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384850.jpg: 640x480 1 toilet, 61.0ms
Speed: 2.8ms preprocess, 61.0ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  67%|██████▋   | 3340/5000 [07:53<03:48,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000384949.jpg: 480x640 1 boat, 1 clock, 65.8ms
Speed: 2.7ms preprocess, 65.8ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3341/5000 [07:53<03:36,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000385029.jpg: 480x640 1 person, 2 bottles, 2 wine glasss, 1 spoon, 1 bowl, 1 dining table, 62.6ms
Speed: 2.7ms preprocess, 62.6ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3342/5000 [07:53<03:42,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000385190.jpg: 480x640 5 persons, 74.5ms
Speed: 4.4ms preprocess, 74.5ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3343/5000 [07:53<03:44,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000385205.jpg: 480x640 1 cat, 64.3ms
Speed: 3.1ms preprocess, 64.3ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3344/5000 [07:53<03:32,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000385719.jpg: 448x640 3 tvs, 1 laptop, 3 mouses, 2 keyboards, 59.5ms
Speed: 2.7ms preprocess, 59.5ms inference, 8.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3345/5000 [07:53<03:32,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000385997.jpg: 480x640 1 chair, 2 potted plants, 89.5ms
Speed: 2.8ms preprocess, 89.5ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3346/5000 [07:54<03:39,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000386134.jpg: 640x640 1 broccoli, 79.4ms
Speed: 3.8ms preprocess, 79.4ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  67%|██████▋   | 3347/5000 [07:54<03:40,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000386210.jpg: 640x480 2 cars, 2 oranges, 67.0ms
Speed: 2.8ms preprocess, 67.0ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  67%|██████▋   | 3348/5000 [07:54<03:36,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000386277.jpg: 448x640 1 apple, 1 orange, 94.6ms
Speed: 3.0ms preprocess, 94.6ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3349/5000 [07:54<03:43,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000386352.jpg: 448x640 10 persons, 2 tennis rackets, 60.4ms
Speed: 2.9ms preprocess, 60.4ms inference, 11.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3350/5000 [07:54<03:46,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000386457.jpg: 640x448 1 cat, 62.8ms
Speed: 2.7ms preprocess, 62.8ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  67%|██████▋   | 3351/5000 [07:54<03:31,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000386879.jpg: 640x448 1 person, 2 tennis rackets, 84.1ms
Speed: 2.6ms preprocess, 84.1ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  67%|██████▋   | 3352/5000 [07:54<03:39,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000386912.jpg: 480x640 1 person, 1 laptop, 2 books, 60.7ms
Speed: 2.5ms preprocess, 60.7ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3353/5000 [07:55<03:31,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000387098.jpg: 480x640 1 bottle, 2 cups, 2 tvs, 1 laptop, 1 mouse, 1 keyboard, 61.0ms
Speed: 2.5ms preprocess, 61.0ms inference, 10.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3354/5000 [07:55<03:32,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000387148.jpg: 448x640 3 horses, 1 sheep, 5 cows, 92.1ms
Speed: 2.6ms preprocess, 92.1ms inference, 11.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3355/5000 [07:55<03:52,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000387383.jpg: 480x640 3 cats, 1 bed, 62.3ms
Speed: 2.5ms preprocess, 62.3ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3356/5000 [07:55<03:39,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000387387.jpg: 480x640 2 persons, 1 bus, 65.7ms
Speed: 2.6ms preprocess, 65.7ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3357/5000 [07:55<03:29,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000387916.jpg: 448x640 10 persons, 2 skiss, 1 snowboard, 87.9ms
Speed: 2.8ms preprocess, 87.9ms inference, 19.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3358/5000 [07:55<03:57,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000388056.jpg: 448x640 8 persons, 1 baseball bat, 1 baseball glove, 57.0ms
Speed: 3.7ms preprocess, 57.0ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3359/5000 [07:55<03:50,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000388215.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 58.6ms
Speed: 2.7ms preprocess, 58.6ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3360/5000 [07:55<03:35,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000388258.jpg: 480x640 1 bicycle, 1 airplane, 85.1ms
Speed: 2.6ms preprocess, 85.1ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3361/5000 [07:56<03:40,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000388846.jpg: 448x640 5 persons, 12 umbrellas, 1 chair, 58.7ms
Speed: 2.5ms preprocess, 58.7ms inference, 19.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3362/5000 [07:56<03:52,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000388903.jpg: 448x640 3 persons, 1 apple, 2 cell phones, 59.9ms
Speed: 3.8ms preprocess, 59.9ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3363/5000 [07:56<03:41,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000388927.jpg: 640x480 2 persons, 1 laptop, 1 cell phone, 83.5ms
Speed: 2.8ms preprocess, 83.5ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  67%|██████▋   | 3364/5000 [07:56<03:46,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389109.jpg: 640x448 5 persons, 1 bottle, 1 pizza, 2 chairs, 1 dining table, 1 cell phone, 61.2ms
Speed: 2.6ms preprocess, 61.2ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  67%|██████▋   | 3365/5000 [07:56<03:46,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389197.jpg: 448x640 4 persons, 1 surfboard, 61.4ms
Speed: 2.6ms preprocess, 61.4ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3366/5000 [07:56<03:39,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389315.jpg: 640x640 4 books, 110.7ms
Speed: 2.0ms preprocess, 110.7ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  67%|██████▋   | 3367/5000 [07:56<03:56,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389316.jpg: 384x640 2 persons, 1 bench, 2 elephants, 53.1ms
Speed: 2.7ms preprocess, 53.1ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  67%|██████▋   | 3368/5000 [07:57<03:38,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389381.jpg: 544x640 1 orange, 1 broccoli, 1 dining table, 165.5ms
Speed: 2.3ms preprocess, 165.5ms inference, 3.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  67%|██████▋   | 3369/5000 [07:57<04:19,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389451.jpg: 480x640 6 persons, 2 horses, 2 cows, 59.3ms
Speed: 2.6ms preprocess, 59.3ms inference, 9.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3370/5000 [07:57<04:08,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389532.jpg: 448x640 1 person, 2 birds, 82.3ms
Speed: 2.4ms preprocess, 82.3ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3371/5000 [07:57<04:00,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389566.jpg: 448x640 1 car, 13 sheeps, 59.0ms
Speed: 3.0ms preprocess, 59.0ms inference, 12.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3372/5000 [07:57<04:00,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389684.jpg: 480x640 2 persons, 1 bus, 1 truck, 65.2ms
Speed: 2.5ms preprocess, 65.2ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  67%|██████▋   | 3373/5000 [07:57<03:46,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389804.jpg: 448x640 1 bottle, 1 toilet, 1 sink, 62.9ms
Speed: 2.4ms preprocess, 62.9ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  67%|██████▋   | 3374/5000 [07:57<03:39,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389812.jpg: 480x640 1 person, 3 bananas, 77.2ms
Speed: 3.3ms preprocess, 77.2ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3375/5000 [07:58<03:40,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000389933.jpg: 480x640 1 dog, 1 couch, 62.7ms
Speed: 2.8ms preprocess, 62.7ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3376/5000 [07:58<03:28,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000390246.jpg: 448x640 1 person, 1 surfboard, 58.0ms
Speed: 2.7ms preprocess, 58.0ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3377/5000 [07:58<03:17,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000390301.jpg: 480x640 (no detections), 98.7ms
Speed: 4.3ms preprocess, 98.7ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3378/5000 [07:58<03:25,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000390555.jpg: 448x640 31 persons, 7 ties, 2 sports balls, 61.6ms
Speed: 2.9ms preprocess, 61.6ms inference, 34.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3379/5000 [07:58<04:15,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000390826.jpg: 480x640 2 persons, 1 sheep, 1 bear, 68.3ms
Speed: 3.1ms preprocess, 68.3ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3380/5000 [07:58<03:58,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000390902.jpg: 640x448 2 persons, 1 tennis racket, 57.7ms
Speed: 4.4ms preprocess, 57.7ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  68%|██████▊   | 3381/5000 [07:58<03:41,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000391140.jpg: 480x640 1 person, 1 remote, 95.4ms
Speed: 3.4ms preprocess, 95.4ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3382/5000 [07:59<03:45,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000391144.jpg: 448x640 6 elephants, 59.3ms
Speed: 2.7ms preprocess, 59.3ms inference, 6.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3383/5000 [07:59<03:38,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000391290.jpg: 448x640 4 persons, 1 car, 1 umbrella, 1 frisbee, 1 sports ball, 60.3ms
Speed: 3.1ms preprocess, 60.3ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3384/5000 [07:59<03:33,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000391375.jpg: 480x640 1 person, 2 benchs, 63.9ms
Speed: 2.8ms preprocess, 63.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3385/5000 [07:59<03:26,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000391648.jpg: 640x480 2 clocks, 82.3ms
Speed: 3.0ms preprocess, 82.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  68%|██████▊   | 3386/5000 [07:59<03:30,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000391722.jpg: 640x640 5 persons, 1 tie, 1 bottle, 1 cake, 1 dining table, 83.6ms
Speed: 1.9ms preprocess, 83.6ms inference, 12.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  68%|██████▊   | 3387/5000 [07:59<03:49,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000392228.jpg: 640x480 1 clock, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  68%|██████▊   | 3388/5000 [07:59<03:33,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000392481.jpg: 448x640 1 airplane, 55.3ms
Speed: 2.6ms preprocess, 55.3ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000392722.jpg: 448x640 4 persons, 6 cars, 1 bus, 1 handbag, 75.1ms
Speed: 6.4ms preprocess, 75.1ms inference, 10.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3390/5000 [08:00<03:30,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000392818.jpg: 608x640 1 dog, 1 tie, 79.4ms
Speed: 2.7ms preprocess, 79.4ms inference, 3.4ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  68%|██████▊   | 3391/5000 [08:00<03:31,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000392933.jpg: 640x384 1 giraffe, 107.9ms
Speed: 2.6ms preprocess, 107.9ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  68%|██████▊   | 3392/5000 [08:00<03:40,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393014.jpg: 480x640 1 sandwich, 1 hot dog, 76.9ms
Speed: 2.5ms preprocess, 76.9ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3393/5000 [08:00<03:39,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393056.jpg: 480x640 1 person, 66.5ms
Speed: 2.7ms preprocess, 66.5ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3394/5000 [08:00<03:30,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393093.jpg: 448x640 3 persons, 4 cars, 1 train, 1 truck, 58.1ms
Speed: 2.7ms preprocess, 58.1ms inference, 7.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3395/5000 [08:00<03:28,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393115.jpg: 640x640 1 person, 1 skateboard, 83.1ms
Speed: 2.3ms preprocess, 83.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  68%|██████▊   | 3396/5000 [08:00<03:31,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393226.jpg: 480x640 1 person, 3 cars, 2 trucks, 2 traffic lights, 81.8ms
Speed: 4.4ms preprocess, 81.8ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3397/5000 [08:01<03:41,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393282.jpg: 640x640 2 giraffes, 85.9ms
Speed: 1.9ms preprocess, 85.9ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  68%|██████▊   | 3398/5000 [08:01<03:43,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393469.jpg: 640x480 2 persons, 3 sheeps, 1 snowboard, 61.6ms
Speed: 2.9ms preprocess, 61.6ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  68%|██████▊   | 3399/5000 [08:01<03:38,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393569.jpg: 480x640 2 persons, 2 bottles, 2 toilets, 1 sink, 95.8ms
Speed: 3.2ms preprocess, 95.8ms inference, 11.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3400/5000 [08:01<03:55,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000393838.jpg: 448x640 1 bowl, 9 carrots, 1 chair, 69.3ms
Speed: 3.4ms preprocess, 69.3ms inference, 9.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3401/5000 [08:01<03:55,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394199.jpg: 480x640 1 person, 1 motorcycle, 62.3ms
Speed: 2.7ms preprocess, 62.3ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3402/5000 [08:01<03:38,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394206.jpg: 480x640 10 persons, 1 car, 1 motorcycle, 104.1ms
Speed: 2.9ms preprocess, 104.1ms inference, 11.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3403/5000 [08:01<04:01,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394275.jpg: 416x640 5 persons, 1 car, 1 bus, 50.5ms
Speed: 2.8ms preprocess, 50.5ms inference, 6.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  68%|██████▊   | 3404/5000 [08:02<03:43,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394328.jpg: 640x480 2 toilets, 64.6ms
Speed: 3.2ms preprocess, 64.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  68%|██████▊   | 3405/5000 [08:02<03:31,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394510.jpg: 480x640 1 person, 4 bicycles, 1 car, 1 skateboard, 99.9ms
Speed: 2.6ms preprocess, 99.9ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3406/5000 [08:02<03:46,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394559.jpg: 640x448 2 persons, 1 tennis racket, 60.5ms
Speed: 2.7ms preprocess, 60.5ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  68%|██████▊   | 3407/5000 [08:02<03:32,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394611.jpg: 480x640 2 giraffes, 62.1ms
Speed: 3.1ms preprocess, 62.1ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3408/5000 [08:02<03:22,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394677.jpg: 480x640 5 persons, 1 handbag, 1 couch, 1 laptop, 97.3ms
Speed: 4.0ms preprocess, 97.3ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3409/5000 [08:02<03:41,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000394940.jpg: 640x448 2 persons, 1 knife, 1 donut, 1 cake, 2 dining tables, 60.3ms
Speed: 2.8ms preprocess, 60.3ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  68%|██████▊   | 3410/5000 [08:02<03:34,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395180.jpg: 512x640 5 persons, 2 bicycles, 1 boat, 130.3ms
Speed: 2.8ms preprocess, 130.3ms inference, 6.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  68%|██████▊   | 3411/5000 [08:03<04:02,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395343.jpg: 480x640 5 vases, 62.2ms
Speed: 3.0ms preprocess, 62.2ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3412/5000 [08:03<03:52,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395388.jpg: 640x480 7 persons, 72.6ms
Speed: 4.3ms preprocess, 72.6ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  68%|██████▊   | 3413/5000 [08:03<03:50,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395575.jpg: 448x640 1 bench, 60.1ms
Speed: 4.3ms preprocess, 60.1ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3414/5000 [08:03<03:32,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395633.jpg: 480x640 1 boat, 64.9ms
Speed: 3.0ms preprocess, 64.9ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3415/5000 [08:03<03:25,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395701.jpg: 480x640 2 chairs, 1 potted plant, 1 tv, 67.6ms
Speed: 3.6ms preprocess, 67.6ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3416/5000 [08:03<03:25,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395801.jpg: 480x640 8 persons, 2 cars, 1 skateboard, 1 clock, 66.4ms
Speed: 2.8ms preprocess, 66.4ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3417/5000 [08:03<03:35,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000395903.jpg: 384x640 2 persons, 2 surfboards, 114.6ms
Speed: 2.8ms preprocess, 114.6ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  68%|██████▊   | 3418/5000 [08:04<03:50,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396200.jpg: 448x640 3 persons, 4 cars, 1 truck, 2 traffic lights, 3 skateboards, 54.5ms
Speed: 2.4ms preprocess, 54.5ms inference, 12.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3419/5000 [08:04<03:48,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396205.jpg: 640x448 3 cows, 95.9ms
Speed: 3.0ms preprocess, 95.9ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  68%|██████▊   | 3420/5000 [08:04<03:54,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396274.jpg: 480x640 (no detections), 64.5ms
Speed: 3.1ms preprocess, 64.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3421/5000 [08:04<03:34,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396338.jpg: 448x640 3 persons, 2 cars, 1 truck, 63.2ms
Speed: 3.1ms preprocess, 63.2ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3422/5000 [08:04<03:29,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396518.jpg: 480x640 1 person, 1 skateboard, 1 surfboard, 58.0ms
Speed: 3.7ms preprocess, 58.0ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  68%|██████▊   | 3423/5000 [08:04<03:25,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396526.jpg: 448x640 4 chairs, 2 dining tables, 75.5ms
Speed: 4.4ms preprocess, 75.5ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3424/5000 [08:04<03:32,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396568.jpg: 448x640 1 train, 61.7ms
Speed: 3.0ms preprocess, 61.7ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  68%|██████▊   | 3425/5000 [08:04<03:20,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396580.jpg: 448x640 1 boat, 58.3ms
Speed: 2.9ms preprocess, 58.3ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3426/5000 [08:05<03:12,  8.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396729.jpg: 448x640 1 bowl, 1 mouse, 1 cell phone, 80.7ms
Speed: 2.9ms preprocess, 80.7ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3427/5000 [08:05<03:23,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396863.jpg: 448x640 2 persons, 1 horse, 64.4ms
Speed: 3.0ms preprocess, 64.4ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3428/5000 [08:05<03:17,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000396903.jpg: 448x640 1 airplane, 56.7ms
Speed: 2.5ms preprocess, 56.7ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3429/5000 [08:05<03:07,  8.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397133.jpg: 448x640 2 persons, 2 handbags, 2 cups, 1 knife, 5 bowls, 1 potted plant, 1 dining table, 3 ovens, 1 vase, 58.9ms
Speed: 3.1ms preprocess, 58.9ms inference, 27.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3430/5000 [08:05<03:33,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397279.jpg: 448x640 1 person, 1 tennis racket, 57.0ms
Speed: 3.1ms preprocess, 57.0ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3431/5000 [08:05<03:21,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397303.jpg: 480x640 7 persons, 1 tie, 11 chairs, 2 laptops, 1 cell phone, 1 book, 64.3ms
Speed: 2.7ms preprocess, 64.3ms inference, 21.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▊   | 3432/5000 [08:05<03:48,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397327.jpg: 448x640 1 toilet, 1 sink, 84.8ms
Speed: 2.7ms preprocess, 84.8ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3433/5000 [08:06<03:44,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397351.jpg: 448x640 1 person, 1 truck, 59.8ms
Speed: 3.1ms preprocess, 59.8ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▊   | 3434/5000 [08:06<03:29,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397354.jpg: 480x640 8 persons, 4 bottles, 3 wine glasss, 3 cups, 2 bowls, 1 chair, 2 refrigerators, 63.8ms
Speed: 3.0ms preprocess, 63.8ms inference, 24.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▊   | 3435/5000 [08:06<03:52,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397639.jpg: 480x640 2 sheeps, 83.7ms
Speed: 2.9ms preprocess, 83.7ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▊   | 3436/5000 [08:06<03:47,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000397681.jpg: 640x640 2 persons, 1 fork, 5 bowls, 1 banana, 8 oranges, 12 carrots, 3 hot dogs, 4 cakes, 2 potted plants, 2 dining tables, 84.2ms
Speed: 2.1ms preprocess, 84.2ms inference, 49.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  69%|██████▊   | 3437/5000 [08:06<04:58,  5.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398028.jpg: 640x448 6 persons, 1 baseball bat, 61.2ms
Speed: 3.0ms preprocess, 61.2ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  69%|██████▉   | 3438/5000 [08:06<04:30,  5.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398203.jpg: 448x640 8 persons, 1 sports ball, 83.8ms
Speed: 3.1ms preprocess, 83.8ms inference, 13.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3439/5000 [08:07<04:23,  5.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398237.jpg: 480x640 1 person, 1 frisbee, 1 kite, 1 surfboard, 63.1ms
Speed: 2.7ms preprocess, 63.1ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3440/5000 [08:07<04:00,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398377.jpg: 480x640 5 persons, 65.0ms
Speed: 3.9ms preprocess, 65.0ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3441/5000 [08:07<03:45,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398438.jpg: 448x640 1 person, 1 knife, 82.8ms
Speed: 4.6ms preprocess, 82.8ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3442/5000 [08:07<03:46,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398652.jpg: 448x640 1 bowl, 10 potted plants, 65.2ms
Speed: 3.2ms preprocess, 65.2ms inference, 10.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3443/5000 [08:07<03:47,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398742.jpg: 640x448 3 persons, 2 cars, 1 sports ball, 1 baseball glove, 63.6ms
Speed: 2.6ms preprocess, 63.6ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  69%|██████▉   | 3444/5000 [08:07<03:39,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398810.jpg: 640x640 1 cat, 89.8ms
Speed: 3.2ms preprocess, 89.8ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  69%|██████▉   | 3445/5000 [08:07<03:41,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000398905.jpg: 448x640 1 person, 85.6ms
Speed: 3.8ms preprocess, 85.6ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3446/5000 [08:08<03:40,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000399205.jpg: 480x640 12 persons, 3 motorcycles, 66.7ms
Speed: 2.7ms preprocess, 66.7ms inference, 14.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3447/5000 [08:08<03:48,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000399296.jpg: 480x640 1 bottle, 1 cup, 4 carrots, 3 hot dogs, 63.0ms
Speed: 2.8ms preprocess, 63.0ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3448/5000 [08:08<03:41,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000399462.jpg: 640x448 3 persons, 1 kite, 61.1ms
Speed: 4.1ms preprocess, 61.1ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  69%|██████▉   | 3449/5000 [08:08<03:32,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000399560.jpg: 480x640 1 cat, 1 teddy bear, 76.1ms
Speed: 3.4ms preprocess, 76.1ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3450/5000 [08:08<03:30,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000399655.jpg: 480x640 2 dogs, 1 bed, 65.9ms
Speed: 4.1ms preprocess, 65.9ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3451/5000 [08:08<03:23,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000399764.jpg: 640x448 1 person, 1 horse, 57.1ms
Speed: 2.7ms preprocess, 57.1ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  69%|██████▉   | 3452/5000 [08:08<03:10,  8.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400044.jpg: 640x512 1 person, 1 skis, 1 snowboard, 164.1ms
Speed: 2.8ms preprocess, 164.1ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  69%|██████▉   | 3453/5000 [08:08<03:55,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400082.jpg: 384x640 1 cup, 1 spoon, 1 hot dog, 1 dining table, 52.0ms
Speed: 4.1ms preprocess, 52.0ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  69%|██████▉   | 3454/5000 [08:09<03:35,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400161.jpg: 480x640 1 person, 3 books, 103.2ms
Speed: 2.6ms preprocess, 103.2ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3455/5000 [08:09<03:46,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400367.jpg: 448x640 7 cars, 1 traffic light, 59.0ms
Speed: 3.7ms preprocess, 59.0ms inference, 9.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3456/5000 [08:09<03:39,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400573.jpg: 640x480 1 person, 1 sandwich, 1 donut, 65.5ms
Speed: 3.2ms preprocess, 65.5ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  69%|██████▉   | 3457/5000 [08:09<03:35,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400794.jpg: 640x448 2 persons, 1 bottle, 1 fork, 2 pizzas, 63.8ms
Speed: 6.8ms preprocess, 63.8ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  69%|██████▉   | 3458/5000 [08:09<03:31,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400803.jpg: 448x640 1 person, 1 boat, 56.9ms
Speed: 2.8ms preprocess, 56.9ms inference, 6.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3459/5000 [08:09<03:18,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400815.jpg: 448x640 1 person, 1 suitcase, 1 baseball bat, 57.7ms
Speed: 3.2ms preprocess, 57.7ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3460/5000 [08:09<03:11,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000400922.jpg: 480x640 1 kite, 1 clock, 87.3ms
Speed: 6.3ms preprocess, 87.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3461/5000 [08:10<03:18,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000401244.jpg: 640x448 1 person, 1 frisbee, 60.9ms
Speed: 2.8ms preprocess, 60.9ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  69%|██████▉   | 3462/5000 [08:10<03:10,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000401250.jpg: 352x640 1 person, 1 skis, 160.4ms
Speed: 2.7ms preprocess, 160.4ms inference, 2.2ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  69%|██████▉   | 3463/5000 [08:10<03:49,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000401446.jpg: 480x640 1 person, 2 umbrellas, 3 handbags, 64.1ms
Speed: 2.5ms preprocess, 64.1ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3464/5000 [08:10<03:37,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000401862.jpg: 480x640 7 persons, 1 truck, 61.6ms
Speed: 3.1ms preprocess, 61.6ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3465/5000 [08:10<03:33,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000401991.jpg: 384x640 1 cat, 5 dogs, 1 bed, 83.6ms
Speed: 2.5ms preprocess, 83.6ms inference, 6.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  69%|██████▉   | 3466/5000 [08:10<03:37,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402096.jpg: 640x480 (no detections), 63.8ms
Speed: 2.3ms preprocess, 63.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000402118.jpg: 448x640 1 person, 1 snowboard, 55.6ms
Speed: 2.6ms preprocess, 55.6ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3468/5000 [08:10<03:06,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402334.jpg: 640x448 2 clocks, 56.7ms
Speed: 2.6ms preprocess, 56.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  69%|██████▉   | 3469/5000 [08:11<02:59,  8.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402346.jpg: 480x640 1 cup, 1 bowl, 60.8ms
Speed: 2.5ms preprocess, 60.8ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3470/5000 [08:11<02:57,  8.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402433.jpg: 448x640 1 pizza, 80.6ms
Speed: 3.7ms preprocess, 80.6ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3471/5000 [08:11<03:03,  8.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402473.jpg: 480x640 2 cats, 1 couch, 65.9ms
Speed: 2.8ms preprocess, 65.9ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  69%|██████▉   | 3472/5000 [08:11<03:02,  8.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402519.jpg: 448x640 1 bird, 61.4ms
Speed: 2.6ms preprocess, 61.4ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3473/5000 [08:11<02:56,  8.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402615.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 98.9ms
Speed: 2.8ms preprocess, 98.9ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  69%|██████▉   | 3474/5000 [08:11<03:14,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402720.jpg: 640x640 3 persons, 4 bottles, 6 wine glasss, 2 dining tables, 80.5ms
Speed: 3.4ms preprocess, 80.5ms inference, 19.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  70%|██████▉   | 3475/5000 [08:11<03:43,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402765.jpg: 480x640 1 person, 1 remote, 62.5ms
Speed: 4.2ms preprocess, 62.5ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3476/5000 [08:11<03:28,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402774.jpg: 640x448 1 person, 1 umbrella, 58.0ms
Speed: 2.9ms preprocess, 58.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|██████▉   | 3477/5000 [08:12<03:15,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402783.jpg: 480x640 1 banana, 7 oranges, 94.2ms
Speed: 2.7ms preprocess, 94.2ms inference, 9.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3478/5000 [08:12<03:32,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000402992.jpg: 480x640 11 cows, 66.7ms
Speed: 2.9ms preprocess, 66.7ms inference, 10.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3479/5000 [08:12<03:35,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000403122.jpg: 640x448 9 persons, 1 tennis racket, 5 chairs, 61.8ms
Speed: 2.4ms preprocess, 61.8ms inference, 14.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|██████▉   | 3480/5000 [08:12<03:39,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000403353.jpg: 480x640 1 bottle, 1 bed, 1 book, 117.5ms
Speed: 2.7ms preprocess, 117.5ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3481/5000 [08:12<03:51,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000403385.jpg: 512x640 1 toilet, 1 sink, 69.0ms
Speed: 2.8ms preprocess, 69.0ms inference, 3.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  70%|██████▉   | 3482/5000 [08:12<03:37,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000403565.jpg: 640x448 1 person, 1 bicycle, 58.6ms
Speed: 2.7ms preprocess, 58.6ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|██████▉   | 3483/5000 [08:12<03:23,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000403584.jpg: 640x640 3 persons, 3 surfboards, 122.7ms
Speed: 2.4ms preprocess, 122.7ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  70%|██████▉   | 3484/5000 [08:13<03:47,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000403817.jpg: 480x640 1 person, 1 cat, 1 tv, 1 laptop, 58.9ms
Speed: 3.8ms preprocess, 58.9ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3485/5000 [08:13<03:31,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404128.jpg: 448x640 1 airplane, 61.8ms
Speed: 2.7ms preprocess, 61.8ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|██████▉   | 3486/5000 [08:13<03:17,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404191.jpg: 640x480 1 bed, 68.5ms
Speed: 4.1ms preprocess, 68.5ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  70%|██████▉   | 3487/5000 [08:13<03:14,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404249.jpg: 640x448 1 person, 1 skateboard, 62.2ms
Speed: 2.7ms preprocess, 62.2ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|██████▉   | 3488/5000 [08:13<03:08,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404479.jpg: 448x640 1 airplane, 61.8ms
Speed: 2.7ms preprocess, 61.8ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|██████▉   | 3489/5000 [08:13<03:06,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404484.jpg: 480x640 1 cat, 1 dog, 1 cow, 2 potted plants, 72.0ms
Speed: 5.3ms preprocess, 72.0ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3490/5000 [08:13<03:11,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404534.jpg: 640x512 1 car, 1 traffic light, 65.5ms
Speed: 3.9ms preprocess, 65.5ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  70%|██████▉   | 3491/5000 [08:13<03:07,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404568.jpg: 480x640 1 bird, 86.9ms
Speed: 2.9ms preprocess, 86.9ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3492/5000 [08:14<03:15,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404601.jpg: 640x480 (no detections), 64.1ms
Speed: 2.8ms preprocess, 64.1ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  70%|██████▉   | 3493/5000 [08:14<03:03,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404678.jpg: 448x640 6 persons, 5 wine glasss, 1 bowl, 5 chairs, 2 dining tables, 60.3ms
Speed: 2.7ms preprocess, 60.3ms inference, 17.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|██████▉   | 3494/5000 [08:14<03:21,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404805.jpg: 448x640 1 person, 1 surfboard, 84.8ms
Speed: 2.9ms preprocess, 84.8ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|██████▉   | 3495/5000 [08:14<03:22,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404839.jpg: 640x448 2 persons, 1 potted plant, 1 oven, 60.9ms
Speed: 2.5ms preprocess, 60.9ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|██████▉   | 3496/5000 [08:14<03:14,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404922.jpg: 640x480 3 persons, 2 tennis rackets, 64.6ms
Speed: 3.8ms preprocess, 64.6ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  70%|██████▉   | 3497/5000 [08:14<03:12,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000404923.jpg: 480x640 5 persons, 2 baseball bats, 99.3ms
Speed: 2.7ms preprocess, 99.3ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3498/5000 [08:14<03:27,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405195.jpg: 480x640 1 person, 10 donuts, 63.8ms
Speed: 3.0ms preprocess, 63.8ms inference, 9.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|██████▉   | 3499/5000 [08:15<03:29,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405205.jpg: 480x640 3 buss, 60.8ms
Speed: 4.2ms preprocess, 60.8ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3500/5000 [08:15<03:18,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405249.jpg: 480x640 11 persons, 1 cake, 1 dining table, 92.8ms
Speed: 3.9ms preprocess, 92.8ms inference, 16.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3501/5000 [08:15<03:41,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405279.jpg: 480x640 3 persons, 1 bench, 8 kites, 59.7ms
Speed: 2.7ms preprocess, 59.7ms inference, 11.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3502/5000 [08:15<03:39,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405306.jpg: 448x640 3 cats, 66.3ms
Speed: 2.6ms preprocess, 66.3ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3503/5000 [08:15<03:27,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405432.jpg: 480x640 1 dining table, 1 cell phone, 1 book, 1 teddy bear, 90.3ms
Speed: 3.9ms preprocess, 90.3ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3504/5000 [08:15<03:35,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405691.jpg: 448x640 1 person, 2 bottles, 3 wine glasss, 1 cup, 1 fork, 1 bowl, 1 sandwich, 1 hot dog, 2 dining tables, 61.6ms
Speed: 2.9ms preprocess, 61.6ms inference, 11.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3505/5000 [08:15<03:35,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405970.jpg: 448x640 1 chair, 2 couchs, 1 tv, 1 laptop, 57.7ms
Speed: 2.5ms preprocess, 57.7ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3506/5000 [08:16<03:22,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000405972.jpg: 448x640 1 person, 3 elephants, 94.2ms
Speed: 2.7ms preprocess, 94.2ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3507/5000 [08:16<03:28,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000406129.jpg: 640x448 1 person, 1 tennis racket, 57.7ms
Speed: 2.6ms preprocess, 57.7ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|███████   | 3508/5000 [08:16<03:13,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000406417.jpg: 640x576 3 persons, 169.6ms
Speed: 1.8ms preprocess, 169.6ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  70%|███████   | 3509/5000 [08:16<03:53,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000406570.jpg: 640x448 10 apples, 78.6ms
Speed: 2.7ms preprocess, 78.6ms inference, 12.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|███████   | 3510/5000 [08:16<03:55,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000406611.jpg: 480x640 14 persons, 1 skis, 1 snowboard, 63.6ms
Speed: 3.0ms preprocess, 63.6ms inference, 15.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3511/5000 [08:16<03:56,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000406997.jpg: 480x640 5 persons, 1 sheep, 1 frisbee, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3512/5000 [08:16<03:43,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407002.jpg: 640x480 1 person, 1 skis, 79.5ms
Speed: 2.8ms preprocess, 79.5ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  70%|███████   | 3513/5000 [08:17<03:34,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407083.jpg: 640x480 1 person, 1 dog, 63.7ms
Speed: 2.6ms preprocess, 63.7ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  70%|███████   | 3514/5000 [08:17<03:22,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407298.jpg: 480x640 1 person, 69.3ms
Speed: 2.7ms preprocess, 69.3ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3515/5000 [08:17<03:14,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407403.jpg: 640x448 1 cup, 2 vases, 77.8ms
Speed: 3.9ms preprocess, 77.8ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  70%|███████   | 3516/5000 [08:17<03:17,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407518.jpg: 448x640 1 bird, 64.5ms
Speed: 2.7ms preprocess, 64.5ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3517/5000 [08:17<03:08,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407524.jpg: 480x640 8 broccolis, 63.9ms
Speed: 3.0ms preprocess, 63.9ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3518/5000 [08:17<03:10,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407574.jpg: 640x480 2 bananas, 70.0ms
Speed: 2.9ms preprocess, 70.0ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  70%|███████   | 3519/5000 [08:17<03:11,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407614.jpg: 448x640 1 bottle, 1 bowl, 1 potted plant, 1 microwave, 1 oven, 2 refrigerators, 61.0ms
Speed: 2.8ms preprocess, 61.0ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3520/5000 [08:17<03:09,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407646.jpg: 512x640 1 person, 1 sports ball, 1 tennis racket, 67.3ms
Speed: 4.0ms preprocess, 67.3ms inference, 3.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  70%|███████   | 3521/5000 [08:18<03:07,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407650.jpg: 448x640 1 person, 1 snowboard, 92.4ms
Speed: 2.6ms preprocess, 92.4ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3522/5000 [08:18<03:12,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407825.jpg: 640x640 1 bowl, 1 potted plant, 1 clock, 2 vases, 81.0ms
Speed: 2.8ms preprocess, 81.0ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  70%|███████   | 3523/5000 [08:18<03:16,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407868.jpg: 480x640 4 persons, 1 car, 1 sports ball, 2 kites, 65.8ms
Speed: 2.4ms preprocess, 65.8ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  70%|███████   | 3524/5000 [08:18<03:16,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407943.jpg: 448x640 1 person, 100.0ms
Speed: 2.6ms preprocess, 100.0ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  70%|███████   | 3525/5000 [08:18<03:21,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000407960.jpg: 448x640 1 cat, 1 bowl, 59.9ms
Speed: 2.6ms preprocess, 59.9ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3526/5000 [08:18<03:09,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000408112.jpg: 480x640 1 person, 3 airplanes, 70.1ms
Speed: 2.6ms preprocess, 70.1ms inference, 7.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3527/5000 [08:18<03:12,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000408120.jpg: 448x640 1 person, 3 cars, 1 umbrella, 62.8ms
Speed: 2.8ms preprocess, 62.8ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3528/5000 [08:19<03:08,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000408696.jpg: 480x640 2 persons, 1 toilet, 63.7ms
Speed: 3.1ms preprocess, 63.7ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3529/5000 [08:19<03:05,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000408774.jpg: 448x640 4 persons, 2 baseball gloves, 77.6ms
Speed: 5.8ms preprocess, 77.6ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3530/5000 [08:19<03:15,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000408830.jpg: 448x640 5 motorcycles, 61.6ms
Speed: 2.6ms preprocess, 61.6ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3531/5000 [08:19<03:09,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409198.jpg: 448x640 1 person, 1 car, 1 frisbee, 82.8ms
Speed: 2.7ms preprocess, 82.8ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3532/5000 [08:19<03:12,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409211.jpg: 448x640 12 persons, 1 sports ball, 1 tennis racket, 57.9ms
Speed: 2.6ms preprocess, 57.9ms inference, 12.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3533/5000 [08:19<03:18,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409268.jpg: 640x512 1 teddy bear, 66.7ms
Speed: 2.8ms preprocess, 66.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  71%|███████   | 3534/5000 [08:19<03:10,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409358.jpg: 640x640 2 toilets, 111.3ms
Speed: 3.1ms preprocess, 111.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  71%|███████   | 3535/5000 [08:19<03:25,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409424.jpg: 640x480 1 bowl, 4 apples, 3 oranges, 67.7ms
Speed: 2.8ms preprocess, 67.7ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  71%|███████   | 3536/5000 [08:20<03:26,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409475.jpg: 480x640 2 persons, 55.0ms
Speed: 2.8ms preprocess, 55.0ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3537/5000 [08:20<03:11,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409542.jpg: 640x448 7 persons, 2 clocks, 99.0ms
Speed: 2.8ms preprocess, 99.0ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████   | 3538/5000 [08:20<03:29,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409630.jpg: 480x640 1 laptop, 1 mouse, 2 keyboards, 63.9ms
Speed: 4.4ms preprocess, 63.9ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3539/5000 [08:20<03:20,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000409867.jpg: 640x448 2 cats, 97.5ms
Speed: 2.8ms preprocess, 97.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████   | 3540/5000 [08:20<03:25,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410221.jpg: 640x640 1 airplane, 83.6ms
Speed: 2.7ms preprocess, 83.6ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  71%|███████   | 3541/5000 [08:20<03:22,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410428.jpg: 448x640 11 sheeps, 62.0ms
Speed: 2.6ms preprocess, 62.0ms inference, 9.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3542/5000 [08:20<03:20,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410456.jpg: 480x640 7 persons, 2 surfboards, 62.2ms
Speed: 2.5ms preprocess, 62.2ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3543/5000 [08:21<03:18,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410487.jpg: 640x448 1 oven, 1 clock, 80.2ms
Speed: 3.0ms preprocess, 80.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████   | 3544/5000 [08:21<03:16,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410496.jpg: 640x448 15 persons, 1 tennis racket, 62.1ms
Speed: 2.8ms preprocess, 62.1ms inference, 14.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████   | 3545/5000 [08:21<03:26,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410510.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 59.7ms
Speed: 2.5ms preprocess, 59.7ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3546/5000 [08:21<03:12,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410612.jpg: 448x640 2 boats, 79.6ms
Speed: 3.8ms preprocess, 79.6ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3547/5000 [08:21<03:14,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410650.jpg: 480x640 12 persons, 3 chairs, 68.5ms
Speed: 2.9ms preprocess, 68.5ms inference, 14.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3548/5000 [08:21<03:28,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410712.jpg: 448x640 2 persons, 1 car, 3 traffic lights, 61.4ms
Speed: 2.7ms preprocess, 61.4ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3549/5000 [08:21<03:19,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410735.jpg: 512x640 1 person, 5 bowls, 2 pizzas, 65.3ms
Speed: 2.7ms preprocess, 65.3ms inference, 11.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  71%|███████   | 3550/5000 [08:22<03:27,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410878.jpg: 448x640 1 motorcycle, 60.2ms
Speed: 3.0ms preprocess, 60.2ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3551/5000 [08:22<03:14,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410880.jpg: 480x640 1 person, 2 cars, 1 chair, 1 teddy bear, 62.3ms
Speed: 4.0ms preprocess, 62.3ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3552/5000 [08:22<03:09,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000410934.jpg: 480x640 1 umbrella, 1 broccoli, 85.6ms
Speed: 2.6ms preprocess, 85.6ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3553/5000 [08:22<03:12,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000411530.jpg: 480x640 15 persons, 1 cow, 68.4ms
Speed: 3.2ms preprocess, 68.4ms inference, 16.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3554/5000 [08:22<03:27,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000411665.jpg: 448x640 2 cats, 63.4ms
Speed: 2.4ms preprocess, 63.4ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3555/5000 [08:22<03:12,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000411754.jpg: 480x640 5 persons, 82.2ms
Speed: 3.6ms preprocess, 82.2ms inference, 7.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3556/5000 [08:22<03:18,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000411774.jpg: 640x480 1 person, 1 tennis racket, 63.2ms
Speed: 3.6ms preprocess, 63.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  71%|███████   | 3557/5000 [08:22<03:07,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000411817.jpg: 480x640 7 persons, 5 tvs, 63.5ms
Speed: 2.8ms preprocess, 63.5ms inference, 14.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3558/5000 [08:23<03:16,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000411938.jpg: 448x640 8 persons, 3 bicycles, 1 car, 1 motorcycle, 1 cup, 1 teddy bear, 86.8ms
Speed: 2.9ms preprocess, 86.8ms inference, 14.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████   | 3559/5000 [08:23<03:36,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000411953.jpg: 480x640 1 person, 1 tie, 64.7ms
Speed: 4.4ms preprocess, 64.7ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3560/5000 [08:23<03:20,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000412240.jpg: 480x640 1 dog, 66.0ms
Speed: 4.4ms preprocess, 66.0ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████   | 3561/5000 [08:23<03:12,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000412286.jpg: 640x448 1 person, 1 sports ball, 1 tennis racket, 86.0ms
Speed: 4.0ms preprocess, 86.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████   | 3562/5000 [08:23<03:18,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000412362.jpg: 512x640 3 persons, 1 handbag, 1 tie, 2 wine glasss, 76.8ms
Speed: 3.0ms preprocess, 76.8ms inference, 8.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  71%|███████▏  | 3563/5000 [08:23<03:25,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000412531.jpg: 480x640 2 cars, 1 truck, 1 parking meter, 62.0ms
Speed: 2.9ms preprocess, 62.0ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████▏  | 3564/5000 [08:23<03:16,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000412887.jpg: 640x512 1 dog, 1 cow, 93.8ms
Speed: 2.6ms preprocess, 93.8ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  71%|███████▏  | 3565/5000 [08:24<03:22,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000412894.jpg: 640x448 13 persons, 1 traffic light, 1 backpack, 1 handbag, 59.5ms
Speed: 2.5ms preprocess, 59.5ms inference, 14.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████▏  | 3566/5000 [08:24<03:27,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000413247.jpg: 448x640 1 bottle, 1 laptop, 1 mouse, 59.1ms
Speed: 2.7ms preprocess, 59.1ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████▏  | 3567/5000 [08:24<03:13,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000413395.jpg: 448x640 3 persons, 2 cats, 1 couch, 1 bed, 79.9ms
Speed: 2.6ms preprocess, 79.9ms inference, 8.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████▏  | 3568/5000 [08:24<03:19,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000413404.jpg: 448x640 2 persons, 4 benchs, 1 handbag, 58.8ms
Speed: 2.5ms preprocess, 58.8ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████▏  | 3569/5000 [08:24<03:12,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000413552.jpg: 640x448 1 person, 59.8ms
Speed: 2.8ms preprocess, 59.8ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████▏  | 3570/5000 [08:24<03:00,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000413689.jpg: 448x640 4 persons, 3 cars, 2 umbrellas, 1 handbag, 83.1ms
Speed: 2.9ms preprocess, 83.1ms inference, 9.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  71%|███████▏  | 3571/5000 [08:24<03:17,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414034.jpg: 480x640 1 person, 1 cup, 1 bed, 1 dining table, 2 remotes, 1 cell phone, 64.5ms
Speed: 2.7ms preprocess, 64.5ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████▏  | 3572/5000 [08:25<03:12,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414133.jpg: 480x640 4 cars, 1 stop sign, 1 umbrella, 64.7ms
Speed: 2.5ms preprocess, 64.7ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  71%|███████▏  | 3573/5000 [08:25<03:09,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414170.jpg: 640x448 3 persons, 2 skiss, 97.5ms
Speed: 2.7ms preprocess, 97.5ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  71%|███████▏  | 3574/5000 [08:25<03:19,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414261.jpg: 448x640 1 bear, 60.3ms
Speed: 2.9ms preprocess, 60.3ms inference, 1.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3575/5000 [08:25<03:05,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414340.jpg: 640x448 1 clock, 59.1ms
Speed: 2.7ms preprocess, 59.1ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  72%|███████▏  | 3576/5000 [08:25<02:54,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414385.jpg: 480x640 1 person, 4 cars, 1 skateboard, 102.1ms
Speed: 2.8ms preprocess, 102.1ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3577/5000 [08:25<03:11,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414510.jpg: 640x480 2 persons, 2 cars, 2 buss, 1 truck, 63.2ms
Speed: 2.9ms preprocess, 63.2ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  72%|███████▏  | 3578/5000 [08:25<03:08,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414638.jpg: 640x640 1 cup, 2 forks, 1 knife, 1 spoon, 2 sandwichs, 1 dining table, 79.1ms
Speed: 3.1ms preprocess, 79.1ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  72%|███████▏  | 3579/5000 [08:26<03:18,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414673.jpg: 448x640 21 persons, 1 sports ball, 1 baseball bat, 2 chairs, 58.9ms
Speed: 3.5ms preprocess, 58.9ms inference, 21.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3580/5000 [08:26<03:35,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414676.jpg: 640x448 2 clocks, 85.5ms
Speed: 2.5ms preprocess, 85.5ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  72%|███████▏  | 3581/5000 [08:26<03:29,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000414795.jpg: 480x640 7 persons, 30 elephants, 62.1ms
Speed: 2.5ms preprocess, 62.1ms inference, 32.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3582/5000 [08:26<04:02,  5.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415194.jpg: 480x640 1 chair, 1 bed, 1 dining table, 1 refrigerator, 62.1ms
Speed: 2.8ms preprocess, 62.1ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3583/5000 [08:26<03:39,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415238.jpg: 480x640 2 chairs, 1 potted plant, 1 dining table, 1 vase, 56.6ms
Speed: 2.8ms preprocess, 56.6ms inference, 9.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3584/5000 [08:26<03:30,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415536.jpg: 480x640 1 bus, 1 train, 66.3ms
Speed: 2.6ms preprocess, 66.3ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3585/5000 [08:26<03:18,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415716.jpg: 480x640 3 chairs, 2 couchs, 1 potted plant, 2 vases, 65.9ms
Speed: 3.2ms preprocess, 65.9ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3586/5000 [08:27<03:17,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415727.jpg: 480x640 3 persons, 63.6ms
Speed: 2.6ms preprocess, 63.6ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3587/5000 [08:27<03:06,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415741.jpg: 384x640 2 chairs, 1 couch, 1 bed, 2 books, 85.3ms
Speed: 3.0ms preprocess, 85.3ms inference, 6.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  72%|███████▏  | 3588/5000 [08:27<03:11,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415748.jpg: 640x448 1 person, 1 elephant, 57.9ms
Speed: 2.4ms preprocess, 57.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  72%|███████▏  | 3589/5000 [08:27<03:00,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415882.jpg: 640x544 1 person, 1 bed, 147.5ms
Speed: 1.8ms preprocess, 147.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  72%|███████▏  | 3590/5000 [08:27<03:28,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000415990.jpg: 480x640 2 persons, 12 cows, 65.3ms
Speed: 4.1ms preprocess, 65.3ms inference, 48.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3591/5000 [08:27<03:50,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416104.jpg: 480x640 12 persons, 1 umbrella, 4 bottles, 7 chairs, 2 dining tables, 62.7ms
Speed: 2.7ms preprocess, 62.7ms inference, 24.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3592/5000 [08:28<04:01,  5.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416170.jpg: 640x448 1 cat, 58.6ms
Speed: 2.8ms preprocess, 58.6ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  72%|███████▏  | 3593/5000 [08:28<03:33,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416256.jpg: 480x640 1 cat, 1 book, 59.2ms
Speed: 3.8ms preprocess, 59.2ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3594/5000 [08:28<03:21,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416269.jpg: 448x640 1 train, 71.3ms
Speed: 3.9ms preprocess, 71.3ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3595/5000 [08:28<03:12,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416330.jpg: 480x640 1 cat, 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3596/5000 [08:28<03:02,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416343.jpg: 640x448 2 persons, 1 surfboard, 1 chair, 1 refrigerator, 56.6ms
Speed: 2.8ms preprocess, 56.6ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  72%|███████▏  | 3597/5000 [08:28<02:57,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416451.jpg: 480x640 2 persons, 3 bottles, 1 couch, 3 remotes, 83.9ms
Speed: 2.9ms preprocess, 83.9ms inference, 11.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3598/5000 [08:28<03:11,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416534.jpg: 384x640 1 chair, 1 couch, 1 potted plant, 1 tv, 52.0ms
Speed: 2.6ms preprocess, 52.0ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  72%|███████▏  | 3599/5000 [08:28<02:57,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416745.jpg: 640x640 1 toilet, 80.7ms
Speed: 2.9ms preprocess, 80.7ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  72%|███████▏  | 3600/5000 [08:28<02:59,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416758.jpg: 480x640 5 cows, 64.6ms
Speed: 2.7ms preprocess, 64.6ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3601/5000 [08:29<03:05,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416837.jpg: 448x640 1 horse, 1 cow, 62.1ms
Speed: 3.1ms preprocess, 62.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3602/5000 [08:29<02:59,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416885.jpg: 480x640 2 persons, 2 wine glasss, 2 forks, 2 cakes, 1 dining table, 63.1ms
Speed: 4.6ms preprocess, 63.1ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3603/5000 [08:29<03:03,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000416991.jpg: 352x640 14 umbrellas, 150.6ms
Speed: 2.6ms preprocess, 150.6ms inference, 8.9ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  72%|███████▏  | 3604/5000 [08:29<03:41,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417043.jpg: 480x640 1 bench, 60.4ms
Speed: 2.7ms preprocess, 60.4ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3605/5000 [08:29<03:18,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417085.jpg: 384x640 2 cows, 55.1ms
Speed: 2.1ms preprocess, 55.1ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  72%|███████▏  | 3606/5000 [08:29<03:01,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417249.jpg: 640x480 5 persons, 1 handbag, 1 suitcase, 64.8ms
Speed: 2.6ms preprocess, 64.8ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  72%|███████▏  | 3607/5000 [08:29<03:01,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417285.jpg: 320x640 2 persons, 1 cup, 1 sandwich, 1 cake, 2 dining tables, 143.9ms
Speed: 2.2ms preprocess, 143.9ms inference, 4.0ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  72%|███████▏  | 3608/5000 [08:30<03:29,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417465.jpg: 448x640 1 dog, 1 bear, 57.7ms
Speed: 2.5ms preprocess, 57.7ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3609/5000 [08:30<03:11,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417608.jpg: 448x640 1 bowl, 2 sandwichs, 60.9ms
Speed: 2.7ms preprocess, 60.9ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3610/5000 [08:30<03:00,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417632.jpg: 480x640 3 persons, 1 tie, 2 wine glasss, 1 dining table, 96.1ms
Speed: 2.5ms preprocess, 96.1ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3611/5000 [08:30<03:15,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417779.jpg: 480x640 1 person, 2 cars, 1 fire hydrant, 63.9ms
Speed: 2.3ms preprocess, 63.9ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3612/5000 [08:30<03:04,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417876.jpg: 480x640 2 giraffes, 63.6ms
Speed: 4.1ms preprocess, 63.6ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  72%|███████▏  | 3613/5000 [08:30<02:57,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000417911.jpg: 448x640 1 person, 1 surfboard, 85.3ms
Speed: 2.4ms preprocess, 85.3ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3614/5000 [08:30<03:01,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000418062.jpg: 640x448 3 persons, 2 wine glasss, 62.5ms
Speed: 2.6ms preprocess, 62.5ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  72%|███████▏  | 3615/5000 [08:31<02:56,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000418281.jpg: 448x640 2 cows, 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3616/5000 [08:31<02:52,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000418696.jpg: 448x640 4 cars, 2 traffic lights, 78.9ms
Speed: 2.7ms preprocess, 78.9ms inference, 18.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3617/5000 [08:31<03:06,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000418959.jpg: 448x640 1 bird, 63.8ms
Speed: 3.1ms preprocess, 63.8ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3618/5000 [08:31<02:58,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000418961.jpg: 448x640 1 umbrella, 3 clocks, 66.1ms
Speed: 3.1ms preprocess, 66.1ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3619/5000 [08:31<02:58,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419096.jpg: 640x640 1 train, 126.0ms
Speed: 6.5ms preprocess, 126.0ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  72%|███████▏  | 3620/5000 [08:31<03:27,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419098.jpg: 576x640 1 fire hydrant, 150.8ms
Speed: 2.0ms preprocess, 150.8ms inference, 2.0ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  72%|███████▏  | 3621/5000 [08:31<03:46,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419201.jpg: 448x640 3 sheeps, 57.4ms
Speed: 2.7ms preprocess, 57.4ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▏  | 3622/5000 [08:32<03:24,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419312.jpg: 640x480 2 cups, 1 fork, 1 knife, 3 bowls, 2 sandwichs, 1 dining table, 68.0ms
Speed: 4.3ms preprocess, 68.0ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  72%|███████▏  | 3623/5000 [08:32<03:32,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419379.jpg: 640x448 1 person, 2 donuts, 1 cake, 60.5ms
Speed: 2.9ms preprocess, 60.5ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  72%|███████▏  | 3624/5000 [08:32<03:17,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419408.jpg: 448x640 2 persons, 2 cars, 4 benchs, 56.3ms
Speed: 2.6ms preprocess, 56.3ms inference, 7.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  72%|███████▎  | 3625/5000 [08:32<03:09,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419601.jpg: 448x640 4 chairs, 2 couchs, 1 bed, 1 book, 63.9ms
Speed: 4.1ms preprocess, 63.9ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3626/5000 [08:32<03:07,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419653.jpg: 480x640 2 bottles, 4 bowls, 1 chair, 1 potted plant, 2 tvs, 2 microwaves, 1 oven, 1 refrigerator, 1 vase, 80.3ms
Speed: 3.7ms preprocess, 80.3ms inference, 15.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3627/5000 [08:32<03:23,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419882.jpg: 640x640 1 car, 3 trains, 1 truck, 86.5ms
Speed: 3.3ms preprocess, 86.5ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  73%|███████▎  | 3628/5000 [08:32<03:24,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000419974.jpg: 640x448 2 persons, 1 dog, 3 bottles, 1 cup, 1 bowl, 2 dining tables, 1 oven, 56.8ms
Speed: 2.7ms preprocess, 56.8ms inference, 9.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  73%|███████▎  | 3629/5000 [08:33<03:18,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000420069.jpg: 480x640 7 persons, 90.1ms
Speed: 2.3ms preprocess, 90.1ms inference, 7.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3630/5000 [08:33<03:24,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000420230.jpg: 640x448 1 elephant, 60.1ms
Speed: 2.7ms preprocess, 60.1ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  73%|███████▎  | 3631/5000 [08:33<03:07,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000420281.jpg: 640x480 3 persons, 1 hot dog, 60.3ms
Speed: 2.8ms preprocess, 60.3ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  73%|███████▎  | 3632/5000 [08:33<02:57,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000420472.jpg: 384x640 2 giraffes, 61.5ms
Speed: 2.3ms preprocess, 61.5ms inference, 8.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  73%|███████▎  | 3633/5000 [08:33<02:54,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000420840.jpg: 448x640 2 persons, 1 banana, 1 dining table, 59.9ms
Speed: 2.4ms preprocess, 59.9ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3634/5000 [08:33<02:50,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000420916.jpg: 416x640 3 persons, 2 boats, 1 bench, 119.1ms
Speed: 2.5ms preprocess, 119.1ms inference, 4.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  73%|███████▎  | 3635/5000 [08:33<03:11,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000421060.jpg: 448x640 6 persons, 1 skis, 69.2ms
Speed: 2.5ms preprocess, 69.2ms inference, 10.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3636/5000 [08:34<03:12,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000421455.jpg: 480x640 2 cars, 65.3ms
Speed: 4.7ms preprocess, 65.3ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3637/5000 [08:34<03:03,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000421757.jpg: 480x640 2 boats, 61.7ms
Speed: 2.6ms preprocess, 61.7ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3638/5000 [08:34<02:53,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000421834.jpg: 480x640 (no detections), 98.1ms
Speed: 2.5ms preprocess, 98.1ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3639/5000 [08:34<02:56,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000421923.jpg: 640x448 1 potted plant, 1 book, 1 vase, 65.5ms
Speed: 2.7ms preprocess, 65.5ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  73%|███████▎  | 3640/5000 [08:34<02:49,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000422670.jpg: 448x640 1 person, 51.9ms
Speed: 2.6ms preprocess, 51.9ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3641/5000 [08:34<02:39,  8.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000422706.jpg: 448x640 2 persons, 3 boats, 88.7ms
Speed: 2.6ms preprocess, 88.7ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3642/5000 [08:34<02:53,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000422836.jpg: 480x640 7 persons, 62.3ms
Speed: 3.7ms preprocess, 62.3ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3643/5000 [08:34<02:52,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000422886.jpg: 640x640 1 person, 1 hot dog, 79.2ms
Speed: 2.0ms preprocess, 79.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  73%|███████▎  | 3644/5000 [08:34<02:54,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000422998.jpg: 448x640 2 persons, 1 sports ball, 3 bottles, 1 knife, 1 sandwich, 2 dining tables, 85.2ms
Speed: 2.8ms preprocess, 85.2ms inference, 15.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3645/5000 [08:35<03:10,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423104.jpg: 640x448 1 person, 1 tennis racket, 58.9ms
Speed: 2.6ms preprocess, 58.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  73%|███████▎  | 3646/5000 [08:35<02:57,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423123.jpg: 448x640 2 persons, 66.7ms
Speed: 2.7ms preprocess, 66.7ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3647/5000 [08:35<02:53,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423229.jpg: 448x640 1 train, 67.8ms
Speed: 4.3ms preprocess, 67.8ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3648/5000 [08:35<02:53,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423506.jpg: 640x480 2 persons, 2 ties, 1 wine glass, 63.7ms
Speed: 4.1ms preprocess, 63.7ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  73%|███████▎  | 3649/5000 [08:35<02:49,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423519.jpg: 640x480 1 bus, 68.6ms
Speed: 2.8ms preprocess, 68.6ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  73%|███████▎  | 3650/5000 [08:35<02:46,  8.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423617.jpg: 480x640 2 persons, 5 cars, 1 motorcycle, 2 buss, 1 stop sign, 83.5ms
Speed: 3.1ms preprocess, 83.5ms inference, 10.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3651/5000 [08:35<03:04,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423798.jpg: 320x640 (no detections), 47.3ms
Speed: 2.4ms preprocess, 47.3ms inference, 0.6ms postprocess per image at shape (1, 3, 320, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000423944.jpg: 640x448 2 persons, 53.0ms
Speed: 2.9ms preprocess, 53.0ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  73%|███████▎  | 3653/5000 [08:36<02:34,  8.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000423971.jpg: 448x640 1 bottle, 1 toilet, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3654/5000 [08:36<02:33,  8.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424135.jpg: 640x480 (no detections), 88.8ms
Speed: 2.9ms preprocess, 88.8ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  73%|███████▎  | 3655/5000 [08:36<02:39,  8.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424162.jpg: 512x640 5 persons, 3 bicycles, 4 cars, 1 truck, 1 dog, 1 backpack, 1 handbag, 137.1ms
Speed: 3.2ms preprocess, 137.1ms inference, 15.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  73%|███████▎  | 3656/5000 [08:36<03:21,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424349.jpg: 448x640 2 persons, 1 pizza, 1 chair, 1 dining table, 1 refrigerator, 57.9ms
Speed: 2.8ms preprocess, 57.9ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3657/5000 [08:36<03:08,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424521.jpg: 640x416 1 person, 2 sports balls, 1 skateboard, 146.4ms
Speed: 3.8ms preprocess, 146.4ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  73%|███████▎  | 3658/5000 [08:36<03:31,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424545.jpg: 480x640 1 cat, 59.6ms
Speed: 4.1ms preprocess, 59.6ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3659/5000 [08:37<03:13,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424551.jpg: 480x640 2 persons, 2 skiss, 63.0ms
Speed: 3.3ms preprocess, 63.0ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3660/5000 [08:37<03:02,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424642.jpg: 480x640 2 persons, 91.4ms
Speed: 4.4ms preprocess, 91.4ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3661/5000 [08:37<03:05,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424721.jpg: 480x640 2 bowls, 1 apple, 10 carrots, 1 dining table, 59.4ms
Speed: 3.7ms preprocess, 59.4ms inference, 13.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3662/5000 [08:37<03:10,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424776.jpg: 448x640 1 airplane, 60.5ms
Speed: 2.6ms preprocess, 60.5ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3663/5000 [08:37<02:55,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000424975.jpg: 480x640 2 persons, 1 car, 1 truck, 62.5ms
Speed: 2.6ms preprocess, 62.5ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3664/5000 [08:37<02:49,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000425221.jpg: 640x480 1 person, 86.4ms
Speed: 3.4ms preprocess, 86.4ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  73%|███████▎  | 3665/5000 [08:37<02:52,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000425226.jpg: 640x480 1 person, 3 bottles, 1 cup, 1 bowl, 1 refrigerator, 59.5ms
Speed: 2.8ms preprocess, 59.5ms inference, 7.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  73%|███████▎  | 3666/5000 [08:37<02:51,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000425227.jpg: 640x448 2 persons, 3 kites, 60.3ms
Speed: 2.7ms preprocess, 60.3ms inference, 7.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  73%|███████▎  | 3667/5000 [08:38<02:48,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000425361.jpg: 512x640 1 person, 1 wine glass, 1 cup, 1 bowl, 2 dining tables, 2 tvs, 2 laptops, 2 mouses, 1 cell phone, 66.3ms
Speed: 2.6ms preprocess, 66.3ms inference, 16.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  73%|███████▎  | 3668/5000 [08:38<02:59,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000425390.jpg: 480x640 1 cat, 1 laptop, 81.8ms
Speed: 5.2ms preprocess, 81.8ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3669/5000 [08:38<03:01,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000425702.jpg: 320x640 2 persons, 1 surfboard, 44.7ms
Speed: 2.3ms preprocess, 44.7ms inference, 2.8ms postprocess per image at shape (1, 3, 320, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000425906.jpg: 640x480 4 persons, 55.7ms
Speed: 2.8ms preprocess, 55.7ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  73%|███████▎  | 3671/5000 [08:38<02:40,  8.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000425925.jpg: 480x640 1 person, 5 cars, 1 clock, 63.4ms
Speed: 2.7ms preprocess, 63.4ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  73%|███████▎  | 3672/5000 [08:38<02:42,  8.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426166.jpg: 448x640 1 bicycle, 82.8ms
Speed: 3.6ms preprocess, 82.8ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  73%|███████▎  | 3673/5000 [08:38<02:45,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426203.jpg: 640x448 2 persons, 1 bicycle, 4 cars, 2 skateboards, 61.5ms
Speed: 2.6ms preprocess, 61.5ms inference, 9.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  73%|███████▎  | 3674/5000 [08:38<02:49,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426241.jpg: 480x640 1 person, 1 tv, 2 mouses, 2 keyboards, 91.3ms
Speed: 4.7ms preprocess, 91.3ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▎  | 3675/5000 [08:39<03:00,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426253.jpg: 448x640 2 bottles, 1 microwave, 1 oven, 60.6ms
Speed: 2.7ms preprocess, 60.6ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▎  | 3676/5000 [08:39<02:53,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426268.jpg: 448x640 1 person, 1 car, 1 train, 65.7ms
Speed: 3.3ms preprocess, 65.7ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▎  | 3677/5000 [08:39<02:49,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426297.jpg: 448x640 1 zebra, 77.5ms
Speed: 2.9ms preprocess, 77.5ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▎  | 3678/5000 [08:39<02:49,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426329.jpg: 448x640 11 cakes, 62.5ms
Speed: 3.4ms preprocess, 62.5ms inference, 9.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▎  | 3679/5000 [08:39<02:56,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426372.jpg: 448x640 1 car, 1 truck, 64.3ms
Speed: 2.6ms preprocess, 64.3ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▎  | 3680/5000 [08:39<02:48,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426376.jpg: 640x480 1 person, 1 snowboard, 77.0ms
Speed: 2.6ms preprocess, 77.0ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  74%|███████▎  | 3681/5000 [08:39<02:54,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426795.jpg: 480x640 3 persons, 1 bottle, 1 sandwich, 1 donut, 63.1ms
Speed: 2.7ms preprocess, 63.1ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▎  | 3682/5000 [08:39<02:53,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000426836.jpg: 640x640 1 person, 92.5ms
Speed: 3.4ms preprocess, 92.5ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  74%|███████▎  | 3683/5000 [08:40<03:02,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427034.jpg: 480x640 1 person, 2 dogs, 1 laptop, 1 keyboard, 65.8ms
Speed: 3.2ms preprocess, 65.8ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▎  | 3684/5000 [08:40<03:00,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427055.jpg: 480x640 8 cars, 1 clock, 63.3ms
Speed: 3.0ms preprocess, 63.3ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▎  | 3685/5000 [08:40<03:00,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427077.jpg: 640x448 1 person, 62.3ms
Speed: 4.1ms preprocess, 62.3ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  74%|███████▎  | 3686/5000 [08:40<02:50,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427160.jpg: 512x640 3 persons, 1 baseball glove, 91.7ms
Speed: 3.3ms preprocess, 91.7ms inference, 5.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  74%|███████▎  | 3687/5000 [08:40<02:58,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427256.jpg: 512x640 11 persons, 1 baseball bat, 3 chairs, 65.3ms
Speed: 2.9ms preprocess, 65.3ms inference, 15.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  74%|███████▍  | 3688/5000 [08:40<03:07,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427338.jpg: 416x640 1 motorcycle, 54.1ms
Speed: 2.0ms preprocess, 54.1ms inference, 1.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  74%|███████▍  | 3689/5000 [08:40<02:52,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427500.jpg: 640x448 1 person, 1 car, 1 fire hydrant, 114.5ms
Speed: 2.8ms preprocess, 114.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  74%|███████▍  | 3690/5000 [08:41<03:05,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427649.jpg: 640x480 1 boat, 63.0ms
Speed: 2.4ms preprocess, 63.0ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  74%|███████▍  | 3691/5000 [08:41<02:53,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427655.jpg: 640x448 3 persons, 3 cars, 1 bus, 1 truck, 1 horse, 1 umbrella, 60.8ms
Speed: 2.6ms preprocess, 60.8ms inference, 8.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  74%|███████▍  | 3692/5000 [08:41<02:55,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000427997.jpg: 448x640 2 persons, 57.6ms
Speed: 2.7ms preprocess, 57.6ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3693/5000 [08:41<02:44,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000428111.jpg: 448x640 2 persons, 1 traffic light, 1 skateboard, 93.0ms
Speed: 2.6ms preprocess, 93.0ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3694/5000 [08:41<02:55,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000428218.jpg: 480x640 2 persons, 1 frisbee, 66.7ms
Speed: 2.6ms preprocess, 66.7ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3695/5000 [08:41<02:49,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000428280.jpg: 448x640 2 chairs, 1 tv, 1 laptop, 59.1ms
Speed: 3.6ms preprocess, 59.1ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3696/5000 [08:41<02:43,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000428454.jpg: 448x640 1 person, 1 skis, 1 snowboard, 1 kite, 94.4ms
Speed: 3.9ms preprocess, 94.4ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3697/5000 [08:41<02:53,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000428562.jpg: 480x640 1 person, 1 bird, 4 sheeps, 1 teddy bear, 61.0ms
Speed: 2.5ms preprocess, 61.0ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3698/5000 [08:42<02:50,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000428867.jpg: 448x640 1 chair, 4 teddy bears, 61.7ms
Speed: 3.5ms preprocess, 61.7ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3699/5000 [08:42<02:46,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429011.jpg: 352x640 1 person, 1 car, 1 truck, 148.9ms
Speed: 2.6ms preprocess, 148.9ms inference, 2.1ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  74%|███████▍  | 3700/5000 [08:42<03:13,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429109.jpg: 448x640 3 persons, 1 bicycle, 1 car, 4 buss, 2 trains, 59.5ms
Speed: 3.0ms preprocess, 59.5ms inference, 10.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3701/5000 [08:42<03:09,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429281.jpg: 640x480 5 bananas, 2 apples, 64.8ms
Speed: 2.7ms preprocess, 64.8ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  74%|███████▍  | 3702/5000 [08:42<03:02,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429530.jpg: 480x640 5 vases, 113.2ms
Speed: 2.8ms preprocess, 113.2ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3703/5000 [08:42<03:20,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429598.jpg: 480x640 1 bottle, 1 oven, 1 sink, 1 refrigerator, 62.6ms
Speed: 3.2ms preprocess, 62.6ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3704/5000 [08:43<03:07,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429623.jpg: 480x640 1 bottle, 1 cup, 1 pizza, 1 dining table, 63.6ms
Speed: 3.0ms preprocess, 63.6ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3705/5000 [08:43<02:58,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429690.jpg: 448x640 9 persons, 1 sports ball, 1 baseball bat, 57.8ms
Speed: 2.6ms preprocess, 57.8ms inference, 9.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3706/5000 [08:43<02:57,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429718.jpg: 640x640 1 train, 113.6ms
Speed: 2.7ms preprocess, 113.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  74%|███████▍  | 3707/5000 [08:43<03:07,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000429761.jpg: 448x640 2 persons, 4 tennis rackets, 58.9ms
Speed: 2.7ms preprocess, 58.9ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3708/5000 [08:43<02:58,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430048.jpg: 480x640 1 toilet, 1 clock, 65.9ms
Speed: 4.1ms preprocess, 65.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3709/5000 [08:43<02:50,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430056.jpg: 480x640 2 sandwichs, 1 laptop, 1 keyboard, 77.0ms
Speed: 4.2ms preprocess, 77.0ms inference, 7.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3710/5000 [08:43<02:53,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430073.jpg: 480x640 13 persons, 1 sheep, 5 chairs, 62.5ms
Speed: 2.8ms preprocess, 62.5ms inference, 17.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3711/5000 [08:43<03:08,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430286.jpg: 480x640 2 remotes, 61.0ms
Speed: 2.5ms preprocess, 61.0ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3712/5000 [08:44<02:55,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430377.jpg: 640x448 2 persons, 1 skis, 90.3ms
Speed: 2.9ms preprocess, 90.3ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  74%|███████▍  | 3713/5000 [08:44<03:00,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430871.jpg: 448x640 1 parking meter, 58.3ms
Speed: 2.6ms preprocess, 58.3ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3714/5000 [08:44<02:46,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430875.jpg: 480x640 3 traffic lights, 57.0ms
Speed: 4.1ms preprocess, 57.0ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3715/5000 [08:44<02:39,  8.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430961.jpg: 480x640 2 persons, 2 sports balls, 1 baseball glove, 80.6ms
Speed: 4.4ms preprocess, 80.6ms inference, 8.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3716/5000 [08:44<02:47,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000430973.jpg: 480x640 1 pizza, 63.7ms
Speed: 2.8ms preprocess, 63.7ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3717/5000 [08:44<02:41,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431140.jpg: 480x640 3 toilets, 1 sink, 63.8ms
Speed: 2.7ms preprocess, 63.8ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  74%|███████▍  | 3718/5000 [08:44<02:38,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431545.jpg: 448x640 2 persons, 2 baseball gloves, 104.1ms
Speed: 3.8ms preprocess, 104.1ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3719/5000 [08:44<02:52,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431568.jpg: 448x640 1 pizza, 1 dining table, 64.5ms
Speed: 2.7ms preprocess, 64.5ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3720/5000 [08:45<02:46,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431693.jpg: 448x640 1 person, 1 sports ball, 2 tennis rackets, 71.4ms
Speed: 3.1ms preprocess, 71.4ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3721/5000 [08:45<02:46,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431727.jpg: 448x640 (no detections), 82.5ms
Speed: 2.5ms preprocess, 82.5ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3722/5000 [08:45<02:42,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431848.jpg: 640x384 2 persons, 1 skateboard, 112.5ms
Speed: 2.3ms preprocess, 112.5ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  74%|███████▍  | 3723/5000 [08:45<02:54,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431876.jpg: 640x448 6 persons, 1 traffic light, 2 horses, 60.9ms
Speed: 2.6ms preprocess, 60.9ms inference, 8.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  74%|███████▍  | 3724/5000 [08:45<02:53,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000431896.jpg: 448x640 1 train, 74.4ms
Speed: 2.8ms preprocess, 74.4ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  74%|███████▍  | 3725/5000 [08:45<02:50,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000432085.jpg: 480x640 2 persons, 1 bottle, 1 mouse, 65.1ms
Speed: 4.4ms preprocess, 65.1ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3726/5000 [08:45<02:46,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000432468.jpg: 640x448 1 cat, 60.6ms
Speed: 3.1ms preprocess, 60.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  75%|███████▍  | 3727/5000 [08:46<02:37,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000432553.jpg: 640x448 2 persons, 1 bicycle, 3 dogs, 1 tv, 66.3ms
Speed: 2.6ms preprocess, 66.3ms inference, 11.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  75%|███████▍  | 3728/5000 [08:46<02:45,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000432898.jpg: 640x640 6 persons, 2 cars, 5 kites, 85.4ms
Speed: 3.6ms preprocess, 85.4ms inference, 17.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  75%|███████▍  | 3729/5000 [08:46<03:10,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433103.jpg: 448x640 15 persons, 59.4ms
Speed: 2.7ms preprocess, 59.4ms inference, 13.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▍  | 3730/5000 [08:46<03:10,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433134.jpg: 480x640 1 cat, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3731/5000 [08:46<02:56,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433192.jpg: 480x640 1 bed, 1 laptop, 82.6ms
Speed: 5.1ms preprocess, 82.6ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3732/5000 [08:46<02:54,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433204.jpg: 480x640 6 persons, 2 cars, 1 motorcycle, 66.1ms
Speed: 2.8ms preprocess, 66.1ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3733/5000 [08:46<02:52,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433243.jpg: 448x640 1 zebra, 1 giraffe, 58.5ms
Speed: 2.6ms preprocess, 58.5ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▍  | 3734/5000 [08:47<02:44,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433374.jpg: 448x640 4 elephants, 84.8ms
Speed: 2.6ms preprocess, 84.8ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▍  | 3735/5000 [08:47<02:50,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433515.jpg: 480x640 5 persons, 2 cars, 2 kites, 63.8ms
Speed: 2.8ms preprocess, 63.8ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3736/5000 [08:47<02:50,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433774.jpg: 448x640 2 traffic lights, 1 parking meter, 61.2ms
Speed: 2.6ms preprocess, 61.2ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▍  | 3737/5000 [08:47<02:42,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433915.jpg: 480x640 1 person, 86.2ms
Speed: 3.2ms preprocess, 86.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3738/5000 [08:47<02:45,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000433980.jpg: 640x480 1 person, 62.3ms
Speed: 4.0ms preprocess, 62.3ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  75%|███████▍  | 3739/5000 [08:47<02:37,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434204.jpg: 448x640 4 persons, 1 bench, 2 skateboards, 58.5ms
Speed: 4.3ms preprocess, 58.5ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▍  | 3740/5000 [08:47<02:38,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434230.jpg: 416x640 2 persons, 1 cup, 5 bananas, 1 clock, 10 vases, 55.8ms
Speed: 3.5ms preprocess, 55.8ms inference, 16.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  75%|███████▍  | 3741/5000 [08:47<02:57,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434247.jpg: 512x640 1 person, 2 horses, 1 sheep, 69.5ms
Speed: 3.0ms preprocess, 69.5ms inference, 4.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  75%|███████▍  | 3742/5000 [08:48<02:51,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434297.jpg: 480x640 1 broccoli, 4 carrots, 1 dining table, 61.1ms
Speed: 2.7ms preprocess, 61.1ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3743/5000 [08:48<02:45,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434459.jpg: 480x640 5 persons, 1 knife, 3 cakes, 8 chairs, 1 dining table, 1 refrigerator, 1 clock, 99.7ms
Speed: 2.7ms preprocess, 99.7ms inference, 20.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3744/5000 [08:48<03:16,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434479.jpg: 448x640 1 knife, 2 pizzas, 1 dining table, 57.7ms
Speed: 2.9ms preprocess, 57.7ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▍  | 3745/5000 [08:48<03:01,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434548.jpg: 480x640 1 person, 1 baseball glove, 84.1ms
Speed: 3.4ms preprocess, 84.1ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3746/5000 [08:48<03:01,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000434996.jpg: 480x640 2 cats, 1 bed, 4 teddy bears, 59.1ms
Speed: 4.3ms preprocess, 59.1ms inference, 6.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▍  | 3747/5000 [08:48<02:53,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000435003.jpg: 448x640 1 chair, 2 tvs, 1 laptop, 1 mouse, 2 keyboards, 58.9ms
Speed: 2.9ms preprocess, 58.9ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▍  | 3748/5000 [08:48<02:49,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000435081.jpg: 640x640 1 cup, 3 knifes, 7 bowls, 1 apple, 6 donuts, 18 cakes, 5 dining tables, 1 vase, 92.8ms
Speed: 3.2ms preprocess, 92.8ms inference, 56.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  75%|███████▍  | 3749/5000 [08:49<04:05,  5.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000435205.jpg: 448x640 3 persons, 1 teddy bear, 1 toothbrush, 62.5ms
Speed: 4.1ms preprocess, 62.5ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3750/5000 [08:49<03:36,  5.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000435206.jpg: 448x640 4 persons, 1 boat, 59.1ms
Speed: 2.7ms preprocess, 59.1ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3751/5000 [08:49<03:16,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000435208.jpg: 640x480 2 persons, 1 chair, 1 couch, 2 tvs, 1 laptop, 1 keyboard, 1 clock, 65.6ms
Speed: 2.7ms preprocess, 65.6ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  75%|███████▌  | 3752/5000 [08:49<03:08,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000435299.jpg: 480x640 1 cat, 1 bed, 102.9ms
Speed: 2.5ms preprocess, 102.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▌  | 3753/5000 [08:49<03:09,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000435880.jpg: 480x640 2 bottles, 1 bowl, 1 couch, 2 refrigerators, 63.9ms
Speed: 3.0ms preprocess, 63.9ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  75%|███████▌  | 3754/5000 [08:49<02:59,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000436315.jpg: 640x480 1 pizza, 65.3ms
Speed: 2.7ms preprocess, 65.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  75%|███████▌  | 3755/5000 [08:50<02:49,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000436551.jpg: 448x640 2 birds, 56.4ms
Speed: 3.0ms preprocess, 56.4ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3756/5000 [08:50<02:47,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000436617.jpg: 448x640 11 persons, 3 wine glasss, 1 cup, 1 bowl, 1 hot dog, 2 chairs, 2 couchs, 2 dining tables, 61.9ms
Speed: 2.9ms preprocess, 61.9ms inference, 21.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3757/5000 [08:50<03:09,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000436738.jpg: 640x640 3 persons, 3 cars, 2 buss, 2 traffic lights, 81.3ms
Speed: 2.9ms preprocess, 81.3ms inference, 13.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  75%|███████▌  | 3758/5000 [08:50<03:14,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000436883.jpg: 448x640 2 persons, 1 car, 2 buss, 2 trucks, 1 stop sign, 59.9ms
Speed: 2.5ms preprocess, 59.9ms inference, 7.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3759/5000 [08:50<03:02,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437110.jpg: 640x480 1 clock, 88.6ms
Speed: 2.6ms preprocess, 88.6ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  75%|███████▌  | 3760/5000 [08:50<02:59,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437205.jpg: 640x384 1 person, 2 bananas, 47.5ms
Speed: 2.5ms preprocess, 47.5ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  75%|███████▌  | 3761/5000 [08:50<02:44,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437239.jpg: 448x640 4 persons, 4 cars, 1 bench, 1 backpack, 2 frisbees, 57.3ms
Speed: 2.5ms preprocess, 57.3ms inference, 13.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3762/5000 [08:51<02:46,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437331.jpg: 448x640 1 person, 2 surfboards, 80.8ms
Speed: 2.5ms preprocess, 80.8ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3763/5000 [08:51<02:47,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437351.jpg: 640x512 1 person, 6 suitcases, 140.4ms
Speed: 3.8ms preprocess, 140.4ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  75%|███████▌  | 3764/5000 [08:51<03:13,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437392.jpg: 640x480 1 toilet, 58.6ms
Speed: 2.5ms preprocess, 58.6ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  75%|███████▌  | 3765/5000 [08:51<02:54,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437514.jpg: 576x640 1 bench, 1 potted plant, 184.0ms
Speed: 1.9ms preprocess, 184.0ms inference, 2.5ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  75%|███████▌  | 3766/5000 [08:51<03:29,  5.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000437898.jpg: 448x640 2 cups, 3 knifes, 1 oven, 1 sink, 1 refrigerator, 59.7ms
Speed: 2.9ms preprocess, 59.7ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3767/5000 [08:51<03:12,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438017.jpg: 640x480 4 cars, 65.7ms
Speed: 2.9ms preprocess, 65.7ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  75%|███████▌  | 3768/5000 [08:51<02:58,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438226.jpg: 320x640 1 wine glass, 1 fork, 2 sandwichs, 1 dining table, 101.5ms
Speed: 2.5ms preprocess, 101.5ms inference, 3.4ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  75%|███████▌  | 3769/5000 [08:52<03:03,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438269.jpg: 640x448 1 bird, 128.4ms
Speed: 3.2ms preprocess, 128.4ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  75%|███████▌  | 3770/5000 [08:52<03:13,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438304.jpg: 640x480 2 persons, 1 sports ball, 3 tennis rackets, 61.0ms
Speed: 2.8ms preprocess, 61.0ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  75%|███████▌  | 3771/5000 [08:52<03:00,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438774.jpg: 448x640 5 persons, 1 bottle, 1 cup, 2 cakes, 1 chair, 1 dining table, 1 microwave, 1 refrigerator, 60.7ms
Speed: 2.7ms preprocess, 60.7ms inference, 11.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3772/5000 [08:52<02:59,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438862.jpg: 448x640 5 persons, 1 sports ball, 94.4ms
Speed: 2.6ms preprocess, 94.4ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3773/5000 [08:52<03:04,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438876.jpg: 448x640 1 person, 1 sports ball, 1 baseball bat, 2 tennis rackets, 61.5ms
Speed: 4.0ms preprocess, 61.5ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  75%|███████▌  | 3774/5000 [08:52<02:53,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438907.jpg: 640x480 2 persons, 1 skateboard, 62.1ms
Speed: 2.9ms preprocess, 62.1ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  76%|███████▌  | 3775/5000 [08:52<02:44,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000438955.jpg: 320x640 12 persons, 1 backpack, 47.0ms
Speed: 2.3ms preprocess, 47.0ms inference, 7.4ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  76%|███████▌  | 3776/5000 [08:53<02:37,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439180.jpg: 384x640 14 persons, 3 cars, 4 trucks, 11 horses, 143.8ms
Speed: 2.2ms preprocess, 143.8ms inference, 23.0ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  76%|███████▌  | 3777/5000 [08:53<03:29,  5.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439290.jpg: 448x640 1 banana, 59.1ms
Speed: 2.5ms preprocess, 59.1ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3778/5000 [08:53<03:06,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439426.jpg: 480x640 2 persons, 59.9ms
Speed: 2.3ms preprocess, 59.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3779/5000 [08:53<02:50,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439522.jpg: 640x448 1 person, 54.8ms
Speed: 2.6ms preprocess, 54.8ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  76%|███████▌  | 3780/5000 [08:53<02:37,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439525.jpg: 640x480 4 persons, 3 bottles, 1 cake, 2 dining tables, 63.2ms
Speed: 2.5ms preprocess, 63.2ms inference, 10.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  76%|███████▌  | 3781/5000 [08:53<02:57,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439593.jpg: 480x640 5 persons, 1 train, 1 handbag, 63.0ms
Speed: 3.1ms preprocess, 63.0ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3782/5000 [08:53<02:51,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439623.jpg: 640x480 4 cars, 1 fire hydrant, 63.5ms
Speed: 2.2ms preprocess, 63.5ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  76%|███████▌  | 3783/5000 [08:54<02:44,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439715.jpg: 480x640 8 persons, 2 horses, 1 elephant, 1 umbrella, 69.5ms
Speed: 2.3ms preprocess, 69.5ms inference, 40.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3784/5000 [08:54<03:05,  6.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439773.jpg: 480x640 1 person, 1 teddy bear, 62.6ms
Speed: 2.3ms preprocess, 62.6ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3785/5000 [08:54<02:51,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439854.jpg: 448x640 7 persons, 1 bicycle, 1 skateboard, 64.5ms
Speed: 4.0ms preprocess, 64.5ms inference, 9.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3786/5000 [08:54<02:50,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000439994.jpg: 640x448 4 persons, 77.2ms
Speed: 2.7ms preprocess, 77.2ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  76%|███████▌  | 3787/5000 [08:54<02:53,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000440171.jpg: 640x416 2 persons, 1 elephant, 135.7ms
Speed: 2.4ms preprocess, 135.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  76%|███████▌  | 3788/5000 [08:54<03:08,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000440184.jpg: 448x640 4 persons, 1 sports ball, 2 tennis rackets, 55.6ms
Speed: 2.8ms preprocess, 55.6ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3789/5000 [08:55<02:56,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000440336.jpg: 448x640 5 persons, 3 frisbees, 90.8ms
Speed: 3.9ms preprocess, 90.8ms inference, 8.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3790/5000 [08:55<03:04,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000440475.jpg: 448x640 1 person, 1 bowl, 2 chairs, 2 potted plants, 1 dining table, 1 tv, 1 vase, 61.3ms
Speed: 2.9ms preprocess, 61.3ms inference, 8.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3791/5000 [08:55<02:56,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000440507.jpg: 480x640 1 suitcase, 65.0ms
Speed: 4.0ms preprocess, 65.0ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3792/5000 [08:55<02:45,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000440508.jpg: 448x640 1 train, 65.5ms
Speed: 2.6ms preprocess, 65.5ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3793/5000 [08:55<02:41,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000440617.jpg: 480x640 1 train, 1 traffic light, 64.4ms
Speed: 3.3ms preprocess, 64.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3794/5000 [08:55<02:34,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441247.jpg: 448x640 4 persons, 1 backpack, 1 chair, 2 couchs, 1 clock, 59.7ms
Speed: 2.8ms preprocess, 59.7ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3795/5000 [08:55<02:36,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441286.jpg: 512x640 1 person, 1 surfboard, 165.0ms
Speed: 2.8ms preprocess, 165.0ms inference, 2.5ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  76%|███████▌  | 3796/5000 [08:56<03:07,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441442.jpg: 448x640 2 persons, 1 horse, 56.6ms
Speed: 4.4ms preprocess, 56.6ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3797/5000 [08:56<02:51,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441468.jpg: 640x448 2 persons, 3 cars, 1 bus, 1 fire hydrant, 62.9ms
Speed: 3.1ms preprocess, 62.9ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  76%|███████▌  | 3798/5000 [08:56<02:47,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441491.jpg: 480x640 2 persons, 1 cup, 1 banana, 94.8ms
Speed: 2.7ms preprocess, 94.8ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3799/5000 [08:56<02:52,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441543.jpg: 416x640 12 persons, 1 backpack, 2 umbrellas, 118.9ms
Speed: 2.5ms preprocess, 118.9ms inference, 11.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  76%|███████▌  | 3800/5000 [08:56<03:13,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441553.jpg: 480x640 11 persons, 1 train, 2 traffic lights, 1 backpack, 61.4ms
Speed: 3.4ms preprocess, 61.4ms inference, 14.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3801/5000 [08:56<03:11,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000441586.jpg: 448x640 2 persons, 4 bicycles, 2 trucks, 79.5ms
Speed: 2.8ms preprocess, 79.5ms inference, 11.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3802/5000 [08:56<03:08,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442009.jpg: 448x640 2 bottles, 1 potted plant, 1 vase, 57.9ms
Speed: 4.2ms preprocess, 57.9ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3803/5000 [08:57<02:54,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442161.jpg: 480x640 4 persons, 5 wine glasss, 1 dining table, 61.5ms
Speed: 4.3ms preprocess, 61.5ms inference, 12.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3804/5000 [08:57<02:53,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442306.jpg: 640x640 1 person, 1 umbrella, 192.5ms
Speed: 2.9ms preprocess, 192.5ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  76%|███████▌  | 3805/5000 [08:57<03:30,  5.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442323.jpg: 480x640 1 chair, 1 bed, 1 dining table, 1 tv, 60.4ms
Speed: 2.6ms preprocess, 60.4ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3806/5000 [08:57<03:09,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442456.jpg: 384x640 2 persons, 1 car, 1 truck, 52.1ms
Speed: 2.4ms preprocess, 52.1ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  76%|███████▌  | 3807/5000 [08:57<02:50,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442463.jpg: 640x480 1 person, 1 skateboard, 64.5ms
Speed: 2.3ms preprocess, 64.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  76%|███████▌  | 3808/5000 [08:57<02:39,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442480.jpg: 448x640 3 airplanes, 61.2ms
Speed: 3.3ms preprocess, 61.2ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3809/5000 [08:57<02:33,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442661.jpg: 480x640 1 giraffe, 63.8ms
Speed: 2.4ms preprocess, 63.8ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3810/5000 [08:58<02:30,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442746.jpg: 480x640 1 person, 1 car, 1 sports ball, 63.8ms
Speed: 6.7ms preprocess, 63.8ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▌  | 3811/5000 [08:58<02:29,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442822.jpg: 448x640 10 boats, 61.7ms
Speed: 3.1ms preprocess, 61.7ms inference, 8.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▌  | 3812/5000 [08:58<02:34,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442836.jpg: 480x640 3 persons, 1 frisbee, 73.1ms
Speed: 2.7ms preprocess, 73.1ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▋  | 3813/5000 [08:58<02:38,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000442993.jpg: 640x448 1 clock, 64.5ms
Speed: 2.7ms preprocess, 64.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  76%|███████▋  | 3814/5000 [08:58<02:30,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000443303.jpg: 480x640 1 cat, 1 suitcase, 1 bed, 62.4ms
Speed: 4.3ms preprocess, 62.4ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▋  | 3815/5000 [08:58<02:26,  8.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000443426.jpg: 640x480 1 person, 1 tie, 122.3ms
Speed: 2.8ms preprocess, 122.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  76%|███████▋  | 3816/5000 [08:58<02:45,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000443498.jpg: 480x640 1 bus, 67.6ms
Speed: 2.5ms preprocess, 67.6ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▋  | 3817/5000 [08:58<02:37,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000443844.jpg: 448x640 1 person, 2 benchs, 1 skateboard, 70.8ms
Speed: 3.3ms preprocess, 70.8ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▋  | 3818/5000 [08:59<02:35,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000443969.jpg: 640x640 4 persons, 1 umbrella, 1 sports ball, 1 skateboard, 83.3ms
Speed: 3.3ms preprocess, 83.3ms inference, 9.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  76%|███████▋  | 3819/5000 [08:59<02:53,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000444142.jpg: 640x448 1 person, 1 tennis racket, 9 chairs, 58.7ms
Speed: 2.6ms preprocess, 58.7ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  76%|███████▋  | 3820/5000 [08:59<02:51,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000444275.jpg: 480x640 1 microwave, 61.1ms
Speed: 2.6ms preprocess, 61.1ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▋  | 3821/5000 [08:59<02:38,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000444879.jpg: 480x640 1 train, 102.2ms
Speed: 2.7ms preprocess, 102.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  76%|███████▋  | 3822/5000 [08:59<02:43,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445248.jpg: 448x640 4 elephants, 60.2ms
Speed: 2.7ms preprocess, 60.2ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▋  | 3823/5000 [08:59<02:35,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445365.jpg: 640x448 2 giraffes, 58.1ms
Speed: 2.5ms preprocess, 58.1ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  76%|███████▋  | 3824/5000 [08:59<02:27,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445439.jpg: 448x640 1 zebra, 61.3ms
Speed: 2.9ms preprocess, 61.3ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  76%|███████▋  | 3825/5000 [09:00<02:24,  8.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445602.jpg: 480x640 3 persons, 2 skiss, 74.5ms
Speed: 2.9ms preprocess, 74.5ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3826/5000 [09:00<02:27,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445658.jpg: 448x640 1 bottle, 1 bowl, 1 microwave, 2 ovens, 1 refrigerator, 57.7ms
Speed: 2.7ms preprocess, 57.7ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3827/5000 [09:00<02:26,  8.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445675.jpg: 640x480 1 giraffe, 82.2ms
Speed: 2.5ms preprocess, 82.2ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  77%|███████▋  | 3828/5000 [09:00<02:30,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445722.jpg: 640x416 3 persons, 59.9ms
Speed: 3.8ms preprocess, 59.9ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  77%|███████▋  | 3829/5000 [09:00<02:25,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445792.jpg: 480x640 4 persons, 64.8ms
Speed: 4.1ms preprocess, 64.8ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3830/5000 [09:00<02:24,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445834.jpg: 448x640 4 persons, 1 bus, 79.2ms
Speed: 2.7ms preprocess, 79.2ms inference, 13.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3831/5000 [09:00<02:31,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445846.jpg: 448x640 1 oven, 2 refrigerators, 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3832/5000 [09:00<02:25,  8.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000445999.jpg: 640x416 1 person, 55.5ms
Speed: 2.3ms preprocess, 55.5ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  77%|███████▋  | 3833/5000 [09:01<02:32,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446005.jpg: 448x640 1 laptop, 1 microwave, 1 oven, 2 sinks, 66.4ms
Speed: 4.4ms preprocess, 66.4ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3834/5000 [09:01<02:32,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446117.jpg: 448x640 1 orange, 62.4ms
Speed: 2.8ms preprocess, 62.4ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3835/5000 [09:01<02:25,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446206.jpg: 416x640 1 bus, 2 trains, 1 traffic light, 1 stop sign, 57.2ms
Speed: 2.3ms preprocess, 57.2ms inference, 5.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  77%|███████▋  | 3836/5000 [09:01<02:21,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446207.jpg: 480x640 1 cell phone, 75.9ms
Speed: 17.5ms preprocess, 75.9ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3837/5000 [09:01<02:31,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446522.jpg: 640x480 1 dog, 1 chair, 1 refrigerator, 68.2ms
Speed: 3.1ms preprocess, 68.2ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  77%|███████▋  | 3838/5000 [09:01<02:29,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446574.jpg: 640x448 1 toilet, 65.4ms
Speed: 2.9ms preprocess, 65.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  77%|███████▋  | 3839/5000 [09:01<02:23,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446651.jpg: 448x640 2 persons, 1 car, 86.0ms
Speed: 2.4ms preprocess, 86.0ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3840/5000 [09:01<02:29,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000446703.jpg: 448x640 1 teddy bear, 62.2ms
Speed: 4.2ms preprocess, 62.2ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3841/5000 [09:02<02:24,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447088.jpg: 448x640 3 persons, 2 baseball bats, 58.0ms
Speed: 3.6ms preprocess, 58.0ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3842/5000 [09:02<02:21,  8.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447169.jpg: 480x640 1 dog, 1 oven, 1 sink, 1 refrigerator, 61.1ms
Speed: 2.2ms preprocess, 61.1ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3843/5000 [09:02<02:23,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447187.jpg: 480x640 4 persons, 1 sports ball, 2 baseball bats, 3 baseball gloves, 69.3ms
Speed: 3.6ms preprocess, 69.3ms inference, 8.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3844/5000 [09:02<02:30,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447200.jpg: 480x640 2 dogs, 65.2ms
Speed: 2.6ms preprocess, 65.2ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3845/5000 [09:02<02:24,  8.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447313.jpg: 480x640 4 zebras, 59.1ms
Speed: 2.3ms preprocess, 59.1ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3846/5000 [09:02<02:22,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447314.jpg: 480x640 5 persons, 1 kite, 86.1ms
Speed: 2.6ms preprocess, 86.1ms inference, 10.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3847/5000 [09:02<02:33,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447342.jpg: 416x640 5 cars, 1 bus, 3 trucks, 57.3ms
Speed: 2.7ms preprocess, 57.3ms inference, 7.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  77%|███████▋  | 3848/5000 [09:02<02:31,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447465.jpg: 448x640 3 persons, 2 skiss, 62.5ms
Speed: 3.0ms preprocess, 62.5ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3849/5000 [09:03<02:33,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447522.jpg: 480x640 1 bowl, 7 broccolis, 5 carrots, 1 dining table, 1 laptop, 65.2ms
Speed: 2.5ms preprocess, 65.2ms inference, 15.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3850/5000 [09:03<02:46,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447611.jpg: 448x640 2 persons, 2 laptops, 1 cell phone, 58.0ms
Speed: 3.7ms preprocess, 58.0ms inference, 5.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3851/5000 [09:03<02:38,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447789.jpg: 448x640 1 stop sign, 79.5ms
Speed: 5.0ms preprocess, 79.5ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3852/5000 [09:03<02:38,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000447917.jpg: 384x640 4 persons, 1 sports ball, 56.9ms
Speed: 2.5ms preprocess, 56.9ms inference, 4.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  77%|███████▋  | 3853/5000 [09:03<02:30,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000448076.jpg: 480x640 6 persons, 1 suitcase, 3 potted plants, 66.2ms
Speed: 2.4ms preprocess, 66.2ms inference, 10.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3854/5000 [09:03<02:43,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000448256.jpg: 448x640 3 persons, 1 car, 57.8ms
Speed: 3.0ms preprocess, 57.8ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3855/5000 [09:03<02:33,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000448263.jpg: 480x640 3 persons, 2 baseball gloves, 61.0ms
Speed: 3.9ms preprocess, 61.0ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3856/5000 [09:04<02:29,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000448365.jpg: 448x640 3 persons, 1 frisbee, 1 skateboard, 59.5ms
Speed: 4.4ms preprocess, 59.5ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3857/5000 [09:04<02:29,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000448410.jpg: 448x640 16 persons, 4 trains, 62.9ms
Speed: 8.1ms preprocess, 62.9ms inference, 18.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3858/5000 [09:04<02:46,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000448448.jpg: 480x640 1 giraffe, 64.1ms
Speed: 2.6ms preprocess, 64.1ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3859/5000 [09:04<02:35,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000448810.jpg: 480x640 1 person, 85.7ms
Speed: 4.2ms preprocess, 85.7ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3860/5000 [09:04<02:39,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449190.jpg: 384x640 1 bowl, 4 sandwichs, 1 dining table, 58.0ms
Speed: 3.1ms preprocess, 58.0ms inference, 5.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  77%|███████▋  | 3861/5000 [09:04<02:32,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449198.jpg: 448x640 (no detections), 67.4ms
Speed: 4.0ms preprocess, 67.4ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3862/5000 [09:04<02:23,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449312.jpg: 608x640 1 person, 2 cakes, 1 chair, 1 dining table, 188.0ms
Speed: 1.9ms preprocess, 188.0ms inference, 5.7ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  77%|███████▋  | 3863/5000 [09:05<03:04,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449406.jpg: 448x640 1 zebra, 56.4ms
Speed: 4.1ms preprocess, 56.4ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3864/5000 [09:05<02:45,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449432.jpg: 448x640 18 persons, 1 motorcycle, 1 bus, 1 backpack, 1 handbag, 56.9ms
Speed: 2.6ms preprocess, 56.9ms inference, 19.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3865/5000 [09:05<02:52,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449579.jpg: 448x640 1 person, 1 tennis racket, 82.7ms
Speed: 2.7ms preprocess, 82.7ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3866/5000 [09:05<02:48,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449603.jpg: 448x640 1 person, 62.4ms
Speed: 2.8ms preprocess, 62.4ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3867/5000 [09:05<02:35,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449661.jpg: 192x640 1 person, 1 chair, 126.5ms
Speed: 2.3ms preprocess, 126.5ms inference, 2.3ms postprocess per image at shape (1, 3, 192, 640)


Segmenting Images:  77%|███████▋  | 3868/5000 [09:05<02:45,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449909.jpg: 448x640 1 person, 1 giraffe, 61.0ms
Speed: 2.7ms preprocess, 61.0ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3869/5000 [09:05<02:33,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000449996.jpg: 448x640 4 airplanes, 58.9ms
Speed: 2.7ms preprocess, 58.9ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  77%|███████▋  | 3870/5000 [09:06<02:28,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450075.jpg: 480x640 2 persons, 83.8ms
Speed: 3.3ms preprocess, 83.8ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3871/5000 [09:06<02:32,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450100.jpg: 640x480 1 cup, 1 fork, 1 spoon, 1 cake, 1 dining table, 1 vase, 67.8ms
Speed: 4.5ms preprocess, 67.8ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  77%|███████▋  | 3872/5000 [09:06<02:31,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450202.jpg: 640x480 2 persons, 3 wine glasss, 1 sandwich, 1 dining table, 68.1ms
Speed: 4.3ms preprocess, 68.1ms inference, 7.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  77%|███████▋  | 3873/5000 [09:06<02:32,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450303.jpg: 480x640 6 persons, 2 bottles, 1 cup, 9 chairs, 2 dining tables, 1 tv, 8 laptops, 1 mouse, 1 clock, 80.9ms
Speed: 2.6ms preprocess, 80.9ms inference, 37.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  77%|███████▋  | 3874/5000 [09:06<03:05,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450399.jpg: 480x640 10 persons, 3 bowls, 7 donuts, 1 chair, 64.3ms
Speed: 3.2ms preprocess, 64.3ms inference, 19.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3875/5000 [09:06<03:11,  5.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450439.jpg: 320x640 1 airplane, 2 kites, 108.4ms
Speed: 3.6ms preprocess, 108.4ms inference, 2.6ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  78%|███████▊  | 3876/5000 [09:06<03:05,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450488.jpg: 640x448 1 bed, 1 tv, 79.0ms
Speed: 3.9ms preprocess, 79.0ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  78%|███████▊  | 3877/5000 [09:07<02:56,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450559.jpg: 640x448 1 person, 3 skateboards, 63.2ms
Speed: 2.9ms preprocess, 63.2ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  78%|███████▊  | 3878/5000 [09:07<02:43,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450686.jpg: 640x512 1 person, 1 cell phone, 137.2ms
Speed: 3.1ms preprocess, 137.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  78%|███████▊  | 3879/5000 [09:07<02:57,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000450758.jpg: 480x640 7 giraffes, 62.5ms
Speed: 2.9ms preprocess, 62.5ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3880/5000 [09:07<02:47,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451043.jpg: 448x640 1 person, 1 snowboard, 65.4ms
Speed: 3.0ms preprocess, 65.4ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3881/5000 [09:07<02:36,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451084.jpg: 640x448 1 person, 1 skateboard, 93.6ms
Speed: 2.5ms preprocess, 93.6ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  78%|███████▊  | 3882/5000 [09:07<02:38,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451090.jpg: 448x640 1 boat, 66.9ms
Speed: 3.0ms preprocess, 66.9ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3883/5000 [09:07<02:30,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451144.jpg: 480x640 2 persons, 3 backpacks, 1 skis, 65.6ms
Speed: 2.6ms preprocess, 65.6ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3884/5000 [09:08<02:36,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451150.jpg: 640x480 6 donuts, 69.8ms
Speed: 4.9ms preprocess, 69.8ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  78%|███████▊  | 3885/5000 [09:08<02:37,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451155.jpg: 480x640 1 person, 1 bed, 1 tv, 67.7ms
Speed: 3.0ms preprocess, 67.7ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3886/5000 [09:08<02:30,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451308.jpg: 640x448 13 suitcases, 69.6ms
Speed: 3.1ms preprocess, 69.6ms inference, 13.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  78%|███████▊  | 3887/5000 [09:08<02:38,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451435.jpg: 416x640 1 person, 1 sports ball, 1 tennis racket, 56.0ms
Speed: 2.5ms preprocess, 56.0ms inference, 3.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  78%|███████▊  | 3888/5000 [09:08<02:27,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451571.jpg: 480x640 1 pizza, 60.0ms
Speed: 2.7ms preprocess, 60.0ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3889/5000 [09:08<02:24,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451693.jpg: 480x640 1 pizza, 1 dining table, 63.0ms
Speed: 3.0ms preprocess, 63.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3890/5000 [09:08<02:21,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451714.jpg: 640x480 3 persons, 2 skiss, 62.5ms
Speed: 2.7ms preprocess, 62.5ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  78%|███████▊  | 3891/5000 [09:08<02:17,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000451879.jpg: 576x640 6 persons, 1 sports ball, 1 baseball glove, 187.5ms
Speed: 2.6ms preprocess, 187.5ms inference, 9.6ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  78%|███████▊  | 3892/5000 [09:09<03:04,  6.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000452084.jpg: 640x640 1 spoon, 1 bowl, 1 cake, 1 dining table, 155.4ms
Speed: 1.7ms preprocess, 155.4ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  78%|███████▊  | 3893/5000 [09:09<03:19,  5.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000452122.jpg: 448x640 1 airplane, 55.2ms
Speed: 2.2ms preprocess, 55.2ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000452321.jpg: 480x640 2 buss, 1 train, 62.1ms
Speed: 2.9ms preprocess, 62.1ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3895/5000 [09:09<02:40,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000452515.jpg: 640x288 4 persons, 1 tennis racket, 141.6ms
Speed: 3.5ms preprocess, 141.6ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 288)


Segmenting Images:  78%|███████▊  | 3896/5000 [09:09<02:53,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000452784.jpg: 640x480 1 person, 1 bowl, 2 broccolis, 1 toilet, 61.2ms
Speed: 3.2ms preprocess, 61.2ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  78%|███████▊  | 3897/5000 [09:09<02:42,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000452793.jpg: 448x640 2 sinks, 1 refrigerator, 60.4ms
Speed: 2.5ms preprocess, 60.4ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3898/5000 [09:10<02:32,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000452891.jpg: 640x480 1 bench, 1 dog, 94.9ms
Speed: 2.7ms preprocess, 94.9ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  78%|███████▊  | 3899/5000 [09:10<02:35,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453001.jpg: 480x640 2 persons, 2 benchs, 60.2ms
Speed: 3.1ms preprocess, 60.2ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3900/5000 [09:10<02:26,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453040.jpg: 640x480 (no detections), 64.6ms
Speed: 2.4ms preprocess, 64.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  78%|███████▊  | 3901/5000 [09:10<02:15,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453166.jpg: 448x640 4 persons, 5 ties, 61.0ms
Speed: 2.8ms preprocess, 61.0ms inference, 8.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3902/5000 [09:10<02:25,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453302.jpg: 480x640 4 bottles, 1 microwave, 1 refrigerator, 63.2ms
Speed: 4.6ms preprocess, 63.2ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3903/5000 [09:10<02:24,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453341.jpg: 480x640 6 persons, 1 tv, 64.8ms
Speed: 2.8ms preprocess, 64.8ms inference, 7.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3904/5000 [09:10<02:36,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453584.jpg: 480x640 1 bench, 69.7ms
Speed: 3.5ms preprocess, 69.7ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3905/5000 [09:11<02:30,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453634.jpg: 640x480 3 toilets, 1 sink, 64.3ms
Speed: 3.4ms preprocess, 64.3ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  78%|███████▊  | 3906/5000 [09:11<02:24,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453708.jpg: 448x640 9 persons, 2 bananas, 60.9ms
Speed: 2.8ms preprocess, 60.9ms inference, 9.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3907/5000 [09:11<02:26,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453722.jpg: 448x640 2 couchs, 1 tv, 60.0ms
Speed: 2.5ms preprocess, 60.0ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3908/5000 [09:11<02:19,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453841.jpg: 448x640 1 person, 5 cars, 1 truck, 4 traffic lights, 1 fire hydrant, 70.1ms
Speed: 4.1ms preprocess, 70.1ms inference, 20.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3909/5000 [09:11<02:32,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453860.jpg: 448x640 2 suitcases, 60.8ms
Speed: 2.6ms preprocess, 60.8ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3910/5000 [09:11<02:22,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000453981.jpg: 448x640 2 bears, 58.9ms
Speed: 2.4ms preprocess, 58.9ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3911/5000 [09:11<02:15,  8.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000454067.jpg: 544x640 1 person, 1 chair, 1 tv, 2 books, 169.5ms
Speed: 4.1ms preprocess, 169.5ms inference, 5.4ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  78%|███████▊  | 3912/5000 [09:12<02:49,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000454404.jpg: 544x640 1 person, 1 couch, 69.9ms
Speed: 1.9ms preprocess, 69.9ms inference, 3.4ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  78%|███████▊  | 3913/5000 [09:12<02:39,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000454661.jpg: 448x640 4 cars, 3 traffic lights, 62.3ms
Speed: 2.6ms preprocess, 62.3ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3914/5000 [09:12<02:32,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000454750.jpg: 448x640 2 zebras, 60.3ms
Speed: 3.1ms preprocess, 60.3ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3915/5000 [09:12<02:21,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000454798.jpg: 448x640 1 person, 1 truck, 1 horse, 83.2ms
Speed: 3.9ms preprocess, 83.2ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3916/5000 [09:12<02:25,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000454978.jpg: 416x640 1 motorcycle, 54.6ms
Speed: 2.5ms preprocess, 54.6ms inference, 1.7ms postprocess per image at shape (1, 3, 416, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000455085.jpg: 640x448 1 bus, 53.3ms
Speed: 2.6ms preprocess, 53.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  78%|███████▊  | 3918/5000 [09:12<02:07,  8.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455157.jpg: 640x640 3 persons, 2 benchs, 1 umbrella, 79.4ms
Speed: 2.2ms preprocess, 79.4ms inference, 8.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  78%|███████▊  | 3919/5000 [09:12<02:16,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455219.jpg: 448x640 2 persons, 2 cows, 59.5ms
Speed: 2.5ms preprocess, 59.5ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  78%|███████▊  | 3920/5000 [09:12<02:11,  8.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455267.jpg: 480x640 1 knife, 1 bowl, 6 broccolis, 2 carrots, 66.6ms
Speed: 3.2ms preprocess, 66.6ms inference, 14.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  78%|███████▊  | 3921/5000 [09:13<02:26,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455301.jpg: 512x640 1 person, 1 bed, 1 laptop, 140.7ms
Speed: 2.8ms preprocess, 140.7ms inference, 3.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  78%|███████▊  | 3922/5000 [09:13<02:44,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455352.jpg: 544x640 2 clocks, 71.4ms
Speed: 1.9ms preprocess, 71.4ms inference, 3.0ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  78%|███████▊  | 3923/5000 [09:13<02:35,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455448.jpg: 640x480 2 persons, 1 donut, 88.3ms
Speed: 2.7ms preprocess, 88.3ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  78%|███████▊  | 3924/5000 [09:13<02:36,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455555.jpg: 576x640 2 persons, 75.8ms
Speed: 2.3ms preprocess, 75.8ms inference, 3.3ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  78%|███████▊  | 3925/5000 [09:13<02:29,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455597.jpg: 448x640 1 person, 1 cup, 3 bowls, 1 oven, 59.1ms
Speed: 2.6ms preprocess, 59.1ms inference, 10.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▊  | 3926/5000 [09:13<02:27,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455624.jpg: 448x640 15 persons, 1 motorcycle, 62.0ms
Speed: 3.3ms preprocess, 62.0ms inference, 14.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▊  | 3927/5000 [09:14<02:37,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455716.jpg: 448x640 3 motorcycles, 59.2ms
Speed: 4.5ms preprocess, 59.2ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▊  | 3928/5000 [09:14<02:30,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455872.jpg: 480x640 2 boats, 70.0ms
Speed: 4.3ms preprocess, 70.0ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▊  | 3929/5000 [09:14<02:40,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455937.jpg: 480x640 1 person, 1 chair, 1 couch, 2 tvs, 1 laptop, 2 books, 63.9ms
Speed: 2.8ms preprocess, 63.9ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▊  | 3930/5000 [09:14<02:36,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000455981.jpg: 640x480 1 toilet, 74.7ms
Speed: 2.7ms preprocess, 74.7ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  79%|███████▊  | 3931/5000 [09:14<02:29,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456015.jpg: 448x640 5 persons, 4 horses, 101.4ms
Speed: 3.2ms preprocess, 101.4ms inference, 8.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▊  | 3932/5000 [09:14<02:43,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456143.jpg: 384x640 1 bowl, 2 sandwichs, 2 carrots, 1 pizza, 1 dining table, 128.4ms
Speed: 2.6ms preprocess, 128.4ms inference, 5.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  79%|███████▊  | 3933/5000 [09:14<02:54,  6.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456292.jpg: 640x480 1 cat, 1 toilet, 60.3ms
Speed: 2.3ms preprocess, 60.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  79%|███████▊  | 3934/5000 [09:15<02:37,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456303.jpg: 640x416 1 person, 149.8ms
Speed: 3.7ms preprocess, 149.8ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  79%|███████▊  | 3935/5000 [09:15<02:52,  6.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456394.jpg: 512x640 1 car, 1 motorcycle, 62.6ms
Speed: 3.7ms preprocess, 62.6ms inference, 2.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  79%|███████▊  | 3936/5000 [09:15<02:37,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456496.jpg: 448x640 2 persons, 6 birds, 58.9ms
Speed: 2.4ms preprocess, 58.9ms inference, 8.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▊  | 3937/5000 [09:15<02:31,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456559.jpg: 480x640 1 person, 94.1ms
Speed: 2.6ms preprocess, 94.1ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3938/5000 [09:15<02:33,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456662.jpg: 640x640 1 person, 1 refrigerator, 80.1ms
Speed: 2.9ms preprocess, 80.1ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  79%|███████▉  | 3939/5000 [09:15<02:29,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000456865.jpg: 448x640 1 airplane, 74.7ms
Speed: 2.9ms preprocess, 74.7ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3940/5000 [09:15<02:27,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000457078.jpg: 480x640 2 persons, 2 cups, 1 tv, 1 clock, 71.3ms
Speed: 2.7ms preprocess, 71.3ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3941/5000 [09:16<02:27,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000457262.jpg: 640x640 2 bananas, 1 dining table, 82.1ms
Speed: 3.2ms preprocess, 82.1ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  79%|███████▉  | 3942/5000 [09:16<02:30,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000457559.jpg: 448x640 5 persons, 2 sports balls, 75.2ms
Speed: 4.4ms preprocess, 75.2ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3943/5000 [09:16<02:31,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000457848.jpg: 448x640 3 persons, 4 motorcycles, 62.1ms
Speed: 2.3ms preprocess, 62.1ms inference, 6.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3944/5000 [09:16<02:25,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000457884.jpg: 480x640 3 persons, 3 baseball gloves, 61.9ms
Speed: 2.4ms preprocess, 61.9ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3945/5000 [09:16<02:21,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458045.jpg: 480x640 9 persons, 1 baseball glove, 66.4ms
Speed: 3.3ms preprocess, 66.4ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3946/5000 [09:16<02:31,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458054.jpg: 448x640 (no detections), 60.7ms
Speed: 2.4ms preprocess, 60.7ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000458109.jpg: 480x640 1 train, 61.3ms
Speed: 2.9ms preprocess, 61.3ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3948/5000 [09:17<02:10,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458223.jpg: 352x640 1 person, 1 bicycle, 2 boats, 153.4ms
Speed: 2.0ms preprocess, 153.4ms inference, 3.2ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  79%|███████▉  | 3949/5000 [09:17<02:31,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458255.jpg: 448x640 1 person, 1 cat, 1 couch, 1 bed, 58.0ms
Speed: 2.7ms preprocess, 58.0ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3950/5000 [09:17<02:23,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458325.jpg: 448x640 8 persons, 17 cars, 6 traffic lights, 1 skateboard, 60.7ms
Speed: 3.1ms preprocess, 60.7ms inference, 27.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3951/5000 [09:17<02:46,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458410.jpg: 480x640 1 couch, 1 dining table, 1 microwave, 1 refrigerator, 97.9ms
Speed: 2.3ms preprocess, 97.9ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3952/5000 [09:17<02:44,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458663.jpg: 448x640 3 chairs, 1 oven, 1 sink, 61.7ms
Speed: 2.8ms preprocess, 61.7ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3953/5000 [09:17<02:32,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458702.jpg: 640x448 1 car, 2 parking meters, 163.3ms
Speed: 2.6ms preprocess, 163.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  79%|███████▉  | 3954/5000 [09:18<02:53,  6.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458755.jpg: 480x640 2 persons, 6 sheeps, 59.6ms
Speed: 2.3ms preprocess, 59.6ms inference, 9.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3955/5000 [09:18<02:41,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458768.jpg: 448x640 1 couch, 2 ovens, 61.7ms
Speed: 2.6ms preprocess, 61.7ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3956/5000 [09:18<02:29,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458790.jpg: 480x640 (no detections), 82.1ms
Speed: 4.1ms preprocess, 82.1ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3957/5000 [09:18<02:22,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000458992.jpg: 640x480 1 person, 1 bottle, 1 pizza, 1 couch, 70.1ms
Speed: 2.8ms preprocess, 70.1ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  79%|███████▉  | 3958/5000 [09:18<02:21,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459153.jpg: 640x448 1 person, 1 train, 1 bench, 1 laptop, 1 cell phone, 66.9ms
Speed: 2.7ms preprocess, 66.9ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  79%|███████▉  | 3959/5000 [09:18<02:19,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459195.jpg: 480x640 3 persons, 2 frisbees, 1 sports ball, 116.0ms
Speed: 4.3ms preprocess, 116.0ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3960/5000 [09:18<02:34,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459272.jpg: 640x480 8 persons, 2 cell phones, 61.7ms
Speed: 2.6ms preprocess, 61.7ms inference, 10.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  79%|███████▉  | 3961/5000 [09:18<02:30,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459396.jpg: 640x640 3 cows, 82.3ms
Speed: 3.6ms preprocess, 82.3ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  79%|███████▉  | 3962/5000 [09:19<02:29,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459437.jpg: 448x640 1 person, 3 sheeps, 85.5ms
Speed: 2.5ms preprocess, 85.5ms inference, 8.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3963/5000 [09:19<02:30,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459467.jpg: 448x640 1 airplane, 59.8ms
Speed: 2.8ms preprocess, 59.8ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3964/5000 [09:19<02:17,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459500.jpg: 640x480 1 car, 1 clock, 58.1ms
Speed: 3.6ms preprocess, 58.1ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  79%|███████▉  | 3965/5000 [09:19<02:11,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459634.jpg: 544x640 2 persons, 2 motorcycles, 1 boat, 162.8ms
Speed: 2.1ms preprocess, 162.8ms inference, 5.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  79%|███████▉  | 3966/5000 [09:19<02:42,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459662.jpg: 480x640 2 persons, 61.5ms
Speed: 2.3ms preprocess, 61.5ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3967/5000 [09:19<02:27,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459757.jpg: 480x640 1 giraffe, 63.3ms
Speed: 2.5ms preprocess, 63.3ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3968/5000 [09:19<02:18,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459809.jpg: 448x640 3 persons, 1 boat, 1 kite, 74.4ms
Speed: 2.5ms preprocess, 74.4ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3969/5000 [09:20<02:21,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459887.jpg: 640x480 4 bananas, 66.6ms
Speed: 2.5ms preprocess, 66.6ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  79%|███████▉  | 3970/5000 [09:20<02:17,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000459954.jpg: 480x640 1 person, 1 suitcase, 1 microwave, 1 refrigerator, 67.5ms
Speed: 3.3ms preprocess, 67.5ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3971/5000 [09:20<02:14,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460147.jpg: 448x640 20 cars, 3 trucks, 88.4ms
Speed: 2.8ms preprocess, 88.4ms inference, 25.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  79%|███████▉  | 3972/5000 [09:20<02:39,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460160.jpg: 480x640 8 boats, 3 birds, 59.3ms
Speed: 2.6ms preprocess, 59.3ms inference, 10.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  79%|███████▉  | 3973/5000 [09:20<02:35,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460229.jpg: 640x384 1 car, 2 traffic lights, 1 stop sign, 111.8ms
Speed: 2.2ms preprocess, 111.8ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  79%|███████▉  | 3974/5000 [09:20<02:38,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460333.jpg: 480x640 1 person, 1 dog, 2 chairs, 1 couch, 2 beds, 88.2ms
Speed: 2.8ms preprocess, 88.2ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|███████▉  | 3975/5000 [09:21<02:40,  6.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460347.jpg: 640x448 4 cars, 2 buss, 61.5ms
Speed: 2.7ms preprocess, 61.5ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  80%|███████▉  | 3976/5000 [09:21<02:29,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460379.jpg: 640x448 3 giraffes, 66.8ms
Speed: 2.8ms preprocess, 66.8ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  80%|███████▉  | 3977/5000 [09:21<02:23,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460494.jpg: 448x640 1 cup, 1 fork, 2 bowls, 2 broccolis, 1 dining table, 76.4ms
Speed: 2.9ms preprocess, 76.4ms inference, 11.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|███████▉  | 3978/5000 [09:21<02:29,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460682.jpg: 192x640 1 person, 88.2ms
Speed: 1.9ms preprocess, 88.2ms inference, 1.5ms postprocess per image at shape (1, 3, 192, 640)


Segmenting Images:  80%|███████▉  | 3979/5000 [09:21<02:23,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460683.jpg: 640x480 2 persons, 2 ties, 1 chair, 65.1ms
Speed: 2.6ms preprocess, 65.1ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  80%|███████▉  | 3980/5000 [09:21<02:18,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460841.jpg: 480x640 1 person, 1 cat, 1 book, 98.2ms
Speed: 3.8ms preprocess, 98.2ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|███████▉  | 3981/5000 [09:21<02:23,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460927.jpg: 384x640 1 bear, 116.9ms
Speed: 2.1ms preprocess, 116.9ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  80%|███████▉  | 3982/5000 [09:21<02:28,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460929.jpg: 640x480 1 bottle, 1 hot dog, 1 dining table, 62.8ms
Speed: 2.9ms preprocess, 62.8ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  80%|███████▉  | 3983/5000 [09:22<02:19,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000460967.jpg: 640x608 1 bus, 185.4ms
Speed: 2.0ms preprocess, 185.4ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  80%|███████▉  | 3984/5000 [09:22<02:49,  5.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000461009.jpg: 640x640 8 persons, 1 sports ball, 1 apple, 1 orange, 79.6ms
Speed: 3.4ms preprocess, 79.6ms inference, 14.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  80%|███████▉  | 3985/5000 [09:22<02:54,  5.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000461036.jpg: 448x640 1 bear, 57.8ms
Speed: 3.8ms preprocess, 57.8ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|███████▉  | 3986/5000 [09:22<02:34,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000461275.jpg: 448x640 1 person, 85.0ms
Speed: 4.1ms preprocess, 85.0ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|███████▉  | 3987/5000 [09:22<02:29,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000461405.jpg: 448x640 14 sheeps, 57.8ms
Speed: 2.8ms preprocess, 57.8ms inference, 14.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|███████▉  | 3988/5000 [09:22<02:27,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000461573.jpg: 512x640 1 bench, 136.0ms
Speed: 3.8ms preprocess, 136.0ms inference, 1.6ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  80%|███████▉  | 3989/5000 [09:23<02:40,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000461751.jpg: 576x640 1 person, 2 cars, 182.0ms
Speed: 1.7ms preprocess, 182.0ms inference, 3.6ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  80%|███████▉  | 3990/5000 [09:23<03:02,  5.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462031.jpg: 640x480 1 person, 1 sports ball, 61.6ms
Speed: 2.5ms preprocess, 61.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  80%|███████▉  | 3991/5000 [09:23<02:42,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462371.jpg: 544x640 10 persons, 1 tie, 147.4ms
Speed: 2.6ms preprocess, 147.4ms inference, 11.2ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  80%|███████▉  | 3992/5000 [09:23<03:02,  5.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462576.jpg: 480x640 1 cup, 2 bowls, 1 orange, 1 dining table, 85.0ms
Speed: 2.6ms preprocess, 85.0ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|███████▉  | 3993/5000 [09:23<02:53,  5.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462614.jpg: 512x640 1 potted plant, 2 toilets, 1 sink, 70.0ms
Speed: 3.4ms preprocess, 70.0ms inference, 4.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  80%|███████▉  | 3994/5000 [09:23<02:40,  6.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462629.jpg: 448x640 1 toilet, 2 refrigerators, 64.6ms
Speed: 3.0ms preprocess, 64.6ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|███████▉  | 3995/5000 [09:24<02:26,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462643.jpg: 640x448 3 persons, 1 cell phone, 152.6ms
Speed: 2.2ms preprocess, 152.6ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  80%|███████▉  | 3996/5000 [09:24<02:44,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462728.jpg: 448x640 2 persons, 2 surfboards, 58.3ms
Speed: 2.1ms preprocess, 58.3ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|███████▉  | 3997/5000 [09:24<02:29,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462756.jpg: 448x640 1 motorcycle, 1 truck, 58.5ms
Speed: 3.1ms preprocess, 58.5ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|███████▉  | 3998/5000 [09:24<02:17,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000462904.jpg: 480x640 10 persons, 1 cow, 2 umbrellas, 3 chairs, 79.5ms
Speed: 3.3ms preprocess, 79.5ms inference, 19.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|███████▉  | 3999/5000 [09:24<02:32,  6.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463037.jpg: 448x640 2 airplanes, 54.1ms
Speed: 2.7ms preprocess, 54.1ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4000/5000 [09:24<02:17,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463174.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 60.7ms
Speed: 3.2ms preprocess, 60.7ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4001/5000 [09:24<02:09,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463199.jpg: 480x640 12 persons, 2 handbags, 2 cell phones, 65.2ms
Speed: 3.2ms preprocess, 65.2ms inference, 15.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4002/5000 [09:25<02:21,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463283.jpg: 640x640 2 bottles, 2 cups, 1 fork, 1 orange, 138.9ms
Speed: 5.3ms preprocess, 138.9ms inference, 8.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  80%|████████  | 4003/5000 [09:25<02:42,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463522.jpg: 480x640 4 persons, 1 bicycle, 1 horse, 55.7ms
Speed: 3.0ms preprocess, 55.7ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4004/5000 [09:25<02:28,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463527.jpg: 480x640 2 bottles, 1 cup, 1 knife, 1 bowl, 2 sandwichs, 1 dining table, 66.2ms
Speed: 3.0ms preprocess, 66.2ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4005/5000 [09:25<02:25,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463542.jpg: 448x640 25 persons, 3 skiss, 97.4ms
Speed: 3.3ms preprocess, 97.4ms inference, 30.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4006/5000 [09:25<02:53,  5.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463618.jpg: 480x640 2 persons, 1 couch, 1 tv, 67.6ms
Speed: 3.0ms preprocess, 67.6ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4007/5000 [09:25<02:39,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463647.jpg: 480x640 2 cars, 1 truck, 70.3ms
Speed: 3.4ms preprocess, 70.3ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4008/5000 [09:26<02:29,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463690.jpg: 448x640 5 persons, 85.2ms
Speed: 2.8ms preprocess, 85.2ms inference, 12.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4009/5000 [09:26<02:28,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463730.jpg: 448x640 9 persons, 1 car, 1 bus, 2 trucks, 60.3ms
Speed: 2.2ms preprocess, 60.3ms inference, 11.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4010/5000 [09:26<02:25,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463802.jpg: 480x640 2 persons, 61.4ms
Speed: 2.7ms preprocess, 61.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4011/5000 [09:26<02:14,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463842.jpg: 480x640 18 bottles, 64.3ms
Speed: 3.6ms preprocess, 64.3ms inference, 17.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4012/5000 [09:26<02:33,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463849.jpg: 640x576 2 persons, 5 cars, 5 boats, 1 bird, 138.0ms
Speed: 1.6ms preprocess, 138.0ms inference, 13.7ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  80%|████████  | 4013/5000 [09:26<02:54,  5.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000463918.jpg: 480x640 2 persons, 1 cat, 1 couch, 7 remotes, 61.9ms
Speed: 4.2ms preprocess, 61.9ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4014/5000 [09:27<02:45,  5.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464089.jpg: 480x640 6 persons, 1 sports ball, 2 baseball bats, 1 baseball glove, 86.4ms
Speed: 3.0ms preprocess, 86.4ms inference, 14.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4015/5000 [09:27<02:45,  5.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464144.jpg: 640x448 1 person, 1 skis, 61.9ms
Speed: 2.9ms preprocess, 61.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  80%|████████  | 4016/5000 [09:27<02:27,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464251.jpg: 480x640 1 bird, 2 chairs, 58.3ms
Speed: 2.7ms preprocess, 58.3ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4017/5000 [09:27<02:17,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464358.jpg: 448x640 1 chair, 1 bed, 92.2ms
Speed: 3.2ms preprocess, 92.2ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4018/5000 [09:27<02:19,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464476.jpg: 480x640 1 person, 1 tv, 64.9ms
Speed: 4.3ms preprocess, 64.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4019/5000 [09:27<02:12,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464522.jpg: 640x512 1 dog, 136.9ms
Speed: 2.9ms preprocess, 136.9ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  80%|████████  | 4020/5000 [09:27<02:27,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464689.jpg: 640x608 1 bottle, 181.4ms
Speed: 2.0ms preprocess, 181.4ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  80%|████████  | 4021/5000 [09:28<02:50,  5.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464786.jpg: 448x640 1 person, 1 hot dog, 59.2ms
Speed: 2.9ms preprocess, 59.2ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4022/5000 [09:28<02:30,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464824.jpg: 640x480 1 person, 1 skateboard, 63.3ms
Speed: 2.3ms preprocess, 63.3ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  80%|████████  | 4023/5000 [09:28<02:18,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000464872.jpg: 448x640 2 zebras, 64.6ms
Speed: 2.7ms preprocess, 64.6ms inference, 8.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  80%|████████  | 4024/5000 [09:28<02:14,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465129.jpg: 480x640 1 person, 2 bottles, 1 chair, 1 tv, 1 microwave, 3 sinks, 68.8ms
Speed: 2.4ms preprocess, 68.8ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  80%|████████  | 4025/5000 [09:28<02:15,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465179.jpg: 640x480 1 person, 3 toothbrushs, 65.8ms
Speed: 3.7ms preprocess, 65.8ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  81%|████████  | 4026/5000 [09:28<02:10,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465180.jpg: 448x640 6 persons, 5 elephants, 70.5ms
Speed: 2.5ms preprocess, 70.5ms inference, 19.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4027/5000 [09:28<02:18,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465430.jpg: 544x640 1 knife, 2 sandwichs, 1 oven, 72.1ms
Speed: 2.0ms preprocess, 72.1ms inference, 5.7ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  81%|████████  | 4028/5000 [09:28<02:14,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465549.jpg: 448x640 2 persons, 1 bowl, 1 couch, 1 potted plant, 61.7ms
Speed: 3.1ms preprocess, 61.7ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4029/5000 [09:29<02:10,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465585.jpg: 640x448 1 bowl, 4 broccolis, 17 carrots, 62.6ms
Speed: 4.4ms preprocess, 62.6ms inference, 29.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  81%|████████  | 4030/5000 [09:29<02:31,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465675.jpg: 416x640 1 boat, 118.4ms
Speed: 2.8ms preprocess, 118.4ms inference, 1.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  81%|████████  | 4031/5000 [09:29<02:33,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465718.jpg: 448x640 2 persons, 2 tvs, 1 laptop, 1 mouse, 1 keyboard, 1 cell phone, 59.0ms
Speed: 2.7ms preprocess, 59.0ms inference, 7.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4032/5000 [09:29<02:24,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465806.jpg: 480x640 1 person, 1 cup, 1 banana, 60.1ms
Speed: 2.5ms preprocess, 60.1ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4033/5000 [09:29<02:13,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465822.jpg: 480x640 12 persons, 1 bottle, 5 chairs, 2 laptops, 1 clock, 74.0ms
Speed: 3.8ms preprocess, 74.0ms inference, 34.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4034/5000 [09:29<02:34,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000465836.jpg: 480x640 3 persons, 3 skiss, 61.9ms
Speed: 4.3ms preprocess, 61.9ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4035/5000 [09:30<02:25,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466085.jpg: 640x480 1 toilet, 1 sink, 64.8ms
Speed: 2.7ms preprocess, 64.8ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  81%|████████  | 4036/5000 [09:30<02:13,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466125.jpg: 640x448 4 persons, 1 umbrella, 1 chair, 81.5ms
Speed: 2.7ms preprocess, 81.5ms inference, 9.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  81%|████████  | 4037/5000 [09:30<02:17,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466156.jpg: 480x640 2 cars, 1 cat, 64.5ms
Speed: 4.0ms preprocess, 64.5ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4038/5000 [09:30<02:10,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466256.jpg: 448x640 1 fork, 3 carrots, 57.9ms
Speed: 2.5ms preprocess, 57.9ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4039/5000 [09:30<02:04,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466339.jpg: 640x384 1 cat, 1 dog, 157.5ms
Speed: 2.5ms preprocess, 157.5ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  81%|████████  | 4040/5000 [09:30<02:24,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466416.jpg: 448x640 1 clock, 58.4ms
Speed: 2.6ms preprocess, 58.4ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4041/5000 [09:30<02:11,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466567.jpg: 640x480 1 donut, 66.8ms
Speed: 2.8ms preprocess, 66.8ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  81%|████████  | 4042/5000 [09:30<02:05,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466602.jpg: 640x480 1 person, 70.6ms
Speed: 3.0ms preprocess, 70.6ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  81%|████████  | 4043/5000 [09:31<02:05,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466835.jpg: 640x480 2 bananas, 59.9ms
Speed: 3.4ms preprocess, 59.9ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  81%|████████  | 4044/5000 [09:31<02:00,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000466986.jpg: 480x640 2 persons, 1 tv, 62.6ms
Speed: 3.0ms preprocess, 62.6ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4045/5000 [09:31<01:57,  8.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000467176.jpg: 448x640 6 persons, 1 tv, 2 remotes, 58.8ms
Speed: 3.0ms preprocess, 58.8ms inference, 8.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4046/5000 [09:31<02:06,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000467315.jpg: 448x640 3 persons, 2 chairs, 1 vase, 59.2ms
Speed: 2.9ms preprocess, 59.2ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4047/5000 [09:31<02:05,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000467511.jpg: 640x416 2 persons, 1 motorcycle, 115.7ms
Speed: 2.6ms preprocess, 115.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  81%|████████  | 4048/5000 [09:31<02:15,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000467776.jpg: 448x640 1 airplane, 2 cows, 89.7ms
Speed: 2.7ms preprocess, 89.7ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4049/5000 [09:31<02:17,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000467848.jpg: 480x640 1 person, 3 cars, 3 trucks, 66.5ms
Speed: 3.4ms preprocess, 66.5ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4050/5000 [09:32<02:15,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468124.jpg: 480x640 1 car, 1 bus, 65.9ms
Speed: 2.6ms preprocess, 65.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4051/5000 [09:32<02:08,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468233.jpg: 448x640 1 laptop, 1 clock, 87.5ms
Speed: 3.7ms preprocess, 87.5ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4052/5000 [09:32<02:10,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468245.jpg: 448x640 1 bed, 56.5ms
Speed: 3.3ms preprocess, 56.5ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4053/5000 [09:32<02:01,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468332.jpg: 448x640 13 persons, 1 car, 61.4ms
Speed: 2.8ms preprocess, 61.4ms inference, 12.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4054/5000 [09:32<02:09,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468501.jpg: 480x640 3 persons, 1 chair, 84.8ms
Speed: 4.3ms preprocess, 84.8ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4055/5000 [09:32<02:13,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468505.jpg: 480x640 1 person, 2 cups, 1 hot dog, 2 dining tables, 58.7ms
Speed: 2.9ms preprocess, 58.7ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4056/5000 [09:32<02:09,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468577.jpg: 640x640 1 person, 2 beds, 187.2ms
Speed: 3.4ms preprocess, 187.2ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  81%|████████  | 4057/5000 [09:33<02:40,  5.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468632.jpg: 480x640 3 persons, 1 frisbee, 65.6ms
Speed: 2.8ms preprocess, 65.6ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4058/5000 [09:33<02:31,  6.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468925.jpg: 480x640 1 person, 1 cup, 3 bananas, 2 cakes, 1 dining table, 76.9ms
Speed: 3.5ms preprocess, 76.9ms inference, 8.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4059/5000 [09:33<02:28,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468954.jpg: 448x640 4 persons, 59.5ms
Speed: 2.4ms preprocess, 59.5ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████  | 4060/5000 [09:33<02:16,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000468965.jpg: 480x640 9 persons, 1 kite, 64.4ms
Speed: 3.0ms preprocess, 64.4ms inference, 10.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4061/5000 [09:33<02:14,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000469067.jpg: 480x640 2 persons, 1 cat, 1 couch, 1 bed, 1 cell phone, 90.0ms
Speed: 2.5ms preprocess, 90.0ms inference, 10.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████  | 4062/5000 [09:33<02:17,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000469174.jpg: 480x640 1 airplane, 1 kite, 65.6ms
Speed: 2.9ms preprocess, 65.6ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4063/5000 [09:33<02:09,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000469192.jpg: 480x640 1 person, 1 truck, 9 kites, 61.1ms
Speed: 2.9ms preprocess, 61.1ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4064/5000 [09:34<02:09,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000469246.jpg: 480x640 2 persons, 1 train, 88.2ms
Speed: 2.7ms preprocess, 88.2ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4065/5000 [09:34<02:11,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000469652.jpg: 448x640 1 giraffe, 60.2ms
Speed: 2.7ms preprocess, 60.2ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████▏ | 4066/5000 [09:34<02:03,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000469828.jpg: 480x640 1 person, 61.2ms
Speed: 2.7ms preprocess, 61.2ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4067/5000 [09:34<01:57,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000470121.jpg: 480x640 2 cups, 1 bowl, 1 donut, 3 cakes, 1 dining table, 78.2ms
Speed: 2.8ms preprocess, 78.2ms inference, 15.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4068/5000 [09:34<02:05,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000470173.jpg: 640x448 1 person, 1 bicycle, 57.7ms
Speed: 2.7ms preprocess, 57.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  81%|████████▏ | 4069/5000 [09:34<01:59,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000470773.jpg: 480x640 1 person, 3 cups, 1 chair, 1 dining table, 65.4ms
Speed: 2.9ms preprocess, 65.4ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4070/5000 [09:34<01:59,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000470779.jpg: 480x640 5 persons, 1 skis, 70.6ms
Speed: 2.6ms preprocess, 70.6ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4071/5000 [09:35<02:07,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000470924.jpg: 480x640 7 persons, 7 cars, 3 wine glasss, 5 cups, 2 pizzas, 2 chairs, 3 dining tables, 68.7ms
Speed: 2.9ms preprocess, 68.7ms inference, 25.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  81%|████████▏ | 4072/5000 [09:35<02:27,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000470952.jpg: 640x448 7 persons, 2 skiss, 57.7ms
Speed: 2.6ms preprocess, 57.7ms inference, 8.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  81%|████████▏ | 4073/5000 [09:35<02:18,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471023.jpg: 448x640 9 persons, 57.4ms
Speed: 2.5ms preprocess, 57.4ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  81%|████████▏ | 4074/5000 [09:35<02:18,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471087.jpg: 640x544 1 person, 1 tie, 139.3ms
Speed: 3.1ms preprocess, 139.3ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  82%|████████▏ | 4075/5000 [09:35<02:29,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471450.jpg: 480x640 2 sheeps, 1 cow, 57.1ms
Speed: 2.7ms preprocess, 57.1ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4076/5000 [09:35<02:14,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471567.jpg: 480x640 1 person, 1 giraffe, 104.7ms
Speed: 3.2ms preprocess, 104.7ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4077/5000 [09:35<02:17,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471756.jpg: 448x640 1 person, 1 surfboard, 60.6ms
Speed: 3.0ms preprocess, 60.6ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4078/5000 [09:36<02:08,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471789.jpg: 480x640 10 persons, 1 kite, 63.1ms
Speed: 3.0ms preprocess, 63.1ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4079/5000 [09:36<02:09,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471869.jpg: 640x544 1 teddy bear, 85.1ms
Speed: 3.3ms preprocess, 85.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  82%|████████▏ | 4080/5000 [09:36<02:10,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471893.jpg: 448x640 2 persons, 2 cups, 2 chairs, 1 remote, 3 books, 61.6ms
Speed: 2.8ms preprocess, 61.6ms inference, 8.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4081/5000 [09:36<02:09,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000471991.jpg: 448x640 1 potted plant, 2 clocks, 60.2ms
Speed: 2.6ms preprocess, 60.2ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4082/5000 [09:36<02:03,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000472030.jpg: 480x640 1 bench, 84.7ms
Speed: 2.9ms preprocess, 84.7ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4083/5000 [09:36<02:03,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000472046.jpg: 448x640 2 bowls, 3 chairs, 2 couchs, 2 potted plants, 1 tv, 1 oven, 1 vase, 58.7ms
Speed: 3.0ms preprocess, 58.7ms inference, 9.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4084/5000 [09:36<02:04,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000472298.jpg: 416x640 2 persons, 1 boat, 56.1ms
Speed: 2.5ms preprocess, 56.1ms inference, 5.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  82%|████████▏ | 4085/5000 [09:37<01:57,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000472375.jpg: 640x640 1 bicycle, 1 motorcycle, 1 dog, 80.5ms
Speed: 3.3ms preprocess, 80.5ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  82%|████████▏ | 4086/5000 [09:37<02:02,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000472623.jpg: 640x448 3 persons, 2 bicycles, 70.2ms
Speed: 3.2ms preprocess, 70.2ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  82%|████████▏ | 4087/5000 [09:37<02:02,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000472678.jpg: 480x640 1 bottle, 1 chair, 3 tvs, 1 mouse, 1 book, 63.0ms
Speed: 2.7ms preprocess, 63.0ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4088/5000 [09:37<02:00,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473015.jpg: 448x640 2 birds, 63.9ms
Speed: 3.2ms preprocess, 63.9ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4089/5000 [09:37<01:55,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473118.jpg: 640x448 1 person, 1 bicycle, 1 skateboard, 92.4ms
Speed: 4.2ms preprocess, 92.4ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  82%|████████▏ | 4090/5000 [09:37<02:02,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473121.jpg: 448x640 1 person, 64.9ms
Speed: 4.5ms preprocess, 64.9ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4091/5000 [09:37<01:57,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473219.jpg: 448x640 5 persons, 1 tie, 1 couch, 6 potted plants, 64.5ms
Speed: 3.1ms preprocess, 64.5ms inference, 15.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4092/5000 [09:37<02:06,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473237.jpg: 448x640 1 person, 1 pizza, 1 chair, 69.2ms
Speed: 3.2ms preprocess, 69.2ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4093/5000 [09:38<02:04,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473406.jpg: 640x480 1 person, 2 bottles, 1 microwave, 1 sink, 69.5ms
Speed: 3.3ms preprocess, 69.5ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  82%|████████▏ | 4094/5000 [09:38<02:02,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473821.jpg: 448x640 1 bowl, 2 chairs, 1 couch, 1 dining table, 1 tv, 113.5ms
Speed: 5.3ms preprocess, 113.5ms inference, 6.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4095/5000 [09:38<02:15,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473869.jpg: 448x640 1 person, 1 bottle, 1 wine glass, 1 potted plant, 1 vase, 60.9ms
Speed: 3.3ms preprocess, 60.9ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4096/5000 [09:38<02:07,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000473974.jpg: 448x640 5 zebras, 63.8ms
Speed: 3.1ms preprocess, 63.8ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4097/5000 [09:38<02:04,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474021.jpg: 480x640 4 persons, 3 remotes, 65.6ms
Speed: 4.9ms preprocess, 65.6ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4098/5000 [09:38<02:09,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474028.jpg: 448x640 12 persons, 1 sports ball, 67.0ms
Speed: 3.1ms preprocess, 67.0ms inference, 12.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4099/5000 [09:38<02:14,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474039.jpg: 416x640 1 person, 1 couch, 1 bed, 1 teddy bear, 59.5ms
Speed: 3.3ms preprocess, 59.5ms inference, 4.2ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  82%|████████▏ | 4100/5000 [09:39<02:05,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474078.jpg: 480x640 4 persons, 1 sports ball, 2 baseball bats, 66.1ms
Speed: 3.7ms preprocess, 66.1ms inference, 15.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4101/5000 [09:39<02:09,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474095.jpg: 640x448 1 person, 60.1ms
Speed: 4.8ms preprocess, 60.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  82%|████████▏ | 4102/5000 [09:39<02:01,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474164.jpg: 640x640 2 cars, 1 train, 1 truck, 1 dog, 84.0ms
Speed: 2.4ms preprocess, 84.0ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  82%|████████▏ | 4103/5000 [09:39<02:07,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474167.jpg: 640x448 3 persons, 2 bottles, 4 pizzas, 1 dining table, 69.5ms
Speed: 3.5ms preprocess, 69.5ms inference, 9.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  82%|████████▏ | 4104/5000 [09:39<02:09,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474170.jpg: 480x640 3 elephants, 72.1ms
Speed: 3.4ms preprocess, 72.1ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4105/5000 [09:39<02:11,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474293.jpg: 480x640 1 person, 1 handbag, 65.1ms
Speed: 3.2ms preprocess, 65.1ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4106/5000 [09:39<02:05,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474344.jpg: 448x640 2 persons, 60.2ms
Speed: 2.9ms preprocess, 60.2ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4107/5000 [09:40<01:57,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474452.jpg: 480x640 9 cars, 64.2ms
Speed: 3.6ms preprocess, 64.2ms inference, 10.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4108/5000 [09:40<02:09,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474786.jpg: 640x480 3 bottles, 1 tv, 2 sinks, 65.2ms
Speed: 4.0ms preprocess, 65.2ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  82%|████████▏ | 4109/5000 [09:40<02:06,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474854.jpg: 640x448 1 person, 1 bed, 56.8ms
Speed: 2.5ms preprocess, 56.8ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  82%|████████▏ | 4110/5000 [09:40<01:57,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000474881.jpg: 480x640 5 sheeps, 61.3ms
Speed: 2.8ms preprocess, 61.3ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4111/5000 [09:40<01:54,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475064.jpg: 448x640 3 persons, 1 hot dog, 62.9ms
Speed: 3.2ms preprocess, 62.9ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4112/5000 [09:40<01:56,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475150.jpg: 448x640 1 giraffe, 60.9ms
Speed: 3.6ms preprocess, 60.9ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4113/5000 [09:40<01:52,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475191.jpg: 480x640 1 person, 1 baseball bat, 1 tennis racket, 66.0ms
Speed: 2.9ms preprocess, 66.0ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4114/5000 [09:40<01:51,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475223.jpg: 448x640 2 birds, 60.1ms
Speed: 3.1ms preprocess, 60.1ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4115/5000 [09:41<01:46,  8.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475365.jpg: 448x640 1 truck, 1 clock, 79.7ms
Speed: 3.8ms preprocess, 79.7ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4116/5000 [09:41<01:52,  7.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475387.jpg: 448x640 4 persons, 1 train, 62.0ms
Speed: 2.6ms preprocess, 62.0ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4117/5000 [09:41<01:51,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475484.jpg: 640x448 2 persons, 4 cars, 1 traffic light, 63.1ms
Speed: 3.0ms preprocess, 63.1ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  82%|████████▏ | 4118/5000 [09:41<01:59,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475572.jpg: 448x640 2 persons, 1 potted plant, 2 vases, 1 teddy bear, 64.7ms
Speed: 3.2ms preprocess, 64.7ms inference, 6.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4119/5000 [09:41<01:57,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475678.jpg: 480x640 11 suitcases, 69.3ms
Speed: 2.9ms preprocess, 69.3ms inference, 10.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4120/5000 [09:41<02:03,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475732.jpg: 480x640 1 cat, 95.2ms
Speed: 2.8ms preprocess, 95.2ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4121/5000 [09:41<02:06,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475779.jpg: 480x640 1 elephant, 66.4ms
Speed: 3.0ms preprocess, 66.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  82%|████████▏ | 4122/5000 [09:42<01:59,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000475904.jpg: 448x640 1 boat, 1 bird, 58.8ms
Speed: 2.7ms preprocess, 58.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4123/5000 [09:42<01:52,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476119.jpg: 448x640 1 person, 3 cars, 1 skateboard, 61.1ms
Speed: 3.3ms preprocess, 61.1ms inference, 5.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  82%|████████▏ | 4124/5000 [09:42<01:52,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476215.jpg: 416x640 1 person, 5 horses, 80.5ms
Speed: 4.0ms preprocess, 80.5ms inference, 6.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  82%|████████▎ | 4125/5000 [09:42<01:55,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476258.jpg: 384x640 1 person, 1 skateboard, 131.4ms
Speed: 2.8ms preprocess, 131.4ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  83%|████████▎ | 4126/5000 [09:42<02:08,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476415.jpg: 640x448 1 person, 1 tie, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  83%|████████▎ | 4127/5000 [09:42<01:59,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476491.jpg: 640x448 1 person, 60.3ms
Speed: 3.8ms preprocess, 60.3ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  83%|████████▎ | 4128/5000 [09:42<01:58,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476514.jpg: 640x448 2 persons, 1 umbrella, 1 tie, 62.8ms
Speed: 2.7ms preprocess, 62.8ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  83%|████████▎ | 4129/5000 [09:43<01:55,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476704.jpg: 448x640 1 bicycle, 4 cars, 1 bus, 60.1ms
Speed: 2.9ms preprocess, 60.1ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4130/5000 [09:43<01:52,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476770.jpg: 448x640 9 persons, 3 baseball gloves, 1 chair, 97.9ms
Speed: 2.7ms preprocess, 97.9ms inference, 14.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4131/5000 [09:43<02:06,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476787.jpg: 480x640 4 bottles, 1 cup, 1 bowl, 1 pizza, 63.7ms
Speed: 2.5ms preprocess, 63.7ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4132/5000 [09:43<02:01,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000476810.jpg: 480x640 1 cat, 1 chair, 1 remote, 67.7ms
Speed: 2.9ms preprocess, 67.7ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4133/5000 [09:43<01:56,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000477118.jpg: 448x640 (no detections), 61.9ms
Speed: 3.6ms preprocess, 61.9ms inference, 0.6ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000477227.jpg: 448x640 1 bus, 53.0ms
Speed: 2.7ms preprocess, 53.0ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4135/5000 [09:43<01:43,  8.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000477288.jpg: 640x480 10 persons, 5 umbrellas, 65.4ms
Speed: 3.3ms preprocess, 65.4ms inference, 15.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4136/5000 [09:43<01:51,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000477441.jpg: 480x640 1 person, 2 airplanes, 1 truck, 63.6ms
Speed: 2.7ms preprocess, 63.6ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4137/5000 [09:44<01:49,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000477623.jpg: 480x640 2 trains, 105.7ms
Speed: 2.6ms preprocess, 105.7ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4138/5000 [09:44<01:56,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000477689.jpg: 640x480 1 person, 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4139/5000 [09:44<01:50,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000477805.jpg: 448x640 2 persons, 61.3ms
Speed: 2.6ms preprocess, 61.3ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4140/5000 [09:44<01:45,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000477955.jpg: 640x480 1 person, 1 kite, 77.6ms
Speed: 2.5ms preprocess, 77.6ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4141/5000 [09:44<01:49,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000478136.jpg: 640x544 13 donuts, 71.8ms
Speed: 1.6ms preprocess, 71.8ms inference, 14.3ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  83%|████████▎ | 4142/5000 [09:44<01:59,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000478286.jpg: 480x640 (no detections), 64.3ms
Speed: 2.6ms preprocess, 64.3ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4143/5000 [09:44<01:49,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000478393.jpg: 640x480 2 cats, 1 couch, 1 bed, 102.3ms
Speed: 2.6ms preprocess, 102.3ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4144/5000 [09:44<01:57,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000478420.jpg: 640x480 2 persons, 63.6ms
Speed: 2.8ms preprocess, 63.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4145/5000 [09:45<01:51,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000478474.jpg: 640x480 1 car, 1 truck, 62.1ms
Speed: 2.4ms preprocess, 62.1ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4146/5000 [09:45<01:46,  8.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000478721.jpg: 480x640 4 persons, 2 cars, 1 bus, 4 trucks, 107.2ms
Speed: 3.3ms preprocess, 107.2ms inference, 20.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4147/5000 [09:45<02:05,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000478862.jpg: 480x640 10 persons, 1 airplane, 62.5ms
Speed: 2.9ms preprocess, 62.5ms inference, 10.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4148/5000 [09:45<02:04,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479030.jpg: 448x640 1 train, 60.6ms
Speed: 2.8ms preprocess, 60.6ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4149/5000 [09:45<01:54,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479099.jpg: 480x640 1 bench, 87.2ms
Speed: 2.6ms preprocess, 87.2ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4150/5000 [09:45<01:55,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479126.jpg: 448x640 2 persons, 1 bench, 2 chairs, 1 laptop, 63.8ms
Speed: 2.9ms preprocess, 63.8ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4151/5000 [09:45<01:53,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479155.jpg: 448x640 7 persons, 1 sheep, 1 handbag, 1 apple, 59.9ms
Speed: 4.3ms preprocess, 59.9ms inference, 8.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4152/5000 [09:46<02:00,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479248.jpg: 480x640 3 persons, 2 bottles, 1 bowl, 1 tv, 1 clock, 5 vases, 67.8ms
Speed: 2.8ms preprocess, 67.8ms inference, 13.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4153/5000 [09:46<02:05,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479448.jpg: 480x640 1 spoon, 2 bowls, 17 bananas, 1 orange, 1 dining table, 63.9ms
Speed: 4.3ms preprocess, 63.9ms inference, 21.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4154/5000 [09:46<02:20,  6.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479596.jpg: 448x640 4 apples, 12 oranges, 60.7ms
Speed: 2.8ms preprocess, 60.7ms inference, 13.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4155/5000 [09:46<02:20,  6.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479732.jpg: 448x640 1 sandwich, 3 carrots, 1 dining table, 64.8ms
Speed: 2.9ms preprocess, 64.8ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4156/5000 [09:46<02:10,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479912.jpg: 640x480 1 car, 1 airplane, 64.7ms
Speed: 2.8ms preprocess, 64.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4157/5000 [09:46<02:01,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000479953.jpg: 416x640 1 person, 1 sports ball, 57.3ms
Speed: 2.7ms preprocess, 57.3ms inference, 2.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  83%|████████▎ | 4158/5000 [09:46<01:54,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480021.jpg: 480x640 4 persons, 1 car, 2 motorcycles, 1 tie, 82.9ms
Speed: 3.2ms preprocess, 82.9ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4159/5000 [09:47<02:00,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480122.jpg: 640x640 1 cup, 2 knifes, 1 bowl, 1 banana, 5 chairs, 1 dining table, 1 oven, 1 sink, 94.6ms
Speed: 3.1ms preprocess, 94.6ms inference, 17.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  83%|████████▎ | 4160/5000 [09:47<02:14,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480212.jpg: 480x640 1 cup, 1 dining table, 1 cell phone, 92.1ms
Speed: 3.0ms preprocess, 92.1ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4161/5000 [09:47<02:12,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480275.jpg: 480x640 1 bicycle, 2 bananas, 65.6ms
Speed: 2.5ms preprocess, 65.6ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4162/5000 [09:47<02:03,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480842.jpg: 608x640 3 giraffes, 170.5ms
Speed: 1.9ms preprocess, 170.5ms inference, 4.9ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  83%|████████▎ | 4163/5000 [09:47<02:22,  5.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480936.jpg: 640x512 1 person, 1 bowl, 2 chairs, 175.1ms
Speed: 4.0ms preprocess, 175.1ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  83%|████████▎ | 4164/5000 [09:48<02:40,  5.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480944.jpg: 640x480 7 cars, 1 bus, 5 traffic lights, 67.3ms
Speed: 3.0ms preprocess, 67.3ms inference, 14.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4165/5000 [09:48<02:32,  5.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000480985.jpg: 640x480 6 persons, 2 motorcycles, 64.6ms
Speed: 4.3ms preprocess, 64.6ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  83%|████████▎ | 4166/5000 [09:48<02:22,  5.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481159.jpg: 480x640 1 horse, 73.2ms
Speed: 3.5ms preprocess, 73.2ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4167/5000 [09:48<02:10,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481386.jpg: 640x448 1 person, 1 potted plant, 1 vase, 57.8ms
Speed: 2.6ms preprocess, 57.8ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  83%|████████▎ | 4168/5000 [09:48<01:59,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481390.jpg: 480x640 11 persons, 1 sports ball, 63.9ms
Speed: 2.7ms preprocess, 63.9ms inference, 14.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4169/5000 [09:48<02:01,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481404.jpg: 448x640 2 clocks, 1 vase, 57.4ms
Speed: 2.7ms preprocess, 57.4ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4170/5000 [09:48<01:52,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481413.jpg: 448x640 1 person, 1 frisbee, 83.9ms
Speed: 6.0ms preprocess, 83.9ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4171/5000 [09:49<01:53,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481480.jpg: 448x640 2 horses, 1 kite, 56.5ms
Speed: 2.8ms preprocess, 56.5ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  83%|████████▎ | 4172/5000 [09:49<01:46,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481567.jpg: 480x640 1 person, 1 motorcycle, 63.7ms
Speed: 2.7ms preprocess, 63.7ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  83%|████████▎ | 4173/5000 [09:49<01:43,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481573.jpg: 640x640 1 person, 126.4ms
Speed: 2.3ms preprocess, 126.4ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  83%|████████▎ | 4174/5000 [09:49<01:55,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000481582.jpg: 448x640 4 persons, 1 horse, 56.1ms
Speed: 2.4ms preprocess, 56.1ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▎ | 4175/5000 [09:49<01:49,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482100.jpg: 448x640 1 potted plant, 3 toilets, 59.9ms
Speed: 2.7ms preprocess, 59.9ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▎ | 4176/5000 [09:49<01:44,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482275.jpg: 480x640 2 persons, 1 tie, 2 cakes, 71.1ms
Speed: 2.7ms preprocess, 71.1ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▎ | 4177/5000 [09:49<01:45,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482319.jpg: 480x640 2 persons, 1 chair, 2 books, 67.5ms
Speed: 2.8ms preprocess, 67.5ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▎ | 4178/5000 [09:49<01:45,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482436.jpg: 416x640 2 persons, 1 cup, 80.9ms
Speed: 8.5ms preprocess, 80.9ms inference, 3.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  84%|████████▎ | 4179/5000 [09:50<01:47,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482477.jpg: 640x512 1 bird, 73.0ms
Speed: 2.6ms preprocess, 73.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  84%|████████▎ | 4180/5000 [09:50<01:45,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482487.jpg: 640x480 1 car, 2 clocks, 64.4ms
Speed: 3.2ms preprocess, 64.4ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  84%|████████▎ | 4181/5000 [09:50<01:43,  7.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482585.jpg: 480x640 2 persons, 2 trains, 84.2ms
Speed: 3.3ms preprocess, 84.2ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▎ | 4182/5000 [09:50<01:47,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482719.jpg: 480x640 1 banana, 59.7ms
Speed: 2.9ms preprocess, 59.7ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▎ | 4183/5000 [09:50<01:42,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482735.jpg: 448x640 1 person, 2 surfboards, 77.5ms
Speed: 4.7ms preprocess, 77.5ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▎ | 4184/5000 [09:50<01:48,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482800.jpg: 480x640 4 persons, 1 frisbee, 63.8ms
Speed: 2.6ms preprocess, 63.8ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▎ | 4185/5000 [09:50<01:46,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482917.jpg: 480x640 1 person, 2 dogs, 1 tv, 65.3ms
Speed: 4.4ms preprocess, 65.3ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▎ | 4186/5000 [09:50<01:49,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482970.jpg: 384x640 1 laptop, 1 keyboard, 63.5ms
Speed: 2.7ms preprocess, 63.5ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  84%|████████▎ | 4187/5000 [09:51<01:45,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000482978.jpg: 384x640 1 train, 51.6ms
Speed: 3.2ms preprocess, 51.6ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000483050.jpg: 480x640 1 bed, 63.6ms
Speed: 2.7ms preprocess, 63.6ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4189/5000 [09:51<01:34,  8.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000483531.jpg: 480x640 1 bed, 60.2ms
Speed: 2.5ms preprocess, 60.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4190/5000 [09:51<01:32,  8.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000483667.jpg: 640x448 1 person, 1 tie, 91.6ms
Speed: 2.8ms preprocess, 91.6ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  84%|████████▍ | 4191/5000 [09:51<01:38,  8.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000483999.jpg: 448x640 13 persons, 2 bottles, 1 wine glass, 6 cups, 2 chairs, 3 dining tables, 58.6ms
Speed: 2.5ms preprocess, 58.6ms inference, 22.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4192/5000 [09:51<01:53,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484029.jpg: 640x480 5 cars, 1 stop sign, 59.9ms
Speed: 2.5ms preprocess, 59.9ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  84%|████████▍ | 4193/5000 [09:51<01:48,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484296.jpg: 480x640 2 zebras, 94.7ms
Speed: 2.8ms preprocess, 94.7ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4194/5000 [09:52<01:51,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484351.jpg: 448x640 15 persons, 2 ties, 2 bottles, 3 chairs, 1 dining table, 59.8ms
Speed: 2.5ms preprocess, 59.8ms inference, 20.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4195/5000 [09:52<02:00,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484404.jpg: 480x640 3 persons, 1 car, 1 bus, 61.5ms
Speed: 2.5ms preprocess, 61.5ms inference, 4.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4196/5000 [09:52<01:52,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484415.jpg: 480x640 (no detections), 85.7ms
Speed: 4.1ms preprocess, 85.7ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4197/5000 [09:52<01:48,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484760.jpg: 448x640 3 clocks, 57.4ms
Speed: 3.0ms preprocess, 57.4ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4198/5000 [09:52<01:42,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484893.jpg: 448x640 2 persons, 2 sheeps, 56.6ms
Speed: 2.8ms preprocess, 56.6ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4199/5000 [09:52<01:38,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000484978.jpg: 512x640 1 person, 7 cups, 1 knife, 2 dining tables, 132.3ms
Speed: 3.0ms preprocess, 132.3ms inference, 7.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  84%|████████▍ | 4200/5000 [09:52<01:57,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485027.jpg: 640x352 1 person, 1 sports ball, 1 tennis racket, 144.6ms
Speed: 2.5ms preprocess, 144.6ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 352)


Segmenting Images:  84%|████████▍ | 4201/5000 [09:53<02:08,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485071.jpg: 448x640 2 persons, 58.9ms
Speed: 2.6ms preprocess, 58.9ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4202/5000 [09:53<01:55,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485130.jpg: 480x640 (no detections), 61.1ms
Speed: 2.7ms preprocess, 61.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000485237.jpg: 192x640 1 airplane, 2 trucks, 91.6ms
Speed: 2.5ms preprocess, 91.6ms inference, 1.5ms postprocess per image at shape (1, 3, 192, 640)


Segmenting Images:  84%|████████▍ | 4204/5000 [09:53<01:43,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485424.jpg: 480x640 4 bottles, 1 cup, 1 microwave, 77.8ms
Speed: 2.6ms preprocess, 77.8ms inference, 7.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4205/5000 [09:53<01:46,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485480.jpg: 288x640 3 persons, 1 baseball glove, 112.9ms
Speed: 2.4ms preprocess, 112.9ms inference, 2.9ms postprocess per image at shape (1, 3, 288, 640)


Segmenting Images:  84%|████████▍ | 4206/5000 [09:53<01:52,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485802.jpg: 640x448 1 airplane, 60.2ms
Speed: 2.6ms preprocess, 60.2ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  84%|████████▍ | 4207/5000 [09:53<01:44,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485844.jpg: 448x640 1 person, 2 chairs, 1 dining table, 81.3ms
Speed: 4.2ms preprocess, 81.3ms inference, 7.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4208/5000 [09:53<01:47,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485895.jpg: 448x640 2 giraffes, 61.4ms
Speed: 3.9ms preprocess, 61.4ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4209/5000 [09:54<01:42,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000485972.jpg: 640x448 1 fork, 1 cake, 1 dining table, 59.4ms
Speed: 2.7ms preprocess, 59.4ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  84%|████████▍ | 4210/5000 [09:54<01:39,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000486040.jpg: 640x480 1 laptop, 1 keyboard, 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  84%|████████▍ | 4211/5000 [09:54<01:37,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000486046.jpg: 480x640 1 zebra, 1 giraffe, 95.4ms
Speed: 3.6ms preprocess, 95.4ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4212/5000 [09:54<01:43,  7.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000486104.jpg: 480x640 9 persons, 64.8ms
Speed: 4.1ms preprocess, 64.8ms inference, 10.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4213/5000 [09:54<01:47,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000486112.jpg: 448x640 1 person, 1 elephant, 61.6ms
Speed: 2.6ms preprocess, 61.6ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4214/5000 [09:54<01:41,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000486438.jpg: 448x640 2 persons, 9 donuts, 73.9ms
Speed: 3.2ms preprocess, 73.9ms inference, 12.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4215/5000 [09:54<01:49,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000486479.jpg: 480x640 1 dog, 67.6ms
Speed: 4.2ms preprocess, 67.6ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4216/5000 [09:54<01:44,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000486573.jpg: 640x480 1 vase, 58.6ms
Speed: 2.6ms preprocess, 58.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  84%|████████▍ | 4217/5000 [09:55<01:38,  7.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000487583.jpg: 640x448 1 cup, 2 toilets, 65.3ms
Speed: 2.7ms preprocess, 65.3ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  84%|████████▍ | 4218/5000 [09:55<01:38,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488075.jpg: 480x640 1 bottle, 1 apple, 2 chairs, 1 couch, 1 microwave, 64.3ms
Speed: 3.0ms preprocess, 64.3ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  84%|████████▍ | 4219/5000 [09:55<01:38,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488166.jpg: 640x384 1 person, 2 cups, 1 sandwich, 1 pizza, 1 dining table, 123.3ms
Speed: 3.0ms preprocess, 123.3ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  84%|████████▍ | 4220/5000 [09:55<01:52,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488251.jpg: 384x640 3 persons, 4 horses, 64.0ms
Speed: 2.7ms preprocess, 64.0ms inference, 8.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  84%|████████▍ | 4221/5000 [09:55<01:52,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488270.jpg: 448x640 1 airplane, 59.0ms
Speed: 2.5ms preprocess, 59.0ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  84%|████████▍ | 4222/5000 [09:55<01:42,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488385.jpg: 640x480 2 motorcycles, 59.6ms
Speed: 3.0ms preprocess, 59.6ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  84%|████████▍ | 4223/5000 [09:55<01:37,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488592.jpg: 640x480 4 persons, 2 bicycles, 1 umbrella, 60.4ms
Speed: 3.2ms preprocess, 60.4ms inference, 9.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  84%|████████▍ | 4224/5000 [09:56<01:37,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488664.jpg: 512x640 1 train, 85.6ms
Speed: 4.4ms preprocess, 85.6ms inference, 2.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  84%|████████▍ | 4225/5000 [09:56<01:42,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488673.jpg: 640x480 3 persons, 2 bottles, 1 wine glass, 2 cups, 2 bowls, 2 dining tables, 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▍ | 4226/5000 [09:56<01:47,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488710.jpg: 512x640 1 person, 72.2ms
Speed: 4.4ms preprocess, 72.2ms inference, 2.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  85%|████████▍ | 4227/5000 [09:56<01:44,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000488736.jpg: 480x640 2 clocks, 97.9ms
Speed: 2.8ms preprocess, 97.9ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▍ | 4228/5000 [09:56<01:49,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489014.jpg: 480x640 1 boat, 1 dog, 69.4ms
Speed: 2.9ms preprocess, 69.4ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▍ | 4229/5000 [09:56<01:44,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489046.jpg: 448x640 1 bird, 62.5ms
Speed: 2.9ms preprocess, 62.5ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▍ | 4230/5000 [09:56<01:39,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489091.jpg: 640x480 1 toilet, 3 sinks, 82.1ms
Speed: 3.9ms preprocess, 82.1ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▍ | 4231/5000 [09:56<01:43,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489305.jpg: 512x640 2 persons, 2 bowls, 1 carrot, 67.1ms
Speed: 3.8ms preprocess, 67.1ms inference, 5.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  85%|████████▍ | 4232/5000 [09:57<01:42,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489339.jpg: 640x448 1 person, 3 surfboards, 59.0ms
Speed: 2.7ms preprocess, 59.0ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  85%|████████▍ | 4233/5000 [09:57<01:37,  7.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489611.jpg: 640x640 1 person, 2 remotes, 1 book, 184.0ms
Speed: 1.7ms preprocess, 184.0ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  85%|████████▍ | 4234/5000 [09:57<02:03,  6.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489764.jpg: 544x640 2 persons, 1 chair, 140.7ms
Speed: 1.7ms preprocess, 140.7ms inference, 3.3ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  85%|████████▍ | 4235/5000 [09:57<02:10,  5.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489842.jpg: 576x640 25 persons, 3 ties, 139.1ms
Speed: 1.4ms preprocess, 139.1ms inference, 33.0ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  85%|████████▍ | 4236/5000 [09:57<02:35,  4.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000489924.jpg: 640x640 2 persons, 1 skateboard, 1 surfboard, 79.8ms
Speed: 3.3ms preprocess, 79.8ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  85%|████████▍ | 4237/5000 [09:58<02:26,  5.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000490125.jpg: 448x640 1 car, 2 trucks, 13 birds, 62.6ms
Speed: 4.4ms preprocess, 62.6ms inference, 15.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▍ | 4238/5000 [09:58<02:19,  5.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000490171.jpg: 448x640 1 person, 1 dog, 3 surfboards, 58.7ms
Speed: 2.8ms preprocess, 58.7ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▍ | 4239/5000 [09:58<02:03,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000490413.jpg: 256x640 (no detections), 99.1ms
Speed: 2.4ms preprocess, 99.1ms inference, 0.3ms postprocess per image at shape (1, 3, 256, 640)


Segmenting Images:  85%|████████▍ | 4240/5000 [09:58<01:56,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000490470.jpg: 480x640 3 boats, 109.4ms
Speed: 2.7ms preprocess, 109.4ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▍ | 4241/5000 [09:58<01:58,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000490515.jpg: 448x640 1 person, 61.2ms
Speed: 2.7ms preprocess, 61.2ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▍ | 4242/5000 [09:58<01:47,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000490936.jpg: 640x480 8 persons, 1 bicycle, 1 car, 2 motorcycles, 1 truck, 1 backpack, 1 suitcase, 63.9ms
Speed: 4.1ms preprocess, 63.9ms inference, 16.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▍ | 4243/5000 [09:58<01:52,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491008.jpg: 448x640 2 persons, 1 pizza, 12 chairs, 1 potted plant, 2 dining tables, 1 vase, 63.9ms
Speed: 2.7ms preprocess, 63.9ms inference, 17.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▍ | 4244/5000 [09:59<01:59,  6.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491071.jpg: 640x416 2 sinks, 147.6ms
Speed: 3.9ms preprocess, 147.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  85%|████████▍ | 4245/5000 [09:59<02:09,  5.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491090.jpg: 640x480 1 person, 1 motorcycle, 62.4ms
Speed: 2.6ms preprocess, 62.4ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▍ | 4246/5000 [09:59<01:55,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491130.jpg: 640x448 1 person, 1 snowboard, 1 kite, 56.5ms
Speed: 2.6ms preprocess, 56.5ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  85%|████████▍ | 4247/5000 [09:59<01:45,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491213.jpg: 480x640 4 cars, 1 motorcycle, 1 truck, 65.3ms
Speed: 2.7ms preprocess, 65.3ms inference, 11.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▍ | 4248/5000 [09:59<01:47,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491216.jpg: 640x480 1 cat, 1 dog, 3 bottles, 1 chair, 3 potted plants, 1 refrigerator, 2 vases, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 11.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▍ | 4249/5000 [09:59<01:48,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491366.jpg: 480x640 2 persons, 1 couch, 4 tvs, 62.8ms
Speed: 3.9ms preprocess, 62.8ms inference, 7.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▌ | 4250/5000 [09:59<01:46,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491464.jpg: 448x640 1 person, 1 car, 2 baseball bats, 60.4ms
Speed: 3.1ms preprocess, 60.4ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▌ | 4251/5000 [10:00<01:45,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491470.jpg: 640x480 1 car, 1 traffic light, 62.7ms
Speed: 4.5ms preprocess, 62.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▌ | 4252/5000 [10:00<01:40,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491497.jpg: 640x480 1 person, 1 chair, 1 bed, 1 tv, 1 book, 64.8ms
Speed: 4.2ms preprocess, 64.8ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▌ | 4253/5000 [10:00<01:38,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491613.jpg: 640x480 1 zebra, 81.1ms
Speed: 3.3ms preprocess, 81.1ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▌ | 4254/5000 [10:00<01:39,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491683.jpg: 640x480 1 horse, 63.2ms
Speed: 4.1ms preprocess, 63.2ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▌ | 4255/5000 [10:00<01:35,  7.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491725.jpg: 480x640 2 boats, 62.2ms
Speed: 4.5ms preprocess, 62.2ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▌ | 4256/5000 [10:00<01:33,  7.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491757.jpg: 480x640 1 cat, 1 bed, 65.8ms
Speed: 2.7ms preprocess, 65.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▌ | 4257/5000 [10:00<01:31,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000491867.jpg: 640x480 3 persons, 1 tie, 101.6ms
Speed: 3.4ms preprocess, 101.6ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▌ | 4258/5000 [10:01<01:39,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492077.jpg: 480x640 7 persons, 1 bicycle, 1 bus, 2 traffic lights, 68.6ms
Speed: 2.7ms preprocess, 68.6ms inference, 10.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▌ | 4259/5000 [10:01<01:42,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492110.jpg: 448x640 3 persons, 1 cup, 1 chair, 3 dining tables, 1 laptop, 1 cell phone, 61.1ms
Speed: 2.8ms preprocess, 61.1ms inference, 9.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▌ | 4260/5000 [10:01<01:42,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492282.jpg: 640x480 4 persons, 3 horses, 89.1ms
Speed: 2.7ms preprocess, 89.1ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  85%|████████▌ | 4261/5000 [10:01<01:47,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492284.jpg: 480x640 1 person, 1 bear, 2 backpacks, 1 kite, 66.1ms
Speed: 2.6ms preprocess, 66.1ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▌ | 4262/5000 [10:01<01:43,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492362.jpg: 640x448 4 persons, 1 skateboard, 1 donut, 60.6ms
Speed: 2.8ms preprocess, 60.6ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  85%|████████▌ | 4263/5000 [10:01<01:40,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492758.jpg: 448x640 1 person, 5 chairs, 2 couchs, 3 potted plants, 1 remote, 68.5ms
Speed: 2.5ms preprocess, 68.5ms inference, 34.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▌ | 4264/5000 [10:01<01:49,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492878.jpg: 640x640 3 bottles, 1 sink, 1 vase, 3 toothbrushs, 80.5ms
Speed: 2.7ms preprocess, 80.5ms inference, 10.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  85%|████████▌ | 4265/5000 [10:02<01:51,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492905.jpg: 448x640 1 tv, 1 laptop, 1 mouse, 1 keyboard, 1 cell phone, 62.2ms
Speed: 3.7ms preprocess, 62.2ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▌ | 4266/5000 [10:02<01:44,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492937.jpg: 640x640 1 person, 1 bicycle, 102.2ms
Speed: 3.4ms preprocess, 102.2ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  85%|████████▌ | 4267/5000 [10:02<01:48,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492968.jpg: 448x640 1 person, 1 snowboard, 61.3ms
Speed: 2.8ms preprocess, 61.3ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▌ | 4268/5000 [10:02<01:41,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000492992.jpg: 640x448 1 bird, 1 cow, 61.1ms
Speed: 2.8ms preprocess, 61.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  85%|████████▌ | 4269/5000 [10:02<01:35,  7.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493019.jpg: 480x640 6 zebras, 62.2ms
Speed: 2.9ms preprocess, 62.2ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  85%|████████▌ | 4270/5000 [10:02<01:37,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493284.jpg: 448x640 6 elephants, 62.7ms
Speed: 2.9ms preprocess, 62.7ms inference, 6.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▌ | 4271/5000 [10:02<01:36,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493286.jpg: 384x640 2 persons, 1 car, 1 airplane, 115.9ms
Speed: 2.6ms preprocess, 115.9ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  85%|████████▌ | 4272/5000 [10:03<01:44,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493334.jpg: 416x640 1 cake, 1 cell phone, 150.6ms
Speed: 3.4ms preprocess, 150.6ms inference, 1.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  85%|████████▌ | 4273/5000 [10:03<01:55,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493442.jpg: 448x640 1 person, 58.6ms
Speed: 2.7ms preprocess, 58.6ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  85%|████████▌ | 4274/5000 [10:03<01:44,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493566.jpg: 288x640 2 trains, 115.7ms
Speed: 2.3ms preprocess, 115.7ms inference, 3.0ms postprocess per image at shape (1, 3, 288, 640)


Segmenting Images:  86%|████████▌ | 4275/5000 [10:03<01:48,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493613.jpg: 448x640 1 person, 1 backpack, 1 skateboard, 80.3ms
Speed: 2.7ms preprocess, 80.3ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▌ | 4276/5000 [10:03<01:45,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493772.jpg: 640x640 1 person, 2 umbrellas, 83.6ms
Speed: 2.3ms preprocess, 83.6ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  86%|████████▌ | 4277/5000 [10:03<01:45,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493799.jpg: 512x640 1 fork, 3 carrots, 1 donut, 1 dining table, 143.3ms
Speed: 3.1ms preprocess, 143.3ms inference, 8.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  86%|████████▌ | 4278/5000 [10:03<01:59,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493864.jpg: 640x480 3 persons, 1 surfboard, 89.1ms
Speed: 2.9ms preprocess, 89.1ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  86%|████████▌ | 4279/5000 [10:04<01:55,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000493905.jpg: 640x576 12 persons, 1 sports ball, 10 chairs, 156.5ms
Speed: 1.7ms preprocess, 156.5ms inference, 23.8ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  86%|████████▌ | 4280/5000 [10:04<02:23,  5.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000494188.jpg: 640x480 5 persons, 1 bicycle, 1 bird, 1 handbag, 59.0ms
Speed: 2.6ms preprocess, 59.0ms inference, 8.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  86%|████████▌ | 4281/5000 [10:04<02:08,  5.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000494427.jpg: 640x480 1 laptop, 69.5ms
Speed: 2.9ms preprocess, 69.5ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  86%|████████▌ | 4282/5000 [10:04<01:58,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000494634.jpg: 480x640 1 cat, 1 dog, 2 laptops, 1 oven, 63.6ms
Speed: 2.7ms preprocess, 63.6ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4283/5000 [10:04<01:51,  6.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000494759.jpg: 480x640 2 persons, 2 kites, 66.1ms
Speed: 3.1ms preprocess, 66.1ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4284/5000 [10:04<01:43,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000494863.jpg: 480x640 1 car, 2 trains, 83.2ms
Speed: 3.5ms preprocess, 83.2ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4285/5000 [10:05<01:44,  6.86it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000494869.jpg: 640x448 2 persons, 1 dog, 1 bottle, 6 bowls, 1 sink, 64.2ms
Speed: 3.0ms preprocess, 64.2ms inference, 9.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  86%|████████▌ | 4286/5000 [10:05<01:43,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000494913.jpg: 448x640 3 persons, 1 couch, 2 tvs, 62.3ms
Speed: 2.8ms preprocess, 62.3ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▌ | 4287/5000 [10:05<01:39,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000495054.jpg: 448x640 1 airplane, 4 trucks, 74.1ms
Speed: 2.6ms preprocess, 74.1ms inference, 8.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▌ | 4288/5000 [10:05<01:40,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000495146.jpg: 480x640 6 persons, 9 cars, 1 bus, 1 train, 2 trucks, 62.7ms
Speed: 2.6ms preprocess, 62.7ms inference, 17.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4289/5000 [10:05<01:45,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000495448.jpg: 640x480 1 cake, 60.1ms
Speed: 2.8ms preprocess, 60.1ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  86%|████████▌ | 4290/5000 [10:05<01:37,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000495732.jpg: 640x448 3 persons, 1 frisbee, 57.0ms
Speed: 2.5ms preprocess, 57.0ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  86%|████████▌ | 4291/5000 [10:05<01:33,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000496409.jpg: 480x640 2 persons, 74.4ms
Speed: 5.1ms preprocess, 74.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4292/5000 [10:06<01:33,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000496571.jpg: 640x448 2 persons, 3 cups, 1 fork, 2 spoons, 1 bowl, 2 dining tables, 1 cell phone, 59.3ms
Speed: 2.9ms preprocess, 59.3ms inference, 10.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  86%|████████▌ | 4293/5000 [10:06<01:35,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000496597.jpg: 480x640 1 person, 1 boat, 90.3ms
Speed: 2.8ms preprocess, 90.3ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4294/5000 [10:06<01:37,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000496722.jpg: 384x640 2 cars, 1 parking meter, 59.1ms
Speed: 2.5ms preprocess, 59.1ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  86%|████████▌ | 4295/5000 [10:06<01:32,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000496854.jpg: 480x640 7 persons, 2 umbrellas, 2 handbags, 61.7ms
Speed: 3.4ms preprocess, 61.7ms inference, 9.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4296/5000 [10:06<01:35,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000496954.jpg: 480x640 1 bottle, 1 cup, 1 fork, 1 bowl, 2 oranges, 3 cakes, 1 dining table, 63.0ms
Speed: 3.0ms preprocess, 63.0ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4297/5000 [10:06<01:40,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000497344.jpg: 448x640 1 person, 1 tie, 1 bottle, 1 laptop, 1 keyboard, 1 cell phone, 77.2ms
Speed: 4.9ms preprocess, 77.2ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▌ | 4298/5000 [10:06<01:41,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000497568.jpg: 448x640 2 persons, 2 airplanes, 2 trucks, 61.0ms
Speed: 2.9ms preprocess, 61.0ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▌ | 4299/5000 [10:07<01:38,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000497599.jpg: 640x448 2 persons, 1 laptop, 103.3ms
Speed: 3.2ms preprocess, 103.3ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  86%|████████▌ | 4300/5000 [10:07<01:43,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000497628.jpg: 640x480 1 couch, 69.2ms
Speed: 3.1ms preprocess, 69.2ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  86%|████████▌ | 4301/5000 [10:07<01:36,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000497867.jpg: 416x640 3 persons, 1 bus, 57.7ms
Speed: 2.7ms preprocess, 57.7ms inference, 4.0ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  86%|████████▌ | 4302/5000 [10:07<01:31,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498032.jpg: 480x640 1 person, 1 remote, 100.9ms
Speed: 2.8ms preprocess, 100.9ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4303/5000 [10:07<01:36,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498286.jpg: 480x640 1 car, 1 cat, 89.7ms
Speed: 3.0ms preprocess, 89.7ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4304/5000 [10:07<01:39,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498463.jpg: 480x640 1 microwave, 1 refrigerator, 64.9ms
Speed: 3.1ms preprocess, 64.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4305/5000 [10:07<01:34,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498709.jpg: 448x640 2 cars, 1 motorcycle, 59.4ms
Speed: 2.7ms preprocess, 59.4ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▌ | 4306/5000 [10:07<01:29,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498747.jpg: 480x640 8 persons, 1 bottle, 1 chair, 2 cell phones, 103.3ms
Speed: 4.7ms preprocess, 103.3ms inference, 11.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4307/5000 [10:08<01:42,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498807.jpg: 448x640 2 persons, 1 surfboard, 58.0ms
Speed: 3.1ms preprocess, 58.0ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▌ | 4308/5000 [10:08<01:34,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498857.jpg: 480x640 1 giraffe, 63.6ms
Speed: 4.0ms preprocess, 63.6ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4309/5000 [10:08<01:29,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000498919.jpg: 480x640 11 oranges, 87.2ms
Speed: 4.1ms preprocess, 87.2ms inference, 15.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▌ | 4310/5000 [10:08<01:40,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499031.jpg: 640x448 3 persons, 1 banana, 60.2ms
Speed: 2.8ms preprocess, 60.2ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  86%|████████▌ | 4311/5000 [10:08<01:33,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499109.jpg: 640x480 1 sandwich, 1 dining table, 57.7ms
Speed: 2.7ms preprocess, 57.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  86%|████████▌ | 4312/5000 [10:08<01:28,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499181.jpg: 480x640 5 cars, 1 bus, 1 truck, 6 traffic lights, 1 stop sign, 98.8ms
Speed: 3.1ms preprocess, 98.8ms inference, 14.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▋ | 4313/5000 [10:08<01:42,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499266.jpg: 480x640 3 persons, 1 tv, 1 laptop, 60.3ms
Speed: 2.6ms preprocess, 60.3ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▋ | 4314/5000 [10:09<01:36,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499313.jpg: 480x640 3 cups, 2 pizzas, 1 dining table, 63.3ms
Speed: 3.0ms preprocess, 63.3ms inference, 12.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▋ | 4315/5000 [10:09<01:35,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499622.jpg: 640x608 1 person, 1 motorcycle, 193.6ms
Speed: 3.8ms preprocess, 193.6ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  86%|████████▋ | 4316/5000 [10:09<01:57,  5.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499768.jpg: 448x640 2 cars, 1 fire hydrant, 58.5ms
Speed: 2.6ms preprocess, 58.5ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▋ | 4317/5000 [10:09<01:46,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000499775.jpg: 480x640 1 car, 1 bus, 61.9ms
Speed: 2.7ms preprocess, 61.9ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▋ | 4318/5000 [10:09<01:37,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500049.jpg: 448x640 1 airplane, 83.0ms
Speed: 3.2ms preprocess, 83.0ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▋ | 4319/5000 [10:09<01:38,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500211.jpg: 480x640 6 umbrellas, 1 chair, 66.5ms
Speed: 2.7ms preprocess, 66.5ms inference, 7.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▋ | 4320/5000 [10:09<01:36,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500257.jpg: 512x640 5 persons, 2 suitcases, 1 tv, 69.2ms
Speed: 4.7ms preprocess, 69.2ms inference, 8.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  86%|████████▋ | 4321/5000 [10:10<01:37,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500270.jpg: 448x640 3 persons, 1 skateboard, 69.0ms
Speed: 2.9ms preprocess, 69.0ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▋ | 4322/5000 [10:10<01:34,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500423.jpg: 448x640 1 train, 59.3ms
Speed: 3.0ms preprocess, 59.3ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  86%|████████▋ | 4323/5000 [10:10<01:28,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500464.jpg: 640x448 2 bottles, 2 sinks, 58.8ms
Speed: 3.0ms preprocess, 58.8ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  86%|████████▋ | 4324/5000 [10:10<01:25,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500477.jpg: 480x640 1 person, 1 sandwich, 62.4ms
Speed: 3.0ms preprocess, 62.4ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  86%|████████▋ | 4325/5000 [10:10<01:23,  8.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500478.jpg: 640x352 13 persons, 1 baseball glove, 9 chairs, 126.9ms
Speed: 4.4ms preprocess, 126.9ms inference, 14.8ms postprocess per image at shape (1, 3, 640, 352)


Segmenting Images:  87%|████████▋ | 4326/5000 [10:10<01:44,  6.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500565.jpg: 480x640 2 persons, 64.1ms
Speed: 2.3ms preprocess, 64.1ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4327/5000 [10:10<01:36,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500613.jpg: 448x640 1 car, 2 buss, 3 trucks, 61.2ms
Speed: 2.5ms preprocess, 61.2ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4328/5000 [10:11<01:31,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500663.jpg: 480x640 2 sheeps, 2 cows, 74.7ms
Speed: 2.2ms preprocess, 74.7ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4329/5000 [10:11<01:31,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500716.jpg: 640x576 1 person, 1 cat, 76.2ms
Speed: 2.6ms preprocess, 76.2ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images:  87%|████████▋ | 4330/5000 [10:11<01:30,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000500826.jpg: 448x640 3 traffic lights, 63.0ms
Speed: 2.5ms preprocess, 63.0ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4331/5000 [10:11<01:25,  7.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000501005.jpg: 416x640 3 persons, 1 baseball bat, 2 baseball gloves, 66.1ms
Speed: 2.6ms preprocess, 66.1ms inference, 6.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  87%|████████▋ | 4332/5000 [10:11<01:27,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000501023.jpg: 480x640 2 stop signs, 1 parking meter, 63.8ms
Speed: 4.2ms preprocess, 63.8ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4333/5000 [10:11<01:25,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000501243.jpg: 448x640 3 zebras, 60.7ms
Speed: 2.5ms preprocess, 60.7ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4334/5000 [10:11<01:22,  8.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000501368.jpg: 640x480 1 person, 66.2ms
Speed: 4.5ms preprocess, 66.2ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  87%|████████▋ | 4335/5000 [10:11<01:20,  8.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000501523.jpg: 480x640 1 cat, 2 bottles, 1 cup, 2 sinks, 83.7ms
Speed: 3.0ms preprocess, 83.7ms inference, 8.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4336/5000 [10:12<01:26,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502136.jpg: 544x640 2 potted plants, 2 vases, 148.1ms
Speed: 3.5ms preprocess, 148.1ms inference, 4.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  87%|████████▋ | 4337/5000 [10:12<01:41,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502168.jpg: 480x640 7 persons, 5 boats, 62.1ms
Speed: 2.2ms preprocess, 62.1ms inference, 11.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4338/5000 [10:12<01:39,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502229.jpg: 640x640 1 train, 165.2ms
Speed: 2.9ms preprocess, 165.2ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  87%|████████▋ | 4339/5000 [10:12<01:52,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502336.jpg: 448x640 6 persons, 2 handbags, 76.0ms
Speed: 2.6ms preprocess, 76.0ms inference, 12.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4340/5000 [10:12<01:48,  6.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502347.jpg: 480x640 1 kite, 63.3ms
Speed: 3.7ms preprocess, 63.3ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4341/5000 [10:12<01:38,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502599.jpg: 448x640 2 airplanes, 59.3ms
Speed: 2.8ms preprocess, 59.3ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4342/5000 [10:13<01:30,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502732.jpg: 640x480 1 refrigerator, 63.8ms
Speed: 3.1ms preprocess, 63.8ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  87%|████████▋ | 4343/5000 [10:13<01:26,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502737.jpg: 640x480 1 person, 69.1ms
Speed: 3.1ms preprocess, 69.1ms inference, 29.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  87%|████████▋ | 4344/5000 [10:13<01:30,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000502910.jpg: 640x448 (no detections), 61.9ms
Speed: 2.9ms preprocess, 61.9ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  87%|████████▋ | 4345/5000 [10:13<01:22,  7.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000503755.jpg: 640x480 1 person, 1 tennis racket, 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  87%|████████▋ | 4346/5000 [10:13<01:22,  7.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000503823.jpg: 448x640 1 boat, 65.1ms
Speed: 4.6ms preprocess, 65.1ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4347/5000 [10:13<01:23,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000503841.jpg: 608x640 2 traffic lights, 169.6ms
Speed: 1.9ms preprocess, 169.6ms inference, 3.1ms postprocess per image at shape (1, 3, 608, 640)


Segmenting Images:  87%|████████▋ | 4348/5000 [10:13<01:41,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000503855.jpg: 384x640 9 persons, 1 backpack, 6 umbrellas, 1 kite, 1 surfboard, 120.0ms
Speed: 2.4ms preprocess, 120.0ms inference, 12.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  87%|████████▋ | 4349/5000 [10:14<01:51,  5.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504000.jpg: 256x640 2 persons, 3 airplanes, 95.4ms
Speed: 2.2ms preprocess, 95.4ms inference, 2.8ms postprocess per image at shape (1, 3, 256, 640)


Segmenting Images:  87%|████████▋ | 4350/5000 [10:14<01:45,  6.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504074.jpg: 448x640 2 persons, 3 chairs, 1 laptop, 73.4ms
Speed: 2.7ms preprocess, 73.4ms inference, 10.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4351/5000 [10:14<01:42,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504389.jpg: 416x640 2 persons, 1 frisbee, 1 tennis racket, 60.3ms
Speed: 3.6ms preprocess, 60.3ms inference, 4.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  87%|████████▋ | 4352/5000 [10:14<01:34,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504415.jpg: 448x640 1 person, 4 cars, 59.3ms
Speed: 2.9ms preprocess, 59.3ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4353/5000 [10:14<01:29,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504439.jpg: 320x640 2 zebras, 161.5ms
Speed: 3.3ms preprocess, 161.5ms inference, 1.9ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  87%|████████▋ | 4354/5000 [10:14<01:42,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504580.jpg: 448x640 2 giraffes, 57.8ms
Speed: 2.8ms preprocess, 57.8ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4355/5000 [10:14<01:32,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504589.jpg: 640x448 2 persons, 1 frisbee, 60.1ms
Speed: 3.1ms preprocess, 60.1ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  87%|████████▋ | 4356/5000 [10:15<01:27,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504635.jpg: 448x640 4 zebras, 58.3ms
Speed: 3.0ms preprocess, 58.3ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4357/5000 [10:15<01:27,  7.34it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000504711.jpg: 512x640 1 fork, 1 cake, 142.5ms
Speed: 3.0ms preprocess, 142.5ms inference, 2.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  87%|████████▋ | 4358/5000 [10:15<01:39,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000505169.jpg: 640x480 1 toilet, 60.0ms
Speed: 3.1ms preprocess, 60.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  87%|████████▋ | 4359/5000 [10:15<01:30,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000505451.jpg: 384x640 2 persons, 50.4ms
Speed: 4.5ms preprocess, 50.4ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  87%|████████▋ | 4360/5000 [10:15<01:24,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000505565.jpg: 448x640 4 birds, 57.2ms
Speed: 4.1ms preprocess, 57.2ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4361/5000 [10:15<01:22,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000505573.jpg: 640x384 1 person, 125.0ms
Speed: 2.8ms preprocess, 125.0ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  87%|████████▋ | 4362/5000 [10:15<01:36,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000505638.jpg: 512x640 1 train, 74.0ms
Speed: 3.1ms preprocess, 74.0ms inference, 2.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  87%|████████▋ | 4363/5000 [10:16<01:32,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000505789.jpg: 640x480 5 persons, 1 clock, 64.7ms
Speed: 3.5ms preprocess, 64.7ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  87%|████████▋ | 4364/5000 [10:16<01:28,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000505942.jpg: 480x640 2 persons, 4 traffic lights, 1 stop sign, 70.8ms
Speed: 4.5ms preprocess, 70.8ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4365/5000 [10:16<01:32,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506004.jpg: 448x640 5 boats, 66.3ms
Speed: 3.9ms preprocess, 66.3ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4366/5000 [10:16<01:29,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506178.jpg: 640x480 2 persons, 1 tv, 64.7ms
Speed: 4.6ms preprocess, 64.7ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  87%|████████▋ | 4367/5000 [10:16<01:25,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506279.jpg: 640x640 1 wine glass, 1 dining table, 194.4ms
Speed: 2.1ms preprocess, 194.4ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  87%|████████▋ | 4368/5000 [10:16<01:46,  5.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506310.jpg: 448x640 1 bottle, 2 refrigerators, 57.8ms
Speed: 2.7ms preprocess, 57.8ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4369/5000 [10:16<01:35,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506454.jpg: 480x640 1 bench, 63.0ms
Speed: 2.9ms preprocess, 63.0ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4370/5000 [10:17<01:28,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506656.jpg: 480x640 1 person, 3 horses, 61.2ms
Speed: 3.1ms preprocess, 61.2ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4371/5000 [10:17<01:27,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506707.jpg: 448x640 7 persons, 65.7ms
Speed: 3.1ms preprocess, 65.7ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4372/5000 [10:17<01:27,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000506933.jpg: 480x640 2 persons, 1 cell phone, 65.5ms
Speed: 3.2ms preprocess, 65.5ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  87%|████████▋ | 4373/5000 [10:17<01:23,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507015.jpg: 448x640 6 persons, 1 sports ball, 1 baseball glove, 78.7ms
Speed: 3.1ms preprocess, 78.7ms inference, 15.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  87%|████████▋ | 4374/5000 [10:17<01:29,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507037.jpg: 480x640 14 persons, 3 bicycles, 1 bottle, 64.1ms
Speed: 2.9ms preprocess, 64.1ms inference, 17.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4375/5000 [10:17<01:33,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507042.jpg: 640x448 2 giraffes, 60.7ms
Speed: 3.0ms preprocess, 60.7ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  88%|████████▊ | 4376/5000 [10:17<01:26,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507081.jpg: 640x640 1 chair, 2 ovens, 85.8ms
Speed: 2.9ms preprocess, 85.8ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  88%|████████▊ | 4377/5000 [10:18<01:27,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507223.jpg: 640x480 4 persons, 2 baseball bats, 1 baseball glove, 93.2ms
Speed: 7.0ms preprocess, 93.2ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  88%|████████▊ | 4378/5000 [10:18<01:32,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507235.jpg: 640x640 1 person, 2 bowls, 1 dining table, 79.9ms
Speed: 4.1ms preprocess, 79.9ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  88%|████████▊ | 4379/5000 [10:18<01:31,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507473.jpg: 480x640 1 person, 1 cell phone, 1 toothbrush, 61.6ms
Speed: 3.6ms preprocess, 61.6ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4380/5000 [10:18<01:26,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507575.jpg: 480x640 1 suitcase, 3 laptops, 2 mouses, 2 keyboards, 1 cell phone, 82.9ms
Speed: 5.6ms preprocess, 82.9ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4381/5000 [10:18<01:30,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507667.jpg: 512x640 7 persons, 1 car, 1 motorcycle, 68.5ms
Speed: 4.2ms preprocess, 68.5ms inference, 9.8ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  88%|████████▊ | 4382/5000 [10:18<01:37,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507797.jpg: 448x640 6 persons, 2 buss, 5 handbags, 52.4ms
Speed: 3.4ms preprocess, 52.4ms inference, 10.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4383/5000 [10:18<01:36,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507893.jpg: 640x448 1 toilet, 1 sink, 59.8ms
Speed: 3.1ms preprocess, 59.8ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  88%|████████▊ | 4384/5000 [10:19<01:27,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000507975.jpg: 480x640 8 persons, 1 car, 3 horses, 63.5ms
Speed: 3.2ms preprocess, 63.5ms inference, 9.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4385/5000 [10:19<01:31,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508101.jpg: 480x640 5 persons, 1 boat, 67.2ms
Speed: 6.0ms preprocess, 67.2ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4386/5000 [10:19<01:30,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508312.jpg: 448x640 1 oven, 61.7ms
Speed: 5.6ms preprocess, 61.7ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4387/5000 [10:19<01:24,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508370.jpg: 640x448 3 persons, 1 bicycle, 67.2ms
Speed: 2.7ms preprocess, 67.2ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  88%|████████▊ | 4388/5000 [10:19<01:22,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508482.jpg: 640x480 (no detections), 67.4ms
Speed: 3.9ms preprocess, 67.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  88%|████████▊ | 4389/5000 [10:19<01:17,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508586.jpg: 384x640 8 zebras, 90.8ms
Speed: 2.8ms preprocess, 90.8ms inference, 6.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  88%|████████▊ | 4390/5000 [10:19<01:23,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508602.jpg: 576x640 2 cars, 1 bird, 192.7ms
Speed: 3.0ms preprocess, 192.7ms inference, 4.1ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  88%|████████▊ | 4391/5000 [10:20<01:44,  5.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508639.jpg: 480x640 1 boat, 1 horse, 67.8ms
Speed: 3.3ms preprocess, 67.8ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4392/5000 [10:20<01:35,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508730.jpg: 480x640 2 persons, 66.0ms
Speed: 3.0ms preprocess, 66.0ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4393/5000 [10:20<01:28,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000508917.jpg: 384x640 9 persons, 1 train, 76.0ms
Speed: 2.7ms preprocess, 76.0ms inference, 7.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  88%|████████▊ | 4394/5000 [10:20<01:29,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509008.jpg: 448x640 1 car, 1 bus, 66.4ms
Speed: 3.0ms preprocess, 66.4ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4395/5000 [10:20<01:25,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509014.jpg: 288x640 20 persons, 3 skiss, 119.3ms
Speed: 11.4ms preprocess, 119.3ms inference, 14.8ms postprocess per image at shape (1, 3, 288, 640)


Segmenting Images:  88%|████████▊ | 4396/5000 [10:20<01:40,  5.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509131.jpg: 448x640 1 banana, 3 apples, 3 oranges, 58.5ms
Speed: 2.9ms preprocess, 58.5ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4397/5000 [10:21<01:33,  6.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509258.jpg: 448x640 (no detections), 83.0ms
Speed: 3.3ms preprocess, 83.0ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4398/5000 [10:21<01:28,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509260.jpg: 480x640 1 tv, 1 book, 68.2ms
Speed: 4.8ms preprocess, 68.2ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4399/5000 [10:21<01:23,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509403.jpg: 448x640 3 persons, 1 dog, 1 frisbee, 68.2ms
Speed: 3.4ms preprocess, 68.2ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4400/5000 [10:21<01:21,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509451.jpg: 480x640 8 persons, 1 cup, 1 hot dog, 71.1ms
Speed: 5.4ms preprocess, 71.1ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4401/5000 [10:21<01:24,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509656.jpg: 480x640 1 zebra, 67.2ms
Speed: 3.5ms preprocess, 67.2ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4402/5000 [10:21<01:22,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509699.jpg: 448x640 1 person, 2 chairs, 2 potted plants, 59.9ms
Speed: 3.6ms preprocess, 59.9ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4403/5000 [10:21<01:20,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509719.jpg: 448x640 1 bird, 64.1ms
Speed: 3.3ms preprocess, 64.1ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4404/5000 [10:21<01:17,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509735.jpg: 448x640 4 zebras, 3 giraffes, 59.4ms
Speed: 3.2ms preprocess, 59.4ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4405/5000 [10:22<01:17,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000509824.jpg: 640x640 1 sports ball, 1 bottle, 1 couch, 1 book, 2 vases, 100.5ms
Speed: 28.2ms preprocess, 100.5ms inference, 8.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  88%|████████▊ | 4406/5000 [10:22<01:31,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000510095.jpg: 448x640 1 person, 1 baseball bat, 61.5ms
Speed: 3.4ms preprocess, 61.5ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4407/5000 [10:22<01:24,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000510329.jpg: 480x640 2 persons, 1 umbrella, 1 suitcase, 1 laptop, 68.2ms
Speed: 3.4ms preprocess, 68.2ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4408/5000 [10:22<01:22,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511076.jpg: 448x640 1 person, 1 airplane, 1 bench, 9 birds, 72.0ms
Speed: 3.0ms preprocess, 72.0ms inference, 16.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4409/5000 [10:22<01:27,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511321.jpg: 640x448 2 boats, 62.7ms
Speed: 3.0ms preprocess, 62.7ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  88%|████████▊ | 4410/5000 [10:22<01:20,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511384.jpg: 640x448 1 person, 1 frisbee, 56.5ms
Speed: 2.9ms preprocess, 56.5ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  88%|████████▊ | 4411/5000 [10:22<01:15,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511398.jpg: 448x640 1 dog, 1 frisbee, 56.3ms
Speed: 3.2ms preprocess, 56.3ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4412/5000 [10:23<01:15,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511453.jpg: 640x640 2 bottles, 1 cup, 1 pizza, 1 dining table, 83.2ms
Speed: 3.6ms preprocess, 83.2ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  88%|████████▊ | 4413/5000 [10:23<01:20,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511599.jpg: 480x640 9 persons, 5 boats, 1 surfboard, 66.3ms
Speed: 3.0ms preprocess, 66.3ms inference, 15.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4414/5000 [10:23<01:25,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511647.jpg: 480x640 1 person, 1 kite, 64.0ms
Speed: 2.9ms preprocess, 64.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4415/5000 [10:23<01:21,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511760.jpg: 640x320 2 persons, 3 cars, 2 kites, 126.4ms
Speed: 2.7ms preprocess, 126.4ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 320)


Segmenting Images:  88%|████████▊ | 4416/5000 [10:23<01:28,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000511999.jpg: 448x640 3 persons, 2 cars, 1 train, 1 clock, 59.3ms
Speed: 2.6ms preprocess, 59.3ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4417/5000 [10:23<01:23,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512194.jpg: 480x640 1 person, 1 potted plant, 1 vase, 66.0ms
Speed: 2.6ms preprocess, 66.0ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  88%|████████▊ | 4418/5000 [10:23<01:23,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512248.jpg: 640x480 1 clock, 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  88%|████████▊ | 4419/5000 [10:24<01:18,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512330.jpg: 640x448 4 bottles, 1 refrigerator, 61.0ms
Speed: 2.6ms preprocess, 61.0ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  88%|████████▊ | 4420/5000 [10:24<01:16,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512403.jpg: 640x544 1 knife, 1 vase, 204.5ms
Speed: 1.9ms preprocess, 204.5ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  88%|████████▊ | 4421/5000 [10:24<01:38,  5.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512476.jpg: 448x640 2 bowls, 1 tv, 2 sinks, 59.6ms
Speed: 2.4ms preprocess, 59.6ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4422/5000 [10:24<01:28,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512564.jpg: 448x640 3 cars, 1 bus, 2 traffic lights, 61.7ms
Speed: 3.0ms preprocess, 61.7ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4423/5000 [10:24<01:23,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512648.jpg: 448x640 1 cow, 58.9ms
Speed: 2.5ms preprocess, 58.9ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4424/5000 [10:24<01:19,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512657.jpg: 448x640 3 persons, 1 surfboard, 1 bottle, 3 couchs, 1 remote, 1 clock, 69.8ms
Speed: 3.8ms preprocess, 69.8ms inference, 9.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  88%|████████▊ | 4425/5000 [10:24<01:21,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512776.jpg: 480x640 3 persons, 1 baseball glove, 62.1ms
Speed: 4.3ms preprocess, 62.1ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▊ | 4426/5000 [10:25<01:17,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512836.jpg: 640x480 1 person, 1 dog, 1 bear, 1 umbrella, 61.2ms
Speed: 2.9ms preprocess, 61.2ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  89%|████████▊ | 4427/5000 [10:25<01:19,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512929.jpg: 640x640 2 persons, 1 cup, 1 bowl, 85.3ms
Speed: 3.7ms preprocess, 85.3ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  89%|████████▊ | 4428/5000 [10:25<01:21,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000512985.jpg: 480x640 1 person, 2 surfboards, 62.7ms
Speed: 2.6ms preprocess, 62.7ms inference, 3.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▊ | 4429/5000 [10:25<01:16,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513041.jpg: 480x640 3 persons, 2 wine glasss, 3 cups, 1 bowl, 1 pizza, 1 dining table, 72.7ms
Speed: 2.8ms preprocess, 72.7ms inference, 19.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▊ | 4430/5000 [10:25<01:22,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513181.jpg: 448x640 3 boats, 61.6ms
Speed: 2.6ms preprocess, 61.6ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▊ | 4431/5000 [10:25<01:17,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513283.jpg: 544x640 1 person, 2 bottles, 1 dining table, 143.4ms
Speed: 1.9ms preprocess, 143.4ms inference, 4.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  89%|████████▊ | 4432/5000 [10:25<01:28,  6.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513484.jpg: 448x640 1 bear, 80.9ms
Speed: 2.9ms preprocess, 80.9ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▊ | 4433/5000 [10:26<01:24,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513524.jpg: 448x640 1 person, 1 surfboard, 61.4ms
Speed: 2.5ms preprocess, 61.4ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▊ | 4434/5000 [10:26<01:18,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513567.jpg: 480x640 9 persons, 1 car, 2 hot dogs, 67.3ms
Speed: 2.7ms preprocess, 67.3ms inference, 13.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▊ | 4435/5000 [10:26<01:20,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513580.jpg: 512x640 1 person, 1 motorcycle, 1 airplane, 172.0ms
Speed: 2.9ms preprocess, 172.0ms inference, 3.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  89%|████████▊ | 4436/5000 [10:26<01:34,  5.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000513688.jpg: 480x640 1 chair, 1 laptop, 1 mouse, 1 keyboard, 60.1ms
Speed: 2.7ms preprocess, 60.1ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▊ | 4437/5000 [10:26<01:25,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000514376.jpg: 480x640 1 car, 1 bus, 1 traffic light, 1 potted plant, 63.5ms
Speed: 3.2ms preprocess, 63.5ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4438/5000 [10:26<01:20,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000514508.jpg: 480x640 4 persons, 1 bus, 62.8ms
Speed: 2.6ms preprocess, 62.8ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4439/5000 [10:26<01:17,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000514540.jpg: 640x448 (no detections), 58.5ms
Speed: 3.0ms preprocess, 58.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000514586.jpg: 640x480 2 persons, 1 baseball bat, 85.8ms
Speed: 5.3ms preprocess, 85.8ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  89%|████████▉ | 4441/5000 [10:27<01:12,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000514797.jpg: 448x640 3 persons, 1 kite, 1 chair, 60.5ms
Speed: 2.1ms preprocess, 60.5ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4442/5000 [10:27<01:10,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000514914.jpg: 384x640 1 toilet, 1 sink, 108.0ms
Speed: 2.6ms preprocess, 108.0ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  89%|████████▉ | 4443/5000 [10:27<01:14,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000514979.jpg: 448x640 2 elephants, 57.0ms
Speed: 2.1ms preprocess, 57.0ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4444/5000 [10:27<01:10,  7.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515025.jpg: 448x640 1 person, 3 cars, 2 trucks, 1 dog, 1 bottle, 1 cup, 50.0ms
Speed: 2.6ms preprocess, 50.0ms inference, 9.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4445/5000 [10:27<01:12,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515077.jpg: 480x640 2 persons, 6 chairs, 1 couch, 1 dining table, 1 tv, 62.3ms
Speed: 2.5ms preprocess, 62.3ms inference, 12.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4446/5000 [10:27<01:16,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515266.jpg: 448x640 1 car, 2 trucks, 1 bench, 62.9ms
Speed: 2.8ms preprocess, 62.9ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4447/5000 [10:28<01:13,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515350.jpg: 448x640 1 person, 1 surfboard, 61.4ms
Speed: 2.3ms preprocess, 61.4ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4448/5000 [10:28<01:09,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515445.jpg: 480x640 6 persons, 1 surfboard, 100.7ms
Speed: 2.9ms preprocess, 100.7ms inference, 8.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4449/5000 [10:28<01:16,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515577.jpg: 448x640 2 persons, 1 skis, 59.8ms
Speed: 3.7ms preprocess, 59.8ms inference, 3.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4450/5000 [10:28<01:13,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515579.jpg: 448x640 1 person, 1 sports ball, 69.9ms
Speed: 4.9ms preprocess, 69.9ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4451/5000 [10:28<01:11,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515828.jpg: 448x640 2 persons, 1 backpack, 1 tennis racket, 2 bottles, 2 chairs, 114.3ms
Speed: 2.6ms preprocess, 114.3ms inference, 10.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4452/5000 [10:28<01:21,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000515982.jpg: 448x640 3 persons, 1 sports ball, 1 baseball glove, 63.8ms
Speed: 2.7ms preprocess, 63.8ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4453/5000 [10:28<01:17,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516038.jpg: 640x480 3 persons, 3 baseball gloves, 87.4ms
Speed: 3.1ms preprocess, 87.4ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  89%|████████▉ | 4454/5000 [10:29<01:20,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516143.jpg: 480x640 1 person, 1 bus, 63.4ms
Speed: 2.3ms preprocess, 63.4ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4455/5000 [10:29<01:14,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516173.jpg: 640x416 2 persons, 1 surfboard, 154.1ms
Speed: 2.4ms preprocess, 154.1ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  89%|████████▉ | 4456/5000 [10:29<01:26,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516316.jpg: 480x640 1 horse, 2 zebras, 63.4ms
Speed: 2.6ms preprocess, 63.4ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4457/5000 [10:29<01:19,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516318.jpg: 448x640 1 person, 2 surfboards, 63.7ms
Speed: 3.0ms preprocess, 63.7ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4458/5000 [10:29<01:14,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516601.jpg: 384x640 6 persons, 2 skiss, 48.4ms
Speed: 2.5ms preprocess, 48.4ms inference, 7.9ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  89%|████████▉ | 4459/5000 [10:29<01:12,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516677.jpg: 480x640 8 persons, 6 backpacks, 3 skiss, 74.7ms
Speed: 3.1ms preprocess, 74.7ms inference, 15.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4460/5000 [10:29<01:20,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516708.jpg: 480x640 1 traffic light, 59.3ms
Speed: 4.0ms preprocess, 59.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4461/5000 [10:29<01:14,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516804.jpg: 640x480 1 airplane, 2 buss, 88.4ms
Speed: 2.9ms preprocess, 88.4ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  89%|████████▉ | 4462/5000 [10:30<01:15,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516871.jpg: 448x640 1 banana, 1 cake, 72.1ms
Speed: 2.5ms preprocess, 72.1ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4463/5000 [10:30<01:13,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000516916.jpg: 480x640 1 chair, 2 tvs, 3 laptops, 3 keyboards, 1 cell phone, 64.1ms
Speed: 2.7ms preprocess, 64.1ms inference, 9.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4464/5000 [10:30<01:14,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000517056.jpg: 480x640 2 persons, 1 cup, 1 bowl, 1 cake, 2 chairs, 3 dining tables, 1 book, 79.4ms
Speed: 3.0ms preprocess, 79.4ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4465/5000 [10:30<01:19,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000517069.jpg: 480x640 2 persons, 6 cars, 2 benchs, 72.8ms
Speed: 2.7ms preprocess, 72.8ms inference, 11.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4466/5000 [10:30<01:19,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000517523.jpg: 448x640 1 airplane, 1 boat, 59.0ms
Speed: 2.4ms preprocess, 59.0ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  89%|████████▉ | 4467/5000 [10:30<01:13,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000517687.jpg: 384x640 2 cell phones, 75.0ms
Speed: 5.3ms preprocess, 75.0ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  89%|████████▉ | 4468/5000 [10:30<01:12,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000517832.jpg: 640x640 1 dog, 2 chairs, 92.7ms
Speed: 2.7ms preprocess, 92.7ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  89%|████████▉ | 4469/5000 [10:31<01:15,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000518213.jpg: 480x640 2 cows, 114.1ms
Speed: 2.9ms preprocess, 114.1ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4470/5000 [10:31<01:20,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000518326.jpg: 480x640 2 persons, 71.8ms
Speed: 2.9ms preprocess, 71.8ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4471/5000 [10:31<01:16,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000518770.jpg: 480x640 6 persons, 8 chairs, 1 remote, 70.8ms
Speed: 4.0ms preprocess, 70.8ms inference, 16.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  89%|████████▉ | 4472/5000 [10:31<01:20,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519039.jpg: 320x640 1 truck, 1 clock, 143.7ms
Speed: 2.4ms preprocess, 143.7ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  89%|████████▉ | 4473/5000 [10:31<01:26,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519208.jpg: 416x640 1 person, 2 elephants, 120.5ms
Speed: 1.7ms preprocess, 120.5ms inference, 2.8ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  89%|████████▉ | 4474/5000 [10:31<01:27,  6.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519338.jpg: 448x640 1 train, 60.3ms
Speed: 2.1ms preprocess, 60.3ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|████████▉ | 4475/5000 [10:32<01:17,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519491.jpg: 640x480 2 clocks, 64.4ms
Speed: 2.6ms preprocess, 64.4ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  90%|████████▉ | 4476/5000 [10:32<01:12,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519522.jpg: 640x448 1 person, 2 clocks, 66.7ms
Speed: 2.4ms preprocess, 66.7ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|████████▉ | 4477/5000 [10:32<01:10,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519569.jpg: 640x448 1 bird, 1 bottle, 3 chairs, 1 potted plant, 1 oven, 1 vase, 58.3ms
Speed: 2.6ms preprocess, 58.3ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|████████▉ | 4478/5000 [10:32<01:09,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519611.jpg: 480x640 1 bear, 63.6ms
Speed: 2.4ms preprocess, 63.6ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4479/5000 [10:32<01:06,  7.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519688.jpg: 640x544 1 person, 1 cat, 1 dog, 179.9ms
Speed: 2.0ms preprocess, 179.9ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  90%|████████▉ | 4480/5000 [10:32<01:22,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000519764.jpg: 480x640 1 cat, 2 chairs, 1 tv, 58.9ms
Speed: 4.0ms preprocess, 58.9ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4481/5000 [10:32<01:15,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520009.jpg: 544x640 3 persons, 1 car, 1 bus, 1 traffic light, 1 clock, 144.0ms
Speed: 4.2ms preprocess, 144.0ms inference, 9.4ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  90%|████████▉ | 4482/5000 [10:33<01:26,  5.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520077.jpg: 448x640 2 bottles, 1 tv, 1 mouse, 21 books, 94.5ms
Speed: 2.7ms preprocess, 94.5ms inference, 25.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|████████▉ | 4483/5000 [10:33<01:34,  5.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520264.jpg: 480x640 2 persons, 2 bottles, 1 bowl, 2 pizzas, 1 microwave, 64.3ms
Speed: 2.7ms preprocess, 64.3ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4484/5000 [10:33<01:25,  6.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520301.jpg: 640x480 1 person, 1 dog, 1 horse, 61.7ms
Speed: 2.5ms preprocess, 61.7ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  90%|████████▉ | 4485/5000 [10:33<01:18,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520324.jpg: 480x640 1 person, 2 airplanes, 1 truck, 82.2ms
Speed: 2.9ms preprocess, 82.2ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4486/5000 [10:33<01:17,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520531.jpg: 480x640 1 cat, 1 chair, 1 couch, 2 remotes, 1 book, 61.9ms
Speed: 2.8ms preprocess, 61.9ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4487/5000 [10:33<01:13,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520659.jpg: 480x640 12 persons, 70.8ms
Speed: 2.9ms preprocess, 70.8ms inference, 11.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4488/5000 [10:34<01:15,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520707.jpg: 480x640 11 persons, 1 backpack, 2 handbags, 64.5ms
Speed: 3.0ms preprocess, 64.5ms inference, 14.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4489/5000 [10:34<01:21,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520832.jpg: 480x640 1 person, 64.3ms
Speed: 2.7ms preprocess, 64.3ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4490/5000 [10:34<01:14,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520871.jpg: 448x640 1 bottle, 2 cups, 1 pizza, 1 chair, 2 dining tables, 56.9ms
Speed: 2.8ms preprocess, 56.9ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|████████▉ | 4491/5000 [10:34<01:11,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000520910.jpg: 640x448 1 person, 68.4ms
Speed: 2.8ms preprocess, 68.4ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|████████▉ | 4492/5000 [10:34<01:08,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521052.jpg: 480x640 1 car, 1 truck, 2 zebras, 63.0ms
Speed: 2.8ms preprocess, 63.0ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4493/5000 [10:34<01:08,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521141.jpg: 640x480 3 persons, 10 cars, 70.0ms
Speed: 4.6ms preprocess, 70.0ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  90%|████████▉ | 4494/5000 [10:34<01:13,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521231.jpg: 640x448 1 bear, 68.5ms
Speed: 4.6ms preprocess, 68.5ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|████████▉ | 4495/5000 [10:35<01:09,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521259.jpg: 480x640 18 persons, 1 car, 3 frisbees, 64.7ms
Speed: 2.7ms preprocess, 64.7ms inference, 22.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|████████▉ | 4496/5000 [10:35<01:17,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521282.jpg: 640x448 1 vase, 59.9ms
Speed: 2.4ms preprocess, 59.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|████████▉ | 4497/5000 [10:35<01:10,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521405.jpg: 640x448 1 person, 1 toothbrush, 77.2ms
Speed: 6.0ms preprocess, 77.2ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|████████▉ | 4498/5000 [10:35<01:10,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521509.jpg: 448x640 2 persons, 1 bottle, 1 cup, 2 chairs, 1 bed, 1 vase, 61.7ms
Speed: 2.9ms preprocess, 61.7ms inference, 8.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|████████▉ | 4499/5000 [10:35<01:10,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521540.jpg: 480x640 1 banana, 1 dining table, 74.4ms
Speed: 3.8ms preprocess, 74.4ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4500/5000 [10:35<01:10,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521601.jpg: 480x640 1 donut, 1 toilet, 61.2ms
Speed: 3.3ms preprocess, 61.2ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4501/5000 [10:35<01:06,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521717.jpg: 448x640 3 persons, 2 tennis rackets, 1 chair, 59.3ms
Speed: 3.0ms preprocess, 59.3ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4502/5000 [10:35<01:04,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521719.jpg: 640x448 1 umbrella, 4 kites, 83.0ms
Speed: 3.9ms preprocess, 83.0ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|█████████ | 4503/5000 [10:36<01:07,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521819.jpg: 480x640 10 persons, 2 cars, 1 frisbee, 65.9ms
Speed: 3.0ms preprocess, 65.9ms inference, 13.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4504/5000 [10:36<01:10,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000521956.jpg: 448x640 1 person, 1 tennis racket, 62.0ms
Speed: 3.9ms preprocess, 62.0ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4505/5000 [10:36<01:06,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522007.jpg: 480x640 1 potted plant, 2 microwaves, 1 oven, 91.4ms
Speed: 3.8ms preprocess, 91.4ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4506/5000 [10:36<01:10,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522156.jpg: 448x640 1 sandwich, 1 pizza, 1 cake, 64.0ms
Speed: 3.0ms preprocess, 64.0ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4507/5000 [10:36<01:06,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522393.jpg: 640x640 2 persons, 4 boats, 85.6ms
Speed: 3.7ms preprocess, 85.6ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  90%|█████████ | 4508/5000 [10:36<01:10,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522638.jpg: 448x640 1 bear, 2 teddy bears, 86.8ms
Speed: 6.3ms preprocess, 86.8ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4509/5000 [10:36<01:10,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522713.jpg: 480x640 1 boat, 1 bench, 62.6ms
Speed: 3.0ms preprocess, 62.6ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4510/5000 [10:37<01:05,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522751.jpg: 352x640 3 traffic lights, 168.7ms
Speed: 2.7ms preprocess, 168.7ms inference, 2.7ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  90%|█████████ | 4511/5000 [10:37<01:17,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522889.jpg: 640x448 4 persons, 1 tennis racket, 1 chair, 63.7ms
Speed: 2.8ms preprocess, 63.7ms inference, 5.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  90%|█████████ | 4512/5000 [10:37<01:12,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000522940.jpg: 640x512 1 stop sign, 148.8ms
Speed: 2.6ms preprocess, 148.8ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  90%|█████████ | 4513/5000 [10:37<01:19,  6.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523033.jpg: 448x640 2 persons, 74.9ms
Speed: 3.3ms preprocess, 74.9ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4514/5000 [10:37<01:16,  6.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523100.jpg: 640x480 1 person, 1 bottle, 3 cups, 2 bowls, 70.7ms
Speed: 3.9ms preprocess, 70.7ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  90%|█████████ | 4515/5000 [10:37<01:14,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523175.jpg: 480x640 1 fork, 1 spoon, 1 bowl, 66.8ms
Speed: 4.1ms preprocess, 66.8ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4516/5000 [10:38<01:09,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523194.jpg: 448x640 1 fire hydrant, 59.3ms
Speed: 2.9ms preprocess, 59.3ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4517/5000 [10:38<01:04,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523229.jpg: 640x480 1 parking meter, 2 toilets, 66.3ms
Speed: 3.0ms preprocess, 66.3ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  90%|█████████ | 4518/5000 [10:38<01:05,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523241.jpg: 448x640 5 cars, 2 trucks, 1 traffic light, 65.6ms
Speed: 5.7ms preprocess, 65.6ms inference, 7.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4519/5000 [10:38<01:05,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523782.jpg: 480x640 1 car, 2 trucks, 63.4ms
Speed: 3.0ms preprocess, 63.4ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4520/5000 [10:38<01:02,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523807.jpg: 480x640 1 person, 2 bottles, 1 hot dog, 64.8ms
Speed: 4.3ms preprocess, 64.8ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4521/5000 [10:38<01:05,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523811.jpg: 480x640 1 bird, 66.1ms
Speed: 5.9ms preprocess, 66.1ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  90%|█████████ | 4522/5000 [10:38<01:03,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000523957.jpg: 448x640 13 persons, 1 skateboard, 58.2ms
Speed: 3.0ms preprocess, 58.2ms inference, 14.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  90%|█████████ | 4523/5000 [10:38<01:05,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000524108.jpg: 416x640 8 motorcycles, 90.4ms
Speed: 2.6ms preprocess, 90.4ms inference, 7.3ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  90%|█████████ | 4524/5000 [10:39<01:08,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000524280.jpg: 640x640 1 giraffe, 86.3ms
Speed: 2.8ms preprocess, 86.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  90%|█████████ | 4525/5000 [10:39<01:07,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000524456.jpg: 480x640 3 persons, 1 dog, 1 laptop, 2 mouses, 1 remote, 64.4ms
Speed: 3.0ms preprocess, 64.4ms inference, 12.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4526/5000 [10:39<01:09,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000524742.jpg: 480x640 1 train, 66.5ms
Speed: 3.2ms preprocess, 66.5ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4527/5000 [10:39<01:05,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000524850.jpg: 416x640 23 persons, 2 airplanes, 1 bus, 1 backpack, 2 handbags, 1 suitcase, 58.4ms
Speed: 3.6ms preprocess, 58.4ms inference, 26.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  91%|█████████ | 4528/5000 [10:39<01:13,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000525083.jpg: 448x640 1 chair, 1 dining table, 1 tv, 1 microwave, 1 refrigerator, 101.6ms
Speed: 2.9ms preprocess, 101.6ms inference, 11.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4529/5000 [10:39<01:16,  6.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000525155.jpg: 448x640 1 person, 59.5ms
Speed: 3.2ms preprocess, 59.5ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4530/5000 [10:40<01:08,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000525247.jpg: 448x640 1 cat, 1 tv, 1 laptop, 60.3ms
Speed: 3.1ms preprocess, 60.3ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4531/5000 [10:40<01:03,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000525286.jpg: 640x640 1 clock, 88.8ms
Speed: 2.4ms preprocess, 88.8ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  91%|█████████ | 4532/5000 [10:40<01:06,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000525322.jpg: 448x640 2 airplanes, 63.7ms
Speed: 3.1ms preprocess, 63.7ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4533/5000 [10:40<01:03,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000525600.jpg: 352x640 9 giraffes, 57.6ms
Speed: 2.6ms preprocess, 57.6ms inference, 7.4ms postprocess per image at shape (1, 3, 352, 640)


Segmenting Images:  91%|█████████ | 4534/5000 [10:40<01:02,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000526103.jpg: 480x640 2 elephants, 2 umbrellas, 88.7ms
Speed: 3.4ms preprocess, 88.7ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4535/5000 [10:40<01:05,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000526197.jpg: 640x448 1 donut, 1 cake, 58.4ms
Speed: 3.1ms preprocess, 58.4ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  91%|█████████ | 4536/5000 [10:40<01:00,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000526256.jpg: 480x640 3 clocks, 55.9ms
Speed: 3.0ms preprocess, 55.9ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4537/5000 [10:40<00:58,  7.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000526392.jpg: 448x640 2 persons, 3 cars, 7 traffic lights, 60.2ms
Speed: 4.5ms preprocess, 60.2ms inference, 14.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4538/5000 [10:41<01:02,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000526706.jpg: 448x640 1 horse, 3 cows, 69.2ms
Speed: 4.7ms preprocess, 69.2ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4539/5000 [10:41<01:02,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000526728.jpg: 480x640 2 persons, 1 car, 9 suitcases, 65.5ms
Speed: 4.6ms preprocess, 65.5ms inference, 11.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4540/5000 [10:41<01:04,  7.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000526751.jpg: 448x640 2 persons, 6 boats, 60.2ms
Speed: 3.2ms preprocess, 60.2ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4541/5000 [10:41<01:05,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527029.jpg: 640x640 1 person, 1 chair, 92.0ms
Speed: 2.5ms preprocess, 92.0ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  91%|█████████ | 4542/5000 [10:41<01:06,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527215.jpg: 448x640 5 persons, 2 kites, 63.1ms
Speed: 2.9ms preprocess, 63.1ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4543/5000 [10:41<01:03,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527220.jpg: 224x640 1 car, 3 motorcycles, 1 truck, 110.4ms
Speed: 3.8ms preprocess, 110.4ms inference, 8.0ms postprocess per image at shape (1, 3, 224, 640)


Segmenting Images:  91%|█████████ | 4544/5000 [10:41<01:07,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527427.jpg: 480x640 1 person, 2 bottles, 1 potted plant, 1 dining table, 1 laptop, 1 keyboard, 55.5ms
Speed: 2.7ms preprocess, 55.5ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4545/5000 [10:42<01:03,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527528.jpg: 480x640 4 kites, 61.8ms
Speed: 3.0ms preprocess, 61.8ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4546/5000 [10:42<01:00,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527616.jpg: 512x640 1 person, 1 tie, 1 tv, 167.7ms
Speed: 3.4ms preprocess, 167.7ms inference, 3.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  91%|█████████ | 4547/5000 [10:42<01:12,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527695.jpg: 448x640 1 bowl, 6 broccolis, 59.2ms
Speed: 2.6ms preprocess, 59.2ms inference, 6.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4548/5000 [10:42<01:06,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527750.jpg: 640x448 1 person, 1 pizza, 1 oven, 1 refrigerator, 60.4ms
Speed: 3.0ms preprocess, 60.4ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  91%|█████████ | 4549/5000 [10:42<01:02,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527784.jpg: 512x640 4 bottles, 1 fork, 1 knife, 3 sandwichs, 1 chair, 1 dining table, 112.9ms
Speed: 3.4ms preprocess, 112.9ms inference, 18.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  91%|█████████ | 4550/5000 [10:42<01:11,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000527960.jpg: 448x640 1 bench, 58.3ms
Speed: 2.9ms preprocess, 58.3ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4551/5000 [10:42<01:04,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528314.jpg: 640x448 11 persons, 1 skis, 59.4ms
Speed: 2.8ms preprocess, 59.4ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  91%|█████████ | 4552/5000 [10:43<01:04,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528399.jpg: 448x640 3 persons, 2 wine glasss, 4 cups, 1 spoon, 1 bowl, 1 sandwich, 1 donut, 2 cakes, 1 dining table, 87.8ms
Speed: 2.8ms preprocess, 87.8ms inference, 20.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4553/5000 [10:43<01:09,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528524.jpg: 640x480 1 elephant, 60.5ms
Speed: 2.4ms preprocess, 60.5ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  91%|█████████ | 4554/5000 [10:43<01:02,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528578.jpg: 448x640 2 clocks, 57.8ms
Speed: 2.6ms preprocess, 57.8ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4555/5000 [10:43<00:58,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528705.jpg: 640x448 2 persons, 1 teddy bear, 62.2ms
Speed: 3.3ms preprocess, 62.2ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  91%|█████████ | 4556/5000 [10:43<00:56,  7.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528862.jpg: 480x640 1 bird, 6 giraffes, 86.8ms
Speed: 5.4ms preprocess, 86.8ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4557/5000 [10:43<01:00,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528977.jpg: 640x480 1 bicycle, 63.9ms
Speed: 4.1ms preprocess, 63.9ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  91%|█████████ | 4558/5000 [10:43<00:57,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000528980.jpg: 640x448 4 cars, 1 stop sign, 1 kite, 79.5ms
Speed: 2.6ms preprocess, 79.5ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  91%|█████████ | 4559/5000 [10:44<01:00,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529105.jpg: 480x640 1 bear, 63.2ms
Speed: 2.6ms preprocess, 63.2ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████ | 4560/5000 [10:44<00:56,  7.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529122.jpg: 384x640 3 persons, 1 handbag, 2 donuts, 120.3ms
Speed: 3.6ms preprocess, 120.3ms inference, 5.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  91%|█████████ | 4561/5000 [10:44<01:03,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529148.jpg: 448x640 2 persons, 3 laptops, 1 mouse, 3 keyboards, 78.7ms
Speed: 2.7ms preprocess, 78.7ms inference, 10.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████ | 4562/5000 [10:44<01:05,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529528.jpg: 448x640 1 bowl, 1 cake, 60.8ms
Speed: 3.8ms preprocess, 60.8ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████▏| 4563/5000 [10:44<01:00,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529568.jpg: 640x480 1 bowl, 1 potted plant, 1 dining table, 1 tv, 1 vase, 63.2ms
Speed: 2.6ms preprocess, 63.2ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  91%|█████████▏| 4564/5000 [10:44<00:58,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529762.jpg: 480x640 1 person, 1 wine glass, 1 bowl, 1 dining table, 66.9ms
Speed: 2.9ms preprocess, 66.9ms inference, 9.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████▏| 4565/5000 [10:44<00:58,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529939.jpg: 480x640 1 person, 1 motorcycle, 8 surfboards, 61.3ms
Speed: 2.7ms preprocess, 61.3ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████▏| 4566/5000 [10:45<01:00,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000529966.jpg: 448x640 2 bowls, 15 oranges, 61.3ms
Speed: 3.0ms preprocess, 61.3ms inference, 14.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████▏| 4567/5000 [10:45<01:02,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530052.jpg: 448x640 7 carrots, 60.5ms
Speed: 2.9ms preprocess, 60.5ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████▏| 4568/5000 [10:45<01:00,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530061.jpg: 480x640 3 persons, 1 bowl, 1 chair, 2 dining tables, 94.5ms
Speed: 2.7ms preprocess, 94.5ms inference, 11.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████▏| 4569/5000 [10:45<01:03,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530099.jpg: 480x640 1 car, 1 cat, 63.9ms
Speed: 3.6ms preprocess, 63.9ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████▏| 4570/5000 [10:45<00:59,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530146.jpg: 640x640 2 cups, 1 fork, 1 bowl, 2 sandwichs, 1 dining table, 80.7ms
Speed: 2.2ms preprocess, 80.7ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  91%|█████████▏| 4571/5000 [10:45<01:01,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530162.jpg: 448x640 10 persons, 2 umbrellas, 61.7ms
Speed: 2.9ms preprocess, 61.7ms inference, 25.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  91%|█████████▏| 4572/5000 [10:45<01:04,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530457.jpg: 640x512 1 potted plant, 1 vase, 75.0ms
Speed: 4.3ms preprocess, 75.0ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  91%|█████████▏| 4573/5000 [10:46<01:02,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530466.jpg: 480x640 1 train, 66.2ms
Speed: 3.0ms preprocess, 66.2ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  91%|█████████▏| 4574/5000 [10:46<00:58,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530470.jpg: 480x640 1 truck, 94.0ms
Speed: 8.7ms preprocess, 94.0ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4575/5000 [10:46<01:00,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530624.jpg: 480x640 1 dog, 2 beds, 70.9ms
Speed: 2.9ms preprocess, 70.9ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4576/5000 [10:46<00:58,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530820.jpg: 448x640 1 person, 1 tennis racket, 65.2ms
Speed: 3.2ms preprocess, 65.2ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4577/5000 [10:46<00:55,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530836.jpg: 480x640 1 bowl, 1 oven, 2 refrigerators, 109.9ms
Speed: 2.8ms preprocess, 109.9ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4578/5000 [10:46<01:00,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530854.jpg: 448x640 6 umbrellas, 55.0ms
Speed: 3.1ms preprocess, 55.0ms inference, 6.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4579/5000 [10:46<00:57,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000530975.jpg: 480x640 7 persons, 1 umbrella, 1 hot dog, 3 chairs, 1 dining table, 63.0ms
Speed: 3.2ms preprocess, 63.0ms inference, 14.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4580/5000 [10:47<00:59,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000531036.jpg: 640x480 2 persons, 3 buss, 61.6ms
Speed: 3.0ms preprocess, 61.6ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  92%|█████████▏| 4581/5000 [10:47<00:57,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000531134.jpg: 480x640 6 persons, 1 car, 3 traffic lights, 1 potted plant, 103.5ms
Speed: 6.3ms preprocess, 103.5ms inference, 9.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4582/5000 [10:47<01:03,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000531135.jpg: 480x640 9 persons, 62.0ms
Speed: 3.8ms preprocess, 62.0ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4583/5000 [10:47<01:01,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000531495.jpg: 448x640 4 boats, 62.0ms
Speed: 2.8ms preprocess, 62.0ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4584/5000 [10:47<00:58,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000531707.jpg: 480x640 5 persons, 1 bench, 63.4ms
Speed: 2.8ms preprocess, 63.4ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4585/5000 [10:47<00:55,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000531771.jpg: 640x448 1 oven, 92.2ms
Speed: 3.4ms preprocess, 92.2ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  92%|█████████▏| 4586/5000 [10:47<00:57,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532058.jpg: 480x640 2 persons, 1 bottle, 1 bed, 66.7ms
Speed: 2.6ms preprocess, 66.7ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4587/5000 [10:47<00:55,  7.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532071.jpg: 384x640 2 bears, 59.6ms
Speed: 2.6ms preprocess, 59.6ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  92%|█████████▏| 4588/5000 [10:48<00:52,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532129.jpg: 448x640 1 bottle, 1 cup, 1 pizza, 1 dining table, 62.4ms
Speed: 3.2ms preprocess, 62.4ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4589/5000 [10:48<00:51,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532481.jpg: 448x640 1 person, 1 snowboard, 1 kite, 1 skateboard, 94.3ms
Speed: 4.1ms preprocess, 94.3ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4590/5000 [10:48<00:54,  7.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532493.jpg: 416x640 1 person, 1 surfboard, 61.4ms
Speed: 2.6ms preprocess, 61.4ms inference, 2.3ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  92%|█████████▏| 4591/5000 [10:48<00:54,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532530.jpg: 448x640 3 persons, 71.1ms
Speed: 3.9ms preprocess, 71.1ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4592/5000 [10:48<00:54,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532575.jpg: 448x640 1 boat, 1 dog, 60.9ms
Speed: 3.0ms preprocess, 60.9ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4593/5000 [10:48<00:51,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532690.jpg: 640x544 3 persons, 1 couch, 1 remote, 155.4ms
Speed: 1.6ms preprocess, 155.4ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 544)


Segmenting Images:  92%|█████████▏| 4594/5000 [10:48<01:02,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532761.jpg: 480x640 2 couchs, 1 potted plant, 1 tv, 1 remote, 81.5ms
Speed: 2.8ms preprocess, 81.5ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4595/5000 [10:49<01:01,  6.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532855.jpg: 480x640 1 person, 67.6ms
Speed: 2.5ms preprocess, 67.6ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4596/5000 [10:49<00:56,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000532901.jpg: 384x640 1 person, 1 couch, 2 potted plants, 51.1ms
Speed: 3.7ms preprocess, 51.1ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  92%|█████████▏| 4597/5000 [10:49<00:52,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000533145.jpg: 480x640 1 banana, 1 remote, 85.2ms
Speed: 4.8ms preprocess, 85.2ms inference, 9.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4598/5000 [10:49<00:55,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000533206.jpg: 448x640 1 bottle, 3 wine glasss, 1 cup, 1 knife, 1 bowl, 1 sandwich, 2 cakes, 1 dining table, 59.7ms
Speed: 2.9ms preprocess, 59.7ms inference, 9.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4599/5000 [10:49<00:54,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000533493.jpg: 384x640 2 persons, 1 frisbee, 51.1ms
Speed: 3.5ms preprocess, 51.1ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  92%|█████████▏| 4600/5000 [10:49<00:51,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000533536.jpg: 480x640 1 dog, 63.0ms
Speed: 2.9ms preprocess, 63.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4601/5000 [10:49<00:50,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000533816.jpg: 448x640 15 persons, 1 tennis racket, 68.4ms
Speed: 3.6ms preprocess, 68.4ms inference, 14.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4602/5000 [10:50<00:55,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000533855.jpg: 448x640 1 donut, 60.3ms
Speed: 3.2ms preprocess, 60.3ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4603/5000 [10:50<00:51,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000533958.jpg: 640x640 1 sandwich, 1 orange, 107.6ms
Speed: 3.3ms preprocess, 107.6ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  92%|█████████▏| 4604/5000 [10:50<00:55,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534041.jpg: 480x640 8 persons, 1 bottle, 1 hot dog, 3 chairs, 1 dining table, 75.1ms
Speed: 4.4ms preprocess, 75.1ms inference, 12.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4605/5000 [10:50<00:59,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534270.jpg: 544x640 2 persons, 1 dog, 3 umbrellas, 187.8ms
Speed: 3.2ms preprocess, 187.8ms inference, 6.8ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  92%|█████████▏| 4606/5000 [10:50<01:10,  5.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534394.jpg: 384x640 1 car, 18 sheeps, 47.0ms
Speed: 2.6ms preprocess, 47.0ms inference, 14.4ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  92%|█████████▏| 4607/5000 [10:50<01:05,  5.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534601.jpg: 448x640 1 bed, 1 teddy bear, 61.3ms
Speed: 4.6ms preprocess, 61.3ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4608/5000 [10:50<00:58,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534605.jpg: 448x640 3 persons, 3 motorcycles, 56.7ms
Speed: 4.5ms preprocess, 56.7ms inference, 6.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4609/5000 [10:51<01:00,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534639.jpg: 480x640 2 cars, 2 trains, 2 traffic lights, 65.6ms
Speed: 2.8ms preprocess, 65.6ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4610/5000 [10:51<00:57,  6.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534664.jpg: 416x640 4 suitcases, 51.1ms
Speed: 2.8ms preprocess, 51.1ms inference, 6.6ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  92%|█████████▏| 4611/5000 [10:51<00:52,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534673.jpg: 480x640 3 persons, 1 bus, 61.5ms
Speed: 2.9ms preprocess, 61.5ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▏| 4612/5000 [10:51<00:50,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000534827.jpg: 640x640 3 persons, 5 motorcycles, 111.8ms
Speed: 33.6ms preprocess, 111.8ms inference, 14.0ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  92%|█████████▏| 4613/5000 [10:51<01:02,  6.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535094.jpg: 448x640 1 dog, 60.0ms
Speed: 2.6ms preprocess, 60.0ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4614/5000 [10:51<00:55,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535156.jpg: 512x640 2 elephants, 189.6ms
Speed: 2.9ms preprocess, 189.6ms inference, 3.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  92%|█████████▏| 4615/5000 [10:52<01:07,  5.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535253.jpg: 640x640 3 bottles, 1 cup, 2 bananas, 84.0ms
Speed: 3.7ms preprocess, 84.0ms inference, 10.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  92%|█████████▏| 4616/5000 [10:52<01:05,  5.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535306.jpg: 448x640 1 person, 2 skateboards, 101.5ms
Speed: 4.7ms preprocess, 101.5ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4617/5000 [10:52<01:04,  5.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535523.jpg: 448x640 2 persons, 1 pizza, 1 donut, 60.0ms
Speed: 2.6ms preprocess, 60.0ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4618/5000 [10:52<00:58,  6.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535578.jpg: 640x448 9 sheeps, 59.7ms
Speed: 2.6ms preprocess, 59.7ms inference, 7.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  92%|█████████▏| 4619/5000 [10:52<00:55,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535608.jpg: 512x640 5 persons, 2 umbrellas, 3 chairs, 71.5ms
Speed: 4.6ms preprocess, 71.5ms inference, 23.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  92%|█████████▏| 4620/5000 [10:52<00:59,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000535858.jpg: 448x640 1 person, 58.3ms
Speed: 2.5ms preprocess, 58.3ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  92%|█████████▏| 4621/5000 [10:52<00:53,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000536038.jpg: 640x384 3 persons, 1 bed, 1 laptop, 120.1ms
Speed: 2.4ms preprocess, 120.1ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  92%|█████████▏| 4622/5000 [10:53<00:57,  6.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000536073.jpg: 640x480 1 cup, 1 knife, 1 orange, 139.2ms
Speed: 4.0ms preprocess, 139.2ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  92%|█████████▏| 4623/5000 [10:53<01:01,  6.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000536343.jpg: 448x640 (no detections), 55.7ms
Speed: 4.5ms preprocess, 55.7ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000536947.jpg: 480x640 1 refrigerator, 59.2ms
Speed: 3.7ms preprocess, 59.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  92%|█████████▎| 4625/5000 [10:53<00:49,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537053.jpg: 640x480 1 giraffe, 61.2ms
Speed: 2.8ms preprocess, 61.2ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  93%|█████████▎| 4626/5000 [10:53<00:47,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537153.jpg: 416x640 1 bench, 52.2ms
Speed: 2.3ms preprocess, 52.2ms inference, 2.5ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  93%|█████████▎| 4627/5000 [10:53<00:45,  8.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537241.jpg: 448x640 1 donut, 65.7ms
Speed: 7.3ms preprocess, 65.7ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4628/5000 [10:53<00:45,  8.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537270.jpg: 480x640 4 persons, 1 cup, 1 chair, 1 cell phone, 62.2ms
Speed: 3.3ms preprocess, 62.2ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4629/5000 [10:53<00:46,  8.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537355.jpg: 448x640 1 fire hydrant, 90.2ms
Speed: 3.4ms preprocess, 90.2ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4630/5000 [10:54<00:48,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537506.jpg: 448x640 6 persons, 1 car, 3 umbrellas, 59.8ms
Speed: 4.6ms preprocess, 59.8ms inference, 8.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4631/5000 [10:54<00:48,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537672.jpg: 640x448 1 chair, 1 refrigerator, 55.9ms
Speed: 3.1ms preprocess, 55.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  93%|█████████▎| 4632/5000 [10:54<00:50,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537802.jpg: 480x640 6 teddy bears, 69.9ms
Speed: 4.1ms preprocess, 69.9ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4633/5000 [10:54<00:51,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537812.jpg: 480x640 1 chair, 2 toilets, 65.5ms
Speed: 3.2ms preprocess, 65.5ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4634/5000 [10:54<00:49,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537827.jpg: 416x640 2 persons, 2 baseball gloves, 55.1ms
Speed: 2.7ms preprocess, 55.1ms inference, 3.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  93%|█████████▎| 4635/5000 [10:54<00:46,  7.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537964.jpg: 448x640 1 fire hydrant, 84.9ms
Speed: 4.9ms preprocess, 84.9ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4636/5000 [10:54<00:47,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000537991.jpg: 480x640 1 person, 1 chair, 2 laptops, 2 cell phones, 64.1ms
Speed: 2.9ms preprocess, 64.1ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4637/5000 [10:55<00:46,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000538067.jpg: 480x640 5 persons, 1 kite, 83.0ms
Speed: 4.5ms preprocess, 83.0ms inference, 24.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4638/5000 [10:55<00:51,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000538236.jpg: 448x640 2 persons, 13 donuts, 1 teddy bear, 62.1ms
Speed: 3.1ms preprocess, 62.1ms inference, 15.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4639/5000 [10:55<00:53,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000538364.jpg: 480x640 4 persons, 2 cars, 1 motorcycle, 1 bench, 1 handbag, 69.7ms
Speed: 3.1ms preprocess, 69.7ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4640/5000 [10:55<00:53,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000538458.jpg: 448x640 2 persons, 4 skateboards, 94.5ms
Speed: 3.2ms preprocess, 94.5ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4641/5000 [10:55<00:54,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000539143.jpg: 480x640 1 train, 63.0ms
Speed: 3.2ms preprocess, 63.0ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4642/5000 [10:55<00:50,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000539445.jpg: 640x640 1 train, 1 traffic light, 83.0ms
Speed: 3.3ms preprocess, 83.0ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  93%|█████████▎| 4643/5000 [10:55<00:49,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000539883.jpg: 448x640 1 tv, 4 books, 63.0ms
Speed: 3.2ms preprocess, 63.0ms inference, 10.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4644/5000 [10:56<00:49,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000539962.jpg: 480x640 3 persons, 1 airplane, 1 boat, 1 chair, 72.5ms
Speed: 3.2ms preprocess, 72.5ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4645/5000 [10:56<00:49,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000540280.jpg: 640x448 1 umbrella, 62.7ms
Speed: 3.1ms preprocess, 62.7ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  93%|█████████▎| 4646/5000 [10:56<00:46,  7.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000540414.jpg: 480x640 14 persons, 1 bicycle, 1 motorcycle, 5 umbrellas, 1 bottle, 67.4ms
Speed: 3.1ms preprocess, 67.4ms inference, 25.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4647/5000 [10:56<00:55,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000540466.jpg: 448x640 2 umbrellas, 58.3ms
Speed: 3.3ms preprocess, 58.3ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4648/5000 [10:56<00:51,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000540502.jpg: 448x640 1 bowl, 2 chairs, 1 dining table, 1 microwave, 2 refrigerators, 58.9ms
Speed: 2.9ms preprocess, 58.9ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4649/5000 [10:56<00:49,  7.14it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000540928.jpg: 480x640 1 cat, 93.6ms
Speed: 4.0ms preprocess, 93.6ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4650/5000 [10:56<00:50,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000540932.jpg: 480x640 9 persons, 68.7ms
Speed: 2.9ms preprocess, 68.7ms inference, 11.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4651/5000 [10:57<00:50,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000540962.jpg: 512x640 1 person, 1 chair, 2 couchs, 1 tv, 68.3ms
Speed: 5.0ms preprocess, 68.3ms inference, 5.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  93%|█████████▎| 4652/5000 [10:57<00:49,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000541055.jpg: 384x640 5 persons, 1 skis, 81.1ms
Speed: 2.7ms preprocess, 81.1ms inference, 5.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  93%|█████████▎| 4653/5000 [10:57<00:50,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000541123.jpg: 448x640 6 persons, 1 sports ball, 1 chair, 64.4ms
Speed: 3.0ms preprocess, 64.4ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4654/5000 [10:57<00:49,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000541291.jpg: 448x640 1 parking meter, 61.6ms
Speed: 3.4ms preprocess, 61.6ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4655/5000 [10:57<00:45,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000541634.jpg: 640x640 4 cups, 1 bowl, 1 broccoli, 1 dining table, 80.6ms
Speed: 3.3ms preprocess, 80.6ms inference, 9.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  93%|█████████▎| 4656/5000 [10:57<00:48,  7.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000541664.jpg: 480x640 3 remotes, 1 keyboard, 60.3ms
Speed: 4.5ms preprocess, 60.3ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4657/5000 [10:57<00:49,  6.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000541773.jpg: 480x640 2 persons, 4 bottles, 2 wine glasss, 1 cup, 1 dining table, 63.9ms
Speed: 3.3ms preprocess, 63.9ms inference, 9.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4658/5000 [10:58<00:49,  6.97it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000541952.jpg: 640x480 1 clock, 61.5ms
Speed: 2.8ms preprocess, 61.5ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  93%|█████████▎| 4659/5000 [10:58<00:45,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000542073.jpg: 640x480 1 parking meter, 1 clock, 61.3ms
Speed: 4.3ms preprocess, 61.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  93%|█████████▎| 4660/5000 [10:58<00:44,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000542089.jpg: 640x480 2 sinks, 68.1ms
Speed: 6.7ms preprocess, 68.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  93%|█████████▎| 4661/5000 [10:58<00:44,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000542127.jpg: 448x640 13 persons, 1 kite, 55.0ms
Speed: 2.8ms preprocess, 55.0ms inference, 13.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4662/5000 [10:58<00:46,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000542423.jpg: 448x640 1 person, 2 cars, 1 bench, 74.8ms
Speed: 3.8ms preprocess, 74.8ms inference, 4.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4663/5000 [10:58<00:46,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000542625.jpg: 480x640 3 cars, 1 truck, 1 traffic light, 1 parking meter, 1 bird, 64.5ms
Speed: 2.9ms preprocess, 64.5ms inference, 7.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4664/5000 [10:58<00:45,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000542776.jpg: 448x640 1 bird, 1 cat, 1 book, 64.5ms
Speed: 2.9ms preprocess, 64.5ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4665/5000 [10:58<00:43,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000542856.jpg: 448x640 3 persons, 1 bus, 1 traffic light, 81.1ms
Speed: 4.7ms preprocess, 81.1ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4666/5000 [10:59<00:44,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000543043.jpg: 480x640 1 person, 1 car, 1 bus, 60.0ms
Speed: 3.5ms preprocess, 60.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4667/5000 [10:59<00:42,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000543047.jpg: 480x640 4 chairs, 2 couchs, 1 dining table, 1 tv, 66.2ms
Speed: 3.1ms preprocess, 66.2ms inference, 7.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4668/5000 [10:59<00:43,  7.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000543300.jpg: 480x640 2 boats, 75.5ms
Speed: 12.5ms preprocess, 75.5ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4669/5000 [10:59<00:45,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000543528.jpg: 480x640 1 airplane, 65.3ms
Speed: 3.2ms preprocess, 65.3ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  93%|█████████▎| 4670/5000 [10:59<00:43,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000543581.jpg: 448x640 2 dogs, 1 couch, 1 tv, 102.9ms
Speed: 3.7ms preprocess, 102.9ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  93%|█████████▎| 4671/5000 [10:59<00:46,  7.08it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000544052.jpg: 640x640 1 person, 2 motorcycles, 83.6ms
Speed: 2.3ms preprocess, 83.6ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  93%|█████████▎| 4672/5000 [10:59<00:46,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000544306.jpg: 640x640 2 clocks, 78.0ms
Speed: 3.7ms preprocess, 78.0ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  93%|█████████▎| 4673/5000 [11:00<00:45,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000544444.jpg: 640x448 1 person, 1 backpack, 1 skis, 75.1ms
Speed: 2.8ms preprocess, 75.1ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  93%|█████████▎| 4674/5000 [11:00<00:47,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000544519.jpg: 448x640 1 person, 1 toothbrush, 68.0ms
Speed: 3.2ms preprocess, 68.0ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▎| 4675/5000 [11:00<00:45,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000544565.jpg: 640x640 1 fork, 1 spoon, 2 sandwichs, 1 orange, 1 dining table, 87.1ms
Speed: 2.4ms preprocess, 87.1ms inference, 10.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  94%|█████████▎| 4676/5000 [11:00<00:47,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000544605.jpg: 480x640 1 traffic light, 84.1ms
Speed: 5.2ms preprocess, 84.1ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▎| 4677/5000 [11:00<00:46,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000544811.jpg: 448x640 3 birds, 61.6ms
Speed: 3.2ms preprocess, 61.6ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▎| 4678/5000 [11:00<00:43,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545007.jpg: 640x448 1 person, 2 clocks, 63.5ms
Speed: 3.3ms preprocess, 63.5ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  94%|█████████▎| 4679/5000 [11:00<00:41,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545100.jpg: 448x640 3 persons, 1 car, 2 cell phones, 61.4ms
Speed: 3.1ms preprocess, 61.4ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▎| 4680/5000 [11:01<00:41,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545129.jpg: 480x640 3 zebras, 81.2ms
Speed: 7.1ms preprocess, 81.2ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▎| 4681/5000 [11:01<00:42,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545219.jpg: 480x640 6 persons, 1 bus, 70.4ms
Speed: 3.4ms preprocess, 70.4ms inference, 7.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▎| 4682/5000 [11:01<00:43,  7.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545407.jpg: 448x640 1 airplane, 73.7ms
Speed: 2.8ms preprocess, 73.7ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▎| 4683/5000 [11:01<00:41,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545594.jpg: 512x640 7 persons, 68.7ms
Speed: 2.9ms preprocess, 68.7ms inference, 7.0ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  94%|█████████▎| 4684/5000 [11:01<00:42,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545730.jpg: 448x640 9 zebras, 69.8ms
Speed: 4.4ms preprocess, 69.8ms inference, 7.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▎| 4685/5000 [11:01<00:43,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545826.jpg: 448x640 1 cat, 1 suitcase, 1 chair, 83.6ms
Speed: 6.6ms preprocess, 83.6ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▎| 4686/5000 [11:01<00:45,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000545958.jpg: 448x640 1 cow, 68.0ms
Speed: 3.3ms preprocess, 68.0ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▎| 4687/5000 [11:01<00:42,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546011.jpg: 448x640 3 zebras, 68.4ms
Speed: 6.4ms preprocess, 68.4ms inference, 6.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4688/5000 [11:02<00:41,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546219.jpg: 448x640 10 persons, 3 cups, 3 chairs, 2 dining tables, 67.3ms
Speed: 3.0ms preprocess, 67.3ms inference, 16.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4689/5000 [11:02<00:51,  6.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546325.jpg: 640x480 1 chair, 1 couch, 1 remote, 71.5ms
Speed: 3.8ms preprocess, 71.5ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  94%|█████████▍| 4690/5000 [11:02<00:47,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546475.jpg: 640x384 4 persons, 2 cell phones, 61.4ms
Speed: 2.7ms preprocess, 61.4ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 384)


Segmenting Images:  94%|█████████▍| 4691/5000 [11:02<00:45,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546556.jpg: 320x640 19 sheeps, 115.9ms
Speed: 3.6ms preprocess, 115.9ms inference, 12.1ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  94%|█████████▍| 4692/5000 [11:02<00:50,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546626.jpg: 640x480 1 bottle, 1 cup, 1 apple, 1 dining table, 1 cell phone, 109.4ms
Speed: 9.5ms preprocess, 109.4ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  94%|█████████▍| 4693/5000 [11:02<00:50,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546659.jpg: 640x448 1 train, 61.9ms
Speed: 3.3ms preprocess, 61.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  94%|█████████▍| 4694/5000 [11:03<00:46,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546717.jpg: 640x480 1 person, 1 couch, 1 potted plant, 1 laptop, 99.2ms
Speed: 4.2ms preprocess, 99.2ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  94%|█████████▍| 4695/5000 [11:03<00:47,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546823.jpg: 480x640 10 persons, 1 horse, 2 umbrellas, 66.7ms
Speed: 3.4ms preprocess, 66.7ms inference, 13.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4696/5000 [11:03<00:47,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546826.jpg: 480x640 1 toilet, 71.3ms
Speed: 3.8ms preprocess, 71.3ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4697/5000 [11:03<00:44,  6.79it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546829.jpg: 448x640 1 bench, 1 dog, 64.7ms
Speed: 3.3ms preprocess, 64.7ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4698/5000 [11:03<00:43,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546964.jpg: 480x640 6 persons, 7 chairs, 3 couchs, 1 potted plant, 83.1ms
Speed: 3.4ms preprocess, 83.1ms inference, 20.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4699/5000 [11:03<00:49,  6.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000546976.jpg: 480x640 1 person, 1 bottle, 68.8ms
Speed: 4.5ms preprocess, 68.8ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4700/5000 [11:04<00:45,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547144.jpg: 480x640 2 tvs, 1 laptop, 2 mouses, 3 keyboards, 61.5ms
Speed: 3.3ms preprocess, 61.5ms inference, 7.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4701/5000 [11:04<00:44,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547336.jpg: 480x640 1 person, 1 bed, 1 laptop, 62.0ms
Speed: 3.0ms preprocess, 62.0ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4702/5000 [11:04<00:41,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547383.jpg: 448x640 2 sheeps, 68.0ms
Speed: 3.0ms preprocess, 68.0ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4703/5000 [11:04<00:39,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547502.jpg: 480x640 5 dogs, 1 sheep, 1 frisbee, 66.9ms
Speed: 3.1ms preprocess, 66.9ms inference, 6.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4704/5000 [11:04<00:39,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547519.jpg: 448x640 1 bear, 59.7ms
Speed: 3.2ms preprocess, 59.7ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4705/5000 [11:04<00:37,  7.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547816.jpg: 640x448 3 toilets, 70.6ms
Speed: 2.9ms preprocess, 70.6ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  94%|█████████▍| 4706/5000 [11:04<00:37,  7.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547854.jpg: 640x640 2 persons, 1 fork, 2 pizzas, 1 dining table, 86.7ms
Speed: 3.1ms preprocess, 86.7ms inference, 8.7ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  94%|█████████▍| 4707/5000 [11:04<00:40,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000547886.jpg: 448x640 4 persons, 3 horses, 65.8ms
Speed: 3.1ms preprocess, 65.8ms inference, 5.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4708/5000 [11:05<00:39,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000548246.jpg: 448x640 4 persons, 3 tennis rackets, 1 banana, 90.0ms
Speed: 7.6ms preprocess, 90.0ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4709/5000 [11:05<00:42,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000548267.jpg: 480x640 12 sheeps, 63.9ms
Speed: 3.1ms preprocess, 63.9ms inference, 10.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4710/5000 [11:05<00:42,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000548339.jpg: 576x640 3 persons, 1 baseball bat, 1 bottle, 179.5ms
Speed: 1.8ms preprocess, 179.5ms inference, 6.5ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  94%|█████████▍| 4711/5000 [11:05<00:50,  5.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000548506.jpg: 448x640 3 bananas, 5 apples, 1 orange, 57.1ms
Speed: 2.6ms preprocess, 57.1ms inference, 7.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4712/5000 [11:05<00:45,  6.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000548524.jpg: 448x640 2 persons, 1 boat, 59.2ms
Speed: 2.9ms preprocess, 59.2ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4713/5000 [11:05<00:41,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000548555.jpg: 448x640 1 hot dog, 58.3ms
Speed: 3.0ms preprocess, 58.3ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4714/5000 [11:05<00:39,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000548780.jpg: 448x640 7 persons, 1 bench, 1 bird, 1 dog, 1 handbag, 1 potted plant, 65.0ms
Speed: 4.3ms preprocess, 65.0ms inference, 12.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4715/5000 [11:06<00:40,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000549055.jpg: 320x640 1 person, 1 surfboard, 44.9ms
Speed: 2.3ms preprocess, 44.9ms inference, 2.4ms postprocess per image at shape (1, 3, 320, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000549136.jpg: 448x640 1 bird, 67.1ms
Speed: 3.0ms preprocess, 67.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4717/5000 [11:06<00:35,  8.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000549167.jpg: 640x448 1 bowl, 1 broccoli, 1 dining table, 79.7ms
Speed: 5.2ms preprocess, 79.7ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  94%|█████████▍| 4718/5000 [11:06<00:36,  7.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000549220.jpg: 640x480 4 persons, 1 dog, 70.7ms
Speed: 3.5ms preprocess, 70.7ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  94%|█████████▍| 4719/5000 [11:06<00:36,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000549390.jpg: 448x640 4 persons, 5 bicycles, 1 backpack, 95.4ms
Speed: 4.0ms preprocess, 95.4ms inference, 8.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4720/5000 [11:06<00:40,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000549674.jpg: 480x640 1 cup, 3 tvs, 1 mouse, 2 keyboards, 1 cell phone, 68.1ms
Speed: 3.3ms preprocess, 68.1ms inference, 7.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4721/5000 [11:06<00:39,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000549738.jpg: 448x640 3 persons, 1 kite, 83.4ms
Speed: 2.9ms preprocess, 83.4ms inference, 7.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  94%|█████████▍| 4722/5000 [11:07<00:40,  6.80it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000549930.jpg: 576x640 3 persons, 2 umbrellas, 75.6ms
Speed: 2.4ms preprocess, 75.6ms inference, 6.3ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  94%|█████████▍| 4723/5000 [11:07<00:40,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550084.jpg: 480x640 (no detections), 63.6ms
Speed: 4.7ms preprocess, 63.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  94%|█████████▍| 4724/5000 [11:07<00:36,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550322.jpg: 640x448 1 cup, 1 remote, 1 scissors, 86.4ms
Speed: 2.9ms preprocess, 86.4ms inference, 7.5ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  94%|█████████▍| 4725/5000 [11:07<00:37,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550349.jpg: 640x480 3 persons, 1 car, 1 bus, 1 backpack, 1 handbag, 66.3ms
Speed: 3.9ms preprocess, 66.3ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  95%|█████████▍| 4726/5000 [11:07<00:37,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550426.jpg: 640x416 1 cup, 1 vase, 147.6ms
Speed: 2.3ms preprocess, 147.6ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 416)


Segmenting Images:  95%|█████████▍| 4727/5000 [11:07<00:42,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550471.jpg: 448x640 2 spoons, 1 orange, 1 dining table, 98.2ms
Speed: 3.0ms preprocess, 98.2ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▍| 4728/5000 [11:07<00:42,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550691.jpg: 480x640 1 person, 2 cars, 3 buss, 70.1ms
Speed: 3.0ms preprocess, 70.1ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▍| 4729/5000 [11:08<00:40,  6.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550714.jpg: 480x640 1 person, 2 bananas, 93.6ms
Speed: 7.6ms preprocess, 93.6ms inference, 3.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▍| 4730/5000 [11:08<00:40,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000550797.jpg: 640x384 1 toilet, 55.0ms
Speed: 2.0ms preprocess, 55.0ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000550939.jpg: 640x448 (no detections), 71.9ms
Speed: 2.9ms preprocess, 71.9ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  95%|█████████▍| 4732/5000 [11:08<00:34,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551215.jpg: 640x480 1 person, 66.2ms
Speed: 4.5ms preprocess, 66.2ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  95%|█████████▍| 4733/5000 [11:08<00:34,  7.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551304.jpg: 640x480 1 toilet, 65.6ms
Speed: 3.3ms preprocess, 65.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  95%|█████████▍| 4734/5000 [11:08<00:34,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551350.jpg: 480x640 1 bench, 64.8ms
Speed: 6.0ms preprocess, 64.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▍| 4735/5000 [11:08<00:34,  7.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551439.jpg: 640x448 2 persons, 2 books, 57.5ms
Speed: 3.0ms preprocess, 57.5ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  95%|█████████▍| 4736/5000 [11:08<00:32,  8.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551660.jpg: 640x448 1 cup, 2 bowls, 4 broccolis, 1 dining table, 59.7ms
Speed: 2.7ms preprocess, 59.7ms inference, 7.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  95%|█████████▍| 4737/5000 [11:09<00:33,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551780.jpg: 480x640 1 bird, 1 clock, 111.7ms
Speed: 3.3ms preprocess, 111.7ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▍| 4738/5000 [11:09<00:35,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551794.jpg: 480x640 1 person, 1 motorcycle, 64.5ms
Speed: 3.2ms preprocess, 64.5ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▍| 4739/5000 [11:09<00:34,  7.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551804.jpg: 448x640 1 person, 1 sports ball, 1 tennis racket, 64.1ms
Speed: 3.0ms preprocess, 64.1ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▍| 4740/5000 [11:09<00:33,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551815.jpg: 480x640 3 cats, 1 bed, 81.5ms
Speed: 3.2ms preprocess, 81.5ms inference, 11.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▍| 4741/5000 [11:09<00:34,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551820.jpg: 448x640 14 persons, 59.0ms
Speed: 3.1ms preprocess, 59.0ms inference, 12.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▍| 4742/5000 [11:09<00:35,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000551822.jpg: 480x640 3 cups, 1 bowl, 1 apple, 2 sandwichs, 1 dining table, 63.9ms
Speed: 3.3ms preprocess, 63.9ms inference, 7.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▍| 4743/5000 [11:09<00:35,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000552371.jpg: 448x640 2 horses, 1 chair, 101.6ms
Speed: 2.9ms preprocess, 101.6ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▍| 4744/5000 [11:10<00:36,  6.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000552612.jpg: 640x640 9 sheeps, 1 cow, 78.5ms
Speed: 4.3ms preprocess, 78.5ms inference, 12.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  95%|█████████▍| 4745/5000 [11:10<00:39,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000552775.jpg: 640x480 2 persons, 2 bottles, 1 bowl, 1 donut, 1 oven, 1 refrigerator, 66.5ms
Speed: 5.3ms preprocess, 66.5ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  95%|█████████▍| 4746/5000 [11:10<00:38,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000552842.jpg: 448x640 1 person, 1 sports ball, 1 baseball glove, 69.9ms
Speed: 4.1ms preprocess, 69.9ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▍| 4747/5000 [11:10<00:37,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000552883.jpg: 448x640 1 oven, 61.4ms
Speed: 2.9ms preprocess, 61.4ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▍| 4748/5000 [11:10<00:34,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000552902.jpg: 448x640 1 zebra, 74.5ms
Speed: 3.0ms preprocess, 74.5ms inference, 6.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▍| 4749/5000 [11:10<00:34,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553094.jpg: 448x640 1 airplane, 61.2ms
Speed: 3.4ms preprocess, 61.2ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4750/5000 [11:10<00:32,  7.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553221.jpg: 448x640 11 persons, 5 kites, 1 surfboard, 60.7ms
Speed: 3.3ms preprocess, 60.7ms inference, 19.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4751/5000 [11:11<00:37,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553339.jpg: 480x640 (no detections), 61.6ms
Speed: 4.9ms preprocess, 61.6ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▌| 4752/5000 [11:11<00:33,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553511.jpg: 448x640 1 person, 4 cars, 5 traffic lights, 1 fire hydrant, 59.5ms
Speed: 3.9ms preprocess, 59.5ms inference, 9.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4753/5000 [11:11<00:33,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553664.jpg: 416x640 1 laptop, 1 mouse, 149.8ms
Speed: 2.7ms preprocess, 149.8ms inference, 2.4ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  95%|█████████▌| 4754/5000 [11:11<00:38,  6.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553669.jpg: 480x640 4 persons, 1 bench, 1 potted plant, 63.3ms
Speed: 3.0ms preprocess, 63.3ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▌| 4755/5000 [11:11<00:36,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553731.jpg: 448x640 4 persons, 108.0ms
Speed: 3.5ms preprocess, 108.0ms inference, 4.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4756/5000 [11:11<00:37,  6.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553776.jpg: 512x640 1 person, 1 motorcycle, 128.7ms
Speed: 2.8ms preprocess, 128.7ms inference, 2.1ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  95%|█████████▌| 4757/5000 [11:12<00:39,  6.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553788.jpg: 384x640 2 persons, 2 beds, 1 laptop, 1 book, 115.5ms
Speed: 2.6ms preprocess, 115.5ms inference, 5.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  95%|█████████▌| 4758/5000 [11:12<00:41,  5.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000553990.jpg: 448x640 1 person, 1 horse, 65.1ms
Speed: 4.4ms preprocess, 65.1ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4759/5000 [11:12<00:37,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554002.jpg: 448x640 10 persons, 1 dog, 1 handbag, 60.1ms
Speed: 2.5ms preprocess, 60.1ms inference, 15.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4760/5000 [11:12<00:36,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554156.jpg: 448x640 1 person, 1 tennis racket, 56.7ms
Speed: 2.8ms preprocess, 56.7ms inference, 3.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4761/5000 [11:12<00:33,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554266.jpg: 256x640 1 chair, 1 bed, 1 clock, 114.2ms
Speed: 5.3ms preprocess, 114.2ms inference, 2.1ms postprocess per image at shape (1, 3, 256, 640)


Segmenting Images:  95%|█████████▌| 4762/5000 [11:12<00:35,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554291.jpg: 448x640 1 couch, 59.2ms
Speed: 2.4ms preprocess, 59.2ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4763/5000 [11:12<00:32,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554328.jpg: 512x640 3 persons, 1 baseball glove, 81.6ms
Speed: 3.0ms preprocess, 81.6ms inference, 8.2ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  95%|█████████▌| 4764/5000 [11:13<00:33,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554579.jpg: 640x512 3 persons, 1 dog, 1 chair, 142.6ms
Speed: 3.4ms preprocess, 142.6ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  95%|█████████▌| 4765/5000 [11:13<00:37,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554595.jpg: 512x640 1 person, 1 surfboard, 92.0ms
Speed: 4.2ms preprocess, 92.0ms inference, 15.4ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  95%|█████████▌| 4766/5000 [11:13<00:37,  6.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554735.jpg: 480x640 1 person, 1 pizza, 70.4ms
Speed: 3.2ms preprocess, 70.4ms inference, 3.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▌| 4767/5000 [11:13<00:34,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000554838.jpg: 640x480 2 bottles, 1 refrigerator, 1 book, 71.3ms
Speed: 3.2ms preprocess, 71.3ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  95%|█████████▌| 4768/5000 [11:13<00:33,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555005.jpg: 480x640 2 persons, 1 bed, 2 vases, 75.1ms
Speed: 3.0ms preprocess, 75.1ms inference, 5.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▌| 4769/5000 [11:13<00:32,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555009.jpg: 480x640 2 bottles, 1 cup, 2 tvs, 1 laptop, 2 mouses, 1 keyboard, 59.8ms
Speed: 4.3ms preprocess, 59.8ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  95%|█████████▌| 4770/5000 [11:13<00:32,  7.06it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555012.jpg: 448x640 1 toilet, 63.2ms
Speed: 2.6ms preprocess, 63.2ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4771/5000 [11:14<00:30,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555050.jpg: 416x640 1 person, 3 cars, 5 traffic lights, 1 horse, 56.2ms
Speed: 3.9ms preprocess, 56.2ms inference, 9.5ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  95%|█████████▌| 4772/5000 [11:14<00:30,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555412.jpg: 448x640 1 carrot, 1 pizza, 82.1ms
Speed: 14.4ms preprocess, 82.1ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  95%|█████████▌| 4773/5000 [11:14<00:30,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555597.jpg: 544x640 13 cars, 2 trucks, 186.8ms
Speed: 1.6ms preprocess, 186.8ms inference, 19.2ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  95%|█████████▌| 4774/5000 [11:14<00:40,  5.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555705.jpg: 384x640 2 cats, 53.5ms
Speed: 2.5ms preprocess, 53.5ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  96%|█████████▌| 4775/5000 [11:14<00:35,  6.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000555972.jpg: 640x480 1 chair, 1 book, 4 vases, 62.7ms
Speed: 3.1ms preprocess, 62.7ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  96%|█████████▌| 4776/5000 [11:14<00:33,  6.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000556000.jpg: 544x640 4 persons, 3 chairs, 78.6ms
Speed: 2.2ms preprocess, 78.6ms inference, 8.3ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  96%|█████████▌| 4777/5000 [11:14<00:35,  6.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000556158.jpg: 640x480 1 person, 1 skis, 63.2ms
Speed: 3.0ms preprocess, 63.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  96%|█████████▌| 4778/5000 [11:15<00:32,  6.88it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000556193.jpg: 448x640 2 persons, 1 chair, 61.7ms
Speed: 4.3ms preprocess, 61.7ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4779/5000 [11:15<00:30,  7.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000556498.jpg: 480x640 (no detections), 93.8ms
Speed: 3.0ms preprocess, 93.8ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4780/5000 [11:15<00:29,  7.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000556765.jpg: 448x640 1 elephant, 67.8ms
Speed: 2.7ms preprocess, 67.8ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4781/5000 [11:15<00:28,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000556873.jpg: 640x448 1 fire hydrant, 185.8ms
Speed: 2.8ms preprocess, 185.8ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  96%|█████████▌| 4782/5000 [11:15<00:35,  6.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000557172.jpg: 480x640 2 sinks, 64.9ms
Speed: 3.7ms preprocess, 64.9ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4783/5000 [11:15<00:32,  6.74it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000557258.jpg: 640x448 2 toilets, 55.1ms
Speed: 4.9ms preprocess, 55.1ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  96%|█████████▌| 4784/5000 [11:15<00:29,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000557501.jpg: 480x640 2 sinks, 79.1ms
Speed: 2.9ms preprocess, 79.1ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4785/5000 [11:16<00:29,  7.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000557672.jpg: 448x640 2 persons, 1 umbrella, 70.6ms
Speed: 2.8ms preprocess, 70.6ms inference, 3.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4786/5000 [11:16<00:28,  7.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000557884.jpg: 640x480 1 potted plant, 64.4ms
Speed: 2.9ms preprocess, 64.4ms inference, 7.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  96%|█████████▌| 4787/5000 [11:16<00:28,  7.47it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000557916.jpg: 448x640 1 person, 2 beds, 1 vase, 1 teddy bear, 68.2ms
Speed: 3.2ms preprocess, 68.2ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4788/5000 [11:16<00:28,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000558073.jpg: 480x640 1 cat, 75.5ms
Speed: 4.5ms preprocess, 75.5ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4789/5000 [11:16<00:27,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000558114.jpg: 480x640 7 persons, 1 baseball glove, 101.3ms
Speed: 3.1ms preprocess, 101.3ms inference, 8.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4790/5000 [11:16<00:30,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000558213.jpg: 448x640 3 persons, 1 train, 2 skateboards, 60.4ms
Speed: 2.9ms preprocess, 60.4ms inference, 6.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4791/5000 [11:16<00:29,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000558421.jpg: 448x640 2 trains, 64.0ms
Speed: 2.8ms preprocess, 64.0ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4792/5000 [11:17<00:27,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000558558.jpg: 480x640 1 car, 103.3ms
Speed: 3.2ms preprocess, 103.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4793/5000 [11:17<00:28,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000558854.jpg: 480x640 1 person, 1 cup, 2 spoons, 1 bowl, 2 sandwichs, 1 chair, 1 dining table, 64.4ms
Speed: 4.1ms preprocess, 64.4ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4794/5000 [11:17<00:28,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559099.jpg: 384x640 19 cows, 55.0ms
Speed: 2.5ms preprocess, 55.0ms inference, 14.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  96%|█████████▌| 4795/5000 [11:17<00:30,  6.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559160.jpg: 640x448 4 persons, 1 skateboard, 67.9ms
Speed: 3.0ms preprocess, 67.9ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  96%|█████████▌| 4796/5000 [11:17<00:28,  7.10it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559348.jpg: 480x640 8 persons, 64.8ms
Speed: 2.7ms preprocess, 64.8ms inference, 8.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4797/5000 [11:17<00:28,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559513.jpg: 448x640 1 cup, 1 spoon, 1 cake, 1 dining table, 87.3ms
Speed: 5.6ms preprocess, 87.3ms inference, 4.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4798/5000 [11:17<00:28,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559543.jpg: 448x640 1 person, 2 couchs, 1 remote, 1 book, 1 vase, 60.0ms
Speed: 4.2ms preprocess, 60.0ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4799/5000 [11:18<00:26,  7.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559547.jpg: 512x640 4 persons, 1 tie, 67.3ms
Speed: 3.4ms preprocess, 67.3ms inference, 8.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  96%|█████████▌| 4800/5000 [11:18<00:26,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559707.jpg: 640x480 1 fork, 1 knife, 1 bowl, 6 broccolis, 71.6ms
Speed: 3.2ms preprocess, 71.6ms inference, 9.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  96%|█████████▌| 4801/5000 [11:18<00:27,  7.23it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559842.jpg: 448x640 19 persons, 1 sports ball, 1 chair, 64.6ms
Speed: 3.1ms preprocess, 64.6ms inference, 19.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4802/5000 [11:18<00:29,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000559956.jpg: 480x640 3 persons, 4 sheeps, 1 bottle, 62.4ms
Speed: 3.0ms preprocess, 62.4ms inference, 8.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4803/5000 [11:18<00:28,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560011.jpg: 640x448 1 keyboard, 72.3ms
Speed: 3.9ms preprocess, 72.3ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  96%|█████████▌| 4804/5000 [11:18<00:27,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560178.jpg: 480x640 1 person, 1 apple, 63.7ms
Speed: 2.9ms preprocess, 63.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4805/5000 [11:18<00:26,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560256.jpg: 480x640 5 bananas, 2 apples, 1 orange, 69.5ms
Speed: 2.8ms preprocess, 69.5ms inference, 16.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4806/5000 [11:19<00:28,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560266.jpg: 448x640 2 bears, 61.8ms
Speed: 2.7ms preprocess, 61.8ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4807/5000 [11:19<00:26,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560279.jpg: 640x640 1 bottle, 1 toilet, 1 vase, 202.1ms
Speed: 4.9ms preprocess, 202.1ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  96%|█████████▌| 4808/5000 [11:19<00:33,  5.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560312.jpg: 448x640 (no detections), 51.3ms
Speed: 2.8ms preprocess, 51.3ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\Image Captioning and Segmentation\val2017\000000560371.jpg: 448x640 (no detections), 60.2ms
Speed: 2.8ms preprocess, 60.2ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▌| 4810/5000 [11:19<00:25,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560474.jpg: 480x640 7 persons, 1 umbrella, 1 laptop, 63.3ms
Speed: 3.0ms preprocess, 63.3ms inference, 8.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▌| 4811/5000 [11:19<00:26,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560880.jpg: 512x640 3 horses, 1 cow, 77.5ms
Speed: 3.2ms preprocess, 77.5ms inference, 4.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  96%|█████████▌| 4812/5000 [11:19<00:26,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000560911.jpg: 640x512 2 persons, 1 couch, 66.3ms
Speed: 3.2ms preprocess, 66.3ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  96%|█████████▋| 4813/5000 [11:19<00:25,  7.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561009.jpg: 448x640 2 birds, 61.7ms
Speed: 3.3ms preprocess, 61.7ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▋| 4814/5000 [11:20<00:24,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561223.jpg: 448x640 1 person, 1 surfboard, 64.3ms
Speed: 4.0ms preprocess, 64.3ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  96%|█████████▋| 4815/5000 [11:20<00:23,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561256.jpg: 480x640 1 person, 1 cell phone, 1 sink, 65.3ms
Speed: 3.3ms preprocess, 65.3ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▋| 4816/5000 [11:20<00:23,  7.94it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561335.jpg: 512x640 1 fire hydrant, 1 suitcase, 108.3ms
Speed: 7.8ms preprocess, 108.3ms inference, 2.9ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  96%|█████████▋| 4817/5000 [11:20<00:25,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561366.jpg: 480x640 3 keyboards, 2 cell phones, 66.4ms
Speed: 3.1ms preprocess, 66.4ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▋| 4818/5000 [11:20<00:24,  7.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561465.jpg: 640x640 1 sandwich, 3 donuts, 78.1ms
Speed: 3.7ms preprocess, 78.1ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  96%|█████████▋| 4819/5000 [11:20<00:24,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561679.jpg: 480x640 (no detections), 100.8ms
Speed: 3.3ms preprocess, 100.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▋| 4820/5000 [11:20<00:24,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561889.jpg: 480x640 1 fork, 1 bowl, 2 broccolis, 1 carrot, 1 dining table, 64.6ms
Speed: 3.3ms preprocess, 64.6ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▋| 4821/5000 [11:21<00:23,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000561958.jpg: 480x640 18 persons, 63.9ms
Speed: 3.3ms preprocess, 63.9ms inference, 20.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▋| 4822/5000 [11:21<00:26,  6.65it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562059.jpg: 640x448 1 cup, 1 spoon, 1 bowl, 2 apples, 59.2ms
Speed: 2.8ms preprocess, 59.2ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  96%|█████████▋| 4823/5000 [11:21<00:25,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562121.jpg: 480x640 4 zebras, 71.1ms
Speed: 3.3ms preprocess, 71.1ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  96%|█████████▋| 4824/5000 [11:21<00:24,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562197.jpg: 640x640 4 broccolis, 116.1ms
Speed: 2.4ms preprocess, 116.1ms inference, 9.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  96%|█████████▋| 4825/5000 [11:21<00:26,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562207.jpg: 448x640 3 persons, 1 elephant, 1 handbag, 60.9ms
Speed: 3.4ms preprocess, 60.9ms inference, 5.4ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4826/5000 [11:21<00:24,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562229.jpg: 640x640 1 person, 1 bicycle, 1 skateboard, 88.4ms
Speed: 2.7ms preprocess, 88.4ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  97%|█████████▋| 4827/5000 [11:21<00:25,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562243.jpg: 640x640 1 person, 1 tie, 82.3ms
Speed: 2.6ms preprocess, 82.3ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  97%|█████████▋| 4828/5000 [11:22<00:24,  7.04it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562443.jpg: 480x640 4 zebras, 63.4ms
Speed: 3.4ms preprocess, 63.4ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4829/5000 [11:22<00:23,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562448.jpg: 480x640 1 person, 1 bus, 93.2ms
Speed: 3.8ms preprocess, 93.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4830/5000 [11:22<00:23,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562561.jpg: 480x640 2 dogs, 1 horse, 63.3ms
Speed: 2.8ms preprocess, 63.3ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4831/5000 [11:22<00:22,  7.46it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562581.jpg: 448x640 1 person, 1 tennis racket, 67.7ms
Speed: 2.9ms preprocess, 67.7ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4832/5000 [11:22<00:22,  7.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562818.jpg: 480x640 8 persons, 66.0ms
Speed: 2.9ms preprocess, 66.0ms inference, 9.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4833/5000 [11:22<00:22,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000562843.jpg: 480x640 1 cup, 1 mouse, 1 cell phone, 1 scissors, 67.8ms
Speed: 5.5ms preprocess, 67.8ms inference, 4.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4834/5000 [11:22<00:21,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563267.jpg: 480x640 2 persons, 3 bottles, 1 cup, 1 chair, 1 potted plant, 1 dining table, 1 tv, 1 remote, 67.4ms
Speed: 4.9ms preprocess, 67.4ms inference, 16.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4835/5000 [11:23<00:25,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563281.jpg: 640x640 1 person, 78.7ms
Speed: 4.2ms preprocess, 78.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  97%|█████████▋| 4836/5000 [11:23<00:24,  6.83it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563349.jpg: 640x448 1 person, 1 snowboard, 62.6ms
Speed: 2.7ms preprocess, 62.6ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  97%|█████████▋| 4837/5000 [11:23<00:22,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563470.jpg: 448x640 1 person, 1 chair, 96.5ms
Speed: 3.0ms preprocess, 96.5ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4838/5000 [11:23<00:22,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563603.jpg: 640x480 3 giraffes, 65.5ms
Speed: 2.9ms preprocess, 65.5ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  97%|█████████▋| 4839/5000 [11:23<00:21,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563604.jpg: 480x640 8 persons, 1 bench, 67.9ms
Speed: 2.9ms preprocess, 67.9ms inference, 11.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4840/5000 [11:23<00:22,  7.19it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563648.jpg: 640x480 10 persons, 2 skateboards, 62.2ms
Speed: 3.1ms preprocess, 62.2ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  97%|█████████▋| 4841/5000 [11:23<00:24,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563653.jpg: 448x640 3 persons, 13 cars, 1 handbag, 62.4ms
Speed: 3.1ms preprocess, 62.4ms inference, 15.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4842/5000 [11:24<00:25,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563702.jpg: 448x640 1 person, 3 trucks, 56.5ms
Speed: 2.8ms preprocess, 56.5ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4843/5000 [11:24<00:23,  6.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563758.jpg: 640x448 2 teddy bears, 64.4ms
Speed: 2.5ms preprocess, 64.4ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  97%|█████████▋| 4844/5000 [11:24<00:21,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000563882.jpg: 448x640 5 persons, 1 remote, 1 cell phone, 65.5ms
Speed: 2.7ms preprocess, 65.5ms inference, 7.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4845/5000 [11:24<00:21,  7.32it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000564023.jpg: 640x480 1 toilet, 107.7ms
Speed: 3.0ms preprocess, 107.7ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  97%|█████████▋| 4846/5000 [11:24<00:22,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000564091.jpg: 640x448 1 person, 1 bottle, 1 cake, 1 dining table, 2 cell phones, 59.3ms
Speed: 4.3ms preprocess, 59.3ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  97%|█████████▋| 4847/5000 [11:24<00:21,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000564127.jpg: 640x448 2 toilets, 60.4ms
Speed: 2.7ms preprocess, 60.4ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  97%|█████████▋| 4848/5000 [11:24<00:19,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000564133.jpg: 448x640 4 elephants, 91.8ms
Speed: 2.7ms preprocess, 91.8ms inference, 14.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4849/5000 [11:25<00:21,  7.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000564280.jpg: 448x640 1 dog, 1 couch, 1 bed, 58.8ms
Speed: 3.0ms preprocess, 58.8ms inference, 3.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4850/5000 [11:25<00:19,  7.62it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000564336.jpg: 384x640 2 persons, 2 bottles, 2 chairs, 1 dining table, 58.5ms
Speed: 2.8ms preprocess, 58.5ms inference, 5.7ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  97%|█████████▋| 4851/5000 [11:25<00:19,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565012.jpg: 448x640 1 car, 2 traffic lights, 56.8ms
Speed: 3.1ms preprocess, 56.8ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4852/5000 [11:25<00:18,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565045.jpg: 640x448 1 person, 2 surfboards, 74.8ms
Speed: 4.8ms preprocess, 74.8ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  97%|█████████▋| 4853/5000 [11:25<00:19,  7.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565153.jpg: 640x512 3 stop signs, 71.3ms
Speed: 4.8ms preprocess, 71.3ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  97%|█████████▋| 4854/5000 [11:25<00:18,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565391.jpg: 640x480 2 cars, 1 truck, 106.7ms
Speed: 3.1ms preprocess, 106.7ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  97%|█████████▋| 4855/5000 [11:25<00:20,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565469.jpg: 640x448 1 clock, 63.4ms
Speed: 2.8ms preprocess, 63.4ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  97%|█████████▋| 4856/5000 [11:25<00:18,  7.61it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565563.jpg: 480x640 9 cars, 1 refrigerator, 66.7ms
Speed: 2.8ms preprocess, 66.7ms inference, 10.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4857/5000 [11:26<00:19,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565597.jpg: 640x480 1 banana, 4 pizzas, 63.8ms
Speed: 3.5ms preprocess, 63.8ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  97%|█████████▋| 4858/5000 [11:26<00:19,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565607.jpg: 320x640 1 fire hydrant, 113.9ms
Speed: 2.4ms preprocess, 113.9ms inference, 2.2ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  97%|█████████▋| 4859/5000 [11:26<00:21,  6.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565624.jpg: 384x640 1 cow, 1 elephant, 1 bear, 56.8ms
Speed: 3.3ms preprocess, 56.8ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  97%|█████████▋| 4860/5000 [11:26<00:19,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565776.jpg: 448x640 2 bowls, 1 potted plant, 1 oven, 1 vase, 59.5ms
Speed: 2.9ms preprocess, 59.5ms inference, 4.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4861/5000 [11:26<00:18,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565778.jpg: 416x640 4 persons, 1 train, 1 bench, 135.8ms
Speed: 3.4ms preprocess, 135.8ms inference, 8.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  97%|█████████▋| 4862/5000 [11:26<00:21,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565853.jpg: 480x640 1 person, 3 bottles, 1 potted plant, 1 tv, 84.1ms
Speed: 4.8ms preprocess, 84.1ms inference, 9.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4863/5000 [11:26<00:21,  6.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565877.jpg: 640x448 1 person, 1 couch, 1 laptop, 59.5ms
Speed: 3.3ms preprocess, 59.5ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  97%|█████████▋| 4864/5000 [11:27<00:19,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565962.jpg: 448x640 1 cat, 1 vase, 58.5ms
Speed: 4.4ms preprocess, 58.5ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4865/5000 [11:27<00:17,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000565989.jpg: 640x480 1 clock, 64.8ms
Speed: 3.2ms preprocess, 64.8ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  97%|█████████▋| 4866/5000 [11:27<00:17,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000566042.jpg: 448x640 5 giraffes, 58.6ms
Speed: 5.5ms preprocess, 58.6ms inference, 5.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4867/5000 [11:27<00:17,  7.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000566282.jpg: 448x640 7 persons, 4 cars, 1 traffic light, 1 tie, 1 sports ball, 61.3ms
Speed: 3.7ms preprocess, 61.3ms inference, 11.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4868/5000 [11:27<00:18,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000566436.jpg: 448x640 2 persons, 1 bench, 80.3ms
Speed: 12.7ms preprocess, 80.3ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4869/5000 [11:27<00:18,  7.02it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000566524.jpg: 480x640 3 persons, 1 train, 69.4ms
Speed: 3.0ms preprocess, 69.4ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4870/5000 [11:27<00:18,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000566758.jpg: 480x640 1 bus, 71.6ms
Speed: 3.0ms preprocess, 71.6ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  97%|█████████▋| 4871/5000 [11:28<00:17,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000566923.jpg: 448x640 9 persons, 55.5ms
Speed: 2.7ms preprocess, 55.5ms inference, 10.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4872/5000 [11:28<00:17,  7.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567011.jpg: 448x640 1 person, 1 surfboard, 80.1ms
Speed: 3.4ms preprocess, 80.1ms inference, 2.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  97%|█████████▋| 4873/5000 [11:28<00:17,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567197.jpg: 544x640 1 car, 1 bus, 5 trucks, 1 clock, 166.8ms
Speed: 3.1ms preprocess, 166.8ms inference, 8.9ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  97%|█████████▋| 4874/5000 [11:28<00:21,  5.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567432.jpg: 448x640 1 airplane, 57.4ms
Speed: 2.4ms preprocess, 57.4ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4875/5000 [11:28<00:18,  6.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567640.jpg: 448x640 8 persons, 2 cars, 1 sports ball, 56.9ms
Speed: 3.0ms preprocess, 56.9ms inference, 9.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4876/5000 [11:28<00:18,  6.56it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567740.jpg: 480x640 2 persons, 1 backpack, 3 skiss, 62.9ms
Speed: 2.7ms preprocess, 62.9ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4877/5000 [11:28<00:17,  6.91it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567825.jpg: 640x448 1 bottle, 1 wine glass, 1 sandwich, 61.6ms
Speed: 2.9ms preprocess, 61.6ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  98%|█████████▊| 4878/5000 [11:29<00:16,  7.35it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567886.jpg: 480x640 1 person, 5 books, 1 teddy bear, 64.6ms
Speed: 4.5ms preprocess, 64.6ms inference, 9.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4879/5000 [11:29<00:16,  7.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000567898.jpg: 480x640 3 bowls, 2 carrots, 1 dining table, 74.0ms
Speed: 9.1ms preprocess, 74.0ms inference, 6.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4880/5000 [11:29<00:17,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568147.jpg: 640x480 3 persons, 1 car, 1 parking meter, 64.0ms
Speed: 2.3ms preprocess, 64.0ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  98%|█████████▊| 4881/5000 [11:29<00:16,  7.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568195.jpg: 640x448 3 persons, 1 bottle, 1 cup, 1 fork, 1 bowl, 1 cake, 1 dining table, 1 clock, 1 vase, 97.4ms
Speed: 2.8ms preprocess, 97.4ms inference, 9.1ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  98%|█████████▊| 4882/5000 [11:29<00:17,  6.63it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568213.jpg: 416x640 2 persons, 1 frisbee, 55.3ms
Speed: 4.3ms preprocess, 55.3ms inference, 3.1ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  98%|█████████▊| 4883/5000 [11:29<00:16,  7.15it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568290.jpg: 448x640 1 person, 1 motorcycle, 2 buss, 85.5ms
Speed: 12.3ms preprocess, 85.5ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4884/5000 [11:29<00:16,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568439.jpg: 480x640 8 persons, 4 buss, 70.2ms
Speed: 3.4ms preprocess, 70.2ms inference, 12.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4885/5000 [11:30<00:17,  6.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568584.jpg: 448x640 1 person, 1 chair, 1 couch, 85.1ms
Speed: 3.8ms preprocess, 85.1ms inference, 3.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4886/5000 [11:30<00:17,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568690.jpg: 640x480 1 cat, 1 toilet, 64.7ms
Speed: 3.5ms preprocess, 64.7ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  98%|█████████▊| 4887/5000 [11:30<00:15,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568710.jpg: 448x640 1 person, 2 couchs, 1 potted plant, 1 tv, 2 books, 1 vase, 71.5ms
Speed: 2.9ms preprocess, 71.5ms inference, 15.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4888/5000 [11:30<00:16,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568814.jpg: 384x640 3 persons, 1 chair, 55.5ms
Speed: 2.9ms preprocess, 55.5ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images:  98%|█████████▊| 4889/5000 [11:30<00:14,  7.48it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000568981.jpg: 448x640 4 persons, 1 skateboard, 60.3ms
Speed: 2.9ms preprocess, 60.3ms inference, 4.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4890/5000 [11:30<00:14,  7.70it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569030.jpg: 480x640 7 persons, 9 cars, 1 bus, 1 traffic light, 1 fire hydrant, 1 bench, 1 potted plant, 87.4ms
Speed: 9.8ms preprocess, 87.4ms inference, 21.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4891/5000 [11:30<00:17,  6.39it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569059.jpg: 480x640 1 chair, 2 tvs, 1 mouse, 1 keyboard, 95.8ms
Speed: 2.6ms preprocess, 95.8ms inference, 13.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4892/5000 [11:31<00:17,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569273.jpg: 480x640 4 persons, 1 fire hydrant, 68.7ms
Speed: 2.6ms preprocess, 68.7ms inference, 5.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4893/5000 [11:31<00:15,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569565.jpg: 448x640 4 trucks, 62.4ms
Speed: 2.9ms preprocess, 62.4ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4894/5000 [11:31<00:14,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569700.jpg: 448x640 1 potted plant, 1 vase, 86.6ms
Speed: 19.7ms preprocess, 86.6ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4895/5000 [11:31<00:15,  6.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569825.jpg: 640x448 1 person, 2 baseball gloves, 63.4ms
Speed: 3.8ms preprocess, 63.4ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  98%|█████████▊| 4896/5000 [11:31<00:14,  7.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569917.jpg: 640x480 1 toilet, 2 sinks, 66.0ms
Speed: 2.4ms preprocess, 66.0ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  98%|█████████▊| 4897/5000 [11:31<00:13,  7.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569972.jpg: 576x640 1 person, 1 surfboard, 219.1ms
Speed: 3.6ms preprocess, 219.1ms inference, 3.1ms postprocess per image at shape (1, 3, 576, 640)


Segmenting Images:  98%|█████████▊| 4898/5000 [11:32<00:17,  5.73it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000569976.jpg: 640x480 3 persons, 66.4ms
Speed: 2.7ms preprocess, 66.4ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  98%|█████████▊| 4899/5000 [11:32<00:16,  6.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570169.jpg: 640x448 3 cups, 1 teddy bear, 95.6ms
Speed: 3.0ms preprocess, 95.6ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  98%|█████████▊| 4900/5000 [11:32<00:15,  6.37it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570448.jpg: 512x640 1 person, 1 boat, 192.3ms
Speed: 3.2ms preprocess, 192.3ms inference, 2.7ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images:  98%|█████████▊| 4901/5000 [11:32<00:18,  5.45it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570456.jpg: 480x640 1 bowl, 3 chairs, 1 dining table, 1 sink, 60.1ms
Speed: 2.7ms preprocess, 60.1ms inference, 6.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4902/5000 [11:32<00:16,  6.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570471.jpg: 640x480 1 person, 2 bottles, 1 cup, 1 carrot, 1 cake, 2 chairs, 64.7ms
Speed: 4.7ms preprocess, 64.7ms inference, 8.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  98%|█████████▊| 4903/5000 [11:32<00:15,  6.36it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570539.jpg: 640x480 4 persons, 1 bus, 1 train, 86.7ms
Speed: 5.6ms preprocess, 86.7ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  98%|█████████▊| 4904/5000 [11:33<00:15,  6.31it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570664.jpg: 448x640 1 person, 2 cats, 1 cell phone, 57.9ms
Speed: 4.8ms preprocess, 57.9ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4905/5000 [11:33<00:13,  6.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570688.jpg: 480x640 12 persons, 21 kites, 63.4ms
Speed: 3.2ms preprocess, 63.4ms inference, 31.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4906/5000 [11:33<00:16,  5.67it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570736.jpg: 640x448 1 toilet, 2 sinks, 62.0ms
Speed: 3.7ms preprocess, 62.0ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  98%|█████████▊| 4907/5000 [11:33<00:14,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570756.jpg: 448x640 7 persons, 2 skiss, 5 kites, 57.2ms
Speed: 3.0ms preprocess, 57.2ms inference, 14.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4908/5000 [11:33<00:15,  6.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570782.jpg: 448x640 1 chair, 1 tv, 3 laptops, 1 mouse, 3 keyboards, 1 book, 58.8ms
Speed: 2.6ms preprocess, 58.8ms inference, 7.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4909/5000 [11:33<00:14,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000570834.jpg: 480x640 1 person, 7 bicycles, 60.3ms
Speed: 2.4ms preprocess, 60.3ms inference, 8.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4910/5000 [11:33<00:14,  6.42it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571008.jpg: 448x640 1 stop sign, 60.2ms
Speed: 2.1ms preprocess, 60.2ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4911/5000 [11:34<00:12,  7.11it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571264.jpg: 448x640 1 person, 2 motorcycles, 56.1ms
Speed: 2.4ms preprocess, 56.1ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4912/5000 [11:34<00:11,  7.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571313.jpg: 640x480 1 person, 1 bottle, 1 bed, 1 keyboard, 1 book, 92.0ms
Speed: 2.9ms preprocess, 92.0ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  98%|█████████▊| 4913/5000 [11:34<00:12,  7.16it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571598.jpg: 480x640 4 persons, 2 ties, 2 wine glasss, 1 cell phone, 61.6ms
Speed: 2.8ms preprocess, 61.6ms inference, 9.8ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4914/5000 [11:34<00:11,  7.17it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571718.jpg: 448x640 2 persons, 13 bananas, 1 chair, 58.6ms
Speed: 2.6ms preprocess, 58.6ms inference, 13.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4915/5000 [11:34<00:12,  6.96it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571804.jpg: 448x640 1 bottle, 1 cup, 2 bowls, 1 sink, 91.2ms
Speed: 14.5ms preprocess, 91.2ms inference, 4.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4916/5000 [11:34<00:12,  6.64it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571857.jpg: 448x640 2 persons, 3 airplanes, 1 dog, 67.9ms
Speed: 2.4ms preprocess, 67.9ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4917/5000 [11:34<00:12,  6.87it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571893.jpg: 448x640 4 books, 74.1ms
Speed: 3.9ms preprocess, 74.1ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4918/5000 [11:35<00:11,  7.07it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000571943.jpg: 480x640 2 traffic lights, 87.8ms
Speed: 4.6ms preprocess, 87.8ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4919/5000 [11:35<00:11,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572303.jpg: 480x640 2 trains, 66.9ms
Speed: 2.3ms preprocess, 66.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  98%|█████████▊| 4920/5000 [11:35<00:10,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572388.jpg: 640x448 1 cat, 2 donuts, 71.2ms
Speed: 4.1ms preprocess, 71.2ms inference, 14.9ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  98%|█████████▊| 4921/5000 [11:35<00:11,  6.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572408.jpg: 640x448 1 sheep, 2 cows, 60.0ms
Speed: 3.4ms preprocess, 60.0ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  98%|█████████▊| 4922/5000 [11:35<00:10,  7.38it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572462.jpg: 640x640 7 persons, 1 bus, 199.0ms
Speed: 3.9ms preprocess, 199.0ms inference, 20.8ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images:  98%|█████████▊| 4923/5000 [11:35<00:14,  5.43it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572517.jpg: 448x640 1 bear, 60.6ms
Speed: 2.7ms preprocess, 60.6ms inference, 2.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4924/5000 [11:36<00:12,  6.25it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572555.jpg: 448x640 1 train, 57.2ms
Speed: 2.9ms preprocess, 57.2ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  98%|█████████▊| 4925/5000 [11:36<00:11,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572620.jpg: 448x640 17 persons, 1 car, 1 skateboard, 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 17.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▊| 4926/5000 [11:36<00:11,  6.40it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572678.jpg: 448x640 1 cup, 1 chair, 1 couch, 3 potted plants, 1 dining table, 61.0ms
Speed: 3.1ms preprocess, 61.0ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▊| 4927/5000 [11:36<00:10,  6.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572900.jpg: 480x640 4 persons, 2 potted plants, 100.4ms
Speed: 4.8ms preprocess, 100.4ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▊| 4928/5000 [11:36<00:11,  6.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000572956.jpg: 448x640 9 motorcycles, 62.7ms
Speed: 3.8ms preprocess, 62.7ms inference, 7.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▊| 4929/5000 [11:36<00:10,  6.71it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000573008.jpg: 480x640 1 boat, 1 clock, 92.7ms
Speed: 3.5ms preprocess, 92.7ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▊| 4930/5000 [11:36<00:10,  6.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000573094.jpg: 640x480 1 bicycle, 1 chair, 1 tv, 65.2ms
Speed: 4.6ms preprocess, 65.2ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  99%|█████████▊| 4931/5000 [11:36<00:09,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000573258.jpg: 480x640 1 person, 2 backpacks, 2 skiss, 67.1ms
Speed: 2.9ms preprocess, 67.1ms inference, 5.6ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▊| 4932/5000 [11:37<00:09,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000573391.jpg: 480x640 1 bear, 72.8ms
Speed: 3.0ms preprocess, 72.8ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▊| 4933/5000 [11:37<00:09,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000573626.jpg: 480x640 1 bird, 2 bears, 63.9ms
Speed: 3.8ms preprocess, 63.9ms inference, 4.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▊| 4934/5000 [11:37<00:08,  7.41it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000573943.jpg: 480x640 1 bus, 4 trucks, 64.8ms
Speed: 3.1ms preprocess, 64.8ms inference, 5.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▊| 4935/5000 [11:37<00:08,  7.53it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000574297.jpg: 448x640 1 person, 1 horse, 63.8ms
Speed: 2.9ms preprocess, 63.8ms inference, 6.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▊| 4936/5000 [11:37<00:08,  7.82it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000574315.jpg: 480x640 2 persons, 1 cat, 1 couch, 1 keyboard, 65.9ms
Speed: 2.5ms preprocess, 65.9ms inference, 5.5ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▊| 4937/5000 [11:37<00:08,  7.84it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000574425.jpg: 448x640 1 person, 1 car, 1 bus, 1 truck, 58.3ms
Speed: 3.6ms preprocess, 58.3ms inference, 8.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4938/5000 [11:37<00:07,  7.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000574520.jpg: 416x640 1 person, 1 surfboard, 57.1ms
Speed: 3.3ms preprocess, 57.1ms inference, 2.7ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  99%|█████████▉| 4939/5000 [11:38<00:07,  8.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000574702.jpg: 640x448 7 persons, 4 motorcycles, 90.6ms
Speed: 3.8ms preprocess, 90.6ms inference, 16.0ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  99%|█████████▉| 4940/5000 [11:38<00:08,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000574810.jpg: 640x512 1 cat, 1 dog, 161.6ms
Speed: 4.0ms preprocess, 161.6ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 512)


Segmenting Images:  99%|█████████▉| 4941/5000 [11:38<00:09,  6.24it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000574823.jpg: 640x480 6 persons, 1 sports ball, 89.7ms
Speed: 4.5ms preprocess, 89.7ms inference, 13.8ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  99%|█████████▉| 4942/5000 [11:38<00:09,  5.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575081.jpg: 448x640 1 person, 1 potted plant, 1 tv, 2 books, 61.5ms
Speed: 2.8ms preprocess, 61.5ms inference, 5.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4943/5000 [11:38<00:08,  6.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575187.jpg: 480x640 1 person, 1 giraffe, 65.2ms
Speed: 4.4ms preprocess, 65.2ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4944/5000 [11:38<00:08,  6.75it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575205.jpg: 448x640 2 persons, 1 airplane, 63.7ms
Speed: 3.1ms preprocess, 63.7ms inference, 3.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4945/5000 [11:38<00:07,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575243.jpg: 448x640 1 person, 6 cars, 1 traffic light, 1 umbrella, 60.4ms
Speed: 2.7ms preprocess, 60.4ms inference, 11.0ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4946/5000 [11:39<00:07,  7.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575357.jpg: 480x640 1 dog, 1 frisbee, 69.4ms
Speed: 3.1ms preprocess, 69.4ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4947/5000 [11:39<00:06,  7.58it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575372.jpg: 480x640 5 traffic lights, 100.9ms
Speed: 6.8ms preprocess, 100.9ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4948/5000 [11:39<00:07,  7.12it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575500.jpg: 640x448 1 banana, 60.8ms
Speed: 2.8ms preprocess, 60.8ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  99%|█████████▉| 4949/5000 [11:39<00:06,  7.54it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575815.jpg: 480x640 1 pizza, 1 dining table, 97.9ms
Speed: 2.9ms preprocess, 97.9ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4950/5000 [11:39<00:06,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000575970.jpg: 480x640 1 cup, 5 bowls, 3 chairs, 3 dining tables, 1 refrigerator, 1 vase, 69.2ms
Speed: 2.7ms preprocess, 69.2ms inference, 14.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4951/5000 [11:39<00:07,  6.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000576031.jpg: 480x640 4 persons, 2 skiss, 61.3ms
Speed: 2.9ms preprocess, 61.3ms inference, 5.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4952/5000 [11:39<00:07,  6.59it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000576052.jpg: 448x640 1 person, 1 horse, 66.1ms
Speed: 2.9ms preprocess, 66.1ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4953/5000 [11:40<00:06,  6.95it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000576566.jpg: 640x480 1 person, 1 skateboard, 85.0ms
Speed: 17.8ms preprocess, 85.0ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  99%|█████████▉| 4954/5000 [11:40<00:06,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000576654.jpg: 448x640 2 persons, 1 kite, 65.2ms
Speed: 3.2ms preprocess, 65.2ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4955/5000 [11:40<00:06,  7.28it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000576955.jpg: 448x640 3 persons, 1 horse, 75.9ms
Speed: 2.7ms preprocess, 75.9ms inference, 7.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4956/5000 [11:40<00:06,  7.13it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577149.jpg: 416x640 4 zebras, 59.3ms
Speed: 2.6ms preprocess, 59.3ms inference, 4.3ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images:  99%|█████████▉| 4957/5000 [11:40<00:05,  7.55it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577182.jpg: 448x640 3 benchs, 1 bird, 61.3ms
Speed: 2.8ms preprocess, 61.3ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4958/5000 [11:40<00:05,  7.76it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577539.jpg: 448x640 2 persons, 1 bowl, 104.5ms
Speed: 7.1ms preprocess, 104.5ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4959/5000 [11:40<00:05,  7.21it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577584.jpg: 640x448 1 sink, 66.5ms
Speed: 2.8ms preprocess, 66.5ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  99%|█████████▉| 4960/5000 [11:41<00:05,  7.52it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577735.jpg: 480x640 1 vase, 68.1ms
Speed: 2.6ms preprocess, 68.1ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4961/5000 [11:41<00:05,  7.78it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577862.jpg: 448x640 2 giraffes, 61.7ms
Speed: 2.8ms preprocess, 61.7ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4962/5000 [11:41<00:04,  8.09it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577864.jpg: 448x640 3 persons, 1 sports ball, 90.4ms
Speed: 3.5ms preprocess, 90.4ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4963/5000 [11:41<00:04,  7.57it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577932.jpg: 544x640 9 persons, 1 bicycle, 4 cars, 1 backpack, 2 handbags, 151.1ms
Speed: 1.8ms preprocess, 151.1ms inference, 18.6ms postprocess per image at shape (1, 3, 544, 640)


Segmenting Images:  99%|█████████▉| 4964/5000 [11:41<00:06,  5.93it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577959.jpg: 448x640 1 person, 2 kites, 99.2ms
Speed: 3.0ms preprocess, 99.2ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4965/5000 [11:41<00:05,  6.05it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000577976.jpg: 448x640 3 cars, 2 trucks, 1 fire hydrant, 54.4ms
Speed: 2.7ms preprocess, 54.4ms inference, 5.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4966/5000 [11:41<00:05,  6.66it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578093.jpg: 480x640 1 train, 62.8ms
Speed: 2.9ms preprocess, 62.8ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4967/5000 [11:42<00:04,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578236.jpg: 640x448 1 bird, 1 cat, 71.4ms
Speed: 3.7ms preprocess, 71.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images:  99%|█████████▉| 4968/5000 [11:42<00:04,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578489.jpg: 480x640 8 persons, 2 couchs, 2 remotes, 70.3ms
Speed: 3.8ms preprocess, 70.3ms inference, 13.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4969/5000 [11:42<00:04,  6.89it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578500.jpg: 320x640 2 chairs, 2 couchs, 3 potted plants, 159.4ms
Speed: 3.6ms preprocess, 159.4ms inference, 4.0ms postprocess per image at shape (1, 3, 320, 640)


Segmenting Images:  99%|█████████▉| 4970/5000 [11:42<00:05,  5.98it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578545.jpg: 480x640 1 person, 1 chair, 1 bed, 1 laptop, 64.6ms
Speed: 3.3ms preprocess, 64.6ms inference, 4.4ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images:  99%|█████████▉| 4971/5000 [11:42<00:04,  6.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578792.jpg: 448x640 3 persons, 1 motorcycle, 62.1ms
Speed: 2.8ms preprocess, 62.1ms inference, 4.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images:  99%|█████████▉| 4972/5000 [11:42<00:04,  6.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578871.jpg: 640x480 1 person, 1 motorcycle, 2 cups, 2 spoons, 4 bowls, 1 chair, 1 dining table, 61.4ms
Speed: 2.8ms preprocess, 61.4ms inference, 13.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images:  99%|█████████▉| 4973/5000 [11:42<00:03,  6.85it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578922.jpg: 640x608 1 cup, 1 potted plant, 3 vases, 194.3ms
Speed: 2.6ms preprocess, 194.3ms inference, 6.7ms postprocess per image at shape (1, 3, 640, 608)


Segmenting Images:  99%|█████████▉| 4974/5000 [11:43<00:04,  5.51it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000578967.jpg: 480x640 1 train, 63.2ms
Speed: 2.8ms preprocess, 63.2ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images: 100%|█████████▉| 4975/5000 [11:43<00:04,  6.18it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579070.jpg: 448x640 6 persons, 2 bottles, 1 wine glass, 18 cups, 3 chairs, 3 dining tables, 62.9ms
Speed: 2.9ms preprocess, 62.9ms inference, 57.2ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4976/5000 [11:43<00:04,  5.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579091.jpg: 448x640 5 broccolis, 1 carrot, 57.4ms
Speed: 2.5ms preprocess, 57.4ms inference, 5.5ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4977/5000 [11:43<00:03,  6.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579158.jpg: 480x640 1 airplane, 2 trucks, 69.2ms
Speed: 3.0ms preprocess, 69.2ms inference, 10.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images: 100%|█████████▉| 4978/5000 [11:43<00:03,  6.27it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579307.jpg: 640x480 2 persons, 1 kite, 63.6ms
Speed: 2.4ms preprocess, 63.6ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images: 100%|█████████▉| 4979/5000 [11:43<00:03,  6.72it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579321.jpg: 512x640 1 person, 1 dog, 171.2ms
Speed: 3.4ms preprocess, 171.2ms inference, 4.3ms postprocess per image at shape (1, 3, 512, 640)


Segmenting Images: 100%|█████████▉| 4980/5000 [11:44<00:03,  5.81it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579635.jpg: 448x640 1 person, 1 boat, 2 surfboards, 55.4ms
Speed: 2.6ms preprocess, 55.4ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4981/5000 [11:44<00:02,  6.49it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579655.jpg: 416x640 1 person, 1 cell phone, 56.8ms
Speed: 2.7ms preprocess, 56.8ms inference, 2.9ms postprocess per image at shape (1, 3, 416, 640)


Segmenting Images: 100%|█████████▉| 4982/5000 [11:44<00:02,  6.92it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579818.jpg: 384x640 2 persons, 2 trains, 124.0ms
Speed: 3.3ms preprocess, 124.0ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images: 100%|█████████▉| 4983/5000 [11:44<00:02,  6.44it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579893.jpg: 448x640 1 stop sign, 57.8ms
Speed: 3.6ms preprocess, 57.8ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4984/5000 [11:44<00:02,  7.03it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579900.jpg: 640x480 2 broccolis, 1 carrot, 1 pizza, 74.6ms
Speed: 5.3ms preprocess, 74.6ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images: 100%|█████████▉| 4985/5000 [11:44<00:02,  7.01it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579902.jpg: 640x576 1 person, 4 cars, 1 motorcycle, 160.7ms
Speed: 1.8ms preprocess, 160.7ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 576)


Segmenting Images: 100%|█████████▉| 4986/5000 [11:45<00:02,  5.99it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000579970.jpg: 448x640 1 chair, 1 couch, 2 tvs, 77.4ms
Speed: 5.7ms preprocess, 77.4ms inference, 4.6ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4987/5000 [11:45<00:02,  6.30it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000580197.jpg: 480x640 3 persons, 2 ties, 69.1ms
Speed: 3.5ms preprocess, 69.1ms inference, 4.9ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images: 100%|█████████▉| 4988/5000 [11:45<00:01,  6.68it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000580294.jpg: 448x640 1 person, 1 bowl, 59.9ms
Speed: 2.7ms preprocess, 59.9ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4989/5000 [11:45<00:01,  7.29it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000580410.jpg: 640x448 2 chairs, 1 couch, 83.6ms
Speed: 3.0ms preprocess, 83.6ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 448)


Segmenting Images: 100%|█████████▉| 4990/5000 [11:45<00:01,  7.22it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000580418.jpg: 448x640 2 cars, 1 stop sign, 3 cows, 64.4ms
Speed: 4.2ms preprocess, 64.4ms inference, 6.9ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4991/5000 [11:45<00:01,  7.33it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000580757.jpg: 448x640 1 person, 1 fire hydrant, 59.2ms
Speed: 3.0ms preprocess, 59.2ms inference, 3.3ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4992/5000 [11:45<00:01,  7.77it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581062.jpg: 480x640 1 person, 1 skateboard, 102.0ms
Speed: 4.7ms preprocess, 102.0ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images: 100%|█████████▉| 4993/5000 [11:46<00:00,  7.26it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581100.jpg: 480x640 1 cow, 2 giraffes, 65.8ms
Speed: 3.2ms preprocess, 65.8ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images: 100%|█████████▉| 4994/5000 [11:46<00:00,  7.50it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581206.jpg: 640x480 2 persons, 1 sandwich, 1 hot dog, 89.7ms
Speed: 3.1ms preprocess, 89.7ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images: 100%|█████████▉| 4995/5000 [11:46<00:00,  7.20it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581317.jpg: 384x640 1 person, 1 cell phone, 56.2ms
Speed: 2.9ms preprocess, 56.2ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)


Segmenting Images: 100%|█████████▉| 4996/5000 [11:46<00:00,  7.69it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581357.jpg: 640x640 2 persons, 1 skateboard, 203.1ms
Speed: 6.2ms preprocess, 203.1ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)


Segmenting Images: 100%|█████████▉| 4997/5000 [11:46<00:00,  5.90it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581482.jpg: 448x640 1 clock, 56.3ms
Speed: 2.7ms preprocess, 56.3ms inference, 2.1ms postprocess per image at shape (1, 3, 448, 640)


Segmenting Images: 100%|█████████▉| 4998/5000 [11:46<00:00,  6.60it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581615.jpg: 640x480 1 toilet, 65.8ms
Speed: 2.7ms preprocess, 65.8ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)


Segmenting Images: 100%|█████████▉| 4999/5000 [11:46<00:00,  7.00it/s]


image 1/1 D:\Image Captioning and Segmentation\val2017\000000581781.jpg: 480x640 16 bananas, 80.1ms
Speed: 5.0ms preprocess, 80.1ms inference, 15.7ms postprocess per image at shape (1, 3, 480, 640)


Segmenting Images: 100%|██████████| 5000/5000 [11:47<00:00,  7.07it/s]


In [97]:
annotations_file = r"D:\Image Captioning and Segmentation\annotations\instances_train2017.json"
images_dir = r"D:\Image Captioning and Segmentation\train2017"
labels_dir = r"path_to_save_labels/train2017_labels"

convert_coco_to_yolo(annotations_file, images_dir, labels_dir)

Conversion complete. Labels saved in path_to_save_labels/train2017_labels


In [3]:
from ultralytics import YOLO
from PIL import Image

model = YOLO("yolov8n-seg.pt")
img_path = r"D:\Image Captioning and Segmentation\val2017\000000000802.jpg"


results = model(img_path, device='cpu')
result_img = Image.fromarray(results[0].plot())
result_img.show()


image 1/1 D:\Image Captioning and Segmentation\val2017\000000000802.jpg: 640x448 1 oven, 1 refrigerator, 83.0ms
Speed: 1.2ms preprocess, 83.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 448)
