# Failure Case Finder (Person ReID)

This notebook finds hard/failure retrieval cases from `pytorch_result.mat` and produces 2 visualization images for the report.

In [2]:
import os

import numpy as np
import scipy.io
import torch
from torchvision import datasets
import matplotlib

matplotlib.use("agg")
import matplotlib.pyplot as plt


def sort_img(qf, ql, qc, gf, gl, gc):
    query = qf.view(-1, 1)
    score = torch.mm(gf, query).squeeze(1).cpu().numpy()
    index = np.argsort(score)[::-1]

    query_index = np.argwhere(gl == ql)
    camera_index = np.argwhere(gc == qc)

    junk_index1 = np.argwhere(gl == -1)
    junk_index2 = np.intersect1d(query_index, camera_index)
    junk_index = np.append(junk_index2, junk_index1)

    index = index[np.isin(index, junk_index, invert=True)]
    return index


def imshow(ax, path, title=None, title_color=None):
    im = plt.imread(path)
    ax.imshow(im)
    ax.axis("off")
    if title is not None:
        if title_color is None:
            ax.set_title(title)
        else:
            ax.set_title(title, color=title_color)


ROOT = os.environ.get("REID_ROOT", r"d:\temp\Person_reID_baseline_pytorch")
os.chdir(ROOT)

MAT_PATH = os.path.join(ROOT, "pytorch_result.mat")
TEST_DIR = os.path.join(ROOT, "data", "DukeMTMC-reID", "pytorch")
TOPK = 10
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"[INFO] cwd={os.getcwd()}")
print(f"[INFO] device={DEVICE}")
print(f"[INFO] mat={MAT_PATH}")
print(f"[INFO] test_dir={TEST_DIR}")

assert os.path.isfile(MAT_PATH), f"[ERROR] Missing {MAT_PATH}"
assert os.path.isdir(TEST_DIR), f"[ERROR] Missing {TEST_DIR}"

image_datasets = {
    x: datasets.ImageFolder(os.path.join(TEST_DIR, x)) for x in ["gallery", "query"]
}

result = scipy.io.loadmat(MAT_PATH)
query_feature = torch.FloatTensor(result["query_f"]).to(DEVICE)
query_cam = result["query_cam"][0]
query_label = result["query_label"][0]

gallery_feature = torch.FloatTensor(result["gallery_f"]).to(DEVICE)
gallery_cam = result["gallery_cam"][0]
gallery_label = result["gallery_label"][0]

print(f"[INFO] query_feature={tuple(query_feature.shape)}")
print(f"[INFO] gallery_feature={tuple(gallery_feature.shape)}")


[INFO] cwd=d:\temp\Person_reID_baseline_pytorch
[INFO] device=cpu
[INFO] mat=d:\temp\Person_reID_baseline_pytorch\pytorch_result.mat
[INFO] test_dir=d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch
[INFO] query_feature=(2228, 512)
[INFO] gallery_feature=(17661, 512)


## 1) 定义目标函数/系统与“失败”判定规则

- System: DukeMTMC-reID retrieval using features from `pytorch_result.mat`.
- Failure predicate: Rank-1 is wrong, or first positive appears after Rank-5, or Top-10 contains very few positives.

In [3]:
def failure_metrics(query_i):
    index = sort_img(
        query_feature[query_i],
        query_label[query_i],
        query_cam[query_i],
        gallery_feature,
        gallery_label,
        gallery_cam,
    )

    topk = min(TOPK, len(index))
    topk_labels = gallery_label[index[:topk]]
    topk_pos = int(np.sum(topk_labels == query_label[query_i]))

    pos_rows = np.where(gallery_label[index] == query_label[query_i])[0]
    if len(pos_rows) == 0:
        return None

    first_pos = int(pos_rows[0])
    rank1_correct = bool(gallery_label[index[0]] == query_label[query_i])
    return {
        "index": index,
        "first_pos": first_pos,
        "rank1_correct": rank1_correct,
        "topk_pos": topk_pos,
    }


def is_failure(m):
    if m is None:
        return False
    return (not m["rank1_correct"]) or (m["first_pos"] >= 5) or (m["topk_pos"] <= 1)


## 2) 构造基础样例与断言（sanity checks）

This ensures we can compute rankings and parse image paths.

In [4]:
m0 = failure_metrics(0)
assert m0 is None or len(m0["index"]) > 0

p0, _ = image_datasets["query"].imgs[0]
assert os.path.isfile(p0), f"[ERROR] Missing query image: {p0}"

print("[INFO] sanity checks passed")


[INFO] sanity checks passed


## 3) 从真实数据中提取失败候选（parsing + filtering）

Scan all query images and collect failure-like candidates.

In [5]:
candidates = []

for qi in range(len(query_label)):
    m = failure_metrics(qi)
    if not is_failure(m):
        continue

    query_path, _ = image_datasets["query"].imgs[qi]
    top1_path, _ = image_datasets["gallery"].imgs[m["index"][0]]

    candidates.append(
        {
            "query_index": int(qi),
            "query_label": int(query_label[qi]),
            "query_cam": int(query_cam[qi]),
            "first_positive_rank": int(m["first_pos"] + 1),
            "top10_positives": int(m["topk_pos"]),
            "rank1_correct": bool(m["rank1_correct"]),
            "query_path": query_path,
            "top1_path": top1_path,
        }
    )

candidates.sort(key=lambda x: (x["first_positive_rank"], -x["top10_positives"]), reverse=True)

print(f"[INFO] total queries: {len(query_label)}")
print(f"[INFO] candidate failures: {len(candidates)}")
print("[INFO] top candidates:")
for c in candidates[:10]:
    print(
        "[CAND] "
        f"query_index={c['query_index']} "
        f"label={c['query_label']} cam={c['query_cam']} "
        f"rank1_correct={c['rank1_correct']} "
        f"first_positive_rank={c['first_positive_rank']} "
        f"top10_positives={c['top10_positives']}"
    )
    print(f"       query: {c['query_path']}")
    print(f"       top1 : {c['top1_path']}")


[INFO] total queries: 2228
[INFO] candidate failures: 486
[INFO] top candidates:
[CAND] query_index=1883 label=4315 cam=6 rank1_correct=False first_positive_rank=4274 top10_positives=0
       query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\query\4315\4315_c6_f0076814.jpg
       top1 : d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\6367\6367_c8_f0073570.jpg
[CAND] query_index=67 label=51 cam=1 rank1_correct=False first_positive_rank=3277 top10_positives=0
       query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\query\0051\0051_c1_f0060060.jpg
       top1 : d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\6794\6794_c8_f0178831.jpg
[CAND] query_index=1642 label=3410 cam=8 rank1_correct=False first_positive_rank=3246 top10_positives=0
       query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\query\3410\3410_c8_f0035897.jpg
       top1 : d:\temp\Person_reID_baseline_pytorch\data\D

## 4) 随机/抽样生成输入并捕获异常

Not used for this task. We directly scan all existing query indices.

## 5) 性质测试（property-based）自动搜索反例

Not used for this task.

## 6) 失败用例最小化（shrink）

Not used for this task. We select representative failures by ranking difficulty.

## 7) 失败用例去重与聚类

Not used for this task.

## 8) 生成可复现报告与可视化输出

Pick two cases and save visualizations for the report.

In [6]:
def visualize_case(query_i, out_path):
    m = failure_metrics(query_i)
    assert m is not None

    index = m["index"]
    query_path, _ = image_datasets["query"].imgs[query_i]
    qlabel = int(query_label[query_i])

    fig = plt.figure(figsize=(16, 4))
    ax = plt.subplot(1, 11, 1)
    imshow(ax, query_path, title="query")

    for r in range(10):
        ax = plt.subplot(1, 11, r + 2)
        img_path, _ = image_datasets["gallery"].imgs[int(index[r])]
        label = int(gallery_label[int(index[r])])
        color = "green" if label == qlabel else "red"
        imshow(ax, img_path, title=str(r + 1), title_color=color)

    fig.savefig(out_path, bbox_inches="tight")
    plt.close(fig)


assert len(candidates) >= 2, "[ERROR] Not enough candidates"
case1 = candidates[0]["query_index"]
case2 = candidates[1]["query_index"]

print(f"[INFO] selected case1 query_index={case1}")
print(f"[INFO] selected case2 query_index={case2}")

visualize_case(case1, "show_case1.png")
visualize_case(case2, "show_case2.png")

print("[INFO] saved show_case1.png")
print("[INFO] saved show_case2.png")


[INFO] selected case1 query_index=1883
[INFO] selected case2 query_index=67
[INFO] saved show_case1.png
[INFO] saved show_case2.png


In [7]:
def describe_case(query_i, topn=10):
    m = failure_metrics(query_i)
    index = m["index"]

    qpath, _ = image_datasets["query"].imgs[query_i]
    qlabel = int(query_label[query_i])
    qcam = int(query_cam[query_i])

    pos_rows = np.where(gallery_label[index] == query_label[query_i])[0]
    first_pos_rank = int(pos_rows[0] + 1) if len(pos_rows) else None

    first_pos_path = None
    if first_pos_rank is not None:
        first_pos_path, _ = image_datasets["gallery"].imgs[int(index[first_pos_rank - 1])]

    print(f"[CASE] query_index={query_i} label={qlabel} cam={qcam}")
    print(f"       query: {qpath}")
    print(f"       first_positive_rank: {first_pos_rank}")
    if first_pos_path is not None:
        print(f"       first_positive_path: {first_pos_path}")

    print("       top gallery:")
    for r in range(topn):
        gidx = int(index[r])
        gpath, _ = image_datasets["gallery"].imgs[gidx]
        glabel = int(gallery_label[gidx])
        gcam = int(gallery_cam[gidx])
        ok = (glabel == qlabel)
        tag = "POS" if ok else "NEG"
        print(f"       - R{r+1:02d} {tag} label={glabel} cam={gcam} path={gpath}")


describe_case(case1, topn=10)
print("")
describe_case(case2, topn=10)


[CASE] query_index=1883 label=4315 cam=6
       query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\query\4315\4315_c6_f0076814.jpg
       first_positive_rank: 4274
       first_positive_path: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\4315\4315_c7_f0080881.jpg
       top gallery:
       - R01 NEG label=6367 cam=8 path=d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\6367\6367_c8_f0073570.jpg
       - R02 NEG label=4176 cam=6 path=d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\4176\4176_c6_f0057106.jpg
       - R03 NEG label=4321 cam=7 path=d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\4321\4321_c7_f0081942.jpg
       - R04 NEG label=4321 cam=7 path=d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\4321\4321_c7_f0081822.jpg
       - R05 NEG label=709 cam=1 path=d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\0709\0709_c1

In [8]:
successes = []

for qi in range(len(query_label)):
    m = failure_metrics(qi)
    if m is None:
        continue

    if not m["rank1_correct"]:
        continue

    topk = min(TOPK, len(m["index"]))
    topk_labels = gallery_label[m["index"][:topk]]
    topk_pos = int(np.sum(topk_labels == query_label[qi]))

    query_path, _ = image_datasets["query"].imgs[qi]
    top1_path, _ = image_datasets["gallery"].imgs[int(m["index"][0])]

    successes.append(
        {
            "query_index": int(qi),
            "query_label": int(query_label[qi]),
            "query_cam": int(query_cam[qi]),
            "top10_positives": int(topk_pos),
            "query_path": query_path,
            "top1_path": top1_path,
        }
    )

successes.sort(key=lambda x: x["top10_positives"], reverse=True)

print(f"[INFO] success candidates: {len(successes)}")
print("[INFO] top success candidates:")
for s in successes[:10]:
    print(
        "[SUC] "
        f"query_index={s['query_index']} "
        f"label={s['query_label']} cam={s['query_cam']} "
        f"top10_positives={s['top10_positives']}"
    )
    print(f"      query: {s['query_path']}")
    print(f"      top1 : {s['top1_path']}")

suc1 = successes[0]["query_index"]
suc2 = successes[1]["query_index"]

print(f"[INFO] selected suc1 query_index={suc1}")
print(f"[INFO] selected suc2 query_index={suc2}")

visualize_case(suc1, "show_success1.png")
visualize_case(suc2, "show_success2.png")

print("[INFO] saved show_success1.png")
print("[INFO] saved show_success2.png")


[INFO] success candidates: 1767
[INFO] top success candidates:
[SUC] query_index=0 label=5 cam=2 top10_positives=10
      query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\query\0005\0005_c2_f0046985.jpg
      top1 : d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\0005\0005_c5_f0052021.jpg
[SUC] query_index=1 label=5 cam=5 top10_positives=10
      query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\query\0005\0005_c5_f0051781.jpg
      top1 : d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\0005\0005_c2_f0047345.jpg
[SUC] query_index=4 label=19 cam=2 top10_positives=10
      query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\query\0019\0019_c2_f0055712.jpg
      top1 : d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC-reID\pytorch\gallery\0019\0019_c5_f0060626.jpg
[SUC] query_index=6 label=19 cam=6 top10_positives=10
      query: d:\temp\Person_reID_baseline_pytorch\data\DukeMTMC

In [9]:
import shutil

target_dir = os.path.join(ROOT, "report", "pics")
os.makedirs(target_dir, exist_ok=True)

for fn in ["show_success1.png", "show_success2.png"]:
    src = os.path.join(ROOT, fn)
    dst = os.path.join(target_dir, fn)
    if os.path.isfile(src):
        shutil.move(src, dst)
        print(f"[INFO] moved {src} -> {dst}")
    else:
        print(f"[WARN] missing file: {src}")


[INFO] moved d:\temp\Person_reID_baseline_pytorch\show_success1.png -> d:\temp\Person_reID_baseline_pytorch\report\pics\show_success1.png
[INFO] moved d:\temp\Person_reID_baseline_pytorch\show_success2.png -> d:\temp\Person_reID_baseline_pytorch\report\pics\show_success2.png
