In [131]:
import numpy as np
import cv2
from collections import defaultdict
from dataclasses import dataclass, field
import pickle
import os
from pathlib import Path
from tqdm import tqdm
import time
import joblib
# --- Optional FAISS for faster/more robust k-means ---
try:
    import faiss  # pip install faiss-cpu  (or faiss-gpu)
    FAISS_AVAILABLE = True
except Exception:
    faiss = None
    FAISS_AVAILABLE = False


In [132]:
# ---------- Feature Extraction ----------
def extract_sift(gray_img, nfeatures=500):
    sift = cv2.SIFT_create(nfeatures=nfeatures)
    kps, desc = sift.detectAndCompute(gray_img, None)
    if desc is None:
        return np.empty((0,128), np.float32), []
    return desc.astype(np.float32), kps
def to_rootsift(desc, eps=1e-12, l2_after=True):
    """
    Convert SIFT -> RootSIFT (Arandjelović & Zisserman, 2012).
    Steps: L1-normalize, then sqrt. Optionally L2-normalize after sqrt.
    desc: (N,128) float32
    """
    if desc is None or len(desc) == 0:
        return np.empty((0,128), np.float32)
    # L1-normalize
    desc /= (np.sum(desc, axis=1, keepdims=True) + eps)
    # element-wise sqrt
    desc = np.sqrt(desc, dtype=np.float32)
    if l2_after:
        # optional: stabilize numerics
        norms = np.linalg.norm(desc, axis=1, keepdims=True) + eps
        desc /= norms
    return desc.astype(np.float32)


In [133]:
def keypoints_to_tuples(kps):
    return [(kp.pt[0], kp.pt[1], kp.size, kp.angle, kp.response, kp.octave, kp.class_id)
            for kp in kps]

In [134]:
@dataclass
class VocabNode:
    centroid: np.ndarray
    children: list = field(default_factory=list)
    is_leaf: bool = False
    node_id: int = -1  # unique id for all nodes; leaves get final "visual word" ids
    df: int = 0        # document frequency (images that hit this node)
    idf: float = 0.0   # computed after training
    # optional: entropy if you prefer that formulation


In [135]:
import faiss
import numpy as np

# ---------- Hierarchical K-means (Vocabulary Tree) ----------
class VocabTree:
    def __init__(self, k=10, L=6, min_cluster_size=30, max_iter=20, seed=0):
        self.k = int(k)
        self.L = int(L)
        self.min_cluster_size = int(min_cluster_size)
        self.max_iter = int(max_iter)
        self.rng = np.random.RandomState(seed)
        self.root = None
        self.leaf_nodes = []   # populated after build
        self._next_id = 0
        # If elsewhere you build a compact id map: self.leaf_id_map = {leaf.node_id: i, ...}

    # ---- FAISS KMeans ----
    def _kmeans(self, X, k):
        X = np.ascontiguousarray(X.astype(np.float32))
        N, D = X.shape
        if N < k:
            k = max(1, N)
        seed = int(self.rng.randint(0, 2**31 - 1))
        clus = faiss.Kmeans(d=D, k=k, niter=50, nredo=3, verbose=True, seed=seed)
        try:
            if hasattr(faiss, "get_num_gpus") and faiss.get_num_gpus() > 0 and hasattr(clus, "gpu"):
                clus.gpu = True
        except Exception:
            pass
        clus.train(X)
        centroids = np.ascontiguousarray(clus.centroids.reshape(k, D))
        index = faiss.IndexFlatL2(D); index.add(centroids)
        _, I = index.search(X, 1)
        return centroids, I.ravel().astype(np.int32)


    def _build_rec(self, X, level):
        node = VocabNode(centroid=X.mean(0), node_id=self._next_id)
        self._next_id += 1
        
         # FAISS-aware early stop
        required = max(self.min_cluster_size, 39 * self.k)
        if level == self.L or len(X) < required:
            node.is_leaf = True
            self.leaf_nodes.append(node)
            return node


        centroids, labels = self._kmeans(X, self.k)
        for j in range(centroids.shape[0]):
            child_points = X[labels == j]
            if len(child_points) == 0:
                continue
            child = self._build_rec(child_points, level + 1)
            child.centroid = centroids[j]
            node.children.append(child)
        if len(node.children) == 0:
            node.is_leaf = True
            self.leaf_nodes.append(node)
        return node

    def fit(self, all_descs):
        """
        all_descs: list of np.ndarray, each (Ni, D) SIFT/RootSIFT descriptors per image.
        """
        X = np.vstack([d for d in all_descs if len(d)])
        print("Training descriptors:", len(X))
        self.root = self._build_rec(X, level=0)

    # ---- Hard (single-path) assignment, unchanged ----
    def quantize_path(self, desc):
        """
        Return the leaf node reached by this descriptor (hard assignment).
        """
        node = self.root
        while not node.is_leaf and node.children:
            # pick child with nearest centroid
            cents = np.stack([ch.centroid for ch in node.children], axis=0)
            j = int(np.argmin(((cents - desc) ** 2).sum(1)))
            node = node.children[j]
        return node

    # ---- Soft assignment (new): descend to multiple children when close ----
    def quantize_descriptor_soft(self, desc, ratio=1.15, max_branch=2, max_soft_levels=2):
        """
        Descend the tree allowing up to `max_branch` children per level if their
        distance is within `ratio` of the best distance. Only apply this for the
        first `max_soft_levels` levels; afterwards use single best child.
        Returns a list of *leaf nodes* reached (deduplicated).
        """
        assert self.root is not None
        stack = [(self.root, 0)]
        leaves = []

        while stack:
            node, depth = stack.pop()
            if node.is_leaf or not node.children:
                leaves.append(node)
                continue

            cents = np.stack([c.centroid for c in node.children], axis=0)
            dist = ((cents - desc) ** 2).sum(1)
            order = np.argsort(dist)

            if depth < max_soft_levels:
                best = dist[order[0]]
                chosen = [node.children[i]
                          for i in order
                          if dist[i] <= best * ratio][:max_branch]
            else:
                chosen = [node.children[int(order[0])]]

            for ch in chosen:
                stack.append((ch, depth + 1))

        # Deduplicate in case multiple paths converge to the same leaf
        uniq = {}
        for lf in leaves:
            uniq[lf.node_id] = lf
        return list(uniq.values())

    def quantize_descriptors_soft(self, D, **kwargs):
        """
        Quantize many descriptors with soft assignment,
        returning a list of leaf node_ids (or compact ids if you map them outside).
        """
        if D is None or len(D) == 0:
            return []
        lids = []
        for d in D:
            leaf_nodes = self.quantize_descriptor_soft(d, **kwargs)
            lids.extend([lf.node_id for lf in leaf_nodes])
        return lids


In [136]:
import numpy as np
from collections import defaultdict
import math

class InvertedIndex:
    """
    Dense-by-leaf postings: postings[leaf_id] is a list of (doc_id, tf).
    IDF is a NumPy array of length num_leaves.
    External IDs are mapped to compact internal doc_ids.
    """
    def __init__(self, num_leaves: int):
        self.num_leaves = int(num_leaves)
        self.postings = [list() for _ in range(self.num_leaves)]  # leaf_id -> [(doc_id, tf)]
        self.doc_tf = {}                   # doc_id(int) -> {leaf_id(int): tf(int)}
        self.idf = np.zeros((self.num_leaves,), dtype=np.float32)
        self.doc_norm = {}                 # doc_id -> L2 norm of TF-IDF vector
        self.N = 0                         # number of docs

        # external<->internal mapping (kept)
        self.ext2int = {}                  # external image_id (str/int) -> doc_id (int)
        self.int2ext = []                  # index = doc_id, value = external id

        # stopwords (optional)
        self.stop_leaves = set()

    # ---------- ID mapping ----------
    def _get_doc_id(self, image_id):
        if image_id in self.ext2int:
            return self.ext2int[image_id]
        doc_id = len(self.int2ext)
        self.ext2int[image_id] = doc_id
        self.int2ext.append(image_id)
        return doc_id

    # ---------- Add one image's TF ----------
    def add_image(self, image_id, leaf_ids):
        """
        leaf_ids MUST be compact leaf IDs in [0..num_leaves-1].
        """
        doc_id = self._get_doc_id(image_id)
        if leaf_ids is None or len(leaf_ids) == 0:
            self.doc_tf[doc_id] = {}
        else:
            uniq, counts = np.unique(np.asarray(leaf_ids, dtype=np.int64), return_counts=True)
            # safety: clip to valid range
            uniq = uniq[(uniq >= 0) & (uniq < self.num_leaves)]
            counts = counts[:len(uniq)]
            self.doc_tf[doc_id] = {int(w): int(c) for w, c in zip(uniq, counts)}
        self.N = len(self.doc_tf)

    # ---------- Build postings from doc_tf (call before compute_idf) ----------
    def _build_postings(self):
        self.postings = [list() for _ in range(self.num_leaves)]
        for doc_id, tfmap in self.doc_tf.items():
            for lid, tf in tfmap.items():
                # skip stop leaves if already defined pre-IDF (rare)
                if lid in self.stop_leaves:
                    continue
                self.postings[lid].append((doc_id, tf))

    # ---------- Choose stop leaves (optional) ----------
    def _choose_stop_leaves(self, top_percent=0.0, frac_thresh=None):
        if (top_percent is None or top_percent <= 0.0) and (frac_thresh is None):
            self.stop_leaves = set()
            return
        df = np.array([len(plist) for plist in self.postings], dtype=np.int32)
        N = max(1, self.N)
        stops = set()
        if top_percent and top_percent > 0.0:
            k = max(1, int(self.num_leaves * top_percent))
            top_idx = np.argpartition(df, -k)[-k:]
            stops.update(int(i) for i in top_idx.tolist())
        if frac_thresh is not None:
            for lid, d in enumerate(df):
                if d / N > frac_thresh:
                    stops.add(lid)
        self.stop_leaves = stops

    # ---------- Compute IDF (after postings are built) ----------
    def compute_idf(self, use_entropy=False, stop_percent=0.0, stop_frac=None, hard_purge=False):
        """
        Computes IDF (or entropy weights). Supports optional stopwords:
          - stop_percent: drop top p fraction of most frequent leaves (e.g., 0.005 for 0.5%)
          - stop_frac: drop leaves with df/N > stop_frac (e.g., 0.05)
          - hard_purge: if True, also purge their postings
        """
        # rebuild postings from doc_tf to be sure they're aligned with current docs
        self._build_postings()

        # choose stop leaves (optional)
        self._choose_stop_leaves(top_percent=stop_percent, frac_thresh=stop_frac)
        if hard_purge and self.stop_leaves:
            for lid in self.stop_leaves:
                self.postings[lid] = []

        N = max(1, self.N)
        idf = np.zeros((self.num_leaves,), dtype=np.float32)
        for lid, plist in enumerate(self.postings):
            if lid in self.stop_leaves:
                idf[lid] = 0.0
                continue
            df = len(plist)
            if df == 0:
                idf[lid] = 0.0
            else:
                if use_entropy:
                    p = min(1 - 1e-12, max(1e-12, df / N))
                    idf[lid] = float(-(p * math.log(p) + (1 - p) * math.log(1 - p)))
                else:
                    idf[lid] = float(math.log(N / df))
        self.idf = idf

        # precompute per-doc norms for cosine similarity
        self.doc_norm = {}
        for doc_id, tfmap in self.doc_tf.items():
            s = 0.0
            for lid, tf in tfmap.items():
                w = tf * self.idf[lid]
                s += w * w
            self.doc_norm[doc_id] = math.sqrt(s) if s > 0 else 1.0

    # ---------- Build vectors ----------
    def image_vector(self, image_id):
        """Return normalized TF-IDF vector as dict {leaf_id: weight} using EXTERNAL image_id."""
        if image_id not in self.ext2int:
            return {}
        doc_id = self.ext2int[image_id]
        tf_map = self.doc_tf.get(doc_id, {})
        if not tf_map:
            return {}
        w = {lid: tf * float(self.idf[lid]) for lid, tf in tf_map.items() if self.idf[lid] > 0.0}
        norm = self.doc_norm.get(doc_id, None)
        if norm is None:
            # fallback compute norm if not cached
            s = sum(v * v for v in w.values())
            norm = math.sqrt(s) if s > 0 else 1.0
        if norm == 0:
            norm = 1.0
        return {lid: v / norm for lid, v in w.items()}

    def query_vector(self, leaf_ids):
        tf = defaultdict(int)
        for lid in leaf_ids:
            if 0 <= lid < self.num_leaves and self.idf[lid] > 0.0:
                tf[lid] += 1
        if not tf:
            return {}
        w = {lid: c * float(self.idf[lid]) for lid, c in tf.items()}
        s = sum(v * v for v in w.values())
        norm = math.sqrt(s) if s > 0 else 1.0
        return {lid: v / norm for lid, v in w.items()}

    # ---------- Scoring ----------
    def score(self, qvec, topk=50, return_external_ids=True):
        """
        Cosine similarity via sparse postings. Assumes doc_norms are precomputed.
        Returns [(doc_id or external_id, score), ...]
        """
        if not qvec:
            return []
        acc = defaultdict(float)
        for lid, qw in qvec.items():
            if self.idf[lid] == 0.0:
                continue
            for doc_id, tf in self.postings[lid]:
                iw = tf * float(self.idf[lid])
                acc[doc_id] += qw * iw

        results = []
        for doc_id, dot in acc.items():
            denom = self.doc_norm.get(doc_id, 1.0)
            results.append((doc_id, dot / denom if denom > 0 else 0.0))
        results.sort(key=lambda x: x[1], reverse=True)
        results = results[:topk]
        if return_external_ids:
            return [(self.int2ext[doc_id], score) for doc_id, score in results]
        return results


In [137]:
# ---------- Putting it together ----------
class VocabTreeDB:
    def __init__(self, k=10, L=6, min_cluster_size=25, max_iter=40, seed=0):
        self.tree = VocabTree(k=k, L=L, min_cluster_size=min_cluster_size, max_iter=max_iter, seed=seed)
        self.index = None
        self.image_meta = {}  # optional: {external_id: {...}}

    def spatial_verify(self, qdesc, qkps, candidates, ratio_thresh=0.75, cap=500, lam=0.001):
        # quick exits
        if qdesc is None or len(qdesc) == 0 or not qkps:
            return [(img_id, score, 0) for img_id, score in candidates]

        qdesc = qdesc.astype(np.float32, copy=False)
        bf = cv2.BFMatcher(cv2.NORM_L2)

        reranked = []
        for img_id, base_score in candidates:
            meta = self.image_meta.get(img_id, None)
            if not meta:
                reranked.append((img_id, base_score, 0)); continue

            desc = meta.get('descs', None)
            kps  = meta.get('kps', None)
            if desc is None or len(desc) == 0 or not kps:
                reranked.append((img_id, base_score, 0)); continue

            desc = desc.astype(np.float32, copy=False)

            # k-NN matches (defensive against short lists)
            matches = bf.knnMatch(qdesc, desc, k=2)
            good = []
            for pair in matches[:cap]:
                if len(pair) < 2:     # sometimes only one neighbor exists
                    continue
                m, n = pair
                if n is not None and m.distance < ratio_thresh * n.distance:
                    good.append(m)

            if len(good) < 4:
                reranked.append((img_id, base_score, 0)); continue

            # build correspondence arrays
            src = np.float32([qkps[m.queryIdx].pt for m in good])
            dst = np.float32([kps[m.trainIdx].pt for m in good])

            H, mask = cv2.findHomography(src, dst, cv2.USAC_MAGSAC, 3.0)

            inliers = int(mask.ravel().sum()) if mask is not None else 0

            final = float(base_score) + lam * min(inliers, cap)
            reranked.append((img_id, final, inliers))

        reranked.sort(key=lambda x: x[1], reverse=True)
        return reranked



    def train(self, image_descs):
        """
        image_descs: list of np.ndarray (Ni,128) used to train the vocab.
        After fit, we build a compact leaf-id map and allocate InvertedIndex with num_leaves.
        """
        self.tree.fit(image_descs)
        # compact leaf IDs: node_id -> [0..num_leaves-1]
        self.tree.leaf_id_map = {leaf.node_id: i for i, leaf in enumerate(self.tree.leaves)} \
                                if hasattr(self.tree, "leaves") else \
                                {leaf.node_id: i for i, leaf in enumerate(self.tree.leaf_nodes)}
        num_leaves = len(self.tree.leaves) if hasattr(self.tree, "leaves") else len(self.tree.leaf_nodes)
        self.index = InvertedIndex(num_leaves=num_leaves)

    def add_image(self, external_image_id, descs, kps=None, path=None):
        # get multiple leaves per descriptor for better recall
        raw_leaf_ids = self.tree.quantize_descriptors_soft(
            descs,
            ratio=1.15,
            max_branch=2,
            max_soft_levels=2
        )

        # map raw node_ids -> compact ids
        leaf_ids = [self.tree.leaf_id_map[lid] for lid in raw_leaf_ids]

        self.index.add_image(external_image_id, leaf_ids)
        
        self.image_meta[external_image_id] = dict(path=path, kps=keypoints_to_tuples(kps), descs=descs)


    def finalize(self, use_entropy=False, stop_percent=0.0, stop_frac=None, hard_purge=False):
        """
        Must be called AFTER all add_image() calls.
        Computes IDF (or entropy) and precomputes doc norms.
        Optional stopwords:
        - stop_percent=0.005  -> drop top 0.5% by df
        - stop_frac=0.05      -> drop leaves with df/N > 0.05
        """
        assert self.index is not None, "Index not initialized."
        self.index.compute_idf(use_entropy=use_entropy,
                            stop_percent=stop_percent,
                            stop_frac=stop_frac,
                            hard_purge=hard_purge)

    def query(self, q_descs, topk=20):
        raw_leaf_ids = self.tree.quantize_descriptors_soft(
            q_descs,
            ratio=1.15,
            max_branch=2,
            max_soft_levels=2
        )
        leaf_ids = [self.tree.leaf_id_map[lid] for lid in raw_leaf_ids]

        qvec = self.index.query_vector(leaf_ids)
        return self.index.score(qvec, topk=topk)



In [138]:
descriptors_list = []
images_descriptors={}
DIR_NAME='images/'
for image_name in tqdm(os.listdir('images')):
    image=cv2.imread(f'{DIR_NAME}{image_name}', cv2.IMREAD_GRAYSCALE)
    # image8bit = cv2.normalize(image_gs, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    descriptors, keypoints = extract_sift(image,nfeatures=1000)
    descriptors = to_rootsift(descriptors)
    images_descriptors[image_name.split('.')[0]]={'desc':descriptors,'kps':keypoints,'path':f'{DIR_NAME}{image_name}'}
    descriptors_list.append(descriptors)


# with open('filename.pickle', 'rb') as handle:
#     b = pickle.load(handle)

100%|██████████| 1219/1219 [00:31<00:00, 38.42it/s]


In [139]:
db=VocabTreeDB(k=10, L=6)
start_time = time.time()
db.image_meta=images_descriptors
db.train(descriptors_list)
end_time = time.time()


Training descriptors: 1219277
Sampling a subset of 2560 / 1219277 for training
Clustering 2560 points in 128D to 10 clusters, redo 3 times, 50 iterations
  Preprocessing in 0.17 s
Outer iteration 0 / 3
  Iteration 49 (0.02 s, search 0.01 s): objective=412.979 imbalance=1.298 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 3
  Iteration 49 (0.03 s, search 0.02 s): objective=416.064 imbalance=1.395 nsplit=0       
Outer iteration 2 / 3
  Iteration 49 (0.05 s, search 0.03 s): objective=410.142 imbalance=1.091 nsplit=0       
Objective improved: keep new clusters
Sampling a subset of 2560 / 72105 for training
Clustering 2560 points in 128D to 10 clusters, redo 3 times, 50 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 3
  Iteration 49 (0.05 s, search 0.02 s): objective=401.236 imbalance=1.016 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 3
  Iteration 49 (0.08 s, search 0.04 s): objective=399.699 imbalance=1.287 nsplit=0      

In [140]:
print(f"Training time: {(end_time - start_time/10)} ,minutes")

Training time: 1582454837.5425835 ,minutes


In [141]:
for image_id in tqdm(images_descriptors.keys()):
    db.add_image(image_id, descs=images_descriptors[image_id]['desc'], kps=images_descriptors[image_id]['kps'], path=images_descriptors[image_id]['path'])

100%|██████████| 1219/1219 [01:07<00:00, 17.98it/s]


In [156]:
# Compute IDF + stopwords (can be rerun after more additions)
db.finalize(use_entropy=True,stop_percent=0.01, stop_frac=0.05, hard_purge=False)

In [157]:
print("idf size:", len(db.index.idf))
print("num docs:", db.index.N)
print("sample tf:", list(db.index.doc_tf.items())[0])


idf size: 10954
num docs: 1219
sample tf: (0, {29: 1, 36: 2, 50: 1, 70: 1, 79: 1, 142: 1, 169: 1, 172: 1, 183: 1, 205: 1, 222: 1, 427: 2, 435: 1, 477: 1, 487: 1, 497: 1, 518: 1, 531: 1, 679: 1, 681: 2, 772: 1, 798: 1, 802: 1, 805: 2, 810: 1, 812: 2, 816: 1, 824: 1, 839: 1, 844: 3, 853: 1, 857: 1, 862: 1, 863: 1, 866: 1, 901: 1, 980: 1, 1028: 1, 1102: 2, 1118: 1, 1181: 2, 1184: 1, 1193: 1, 1197: 1, 1198: 2, 1200: 1, 1205: 1, 1226: 1, 1228: 1, 1229: 1, 1235: 1, 1248: 1, 1250: 1, 1262: 1, 1264: 1, 1270: 1, 1271: 1, 1279: 1, 1292: 1, 1312: 1, 1321: 1, 1326: 1, 1332: 2, 1369: 1, 1392: 1, 1394: 1, 1397: 1, 1398: 1, 1400: 1, 1402: 1, 1429: 1, 1430: 1, 1459: 1, 1461: 3, 1465: 2, 1480: 2, 1481: 1, 1482: 1, 1484: 1, 1496: 1, 1501: 1, 1504: 2, 1514: 1, 1516: 1, 1519: 1, 1524: 1, 1525: 1, 1532: 1, 1533: 1, 1534: 1, 1541: 1, 1556: 1, 1558: 1, 1559: 1, 1562: 2, 1564: 1, 1565: 1, 1576: 1, 1592: 1, 1593: 1, 1629: 1, 1631: 3, 1637: 3, 1644: 1, 1651: 2, 1654: 1, 1684: 1, 1694: 1, 1699: 2, 1708: 1, 1755:

In [158]:
def patch_spatial_verify_for_tuples(db):
    def _kp_xy(k):
        if hasattr(k, "pt"):            # cv2.KeyPoint
            return k.pt
        if isinstance(k, (tuple, list)) and len(k) >= 2:
            return float(k[0]), float(k[1])
        if isinstance(k, np.ndarray) and k.size >= 2:
            return float(k[0]), float(k[1])
        raise TypeError(f"Unsupported keypoint type: {type(k)}")

    def _sv(qdesc, qkps, candidates, ratio_thresh=0.75, cap=1000, lam=0.01):
        if qdesc is None or len(qdesc) == 0 or not qkps:
            return [(img_id, score, 0) for img_id, score in candidates]

        qdesc = qdesc.astype(np.float32, copy=False)
        bf = cv2.BFMatcher(cv2.NORM_L2)

        reranked = []
        for img_id, base_score in candidates:
            m = db.image_meta.get(img_id, {})
            desc = m.get("descs", None)
            kps  = m.get("kps", None)
            if desc is None or len(desc) == 0 or not kps:
                reranked.append((img_id, base_score, 0)); continue
            desc = desc.astype(np.float32, copy=False)

            matches = bf.knnMatch(qdesc, desc, k=2)
            good = []
            for pair in matches[:cap]:
                if len(pair) < 2: continue
                m1, m2 = pair
                if m2 is not None and m1.distance < ratio_thresh * m2.distance:
                    good.append(m1)

            if len(good) < 4:
                reranked.append((img_id, base_score, 0)); continue

            src = np.float32([_kp_xy(qkps[m1.queryIdx]) for m1 in good])
            dst = np.float32([_kp_xy(kps [m1.trainIdx]) for m1 in good])

            method = getattr(cv2, "USAC_MAGSAC", cv2.RANSAC)
            H, mask = cv2.findHomography(src, dst, method, 3.0)
            inliers = int(mask.ravel().sum()) if mask is not None else 0

            final = float(base_score) + lam * min(inliers, cap)
            reranked.append((img_id, final, inliers))

        reranked.sort(key=lambda x: x[1], reverse=True)
        return reranked

    db.spatial_verify = _sv
    return db
db=patch_spatial_verify_for_tuples(db)

In [159]:
# Query
q_descs, q_kps = extract_sift(cv2.imread('vase.png', cv2.IMREAD_GRAYSCALE),nfeatures=1500)
qdesc_root = to_rootsift(q_descs)
cands = db.query(q_descs, topk=200)
print(cands)
reranked = db.spatial_verify(qdesc_root,q_kps, cands, ratio_thresh=0.75, cap=1000, lam=0.01)
top10 = [(img, score) for img, score, inl in reranked[:10]]
print("Top 10 after RANSAC re-ranking:", top10)
sv_map = {img:(final,inl) for img,final,inl in reranked}
changes = []
for img, base in cands:
    final, inl = sv_map[img]
    changes.append((img, base, inl, final-base))
print("any inliers >", any(inl>0 for _,_,inl,_ in changes))
print("top deltas:", sorted(changes, key=lambda x: x[3], reverse=True)[:5])


[('4699522', 0.4474074349222656), ('213852', 0.18935597745404023), ('213834', 0.13751695210964307), ('2045994', 0.13139569684384925), ('213937', 0.13007165575155097), ('213836', 0.12795379684970895), ('212686', 0.12755419986130626), ('5551603', 0.11286840246787015), ('2051204', 0.10320157511895812), ('2050235', 0.10236055328569771), ('213806', 0.10089913380339009), ('213550', 0.09707541552179813), ('2051171', 0.09607032048080731), ('213547', 0.09425573767384161), ('213504', 0.09132844662071785), ('213377', 0.0913240964915581), ('213848', 0.0911386951114293), ('213810', 0.0903424034901599), ('1698480', 0.0884022993566637), ('213644', 0.08780225171900023), ('213484', 0.0862195325985291), ('2051085', 0.08529499875326621), ('22786040', 0.08077518231446296), ('213559', 0.0790206511356653), ('213739', 0.07874676280752749), ('213583', 0.07614354662605483), ('213501', 0.07544706623454381), ('213949', 0.07199577397585308), ('2051167', 0.07005454044489717), ('2051088', 0.0677155118977121), ('213

In [153]:
# Query
q_descs, q_kps = extract_sift(cv2.imread('curvyth.png', cv2.IMREAD_GRAYSCALE),nfeatures=1500)
qdesc_root = to_rootsift(q_descs)
cands = db.query(q_descs, topk=200)
print(cands)
reranked = db.spatial_verify(qdesc_root,q_kps, cands, ratio_thresh=0.75, cap=1000, lam=0.01)
top10 = [(img, score) for img, score, inl in reranked[:10]]
print("Top 10 after RANSAC re-ranking:", top10)
sv_map = {img:(final,inl) for img,final,inl in reranked}
changes = []
for img, base in cands:
    final, inl = sv_map[img]
    changes.append((img, base, inl, final-base))
print("any inliers >", any(inl>0 for _,_,inl,_ in changes))
print("top deltas:", sorted(changes, key=lambda x: x[3], reverse=True)[:5])


[('213852', 0.2386451038316182), ('213937', 0.223105193304115), ('213834', 0.18790092240704934), ('213806', 0.16328723475034468), ('213810', 0.15618622910291843), ('213377', 0.14670359459113777), ('213836', 0.14655139109860849), ('213547', 0.14533467932302727), ('213644', 0.14377584713058342), ('213484', 0.14269303048433116), ('213504', 0.14208651780508505), ('2051085', 0.13741527024547734), ('213550', 0.13474090134347483), ('22786040', 0.12486536388532997), ('213848', 0.11690181825467816), ('2045932', 0.11578951460279376), ('2051088', 0.11118092732685393), ('2051204', 0.10800682071553634), ('213501', 0.10664210332426483), ('22786016', 0.10227971312165766), ('2051167', 0.10170880152075037), ('213739', 0.09762199417277077), ('2051171', 0.09586168377519018), ('22785954', 0.094302595548993), ('22786080', 0.08988030471896796), ('2051087', 0.08864499135303683), ('2051108', 0.08455237163233126), ('4699522', 0.08091947801690419), ('213493', 0.07809330075657703), ('213505', 0.07689637162133717

In [None]:
# Query
q_descs, q_kps = extract_sift(cv2.imread('stamp.png', cv2.IMREAD_GRAYSCALE),nfeatures=1500)
qdesc_root = to_rootsift(q_descs)
cands = db.query(q_descs, topk=200)
print(cands)
reranked = db.spatial_verify(qdesc_root,q_kps, cands, ratio_thresh=0.75, cap=1000, lam=0.01)
top10 = [(img, score) for img, score, inl in reranked[:10]]
print("Top 10 after RANSAC re-ranking:", top10)
sv_map = {img:(final,inl) for img,final,inl in reranked}
changes = []
for img, base in cands:
    final, inl = sv_map[img]
    changes.append((img, base, inl, final-base))
print("any inliers >", any(inl>0 for _,_,inl,_ in changes))
print("top deltas:", sorted(changes, key=lambda x: x[3], reverse=True)[:5])


[('4699522', 0.5231873487133393), ('212686', 0.28029610883356804), ('1698480', 0.20577230039700803), ('1698482', 0.13248962505228096), ('2050235', 0.12230345284117297), ('2047959', 0.11351778641045801), ('4699518', 0.09733636132985837), ('213385', 0.09546863400942612), ('22786139', 0.08682086399773996), ('2045406', 0.08421716132693793), ('2045394', 0.08027112106124566), ('22785995', 0.07718863928425675), ('2046020', 0.07714859902699012), ('22785910', 0.07124259964602936), ('5551603', 0.06775103273512599), ('2051157', 0.067130537414744), ('2046774', 0.06514991498495248), ('2051204', 0.06350199445537943), ('1699022', 0.06267918334814411), ('2046816', 0.06071920011949484), ('2045994', 0.05941642051849308), ('2051127', 0.059002432355412054), ('2051167', 0.05771648347520428), ('2051084', 0.05615886245981987), ('212688', 0.05579194468797695), ('22786066', 0.054454235914772546), ('22785898', 0.053309600709412264), ('213834', 0.052468800498927055), ('2047039', 0.050307130620516186), ('22786063

In [146]:
# ========= SAVE SIDE (Jupyter) =========
# Save the trained model to a folder (tree + index + meta), converting cv2.KeyPoint to tuples.
# Usage after you finish training/finalizing:
#   save_vt_model(db, out_dir="vocab_db", include_meta=True)

import os, io, gzip, pickle, tempfile
import numpy as np
import cv2

def save_vt_model(db, out_dir, include_meta=True):
    # ---- helpers ----
    def _atomic_write(path, data_bytes):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with tempfile.NamedTemporaryFile(dir=os.path.dirname(path), delete=False) as tmp:
            tmp.write(data_bytes); tmp.flush(); os.fsync(tmp.fileno())
            tmp_path = tmp.name
        os.replace(tmp_path, path)

    def _bytes_dump_gz(obj) -> bytes:
        buf = io.BytesIO()
        with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=6) as f:
            pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
        return buf.getvalue()

    def _iter_nodes_preorder(root):
        stack, seen = [root], set()
        while stack:
            n = stack.pop()
            if id(n) in seen: continue
            seen.add(id(n)); yield n
            for c in reversed(getattr(n, "children", []) or []):
                stack.append(c)

    def export_tree_dict(tree):
        assert tree is not None and tree.root is not None, "Tree must be trained."
        nodes = []
        for n in _iter_nodes_preorder(tree.root):
            nodes.append({
                "node_id": int(n.node_id),
                "is_leaf": bool(getattr(n, "is_leaf", False)),
                "centroid": (None if getattr(n, "centroid", None) is None
                             else np.ascontiguousarray(n.centroid, dtype=np.float32)),
                "children_ids": [int(c.node_id) for c in (getattr(n, "children", None) or [])],
            })
        leaf_id_map = getattr(tree, "leaf_id_map", None)
        if leaf_id_map is not None:
            leaf_id_map = {int(k): int(v) for k, v in leaf_id_map.items()}
        return {
            "version": 1,
            "k": int(getattr(tree, "k", 10)),
            "L": int(getattr(tree, "L", 6)),
            "root_id": int(tree.root.node_id),
            "nodes": nodes,
            "leaf_id_map": leaf_id_map,
        }

    def export_index_dict(index):
        assert index is not None, "Index is None; finalize() before saving."
        return {
            "version": 1,
            "num_leaves": int(index.num_leaves),
            "doc_tf": {int(d): {int(w): int(tf) for w, tf in tfmap.items()}
                       for d, tfmap in index.doc_tf.items()},
            "idf": np.ascontiguousarray(index.idf, dtype=np.float32),
            "doc_norm": {int(d): float(v) for d, v in index.doc_norm.items()},
            "N": int(getattr(index, "N", len(index.doc_tf))),
            "ext2int": {k: int(v) for k, v in getattr(index, "ext2int", {}).items()},
            "int2ext": list(getattr(index, "int2ext", [])),
            "stop_leaves": set(getattr(index, "stop_leaves", set())),
        }

    def _kp_to_tuple(kp):
        return (float(kp.pt[0]), float(kp.pt[1]),
                float(kp.size), float(kp.angle),
                float(kp.response), int(kp.octave), int(kp.class_id))

    def export_meta_dict(db, include=True):
        if not include:
            return None
        meta = getattr(db, "image_meta", {}) or {}
        # sanitize kps → tuples to avoid "cannot pickle 'cv2.KeyPoint'"
        clean = {}
        for img_id, m in meta.items():
            kps = m.get("kps", None)
            if kps and len(kps) and isinstance(kps[0], cv2.KeyPoint):
                kps = [_kp_to_tuple(k) for k in kps]
            clean[img_id] = {
                "path": m.get("path"),
                "descs": (np.ascontiguousarray(m.get("descs"), dtype=np.float32)
                          if m.get("descs") is not None else None),
                "kps": kps
            }
        return {"image_meta": clean}

    # ---- save ----
    os.makedirs(out_dir, exist_ok=True)
    tree_dict = export_tree_dict(db.tree)
    idx_dict  = export_index_dict(db.index)
    meta_dict = export_meta_dict(db, include=include_meta)

    _atomic_write(os.path.join(out_dir, "tree.pkl.gz"),  _bytes_dump_gz(tree_dict))
    _atomic_write(os.path.join(out_dir, "index.pkl.gz"), _bytes_dump_gz(idx_dict))
    if meta_dict is not None:
        _atomic_write(os.path.join(out_dir, "meta.pkl.gz"),  _bytes_dump_gz(meta_dict))
    print(f"[save_vt_model] Saved to: {out_dir}")

    
save_vt_model(db, 'vocab_db', include_meta=True)

[save_vt_model] Saved to: vocab_db
