In [1]:
import numpy as np
import torch

# Data pre-processing

Only run once after installing and extracting datasets to the `data/` directory.

In [None]:
%run data_utils.py

# CGD Train

data_name: 'car' or 'cub'.

See https://github.com/leftthomas/CGD for explanation of other parameters.

In [None]:
%run train.py --data_name 'car' --feature_dim 64 --batch_size 32 --gd_config 'SG' --recalls 8

# PrivateMail Image Retrieval

## Initialization

Choose dataset: 'car' or 'cub'

In [2]:
data_name = 'car' #or 'cub'

Embedding parameters are tuned for car and cub datasets

In [3]:
from privateMail import DPImageRetriever

# Default embedding parameters
if data_name == 'car':
    # Copy path from CGD train output to results/ folder
    data_base_name = 'car_cropped_resnet50_SG_64_0.1_0.5_0.1_32_data_base.pth' # Copy path from CGD train output to results/ folder
    embed_params = {'sigma': 6, 
                    'alpha': 0.6, 
                    'dim': 2, 
                    'sigma_q':1e-8, 
                    'iters': 5}
else:
    data_base_name = 'cub_cropped_resnet50_G_64_0.1_0.5_0.1_32_data_base.pth' 
    embed_params = {'sigma': 5, 
                    'alpha': 0.5, 
                    'dim': 2, 
                    'sigma_q':1e-8, 
                    'iters': 5}
    
data_base = torch.load('results/{}'.format(data_base_name))
    
# Default noise parameters    
noise_params = {'epsilon': 0.01,
                'delta': 1e-5} 

# Use public dataset of size 1000
retriever = DPImageRetriever(data_base, embed_params, noise_params=noise_params, n_public=1000, n_gallery=-1)

### Optional: Update embedding and noise parameters

In [9]:
retriever.set_embed_params(sigma=6, alpha=0.6, dim=2, sigma_q=1e-8, iters=5)
retriever.set_noise_params(epsilon=0.01, delta=1e-5)

## Measure recall@k

Non-private SMLQ:

In [5]:
retriever.recall(k=8, noise=False, verbose=True) 

Computing recall@8: 100%|██████████| 8130/8130 [10:59<00:00, 12.32it/s]

Recall: 0.8432964329643297





0.8432964329643297

PrivateMail SMLQ:

In [10]:
retriever.recall(k=8, noise=True, verbose=True) 

Computing recall@8 for ε = 0.01, δ = 1e-05: 100%|██████████| 8130/8130 [21:35<00:00,  6.28it/s]

Recall: 0.8151291512915129





0.8151291512915129


## Retrieve images for single query

Retrieves $k$ images for specified query image. Files are saved in the /results folder.

In [5]:
query_img_name = np.random.choice(retriever.gallery_images) # Or specify query file name
retrieved_img_names, retrieved_labels = retriever.run_query(query_img_name, k=8, noise=False, verbose=True, save=True)

Query image: data/car/cropped/014228.jpg
Query label: 73
Retrieved labels: [73, 73, 73, 73, 73, 74, 73, 74]
