In [None]:
use_gdrive = True # @param {type:"boolean"}

RESULTS_PATH = "results2"

if use_gdrive:
  try:
    # mount your google drive to get permanent storage for your results
    from google.colab import drive
    drive.mount('/content/drive')

    RESULTS_PATH = "/content/drive/MyDrive/infoseclab23/results2"
  except ModuleNotFoundError:
    print("failed to mount gdrive")
else:
  print(f"saving results to '{RESULTS_PATH}'. If you're using Google Colab, this folder will be deleted when you disconnect!")

!mkdir -p {RESULTS_PATH}

In [None]:
import sys

# Lab files
![ ! -d 'infoseclab_23' ] && git clone https://github.com/ethz-spylab/infoseclab_23.git
%cd infoseclab_23
!git pull https://github.com/ethz-spylab/infoseclab_23.git
%cd ..
if "infoseclab_23" not in sys.path:
  sys.path.append("infoseclab_23")

# Imports

In [None]:
import infoseclab
from infoseclab import ShadowModels, utils, evaluation
import matplotlib.pyplot as plt
import numpy as np
import os
import pathlib
import scipy
import sklearn
import warnings
from zipfile import ZipFile


# Membership Inference

In this part of the lab, you will create a membership inference attack, with a twist:
you do not know which shadow models were trained on which samples.

First, load the predictions.

In [None]:
# Load data
activations_fit = ShadowModels.activations_fit        # pre-softmax activations from shadow models to construct your attack (50000 x 127 x 10)
activations_attack = ShadowModels.activations_attack  # pre-softmax activations from the target model to attack (50000 x 10)
labels = ShadowModels.labels                          # the class labels of all samples (50000)
training_splits = ShadowModels.training_splits        # train-test vector; -1 for test samples, 1/0 (member/non-member) for training samples (50000)

num_samples, num_models, num_classes = activations_fit.shape
assert activations_attack.shape == (num_samples, num_classes)
assert labels.shape == (num_samples,)
assert training_splits.shape == (num_samples,)

# Mask that is True for all samples with a known target membership label
training_mask = (training_splits != -1)


**Implement your membership inference attack in the following cell.**

Remember that your output should be a NumPy array
containing attack scores for *all* samples,
but we only consider samples where the corresponding `training_mask` entry
is `False` for grading.

The following code performs a dummy attack that ignores all shadow model outputs.

In [None]:
# TODO: Replace the following dummy score with your attack
attack_scores = activations_attack[np.arange(num_samples), labels]

# Save scores
utils.save_attack_scores(os.path.join(RESULTS_PATH, "attack_scores.npy"), attack_scores)

In [None]:
# You can visualize the TPR vs. FPR of your attack on the training set using the following code.
# If you want to evaluate multiple attacks you can call `evaluation.eval_mia` multiple times,
# but you should set `plot_decorations = False` for all but the first call.
fig, ax = plt.subplots()
evaluation.eval_mia(
    true_splits=training_splits[training_mask],
    attack_scores=attack_scores[training_mask],
    ax=ax,
    label="Dummy attack",
    plot_decorations=True,
)
plt.show(fig)
plt.close(fig)

# Stealing Chatbot Prompts

Enter results for the chatbot secret extraction

In [None]:
# Enter your 10 extracted secrets, 6 alphanumeric characters each
secrets = [
    "aaaaaa",
    "aaaaaa",
    "aaaaaa",
    "aaaaaa",
    "aaaaaa",
    "aaaaaa",
    "aaaaaa",
    "aaaaaa",
    "aaaaaa",
    "aaaaaa"
]

# save the secrets to an array and save it to chatbot.npy
secrets = np.array(secrets)
assert len(secrets) == 10
np.save(f"{RESULTS_PATH}/chatbot.npy", secrets)

# Create submission file (**upload `results2.zip` to moodle**)

In [None]:
!zip -j -FSr "{RESULTS_PATH}/results2.zip" {RESULTS_PATH}

In [None]:
from infoseclab.submission import validate_zip2
assert validate_zip2(f"{RESULTS_PATH}/results2.zip")