<a href="https://colab.research.google.com/github/yannuma/PrivacyProject/blob/main/Pyvacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pyvacy
import tarfile
import torch
import requests
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import torchvision
import torch.utils.data as torch_data
import torchvision.transforms as transforms
import numpy as np
from pyvacy import optim, analysis
import itertools

Collecting pyvacy
  Downloading pyvacy-0.0.32.tar.gz (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyvacy
  Building wheel for pyvacy (setup.py) ... [?25l[?25hdone
  Created wheel for pyvacy: filename=pyvacy-0.0.32-py3-none-any.whl size=12382 sha256=9e215e66efb849c573d531bc7cc974e3c49361f9d242d95de2a2cab7f64f9baa
  Stored in directory: /root/.cache/pip/wheels/55/19/df/2cff88acee4b77962cdafdafc7f447c4f8b95ac0a808eac2e0
Successfully built pyvacy
Installing collected packages: pyvacy
Successfully installed pyvacy-0.0.32


In [None]:
response = requests.get(f'https://www.comp.nus.edu.sg/~reza/files/dataset_texas.tgz')
if response.status_code == 200:
    with open(f'dataset_texas.tgz', 'wb') as file:
        file.write(response.content)
    print("Download completed successfully.")
else:
    print(f"Failed to download file: {response.status_code}")

with tarfile.open(f'dataset_texas.tgz') as f:
    f.extractall(f'data/')

with open('data/texas/100/feats', 'r') as f:
    features = f.readlines()
with open('data/texas/100/labels', 'r') as f:
    labels = f.readlines()

print(len(features))
print(len(labels))


Download completed successfully.
67330
67330


In [None]:
class NetSeq(nn.Module):
    def __init__(self):
        super(NetSeq, self).__init__()
        self.fc1 = nn.Linear(6169, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 101)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x

def calc_accuracy(loader, network, device):
    correct = 0
    total = 0
    for data in loader:
      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = network(inputs)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
    return (correct / total, total)


In [None]:
def train_nn_pp(trainloader, testloader, lr, epochs, clip, noiseMult, minibatchSize, delta):
		losses = []
		train_accs = []
		test_accs = []

		device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
		network = NetSeq().to(device)
		criterion = nn.CrossEntropyLoss()

		optimizer = optim.DPSGD(
			l2_norm_clip= clip,
    	noise_multiplier=noiseMult,
    	batch_size=minibatchSize,
			params=network.parameters(),
    	lr=lr,
		)


		epsilon = analysis.moments_accountant(
    	N=len(trainloader.dataset),
    	batch_size=minibatchSize,
    	noise_multiplier=noiseMult,
    	epochs=epochs,
    	delta=delta,
		)

		print("Epsilon: ", epsilon)

		print('Start training on', device)

		for epoch in range(epochs):

			print('Epoch: {}'.format(epoch + 1))

			epoch_loss = []

			for i, data in enumerate(trainloader, 0):
				inputs, labels = data
				inputs, labels = inputs.to(device), labels.to(device)
				optimizer.zero_grad()
				outputs = network(inputs)
				loss = criterion(outputs, labels)
				epoch_loss.append(loss.item())
				loss.backward()
				optimizer.step()

			losses.append(np.mean(epoch_loss))

			# Compute accuracy on training data
			with torch.no_grad():
				acc_tuple = calc_accuracy(trainloader, network, device)

			print('Accuracy of the network on %d train inputs: %d %%' % (acc_tuple[1], 100 * acc_tuple[0]))
			train_accs.append(acc_tuple[0])

			# Validate all classes
			with torch.no_grad():
				acc_tuple = calc_accuracy(testloader, network, device)

			print('Accuracy of the network on %d test inputs: %d %%' % (acc_tuple[1], 100 * acc_tuple[0]))
			test_accs.append(acc_tuple[0])

		network.to('cpu')
		return network

In [None]:
features_list = [list(map(int, ''.join(feature.split()).split(','))) for feature in features]
labels_list = [int(label.strip()) for label in labels]
size = int(0.8 * len(features))
feat_tens_train = torch.tensor(features_list[:size], dtype=torch.float)
l_tens_train = torch.tensor(labels_list[:size], dtype=torch.long)
feat_tens_test = torch.tensor(features_list[size:], dtype=torch.float)
l_tens_test = torch.tensor(labels_list[size:], dtype=torch.long)

#DPSGD:
lr = 0.01
epochs = 20
clip = 3
noiseMult = [0.4, 0.5, 0.6, 0.7, 0.9, 1, 1.1]
batch = 128
delta = 1e-5

dataset_train = TensorDataset(feat_tens_train, l_tens_train)
trainloader = DataLoader(dataset_train, batch_size=batch, shuffle=True)
dataset_test = TensorDataset(feat_tens_test, l_tens_test)
testloader = DataLoader(dataset_test, batch_size=batch, shuffle=True)
for nm in noiseMult:
  train_nn_pp(trainloader, testloader, lr, epochs, clip, nm, batch, delta)


Epsilon:  28.168823511934516
Start training on cuda
Epoch: 1
Accuracy of the network on 53864 train inputs: 4 %
Accuracy of the network on 13466 test inputs: 4 %
Epoch: 2
Accuracy of the network on 53864 train inputs: 5 %
Accuracy of the network on 13466 test inputs: 4 %
Epoch: 3
Accuracy of the network on 53864 train inputs: 9 %
Accuracy of the network on 13466 test inputs: 8 %
Epoch: 4
Accuracy of the network on 53864 train inputs: 11 %
Accuracy of the network on 13466 test inputs: 10 %
Epoch: 5
Accuracy of the network on 53864 train inputs: 12 %
Accuracy of the network on 13466 test inputs: 11 %
Epoch: 6
Accuracy of the network on 53864 train inputs: 13 %
Accuracy of the network on 13466 test inputs: 13 %
Epoch: 7
Accuracy of the network on 53864 train inputs: 15 %
Accuracy of the network on 13466 test inputs: 14 %
Epoch: 8
Accuracy of the network on 53864 train inputs: 18 %
Accuracy of the network on 13466 test inputs: 17 %
Epoch: 9
Accuracy of the network on 53864 train inputs: 21