# Voice judgoor

Here we showcase a full-end-to-end flow of:
1. training a model for a specific task (judging voices)
2. creating a proof of judgment
3. creating and deploying an evm verifier
4. verifying the proof of judgment using the verifier

First we download a few voice related datasets from kaggle, which are all labelled using the same emotion and tone labelling standard.

We have 8 emotions in both speaking and singing datasets: neutral, calm, happy, sad, angry, fear, disgust, surprise.

To download the dataset make sure you have the kaggle cli installed in your local env `pip install kaggle`. Make sure you set up your `kaggle.json` file as detailed [here](https://www.kaggle.com/docs/api#getting-started-installation-&-authentication).
Then run the associated `voice_data.sh` data download script: `sh voice_data.sh`.

Make sure you set the `VOICE_DATA_DIR` variables to point to the directory the `voice_data.sh` script has downloaded to. This script also accepts an argument to download to a specific directory: `sh voice_data.sh /path/to/voice/data`.


In [None]:

import os
# os.environ["VOICE_DATA_DIR"] = "."

voice_data_dir = os.environ.get('VOICE_DATA_DIR')

#  if is none set to ""
if voice_data_dir is None:
    voice_data_dir = ""

print("voice_data_dir: ", voice_data_dir)


### TESS Dataset

In [None]:
# check if notebook is in colab
try:
    # install ezkl
    import google.colab
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "ezkl"])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "onnx"])

# rely on local installation of ezkl if the notebook is not in colab
except:
    pass

from torch import nn
import ezkl
import os
import json
import pandas as pd
import logging

# read in VOICE_DATA_DIR from environment variable

# FORMAT = '%(levelname)s %(name)s %(asctime)-15s %(filename)s:%(lineno)d %(message)s'
# logging.basicConfig(format=FORMAT)
# logging.getLogger().setLevel(logging.INFO)


Tess = os.path.join(voice_data_dir, "data/TESS/")

tess = os.listdir(Tess)

emotions = []
files = []

for item in tess:
    items = os.listdir(Tess + item)
    for file in items:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part == 'ps':
            emotions.append('surprise')
        else:
            emotions.append(part)
        files.append(Tess + item + '/' + file)

tess_df = pd.concat([pd.DataFrame(emotions, columns=['Emotions']), pd.DataFrame(files, columns=['Files'])], axis=1)
tess_df

### RAVDESS SONG dataset

In [None]:
Ravdess = os.path.join(voice_data_dir, "data/RAVDESS_SONG/audio_song_actors_01-24/")

ravdess_list = os.listdir(Ravdess)

files = []
emotions = []

for item in ravdess_list:
    actor = os.listdir(Ravdess + item)
    for file in actor:
        name = file.split('.')[0]
        parts = name.split('-')
        emotions.append(int(parts[2]))
        files.append(Ravdess + item + '/' + file)

emotion_data = pd.DataFrame(emotions, columns=['Emotions'])
files_data = pd.DataFrame(files, columns=['Files'])

ravdess_song_df = pd.concat([emotion_data, files_data], axis=1)

ravdess_song_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)

ravdess_song_df

### RAVDESS SPEECH Dataset

In [None]:
Ravdess = os.path.join(voice_data_dir, "data/RAVDESS_SPEECH/audio_speech_actors_01-24/")

ravdess_list = os.listdir(Ravdess)

files = []
emotions = []

for item in ravdess_list:
    actor = os.listdir(Ravdess + item)
    for file in actor:
        name = file.split('.')[0]
        parts = name.split('-')
        emotions.append(int(parts[2]))
        files.append(Ravdess + item + '/' + file)
        
emotion_data = pd.DataFrame(emotions, columns=['Emotions'])
files_data = pd.DataFrame(files, columns=['Files'])

ravdess_df = pd.concat([emotion_data, files_data], axis=1)

ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)

ravdess_df

### CREMA Dataset

In [None]:
Crema = os.path.join(voice_data_dir, "data/CREMA-D/")

crema = os.listdir(Crema)
emotions = []
files = []

for item in crema:
    files.append(Crema + item)
    
    parts = item.split('_')
    if parts[2] == 'SAD':
        emotions.append('sad')
    elif parts[2] == 'ANG':
        emotions.append('angry')
    elif parts[2] == 'DIS':
        emotions.append('disgust')
    elif parts[2] == 'FEA':
        emotions.append('fear')
    elif parts[2] == 'HAP':
        emotions.append('happy')
    elif parts[2] == 'NEU':
        emotions.append('neutral')
    else :
        emotions.append('unknown')
        
emotions_data = pd.DataFrame(emotions, columns=['Emotions'])
files_data = pd.DataFrame(files, columns=['Files'])

crema_df = pd.concat([emotions_data, files_data], axis=1)

crema_df

### SAVEE Dataset

In [None]:
Savee = os.path.join(voice_data_dir,"data/SAVEE/")

savee = os.listdir(Savee)

emotions = []
files = []

for item in savee:
    files.append(Savee + item)
    part = item.split('_')[1]
    ele = part[:-6]
    if ele == 'a':
        emotions.append('angry')
    elif ele == 'd':
        emotions.append('disgust')
    elif ele == 'f':
        emotions.append('fear')
    elif ele == 'h':
        emotions.append('happy')
    elif ele == 'n':
        emotions.append('neutral')
    elif ele == 'sa':
        emotions.append('sad')
    else:
        emotions.append('surprise')

savee_df = pd.concat([pd.DataFrame(emotions, columns=['Emotions']), pd.DataFrame(files, columns=['Files'])], axis=1)
savee_df

### Combining all datasets

In [None]:
df = pd.concat([ravdess_df, ravdess_song_df, crema_df, tess_df, savee_df], axis = 0)
# relabel indices
df.index = range(len(df.index))
df.to_csv("df.csv",index=False)
df


In [None]:
import seaborn as sns
sns.histplot(data=df, x="Emotions")


### Training 

Here we convert all audio files into 2D frequency-domain spectrograms so that we can leverage convolutional neural networks, which tend to be more efficient than time-series model like RNNs or LSTMs.
We thus: 
1. Extract the mel spectrogram from each of the audio recordings. 
2. Rescale each of these to the decibel (DB) scale. 
3. Define the model as the following model: `(x) -> (conv) -> (relu) -> (linear) -> (y)`


You may notice that we introduce a second computational graph `(key) -> (key)`. The reasons for this are to do with MEV, and if you are not interested you can skip the following paragraph. 

Let's say that obtaining a high score from the judge and then submitting said score to the EVM verifier could result in the issuance of a reward (financial or otherwise). There is an incentive then for MEV bots to scalp any issued valid proof and submit a duplicate transaction with the same proof to the verifier contract in the hopes of obtaining the reward before the original issuer. Here we add `(key) -> (key)` such that the transaction creator's public key / address is both a private input AND a public input to the proof. As such the on-chain verification only succeeds if the key passed in during proof time is also passed in as a public input to the contract. The reward issued by the contract can then be irrevocably tied to that key such that even if the proof is submitted by another actor, the reward would STILL go to the original singer / transaction issuer. 

In [None]:


import librosa
import numpy as np
import matplotlib.pyplot as plt


#stft extraction from augmented data
def extract_mel_spec(filename):
    x,sr=librosa.load(filename,duration=3,offset=0.5)
    X = librosa.feature.melspectrogram(y=x, sr=sr)
    Xdb = librosa.power_to_db(X, ref=np.max)
    Xdb = Xdb.reshape(1,128,-1)
    return Xdb

Xdb=df.iloc[:,1].apply(lambda x: extract_mel_spec(x))


Here we convert label to a number between 0 and 1 where 1 is pleasant surprised and 0 is disgust and the rest are floats in between. The model loves pleasantly surprised voices and hates disgust ;) 

In [None]:
# get max size
max_size = 0
for i in range(len(Xdb)):
    if Xdb[i].shape[2] > max_size:
        max_size = Xdb[i].shape[2]

# 0 pad 2nd dim to max size
Xdb=Xdb.apply(lambda x: np.pad(x,((0,0),(0,0),(0,max_size-x.shape[2]))))

Xdb=pd.DataFrame(Xdb)
Xdb['label'] = df['Emotions']
# convert label to a number between 0 and 1 where 1 is pleasant surprised and 0 is disgust and the rest are floats in betwee
Xdb['label'] = Xdb['label'].apply(lambda x: 1 if x=='surprise' else 0 if x=='disgust' else 0.2 if x=='fear' else 0.4 if x=='happy' else 0.6 if x=='sad' else 0.8)

Xdb.iloc[0,0][0].shape

In [None]:
import torch
# Defines the model
# we got convs, we got relu, we got linear layers
# What else could one want ????

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=5, stride=4)

        self.d1 = nn.Linear(992, 1)

        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()

    def forward(self, key, x):
        # 32x1x28x28 => 32x32x26x26
        x = self.conv1(x)
        x = self.relu(x)
        x = x.flatten(start_dim=1)
        x = self.d1(x)
        x = self.sigmoid(x)

        return [key, x]


circuit = MyModel()

output = circuit(0, torch.tensor(Xdb.iloc[0,0][0].reshape(1,1,128,130)))

output





Here we leverage the classic Adam optimizer, coupled with 0.001 weight decay so as to regularize the model. The weight decay (a.k.a L2 regularization) can also help on the zk-circuit end of things in that it prevents inputs to Halo2 lookup tables from falling out of range (lookup tables are how we represent non-linearities like ReLU and Sigmoid inside our circuits). 

In [None]:
from tqdm import tqdm

# Train the model using pytorch
n_epochs = 10    # number of epochs to run
batch_size = 10  # size of each batch


loss_fn = nn.MSELoss()  #MSE
# adds l2 regularization
optimizer = torch.optim.Adam(circuit.parameters(), lr=0.001, weight_decay=0.001)

# randomly shuffle dataset
Xdb = Xdb.sample(frac=1).reset_index(drop=True)

# split into train and test and validation sets with 80% train, 10% test, 10% validation
train = Xdb.iloc[:int(len(Xdb)*0.8)]
test = Xdb.iloc[int(len(Xdb)*0.8):int(len(Xdb)*0.9)]
val = Xdb.iloc[int(len(Xdb)*0.9):]

batches_per_epoch = len(train)


def get_loss(Xbatch, ybatch):
    y_pred = circuit(0, Xbatch)[1]
    loss = loss_fn(y_pred, ybatch)
    return loss

for epoch in range(n_epochs):
    # X is a torch Variable
    permutation = torch.randperm(batches_per_epoch)

    with tqdm(range(batches_per_epoch), unit="batch", mininterval=0) as bar:
        bar.set_description(f"Epoch {epoch}")
        for i in bar:
            start = i * batch_size
            # take a batch
            indices = np.random.choice(batches_per_epoch, batch_size)

            data = np.concatenate(train.iloc[indices.tolist(),0].values)
            labels = train.iloc[indices.tolist(),1].values.astype(np.float32)

            data = data.reshape(batch_size,1,128,130)
            labels = labels.reshape(batch_size,1)

            # convert to tensors
            Xbatch = torch.tensor(data)
            ybatch = torch.tensor(labels)

            # forward pass
            loss = get_loss(Xbatch, ybatch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()

            bar.set_postfix(
                batch_loss=float(loss),
            )
        # get validation loss
        val_data = np.concatenate(val.iloc[:,0].values)
        val_labels = val.iloc[:,1].values.astype(np.float32)
        val_data = val_data.reshape(len(val),1,128,130)
        val_labels = val_labels.reshape(len(val),1)
        val_loss = get_loss(torch.tensor(val_data), torch.tensor(val_labels))
        print(f"Validation loss: {val_loss}")



# get validation loss
test_data = np.concatenate(test.iloc[:,0].values)
test_labels = val.iloc[:,1].values.astype(np.float32)
test_data = val_data.reshape(len(val),1,128,130)
test_labels = val_labels.reshape(len(val),1)
test_loss = get_loss(torch.tensor(test_data), torch.tensor(test_labels))
print(f"Test loss: {test_loss}")



In [None]:
#
val_data = {
    "input_data": [np.zeros(100).tolist(), np.concatenate(val.iloc[:100,0].values).flatten().tolist()],
}
# save as json file
with open("val_data.json", "w") as f:
    json.dump(val_data, f)


In [None]:
x = 0.1*torch.rand(1,*[1, 128, 130], requires_grad=True)
key = torch.rand(1,*[1], requires_grad=True)

# Flips the neural net into inference mode
circuit.eval()

    # Export the model
torch.onnx.export(circuit,               # model being run
                      (key, x),                   # model input (or a tuple for multiple inputs)
                      "network.onnx",            # where to save the model (can be a file or file-like object)
                      export_params=True,        # store the trained parameter weights inside the model file
                      opset_version=10,          # the ONNX version to export the model to
                      do_constant_folding=True,  # whether to execute constant folding for optimization
                      input_names = ['input'],   # the model's input names
                      output_names = ['output'], # the model's output names
                      dynamic_axes={'input' : {0 : 'batch_size'},
                                    'input.1' : {0 : 'batch_size'}, # variable length axes
                                    'output' : {0 : 'batch_size'}})

key_array = ((key).detach().numpy()).reshape([-1]).tolist()
data_array = ((x).detach().numpy()).reshape([-1]).tolist()

data = dict(input_data = [key_array, data_array])

    # Serialize data into file:
json.dump( data, open("input.json", 'w' ))


# ezkl.export(circuit, input_shape = [[1], [1025, 130]], run_gen_witness=False, run_calibrate_settings=False)

Here we set the visibility of the different parts of the circuit, whereby the model params and the outputs of the computational graph (the key and the judgment) are public

In [None]:
import ezkl
import os 

model_path = os.path.join('network.onnx')
compiled_model_path = os.path.join('network.compiled')
pk_path = os.path.join('test.pk')
vk_path = os.path.join('test.vk')
settings_path = os.path.join('settings.json')
srs_path = os.path.join('kzg.params')
data_path = os.path.join('input.json')
val_data = os.path.join('val_data.json')

run_args = ezkl.PyRunArgs()
run_args.input_visibility = "private"
run_args.param_visibility = "fixed"
run_args.output_visibility = "public"
run_args.variables = [("batch_size", 1)]


# TODO: Dictionary outputs
res = ezkl.gen_settings(model_path, settings_path, py_run_args=run_args)
assert res == True


Now we generate a settings file. This file basically instantiates a bunch of parameters that determine their circuit shape, size etc... Because of the way we represent nonlinearities in the circuit (using Halo2's [lookup tables](https://zcash.github.io/halo2/design/proving-system/lookup.html)), it is often best to _calibrate_ this settings file as some data can fall out of range of these lookups.

You can pass a dataset for calibration that will be representative of real inputs you might find if and when you deploy the prover. Here we use the validation dataset we used during training. 

In [None]:


res = await ezkl.calibrate_settings(val_data, model_path, settings_path, "resources", scales = [4])
assert res == True
print("verified")


In [None]:
res = ezkl.compile_circuit(model_path, compiled_model_path, settings_path)
assert res == True

As we use Halo2 with KZG-commitments we need an SRS string from (preferably) a multi-party trusted setup ceremony. For an overview of the procedures for such a ceremony check out [this page](https://blog.ethereum.org/2023/01/16/announcing-kzg-ceremony). The `get_srs` command retrieves a correctly sized SRS given the calibrated settings file from [here](https://github.com/han0110/halo2-kzg-srs). 

These SRS were generated with [this](https://github.com/privacy-scaling-explorations/perpetualpowersoftau) ceremony. 

In [None]:
res = await ezkl.get_srs(settings_path)

We now need to generate the (partial) circuit witness. These are the model outputs (and any hashes) that are generated when feeding the previously generated `input.json` through the circuit / model. 

In [None]:

witness_path = "witness.json"

res = await ezkl.gen_witness(data_path, compiled_model_path, witness_path)
assert os.path.isfile(witness_path)

As a sanity check we can run a mock proof. This just checks that all the constraints are valid. 

In [None]:


res = ezkl.mock(witness_path, compiled_model_path)

Here we setup verifying and proving keys for the circuit. As the name suggests the proving key is needed for ... proving and the verifying key is needed for ... verifying. 

In [None]:
!export RUST_LOG=trace
# HERE WE SETUP THE CIRCUIT PARAMS
# WE GOT KEYS
# WE GOT CIRCUIT PARAMETERS
# EVERYTHING ANYONE HAS EVER NEEDED FOR ZK
res = ezkl.setup(
        compiled_model_path,
        vk_path,
        pk_path,
        
    )

assert res == True
assert os.path.isfile(vk_path)
assert os.path.isfile(pk_path)
assert os.path.isfile(settings_path)

Now we generate a full proof. 

In [None]:
# GENERATE A PROOF

proof_path = os.path.join('test.pf')

res = ezkl.prove(
        witness_path,
        compiled_model_path,
        pk_path,
        proof_path,
        
        "single",
    )

print(res)
assert os.path.isfile(proof_path)

And verify it as a sanity check. 

In [None]:
# VERIFY IT

res = ezkl.verify(
        proof_path,
        settings_path,
        vk_path,
        
    )

assert res == True
print("verified")

In [None]:
import os
abi_path = 'test.abi'
sol_code_path = 'test.sol'
vk_path = os.path.join('test.vk')
srs_path = os.path.join('kzg.params')
settings_path = os.path.join('settings.json')


res = await ezkl.create_evm_verifier(
        vk_path,
        
        settings_path,
        sol_code_path,
        abi_path,
    )

assert res == True

### Verify if the Verifier Works Locally

#### Deploy The Contract

In [None]:
# Make sure anvil is running locally first
# run with $ anvil -p 3030
# we use the default anvil node here
import json

address_path = os.path.join("address.json")

res = await ezkl.deploy_evm(
    address_path,
    'http://127.0.0.1:3030',
    sol_code_path,
)

assert res == True

with open(address_path, 'r') as file:
    addr = file.read().rstrip()

In [None]:
# make sure anvil is running locally
# $ anvil -p 3030

res = await ezkl.verify_evm(
    addr,
    "http://127.0.0.1:3030",
    proof_path
)
assert res == True