In [None]:
from Metrics.FidelityMetrics import SNRMetric, PESQMetric, SECSMetric
import os
from util import *
import pandas as pd

In [None]:
snr_metric = SNRMetric()
pesq_metric = PESQMetric()
secs_metric = SECSMetric()

In [None]:
folder = "/mnt/d/repo/AntiFake/antifake"

In [None]:
subfiles = [f for f in os.listdir(folder) if f.endswith(".wav")]

original = [f for f in subfiles if "ffmpeg" in f]
antifake = [f for f in subfiles if "antifake" in f]
def normalize_name(filename):
    return filename.replace("ffmpeg", "").replace("antifake", "")
original_dict = {normalize_name(f): f for f in original}
antifake_dict = {normalize_name(f): f for f in antifake}

paired_files = [(original_dict[name], antifake_dict[name]) 
                for name in original_dict.keys() 
                if name in antifake_dict]

assert len(paired_files) == len(original_dict) == len(antifake_dict) == 76

In [None]:
for original, antifake in paired_files:
    original = load_wav(folder + "/" + original, target_sr=16000)
    distorted = load_wav(folder + "/" + antifake, target_sr=16000)
    original = original[: , :distorted.size(1)]
    distorted = distorted[:, : original.size(1)]
    snr_metric.update(original, distorted)
    pesq_metric.update(original, distorted)
    secs_metric.update(original, distorted)
    
print("snr", snr_metric.compute())
print("pesq", pesq_metric.compute())
print("secs", secs_metric.compute())

In [None]:
mos_data = pd.read_csv("./results/NISQA_results.csv")


mos_data_antifake = mos_data[mos_data["deg"].str.contains("antifake")]

mos_pred_mean = np.mean(mos_data_antifake["mos_pred"])
mos_pred_std_dev = np.std(mos_data_antifake["mos_pred"])

print("mos_pred_mean", mos_pred_mean)
print("mos_pred_std", mos_pred_std_dev)

In [None]:
snr_metric = SNRMetric()
pesq_metric = PESQMetric()
secs_metric = SECSMetric()

folder = "./sampled_pair"

speakers = [f for f in os.listdir(folder) if os.path.isdir(os.path.join(folder, f))]

for speaker in speakers:
    for i in range(1, 3):
        prefix = f"{speaker}_{i}"
        original = load_wav(f"{folder}/{speaker}/{prefix}.wav", target_sr=16000)
        for subfile in os.listdir(f"{folder}/{speaker}"):
            if subfile.startswith(prefix) and "advspeech_ssim_only" in subfile:
                advspeech = load_wav(f"{folder}/{speaker}/{subfile}", target_sr=16000)
                original = original[: , :advspeech.size(1)]
                advspeech = advspeech[:, : original.size(1)]
                snr_metric.update(original, advspeech)
                pesq_metric.update(original, advspeech)
                secs_metric.update(original, advspeech)
                print(prefix, subfile)
                
print("snr", snr_metric.compute())
print("pesq", pesq_metric.compute())
print("secs", secs_metric.compute())
        


In [None]:
mos_data = pd.read_csv("./results/NISQA_results_advspeech.csv")

mos_data_advspeech = mos_data[mos_data["deg"].str.contains("advspeech")]

mos_pred_mean = np.mean(mos_data_advspeech["mos_pred"])
mos_pred_std_dev = np.std(mos_data_advspeech["mos_pred"])

print("mos_pred_mean", mos_pred_mean)
print("mos_pred_std", mos_pred_std_dev)

In [None]:
folder = "./sampled_pair"
speakers = [f for f in os.listdir(folder) if os.path.isdir(os.path.join(folder, f))]
ref_dict = {}


for speaker in speakers:
    for i in range(1, 3):
        prefix = f"{speaker}_{i}"
        ref_dict[prefix] = f"{folder}/{speaker}/{prefix}.wav"
        

In [None]:
import wespeaker
model = wespeaker.load_model('english')
model.set_device('cuda:0')

In [None]:
def wespeaker_similarity(input_folder, mode):
    res = []
    
    for filename in os.listdir(f'{input_folder}/{mode}'):
        mode_index = filename.find(mode)
        prefix = filename[:mode_index-1]
        
        original = ref_dict[prefix]
        synthesized = f'{input_folder}/{mode}/{filename}'
        score = model.compute_similarity(original , synthesized)
        res.append(score)
    return res



In [30]:
synthesizer = f"/mnt/d/voicedata/Libri_adv/cosyvoice"
advspeech_similarity = wespeaker_similarity(synthesizer, 'advspeech') 

In [36]:
advspeech_similarity = sorted(advspeech_similarity, reverse=True)
index = int(len(advspeech_similarity) * 0.95)
threshold = advspeech_similarity[index]
print("threshold", threshold)

threshold 0.6063705906271935


In [None]:
print("advspeech: ", np.mean(advspeech_similarity), np.std(advspeech_similarity))

In [38]:
for syn in ['xTTS', 'openvoice', 'cosyvoice']:
    synthesizer = f"/mnt/d/voicedata/Libri_adv/{syn}"

    original_similarity = wespeaker_similarity(synthesizer, 'ffmpeg')
    original_similarity = sorted(original_similarity, reverse=True)
    index = int(len(original_similarity) * 0.95)
    threshold = original_similarity[index]
    print("synthesizer", syn, "threshold", threshold)
    
    antifake_similarity = wespeaker_similarity(synthesizer, 'antifake')
    advspeech_similarity = wespeaker_similarity(synthesizer, 'advspeech')
    
    antifake_accepted = sum([1 for score in antifake_similarity if score > threshold])
    advspeech_accepted = sum([1 for score in advspeech_similarity if score > threshold])
    print("antifake", antifake_accepted / len(antifake_similarity))
    print("advspeech", advspeech_accepted / len(advspeech_similarity))

synthesizer xTTS threshold 0.703551784157753
antifake 0.02631578947368421
advspeech 0.3888888888888889
synthesizer openvoice threshold 0.5727239400148392
antifake 0.5526315789473685
advspeech 0.7638888888888888
synthesizer cosyvoice threshold 0.8517224490642548
antifake 0.0
advspeech 0.006944444444444444
