# FFT-Based Audio Watermarking

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import binascii
from scipy.io.wavfile import read, write
from IPython.display import Audio
from numpy.fft import fft, ifft, fftfreq, fftshift, ifftshift

First, we read in an audio file, taking note of its sampling rate and number of samples.

In [None]:
filename = 'speech.wav'  # replace with your chosen audio file
sr, original = read(f'audio/{filename}')
N = len(original)

print(f"Sampling Rate is {sr} Hz")
print(f"Total number of samples is {N}")

Now, we can listen to the original audio file.

In [None]:
Audio(original, rate=sr)

By plotting the spectrogram, we get a visual representation of the audio signal temporally and across the frequency spectrum.

In [None]:
plt.figure()
plt.specgram(original, Fs=sr)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.title('Spectrogram of Original Audio')
plt.savefig('output/original.png')
plt.show()

We then take the FFT of the audio signal, and align the zero frequency component to the center of the frequency spectrum. 

In [None]:
original_ft = fftshift(fft(original, N))
original_ft_freq = fftfreq(N)
plt.plot(original_ft_freq, abs(original_ft))
plt.show()

# Embed Watermark

In order to embed a text watermark into our audio file, we first convert the watermark into binary. This allows us to embed each individual bit into our audio signal. Embedding at this level ensures that our algorithm poses minimal risk to the sound of the audio.

In [None]:
watermark = "EECS351" # replace with your chosen watermark
watermark_bits = ''.join(format(ord(i), '08b') for i in watermark)

print(f"Watermark: {watermark}")
print(f"Binary array to embed: {watermark_bits}")

We then prepare to embed using frame-wise encoding. This is the process of segmenting an audio signal into small frames, with each one representing a portion of the signal over time. Each frame is processed individually, with a small bit of the watermark embedded into its associated frame. Here, we set the number of samples per frame, and also set the frequency band that we want to embed the watermark at.

Using these details, we convert our signal into a matrix representation where each row is a frame and the columns represent the samples in that frame. This allows us to easily embed each bit of the watermark into its associated frame. 

In [None]:
frame = len(watermark_bits) # total number of bits to embed
samples_per_bit = 10 # number of samples used to embed each bit
total_samples = frame * samples_per_bit # total number of samples to embed
center = (N/2) + 1

embed_freq = 5000 # frequency where watermark is embedded

a = 0.1 # scaling factor

X_mag = np.abs(original_ft)
X_phase = np.angle(original_ft)

Y = X_mag
X_embed = X_mag[int(center - embed_freq - total_samples):int(center - embed_freq)]
X_mat = X_embed.reshape(frame, samples_per_bit) 

After dividing the selected frequency band is divided into short frames, we begin embedding a single secret bit into each frame. All samples in each frame are changed based on the corresponding secret bit. 

If the secret bit is 0, all samples in the frame should be set to the average value of all FFT magnitudes in the frame. This creates a flat, uniform signature for a 0 bit. If the secret bit is 1, we divide the FFT samples in the frame into two groups. The samples in the first group are set to a scaled down average value and the samples of the second group are set to a scaled up average value. This creates an asymmetric signature for a 1 bit.

In [None]:
for i in range(frame):
    avg = 0
    
    # take average of the samples in given frame
    for j in range(samples_per_bit):  
        avg = avg + X_mat[i][j]
    avg = avg / samples_per_bit

    if watermark_bits[i] == '0':  # if the bit to encode is '0'
        print('0', end=' ')
        for j in range(samples_per_bit):
            X_mat[i][j] = avg  # set all 10 samples to same average value

    else:  # if the bit to encode is '1'
        print('1', end=' ')
        for j in range(int(samples_per_bit / 2)):  # access the first half of the frame
            X_mat[i][j] = a * avg  # set FFT samples to scaled down average value
        for j in range(int(samples_per_bit / 2), samples_per_bit):  # access the second half of the frame
            X_mat[i][j] = (2 - a) * avg  # set FFT samples to scaled up average value

After embedding the watermark into the matrix representation of our signal, we need to convert back to a vector in order to reconstruct the audio signal.

In [None]:
X_vec = X_mat.reshape(total_samples)  # bring it back to vector/array

# define range for adding embeddings back to final fft vec with embeddings
range_embed_1 = range(int(center - embed_freq - total_samples), int(center - embed_freq))
range_embed_2 = range(int(center + embed_freq + 1),
                      int(center + embed_freq + total_samples + 1))

Y[range_embed_1] = X_vec
Y[range_embed_2] = X_vec[::-1]  # reversed for symmetry

In [None]:
Y1 = np.multiply(Y, np.exp(1j * X_phase))
plt.plot(original_ft_freq, abs(Y1))

In [None]:
watermarked = ifft(ifftshift(Y1))
write('output/watermarked.wav', int(sr), watermarked.real)

Now we can listen to the watermarked audio file. 

In [None]:
Audio(watermarked.real, rate=sr)

# Decode Watermark

In order to begin recovering the embedded watermark, we need to read in the watermarked audio file.  

In [None]:
sr, watermarked = read('output/watermarked.wav')
N = len(watermarked)

print(f"Sampling Rate is {sr} Hz")
print(f"Total number of samples is {N}")

Here, we plot the spectrogram of the watermarked audio file. This is useful because it provides a visual representation of any artifacts or differences between the original audio and the watermarked audio.

In [None]:
plt.figure()
plt.specgram(watermarked, Fs=sr)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.title(f'Spectrogram of Watermarked Audio at {embed_freq} Hz')
plt.savefig('output/watermarked.png')
plt.show()

We then take the FFT of the watermarked signal, and convert the signal into a matrix just like the representation we used during embedding.

In [None]:
Y2 = fftshift(fft(watermarked))
Y2_abs = abs(Y2)
# find in correct frequency window
detect_window = Y2_abs[int(center - embed_freq - total_samples):int(center - embed_freq)]
detect_window_mat = detect_window.reshape(frame, samples_per_bit)


Our implementation of watermarking uses informed detection, which makes decoding easier by supplementing the algorithm with some information about how the watermark was embedded. The information provided includes:
- Total number of bits embedded

- Total number of samples embedded

- Number of samples used to embed each bit

- Embedding frequency

- Scaling factor

Using this information, we can check each frame for the unique signatures that we encoded during embedding. Based on the values of the samples we find in each frame, we make a bit decision (0 or 1) and append that value to our recovered binary watermark.

In [None]:
recovered_binary = ''  # binary string

for i in range(frame):
    avg = 0
    one_count = 0
    zero_count = 0

    for j in range(samples_per_bit):  # find average
        avg = avg + detect_window_mat[i][j]
    avg = avg / samples_per_bit 

    # checking the first half of the frame
    for j in range(int(samples_per_bit / 2)):
        if (detect_window_mat[i][j] >= (1 + a) * avg / 2):  # decision for '0'
            zero_count += 1
        else:
            one_count += 1

    # checking the second half of the frame
    for j in range(int(samples_per_bit / 2), samples_per_bit - 1):
        if (detect_window_mat[i][j] < (3 - a) * avg / 2):
            zero_count += 1
        else:
            one_count += 1

    # final bit decision
    if (one_count > zero_count):
        recovered_binary = recovered_binary + '1'
    else:
        recovered_binary = recovered_binary + '0'

After running the decoding algorithm, we recover a binary number that we can convert to a string using the "unhexlify" function. If everything worked correctly, we should recover the watermark we chose during embedding!

In [None]:
recovered_watermark = binascii.unhexlify(
    '%x' % int(recovered_binary, 2)).decode('utf-8')

print(f"Recovered binary array is {recovered_binary}")
print(f"Recovered watermark is {recovered_watermark}")