In [45]:
import numpy as np
import soundfile as sf
from scipy.signal import fftconvolve
import pysofaconventions as sofa

# Load stereo audio file
input_file = 'Data/CTHS3_Acid_Bass_Loop_01_C_123.wav'  # Replace with your stereo file
stereo_signal, sample_rate = sf.read(input_file)

print("Sample rate:")
print(sample_rate)

# Ensure the input is stereo
if stereo_signal.ndim != 2 or stereo_signal.shape[1] != 2:
    raise ValueError("Input file must be a stereo audio file.")

# Load HRTF data from a SOFA file
sofa_file = 'Data/mit_kemar_normal_pinna.sofa'  # Replace with the path to your SOFA file
hrtf = sofa.SOFAFile(sofa_file, mode='r') 

# # Print available attributes in the SOFA file
# print("Attributes in the SOFA file:")
# print(hrtf.__dict__.keys())

# # Access sampling rate
print("Sampling Rate:")
print(hrtf.getSamplingRate())

# Access source positions
# print(hrtf.getSourcePositionValues())

# # Access impulse response data
# print("Impulse Responses:")
# print(hrtf.Data_IR)

# Extract HRTF for a specific position (e.g., azimuth = 0°, elevation = 0°)
# Find the index of the desired position
source_positions = hrtf.getSourcePositionValues()
print("Source Positions:")
print(np.round(source_positions))

# print("source_positions[:, 0]")
# print(np.round(source_positions[:, 0]))

# Find the index of the desired position
azimuth = 0
elevation = 0

source_positions = np.round(source_positions, 2);

position_index = np.where(
    (source_positions[:, 0] == azimuth) & (source_positions[:, 1] == elevation)
)[0][0]

# Get the HRTF impulse responses for the left and right ears
hrtf_left = hrtf.getDataIR().flatten()  # Ensure 1D
hrtf_right = hrtf.getDataIR().flatten()  # Ensure 1D

# Check the shape of the HRTF data
print("HRTF Left shape:")
print(hrtf_left.shape)
print("HRTF Right shape:")
print(hrtf_right.shape)
# Ensure the HRTF data is 1D
if hrtf_left.ndim > 1:
    hrtf_left = hrtf_left[:, position_index]
if hrtf_right.ndim > 1:
    hrtf_right = hrtf_right[:, position_index]

print("Stereo Signal Left Channel Shape:", stereo_signal[:, 0].shape)
print("Stereo Signal Right Channel Shape:", stereo_signal[:, 1].shape)
print("Convolved Left Channel Shape:", left_channel.shape)
print("Convolved Right Channel Shape:", right_channel.shape)

# Apply HRTF filters to each channel
left_channel = fftconvolve(stereo_signal[:, 0], hrtf_left, mode='same')
right_channel = fftconvolve(stereo_signal[:, 1], hrtf_right, mode='same')

# Combine the processed channels into a binaural signal
binaural_signal = np.column_stack((left_channel, right_channel))

# Save the binaural audio to a new file
output_file = 'Output/music_binaural_output.wav'
sf.write(output_file, binaural_signal, sample_rate)

print(f"Binaural audio saved to {output_file}")

Sample rate:
44100
Sampling Rate:
[44100.]
Source Positions:
[[  0. -40.   1.]
 [  6. -40.   1.]
 [ 13. -40.   1.]
 ...
 [300.  80.   1.]
 [330.  80.   1.]
 [  0.  90.   1.]]
HRTF Left shape:
(727040,)
HRTF Right shape:
(727040,)
Stereo Signal Left Channel Shape: (344195,)
Stereo Signal Right Channel Shape: (344195,)
Convolved Left Channel Shape: (344195,)
Convolved Right Channel Shape: (344195,)
Binaural audio saved to Output/music_binaural_output.wav


In [27]:
print("Shape of stereo_signal[:, 0]:", stereo_signal[:, 0].shape)
print("Shape of hrtf_left:", hrtf_left.shape)

Shape of stereo_signal[:, 0]: (344195,)
Shape of hrtf_left: (2304, 2, 256)


In [36]:
print("HRIR Left Shape:", hrtf_left.shape)
print("HRIR Right Shape:", hrtf_right.shape)
print("HRIR Left Data:", hrtf_left)
print("HRIR Right Data:", hrtf_right)

HRIR Left Shape: (1179648,)
HRIR Right Shape: (1179648,)
HRIR Left Data: [-0.00216339 -0.00218606 -0.00575749 ... -0.00364431 -0.00466137
 -0.00475264]
HRIR Right Data: [-0.00216339 -0.00218606 -0.00575749 ... -0.00364431 -0.00466137
 -0.00475264]
