In [None]:
# This script is still work in progress. 

import numpy as np
import soundfile as sf
from scipy.signal import fftconvolve
import pysofaconventions as sofa

# Load stereo audio file
input_file = 'Data/CTHS3_Acid_Bass_Loop_01_C_123.wav'  # Replace with your stereo file
stereo_signal, sample_rate = sf.read(input_file)

print("Sample ratein the WAV file:")
print(sample_rate)

# Ensure the input is stereo
if stereo_signal.ndim != 2 or stereo_signal.shape[1] != 2:
    raise ValueError("Input file must be a stereo audio file.")

# Load HRTF data from a SOFA file
sofa_file = 'Data/mit_kemar_normal_pinna.sofa'  # Replace with the path to your SOFA file
hrtf = sofa.SOFAFile(sofa_file, mode='r') 

# # Print available attributes in the SOFA file
# print("Attributes in the SOFA file:")
# print(hrtf.__dict__.keys())

# # Access sampling rate
print("Sampling rate of the SOFA file:")
print(hrtf.getSamplingRate())

if(hrtf.getSamplingRate() != sample_rate):
    raise ValueError("The sampling rate of the input file and the HRTF file do not match.")

# Access source positions
# print(hrtf.getSourcePositionValues())

# # Access impulse response data
# print("Impulse Responses:")
# print(hrtf.Data_IR)

# Extract HRTF for a specific position (e.g., azimuth = 0°, elevation = 0°)
# Find the index of the desired position
source_positions = hrtf.getSourcePositionValues()
print("Source Positions:")
print(np.round(source_positions))
num_sources = source_positions.shape[0]
print("Number of sources:", num_sources)

# print("source_positions[:, 0]")
# print(np.round(source_positions[:, 0]))

# Find the index of the desired position
azimuth = 0
elevation = 0

source_positions = np.round(source_positions, 2);

position_index = np.where(
    (source_positions[:, 0] == azimuth) & (source_positions[:, 1] == elevation)
)[0][0]

# Get the HRTF impulse responses for the left and right ears
# hrtf_left = hrtf.getDataIR().flatten()  # Ensure 1D
# hrtf_right = hrtf.getDataIR().flatten()  # Ensure 1D
hrtf_left = hrtf.getDataIR()[position_index, :, 0]  # Left ear
hrtf_right = hrtf.getDataIR()[position_index, :, 1]  # Right ear

# Check the shape of the HRTF data
print("HRTF Left shape:")
print(hrtf_left.shape)
print("HRTF Right shape:")
print(hrtf_right.shape)
# Ensure the HRTF data is 1D
if hrtf_left.ndim > 1:
    hrtf_left = hrtf_left[:, position_index]
if hrtf_right.ndim > 1:
    hrtf_right = hrtf_right[:, position_index]

print("Stereo Signal Left Channel Shape:", stereo_signal[:, 0].shape)
print("Stereo Signal Right Channel Shape:", stereo_signal[:, 1].shape)

# Apply HRTF filters to each channel
left_channel = fftconvolve(stereo_signal[:, 0], hrtf_left, mode='same')
right_channel = fftconvolve(stereo_signal[:, 1], hrtf_right, mode='same')
# mono_signal = np.mean(stereo_signal, axis=1)
# left_channel = fftconvolve(mono_signal, hrtf_left, mode='same')
# right_channel = fftconvolve(mono_signal, hrtf_right, mode='same')

print("Convolved Left Channel Shape:", left_channel.shape)
print("Convolved Right Channel Shape:", right_channel.shape)

# Combine the processed channels into a binaural signal
binaural_signal = np.column_stack((left_channel, right_channel))

# Save the binaural audio to a new file
output_file = 'Output/music_binaural_output.wav'
sf.write(output_file, binaural_signal, sample_rate)

print(f"Binaural audio saved to {output_file}")

Sample ratein the WAV file:
44100
Sampling rate of the SOFA file:
[44100.]
Source Positions:
[[  0. -40.   1.]
 [  6. -40.   1.]
 [ 13. -40.   1.]
 ...
 [300.  80.   1.]
 [330.  80.   1.]
 [  0.  90.   1.]]
Number of sources: 710
HRTF Left shape:
(2,)
HRTF Right shape:
(2,)
Stereo Signal Left Channel Shape: (344195,)
Stereo Signal Right Channel Shape: (344195,)
Convolved Left Channel Shape: (344195,)
Convolved Right Channel Shape: (344195,)
Binaural audio saved to Output/music_binaural_output.wav


In [12]:
print("Stereo Signal Shape:", stereo_signal.shape)
print("Stereo Signal Sample:", stereo_signal[:10])

print("HRTF Left Shape:", hrtf_left.shape)
print("HRTF Left Sample:", hrtf_left[:10])
print("HRTF Right Shape:", hrtf_right.shape)
print("HRTF Right Sample:", hrtf_right[:10])

print("Left Channel Shape:", left_channel.shape)
print("Left Channel Sample:", left_channel[:10])
print("Right Channel Shape:", right_channel.shape)
print("Right Channel Sample:", right_channel[:10])

position_index = np.where(
    (source_positions[:, 0] == azimuth) & (source_positions[:, 1] == elevation)
)[0]
if len(position_index) == 0:
    raise ValueError("Specified azimuth and elevation not found in the SOFA file.")
position_index = position_index[0]



Stereo Signal Shape: (344195, 2)
Stereo Signal Sample: [[4.06503677e-05 4.75645065e-05]
 [4.93526459e-05 5.01871109e-05]
 [2.46763229e-05 2.34842300e-05]
 [4.87565994e-05 6.13927841e-05]
 [3.01599503e-05 3.38554382e-05]
 [3.88622284e-05 4.31537628e-05]
 [3.36170197e-05 4.60147858e-05]
 [2.41994858e-05 2.02655792e-05]
 [2.71797180e-05 5.06639481e-05]
 [3.71932983e-05 4.63724136e-05]]
HRTF Left Shape: (2,)
HRTF Left Sample: [6.10351562e-05 6.10351562e-05]
HRTF Right Shape: (2,)
HRTF Right Sample: [3.05175781e-05 3.05175781e-05]
Left Channel Shape: (344195,)
Left Channel Sample: [2.48110155e-09 5.49334800e-09 4.51836968e-09 4.48198989e-09
 4.81668394e-09 4.21277946e-09 4.42378223e-09 3.52883944e-09
 3.13593773e-09 3.92901711e-09]
Right Channel Shape: (344195,)
Right Channel Sample: [1.45155354e-09 2.98314262e-09 2.24827090e-09 2.59024091e-09
 2.90674507e-09 2.35013431e-09 2.72120815e-09 2.02271622e-09
 2.16459739e-09 2.96131475e-09]


In [21]:
import pyfar as pf

# Load the SOFA file
sofa_file_path = "Data/mit_kemar_normal_pinna.sofa"  # Replace with your SOFA file path
sofa_obj, *_ = pf.io.read_sofa(sofa_file_path)

# Print the sampling rate
sampling_rate = sofa_obj.sampling_rate
print("Sampling Rate:", sampling_rate)

# Print the source positions (azimuth, elevation, distance)
source_positions = hrtf.getSourcePositionValues()
print("Source Positions (Azimuth, Elevation, Distance):")
print(source_positions)

# Print the shape of the impulse response data
ir_data = sofa_obj.time
print("Impulse Response Data Shape:", ir_data.shape)

# Extract impulse responses for a specific position (e.g., azimuth = 0°, elevation = 0°)
azimuth = 0
elevation = 0
position_index = (source_positions[:, 0] == azimuth) & (source_positions[:, 1] == elevation)

if not position_index.any():
    raise ValueError("Specified azimuth and elevation not found in the SOFA file.")

# Get the impulse responses for the left and right ears
hrtf_left = ir_data[position_index, :, 0].squeeze()  # Left ear
hrtf_right = ir_data[position_index, :, 1].squeeze()  # Right ear

print("HRTF Left Shape:", hrtf_left.shape)
print("HRTF Right Shape:", hrtf_right.shape)

Sampling Rate: 44100.0
Source Positions (Azimuth, Elevation, Distance):
[[  0.         -40.           1.4       ]
 [  6.42857143 -40.           1.4       ]
 [ 12.85714286 -40.           1.4       ]
 ...
 [300.          80.           1.4       ]
 [330.          80.           1.4       ]
 [  0.          90.           1.4       ]]
Impulse Response Data Shape: (710, 2, 512)
HRTF Left Shape: (2,)
HRTF Right Shape: (2,)
