In [19]:
import wave
import os
from scipy.io import wavfile

# Load the WAV file
file_path = "./Data/speech_1.wav"  # Replace with your WAV file path

with wave.open(file_path, 'rb') as wav_file:
    print("WAV File Header and Metadata:")
    
    # Header and Metadata
    riff_chunk = "RIFF"
    wave_format = "WAVE"
    fmt_chunk = "fmt "
    data_chunk = "data"
    
    num_channels = wav_file.getnchannels()
    sample_width = wav_file.getsampwidth()
    frame_rate = wav_file.getframerate()
    num_frames = wav_file.getnframes()
    compression_type = wav_file.getcomptype()
    compression_name = wav_file.getcompname()
    duration = num_frames / frame_rate
    bit_depth = sample_width * 8
    value_range = (-2**(bit_depth - 1), 2**(bit_depth - 1) - 1)
    
    # Print Header and Metadata
    print(f"Chunk Descriptor: {riff_chunk}")
    print(f"Format: {wave_format}")
    print(f"Subchunk1 ID: {fmt_chunk}")
    print(f"Audio Format: {compression_name} ({compression_type})")
    print(f"Number of Channels: {num_channels}")
    print(f"Sample Rate: {frame_rate} Hz")
    print(f"Byte Rate: {frame_rate * num_channels * sample_width} bytes/sec")
    print(f"Block Align: {num_channels * sample_width} bytes")
    print(f"Bit Depth: {bit_depth} bits")
    print(f"Value Range: {value_range}")
    print(f"Subchunk2 ID: {data_chunk}")
    print(f"Number of Frames: {num_frames}")
    print(f"Duration: {duration:.2f} seconds")

    riff_header = wav_file._file.read(12)  # RIFF header is 12 bytes
    file_size = os.path.getsize(file_path)
    print(f"File Size: {file_size} bytes")

    # Read the data chunk size
    wav_file._file.seek(40)  # Offset to the data chunk size in the file
    data_chunk_size = int.from_bytes(wav_file._file.read(4), byteorder='little')
    print(f"Data Chunk Size: {data_chunk_size} bytes")

    chunk_id = wav_file._file.read(4).decode('ascii', errors='ignore')
    chunk_size = int.from_bytes(wav_file._file.read(4), byteorder='little')
    print(f"Chunk ID: {chunk_id}, Chunk Size: {chunk_size}")

# Using scipy.io.wavfile to read the data
sample_rate, data = wavfile.read(file_path)
print("\nWAV File Data:")
print(f"Sample Rate: {sample_rate} Hz")
print(f"Data Type: {data.dtype}")
print(f"Shape of Data Array: {data.shape}")
print(f"First 10 Samples: {data[:10]}")

WAV File Header and Metadata:
Chunk Descriptor: RIFF
Format: WAVE
Subchunk1 ID: fmt 
Audio Format: not compressed (NONE)
Number of Channels: 1
Sample Rate: 44100 Hz
Byte Rate: 88200 bytes/sec
Block Align: 2 bytes
Bit Depth: 16 bits
Value Range: (-32768, 32767)
Subchunk2 ID: data
Number of Frames: 98335
Duration: 2.23 seconds
File Size: 196714 bytes
Data Chunk Size: 20906273 bytes
Chunk ID: OG, Chunk Size: 19398971

WAV File Data:
Sample Rate: 44100 Hz
Data Type: int16
Shape of Data Array: (98335,)
First 10 Samples: [151 237 289 319 335 327 315 296 274 244]


In [23]:
# File format conversion using pydub

from pydub import AudioSegment

audio = AudioSegment.from_file("./Data/speech_1.wav", format="wav")

audio.export("./Output/speech_1.flac", format="flac")

# Load the converted FLAC file to verify
flac_file_path = "./Output/speech_1.flac"
flac_audio = AudioSegment.from_file(flac_file_path, format="flac")
print("\nConverted FLAC File Metadata:")
print(f"Channels: {flac_audio.channels}")
print(f"Sample Width: {flac_audio.sample_width} bytes")
print(f"Frame Rate: {flac_audio.frame_rate} Hz")
print(f"Length: {len(flac_audio)} ms")
print(f"Sample Width in Bits: {flac_audio.sample_width * 8} bits")
print(f"Number of Frames: {len(flac_audio) * flac_audio.frame_rate // 1000}")
print(f"Duration: {len(flac_audio) / 1000:.2f} seconds")
print(f"File Size: {os.path.getsize(flac_file_path)} bytes")
print(f"Sample Rate: {flac_audio.frame_rate} Hz")
print(f"Channels: {flac_audio.channels}")
print(f"Sample Width: {flac_audio.sample_width} bytes")
print(f"Sample Width in Bits: {flac_audio.sample_width * 8} bits")
print(f"Sample Rate: {flac_audio.frame_rate} Hz")
print(f"Length: {len(flac_audio)} ms")


Converted FLAC File Metadata:
Channels: 1
Sample Width: 2 bytes
Frame Rate: 44100 Hz
Length: 2230 ms
Sample Width in Bits: 16 bits
Number of Frames: 98343
Duration: 2.23 seconds
File Size: 74926 bytes
Sample Rate: 44100 Hz
Channels: 1
Sample Width: 2 bytes
Sample Width in Bits: 16 bits
Sample Rate: 44100 Hz
Length: 2230 ms


In [None]:
audio.export("./Output/speech_1.ogg", format="ogg")

# Load the converted FLAC file to verify
ogg_file_path = "./Output/speech_1.ogg"
ogg_audio = AudioSegment.from_file(flac_file_path, format="ogg")
print("\nConverted FLAC File Metadata:")
print(f"Channels: {ogg_audio.channels}")
print(f"Sample Width: {ogg_audio.sample_width} bytes")
print(f"Frame Rate: {ogg_audio.frame_rate} Hz")
print(f"Length: {len(ogg_audio)} ms")
print(f"Sample Width in Bits: {ogg_audio.sample_width * 8} bits")
print(f"Number of Frames: {len(ogg_audio) * ogg_audio.frame_rate // 1000}")
print(f"Duration: {len(ogg_audio) / 1000:.2f} seconds")
print(f"File Size: {os.path.getsize(ogg_file_path)} bytes")
print(f"Sample Rate: {ogg_audio.frame_rate} Hz")
print(f"Channels: {ogg_audio.channels}")
print(f"Sample Width: {ogg_audio.sample_width} bytes")
print(f"Sample Width in Bits: {ogg_audio.sample_width * 8} bits")
print(f"Sample Rate: {ogg_audio.frame_rate} Hz")
print(f"Length: {len(ogg_audio)} ms")