In [4]:
!pip install librosa
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
from PIL import Image



In [5]:
input_folder = "/kaggle/input/tanker/Tanker"
output_folder = "/kaggle/working/Tanker_MS"

In [6]:
def process_audio_file(audio_file, output_folder, sr=16000, segment_length=3, frame_length=0.25, hop_length=0.064, target_size=(40, 43)):
    y, original_sr = librosa.load(audio_file, sr=sr)

    segment_samples = int(segment_length * sr)
    hop_samples = int(hop_length * sr)

    base_filename = os.path.splitext(os.path.basename(audio_file))[0]
    file_output_folder = os.path.join(output_folder, base_filename)
    os.makedirs(file_output_folder, exist_ok=True)

    for i in range(0, len(y), segment_samples):
        segment = y[i:i + segment_samples]
        if len(segment) < segment_samples:
            break  
        
        S = librosa.feature.melspectrogram(y=segment, sr=sr, hop_length=hop_samples, n_mels=128, fmax=sr/2.0)
        
        # Plot chroma CQT without displaying it
        fig, ax = plt.subplots(nrows=1, sharex=True, sharey=True)
        img = librosa.display.specshow(S, y_axis='mel', x_axis='time', ax=ax, sr=sr, hop_length=hop_samples, fmax=sr/2.0)
        ax.set(title=f'Segment {i//segment_samples + 1}')
        
        # Remove axes for cleaner image
        ax.axis('off')
        
        # Save the figure to a temporary file
        temp_filename = "temp.png"
        fig.savefig(temp_filename, bbox_inches='tight', pad_inches=0)
        plt.close(fig)
        
        # Open the saved image, resize it, and save it again
        with Image.open(temp_filename) as img:
            img = img.resize(target_size, Image.ANTIALIAS)
            output_filename = os.path.join(file_output_folder, f"{base_filename}_segment_{i//segment_samples + 1}.png")
            img.save(output_filename)
        
        # Remove the temporary file
        os.remove(temp_filename)

# Function to process a folder of audio files
def process_folder(input_folder, output_folder, sr=16000, segment_length=3, frame_length=0.25, hop_length=0.064, target_size=(40, 43)):
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Process each .wav file in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith('.wav'):
            audio_file = os.path.join(input_folder, filename)
            process_audio_file(audio_file, output_folder, sr, segment_length, frame_length, hop_length, target_size)

In [7]:
process_folder(input_folder, output_folder)

  img = img.resize(target_size, Image.ANTIALIAS)


In [8]:
!zip -r Tanker_MS.zip /kaggle/working/Tanker_MS

  adding: kaggle/working/Tanker_MS/ (stored 0%)
  adding: kaggle/working/Tanker_MS/20/ (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_4.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_23.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_9.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_29.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_24.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_10.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_27.png (deflated 4%)
  adding: kaggle/working/Tanker_MS/20/20_segment_31.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_20.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_22.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_14.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_3.png (stored 0%)
  adding: kaggle/working/Tanker_MS/20/20_segment_5.png (deflated 1%)
  adding: kaggle/