In [1]:
import os
import sys
import warnings

import numpy as np
import tensorflow as tf
import keras
import madmom
import mirdata
import librosa
import librosa.display

from scipy.ndimage import maximum_filter1d
from scipy.interpolate import interp1d
from scipy.signal import argrelmax

import matplotlib.pyplot as plt

In [2]:
# ignore certain warnings
warnings.filterwarnings('ignore')

# set default figure size
plt.rcParams['figure.figsize'] = (20, 6)

In [None]:
from pydub import AudioSegment
from pathlib import Path

input_folder = Path("./audio")
output_folder = Path("./audio_wav")
output_folder.mkdir(exist_ok=True)

for mp3_file in input_folder.glob("*.mp3"):
    wav_file = output_folder / (mp3_file.stem + ".wav")
    print(f"Conversie: {mp3_file.name} -> {wav_file.name}")
    
    audio = AudioSegment.from_mp3(mp3_file)
    audio.export(wav_file, format="wav")
    
print("Conversie completa")


In [None]:
import os
import json
import hashlib
from pathlib import Path

def md5(file_path):
    hash_md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

base_path = Path("./dataset/smc") 
audio_dir = base_path / "audio"
beats_dir = base_path / "annotations" / "beats"

index = {"version": "1.0", "tracks": {}}

for audio_file in sorted(audio_dir.glob("*.wav")):
    track_id = audio_file.stem  # ex: SMC_001
    beat_filename = f"{track_id.lower()}.beats"
    beat_path = beats_dir / beat_filename

    if not beat_path.exists():
        print(f"Fisier lipsa pentru {track_id}: {beat_path.name}")
        continue

    index["tracks"][track_id] = {
        "audio": [
            str(audio_file.relative_to(base_path)).replace("\\", "/"),
            md5(audio_file)
        ],
        "beats": [
            str(beat_path.relative_to(base_path)).replace("\\", "/"),
            md5(beat_path)
        ]
    }

# Scriere în fișier JSON
output_path = "smc_index_1.0.json"
with open(output_path, "w") as f:
    json.dump(index, f, indent=2)

print(f"Index salvat in {output_path} cu {len(index['tracks'])} track-uri")


In [None]:
import os
import glob

input_folder = 'beats_h' 
output_folder = './dataset/harmonix/annotations/beats'

os.makedirs(output_folder, exist_ok=True)

for txt_file in glob.glob(os.path.join(input_folder, '*.txt')):
    with open(txt_file, 'r') as f_in:
        lines = f_in.readlines()

    output_lines = []
    for line in lines:
        parts = line.strip().split()
        if len(parts) >= 2:
            output_lines.append(f"{parts[0]}\t{parts[1]}")

    base_name = os.path.splitext(os.path.basename(txt_file))[0]
    output_path = os.path.join(output_folder, f"{base_name}.beats")

    with open(output_path, 'w') as f_out:
        f_out.write("\n".join(output_lines))

    print(f"✔ Salvat: {output_path}")


✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0001_12step.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0003_6foot7foot.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0004_abc.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0005_again.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0006_aint2proud2beg.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0008_america.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0009_americanmusic.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/dataset/harmonix/annotations/beats\0010_andjusticeforall.beats
✔ Salvat: C:/Users/vanes/Documents/facultate/licenta/project/d

In [None]:
from pydub import AudioSegment
from pydub.generators import Sine
import os

song_id = "0653_dynamite"
audio_path = f"dataset/harmonix/audio/{song_id}.wav"
beats_path = f"dataset/harmonix/annotations/beats/{song_id}.beats"
output_path = f"{song_id}_with_beats.wav"

beat_click = Sine(1200).to_audio_segment(duration=80).apply_gain(-8)
downbeat_click = Sine(700).to_audio_segment(duration=120).apply_gain(-3)

audio = AudioSegment.from_wav(audio_path)

beats = []
with open(beats_path, 'r') as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) >= 2:
            time_sec = float(parts[0])
            position = int(parts[1])
            beats.append((time_sec, position))

output = audio
for time_sec, pos in beats:
    click = downbeat_click if pos == 1 else beat_click
    time_ms = int(time_sec * 1000)
    output = output.overlay(click, position=time_ms)

output.export(output_path, format="wav")
print(f"✔ Salvat: {output_path}")


✔ Salvat: 0653_dynamite_with_beats.wav


In [None]:
import os
import json
import hashlib

ROOT_DIR = "./dataset/harmonix"
PREFIX = ""

def compute_md5(path):
    with open(path, "rb") as f:
        return hashlib.md5(f.read()).hexdigest()

tracks = {}
audio_dir = os.path.join(ROOT_DIR, "audio")
beats_dir = os.path.join(ROOT_DIR, "annotations", "beats")

for fname in os.listdir(audio_dir):
    if not fname.endswith(".wav"):
        continue

    track_id = fname.replace(".wav", "")
    audio_rel = f"audio/{fname}"
    beats_rel = f"annotations/beats/{track_id}.beats"

    audio_path = os.path.join(audio_dir, fname)
    beats_path = os.path.join(beats_dir, f"{track_id}.beats")

    if not os.path.exists(beats_path):
        continue 

    tracks[track_id] = {
        "audio": [audio_rel, compute_md5(audio_path)],
        "beats": [beats_rel, compute_md5(beats_path)]
    }

index = {
    "version": "1.0",
    "tracks": tracks
}

with open("harmonix_index_1.0.json", "w") as f:
    json.dump(index, f, indent=2)


In [None]:
import os
import hashlib
import json

ROOT_DIR = "./dataset/giantsteps_tempo"
PREFIX = ""

def compute_md5(path):
    with open(path, "rb") as f:
        return hashlib.md5(f.read()).hexdigest()

tracks = {}

audio_dir = os.path.join(ROOT_DIR, "audio")
tempo_dir = os.path.join(ROOT_DIR, "annotations", "tempo")

for audio_file in os.listdir(audio_dir):
    if not audio_file.endswith(".wav"):
        continue

    track_id = audio_file.split(".")[0]
    tempo_file = f"giantsteps_{track_id}.bpm"

    tempo_path = os.path.join(tempo_dir, tempo_file)
    if not os.path.exists(tempo_path):
        continue

    track_data = {
        "audio": [
            f"audio/{audio_file}",
            compute_md5(os.path.join(audio_dir, audio_file))
        ],
        "tempo": [
            f"annotations/tempo/{tempo_file}",
            compute_md5(tempo_path)
        ]
    }

    tracks[track_id] = track_data

index = {
    "version": "1.0",
    "tracks": tracks
}

with open("giantsteps_index_1.0.json", "w") as f:
    json.dump(index, f, indent=2)
