# 드라이브 연동

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 라이브러리 설치 및 선언

In [None]:
!apt install ffmpeg -y
!pip install pydub

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:
import os
import csv
import librosa
import pandas as pd
import numpy as np


from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import load_model
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense


import matplotlib.pyplot as plt
from pydub import AudioSegment


In [None]:
# 변환 대상 폴더
mp3_folder = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/audio/mp3"
wav_folder = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/audio/wav"

wavs = set([os.path.join(wav_folder, _) for _ in os.listdir(wav_folder)])

# 하위 폴더까지 mp3 → wav 변환
for root, _, files in os.walk(mp3_folder):
    for file in files:
      mp3_path = os.path.join(mp3_folder, file)
      wav_filename = os.path.splitext(file)[0] + ".wav"
      wav_path = os.path.join(wav_folder, wav_filename)

      if wav_path in wavs:
        print(f"✔️ 이미 변환된 파일: {wav_path}")
        continue

      try:
          audio = AudioSegment.from_mp3(mp3_path)
          audio.export(wav_path, format="wav")
          print(f"✅ 변환 완료: {wav_path}")
      except Exception as e:
          print(f"⚠️ 변환 실패: {mp3_path} ({e})")

✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/7ne4VBA60CxGM75vw0EYad.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/6iOndD4OFo7GkaDypWQIou.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/2yWlGEgEfPot0lv3OAjuG3.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/7tI8dRuH2Yc6RuoTjxo4dU.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/27xkOIER6uDLKALIelHylZ.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/2RkZ5LkEzeHGRsmDqKwmaJ.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/1HEwEN64NjgTaHmo7LfkX8.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/myaudio/wav/4AajxCEwGEsmHmT4H1TwjY.wav
✔️ 이미 변환된 파일: /content/drive/MyDrive/Colab Notebooks/kseb_edu/ks

In [None]:
results = []

for root, dirs, files in os.walk(wav_folder):
    for file in files:
      full_path = os.path.join(root, file)

      results.append({
          'spotify_id': file[ : len(file) - 4],
          'path': full_path
      })

# 결과 저장
df = pd.DataFrame(results)
df = df.drop_duplicates(subset=['spotify_id'], keep='first')
df.to_csv("local_to_spotify_mapping.csv", index=False, encoding="utf-8-sig")
print("✅ 매칭 완료 (하위폴더 포함): local_to_spotify_mapping.csv")

✅ 매칭 완료 (하위폴더 포함): local_to_spotify_mapping.csv


# 데이터 전처리

### features
- spotify_id

### labels
- labels 종류
  - energy
  - acousticness
  - tempo
- 추후 머신러닝에서 features를 선정

In [None]:
# music_table = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/kseb_edu/kseb_0610/datas/universal_top_spotify_songs.csv')
import glob
csv_path = '/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/csv'
csv_files = glob.glob(f'{csv_path}/songDf_cluster*_dm.csv')
dfs = [pd.read_csv(csv_file) for csv_file in csv_files]
music_table = pd.concat(dfs, ignore_index=True)

In [None]:

features = []
labels_loudness, labels_valence, labels_tempo = [], [], []

mel_scaler = MinMaxScaler()
mel_db_list = []

with open("local_to_spotify_mapping.csv", mode='r', encoding='utf-8-sig') as file:

    csv_reader = csv.DictReader(file)

    for _ in csv_reader:
        spotify_id, path = _['spotify_id'], _['path']

        print(f'{spotify_id} 처리 중')

        # AudioReader
        audio, sr = librosa.load(path, sr=16000)
        '''
        filepath: .wav 등 오디오 파일 경로
        librosa.load(...): 오디오 파일을 파형 데이터로 불러옴
        y_audio: 실제 음파를 나타내는 1차원 numpy 배열 (ex: 16000 samples/sec)
        sr: 샘플링레이트. 여기선 16kHz로 고정함 (즉, 1초당 16000개의 값으로 파형 표현)
        '''
        mel = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
        '''
        mel: Mel-spectrogram — 시간에 따른 주파수 성분의 세기를 나타내는 2D 배열
        크기: (128, T) → 128개의 주파수 대역 × 시간 프레임 수
        n_mels=128: 사람의 청각 특성(Mel-scale)에 맞춰 주파수 대역을 128개로 나눔
        쉽게 말해: 소리의 세기를 시간축과 주파수축으로 시각화한 것
        '''
        mel_db = librosa.power_to_db(mel, ref=np.max)
        '''
        Mel-spectrogram은 **power 값(음압의 제곱)**이라서 숫자 범위가 큽니다.
        이를 dB 단위(로그 스케일)로 변환하여 더 직관적이고 학습에 적합한 형태로 만듦
        ref=np.max: 최대값을 기준으로 상대 dB 계산
        즉, 사람 귀로 들리는 강도와 유사하게 변환
        '''

        mel_db_list.append(mel_db)


        # labelsReader
        row = music_table[music_table["spotify_id"] == spotify_id]
        labels_loudness.append(row["loudness"].values[0])
        labels_valence.append(row["valence"].values[0])
        labels_tempo.append(row["tempo"].values[0])

        print(f'{spotify_id} 처리 완료')
        print('-' * 30)

flat_mel = np.concatenate([m.reshape(-1, 1) for m in mel_db_list])
mel_scaler.fit(flat_mel)

for mel_db in mel_db_list:
    # mel_db_scaled = mel_scaler.fit_transform(mel_db.reshape(-1, 1)).reshape(mel_db.shape)
    mel_db_scaled = mel_scaler.transform(mel_db.reshape(-1, 1)).reshape(mel_db.shape)  # ⛔ fit_transform ➡ transform
    '''
    모든 파일들의 Gain과 Volume 일정하게 조절하는 역할
    '''
    mel_db_scaled = np.expand_dims(mel_db_scaled, axis=-1)
    '''
    딥러닝 모델에 넣기 위해 채널 차원 추가
    원래는 (128, T) → 여기에 axis=-1로 차원 하나 추가 → (128, T, 1)
    이건 마치 흑백 이미지를 CNN에 넣을 때 (높이, 너비, 채널)로 맞추는 것과 동일한 작업입니다.
    '''
    features.append(mel_db_scaled)

2RkZ5LkEzeHGRsmDqKwmaJ 처리 중
2RkZ5LkEzeHGRsmDqKwmaJ 처리 완료
------------------------------
1HEwEN64NjgTaHmo7LfkX8 처리 중
1HEwEN64NjgTaHmo7LfkX8 처리 완료
------------------------------
4AajxCEwGEsmHmT4H1TwjY 처리 중
4AajxCEwGEsmHmT4H1TwjY 처리 완료
------------------------------
5ITV0zqzjOYfFWpW0xBmRa 처리 중
5ITV0zqzjOYfFWpW0xBmRa 처리 완료
------------------------------
2HRqTpkrJO5ggZyyK6NPWz 처리 중
2HRqTpkrJO5ggZyyK6NPWz 처리 완료
------------------------------
4wJ5Qq0jBN4ajy7ouZIV1c 처리 중
4wJ5Qq0jBN4ajy7ouZIV1c 처리 완료
------------------------------
04emojnbYkrRmv5qtJcgVP 처리 중
04emojnbYkrRmv5qtJcgVP 처리 완료
------------------------------
2CGNAOSuO1MEFCbBRgUzjd 처리 중
2CGNAOSuO1MEFCbBRgUzjd 처리 완료
------------------------------
0QCIpQV3twfqo9kh0t8Zza 처리 중
0QCIpQV3twfqo9kh0t8Zza 처리 완료
------------------------------
0fK7ie6XwGxQTIkpFoWkd1 처리 중
0fK7ie6XwGxQTIkpFoWkd1 처리 완료
------------------------------
5XeFesFbtLpXzIVDNQP22n 처리 중
5XeFesFbtLpXzIVDNQP22n 처리 완료
------------------------------
6eLQXa6uk2EW8drsiKbABZ 처리 중
6eLQ

In [None]:
# Mel-spectrogram padding

# 가장 긴 시간 길이
max_duration = max([feature.shape[1] for feature in features])

# 반복으로 Mel-spectrogram 길이 맞추기
padding = np.array([
    np.concatenate(
        [feature] * (max_duration // feature.shape[1] + 1),  # 반복해서 충분히 길게 만든 뒤
        axis=1
    )[:, :max_duration, :]  # 필요한 길이만 자르기
    for feature in features
])

In [None]:
labels_loudness = np.array(labels_loudness).reshape(-1, 1)
labels_valence = np.array(labels_valence).reshape(-1, 1)
labels_tempo = np.array(labels_tempo).reshape(-1, 1)

loudness_scaler, valence_scaler, tempo_scaler = MinMaxScaler(), MinMaxScaler(), MinMaxScaler()

loudness_scale_path = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_scaler_scale_.npy"
loudness_min_path = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_scaler_min_.npy"

valence_scale_path = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_scaler_scale_.npy"
valence_min_path = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_scaler_min_.npy"

tempo_scale_path = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_scaler_scale_.npy"
tempo_min_path = "/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_scaler_min_.npy"


# if os.path.exists(scale_path) and os.path.exists(min_path):
#     # 🔁 기존 스케일러 복원
#     tempo_scaler.scale_ = np.load(scale_path)
#     tempo_scaler.min_ = 0
#     tempo_scaler.data_min_ = np.load(min_path)
#     tempo_scaler.data_max_ = tempo_scaler.data_min_ + 1 / tempo_scaler.scale_
#     labels_tempo_scaled = tempo_scaler.transform(labels_tempo).ravel()

#     print("✅ tempo_scaler 복원 완료")
# else:
#     # 🆕 새로 학습 후 저장


In [None]:
###################LOUDNESS###################

labels_loudness_scaled = loudness_scaler.fit_transform(labels_loudness).ravel()
np.save(loudness_scale_path, loudness_scaler.scale_)
np.save(loudness_min_path, loudness_scaler.data_min_)

features_train, features_test, loudness_y_train, loudness_y_test = train_test_split(
    padding, labels_loudness_scaled, test_size=0.2, random_state=42
  )

###################VALENCE###################

labels_valence_scaled = valence_scaler.fit_transform(labels_valence).ravel()
np.save(valence_scale_path, valence_scaler.scale_)
np.save(valence_min_path, valence_scaler.data_min_)

features_train, features_test, valence_y_train, valence_y_test = train_test_split(
    padding, labels_valence_scaled, test_size=0.2, random_state=42
  )

###################TEMPO###################

labels_tempo_scaled = tempo_scaler.fit_transform(labels_tempo).ravel()
np.save(tempo_scale_path, tempo_scaler.scale_)
np.save(tempo_min_path, tempo_scaler.data_min_)

features_train, features_test, tempo_y_train, tempo_y_test = train_test_split(
    padding, labels_tempo_scaled, test_size=0.2, random_state=42
  )


# features_train, features_test, y_train, y_test = train_test_split(
#     padding, labels_tempo_scaled, test_size=0.2, random_state=42
#   )


# print(features_train.shape)
# print(features_test.shape)
# print(y_train.shape)
# print(y_test.shape)

# CNN 기반 회귀 모델 생성
- Mel-spectrogram에 적합하다고 함

In [None]:
# 모델 생성

# GlobalAveragePooling2D는 정보 축소가 너무 큼
# tempo는 시간 전반의 구조를 반영해야 하는 전역적인 특성 → pooling보다는 flatten이 유리함

# model_tempo = models.Sequential([
#   layers.Input(shape=features_train.shape[1:]), # (128, 1876, 1)
#   layers.Conv2D(32, (3, 3), activation='relu'),
#   layers.MaxPooling2D((2, 2)),
#   layers.Conv2D(64, (3, 3), activation='relu'),
#   layers.GlobalAveragePooling2D(),
#   layers.Dense(64, activation='relu'),
#   layers.Dense(1) # 회귀 출력
# ])

# # Conv2D → LSTM 구조 (시간의 흐름 강조)
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D,
#                                      Permute, Reshape, LSTM, Dense)

# # ❶ 실제 시간 길이를 코드로 가져옵니다
# T = features_train.shape[2]          # 예: 1876, 2000, … 무엇이든 OK

# model_tempo = Sequential([
#     Input(shape=(128, T, 1)),
#     Dense(64, activation='relu'),
#     Dense(1)
# ])

# model_tempo.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

T = features_train.shape[2]        # 실제 시간 길이

###################LOUDNESS###################

model_loudness = Sequential([
    Input(shape=(128, T, 1)),
    Conv2D(32, (3,3), padding='same', activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), padding='same', activation='relu'),
    MaxPooling2D((2,2)),             # (32, T/4, 64)
    tf.keras.layers.GlobalAveragePooling2D(),  # (batch, 64)
    Dense(128, activation='relu'),
    Dense(1)
])
model_loudness.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                    loss='mse', metrics=['mae'])

###################VALENCE###################

model_valence = Sequential([
    Input(shape=(128, T, 1)),
    Conv2D(32, (3,3), padding='same', activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), padding='same', activation='relu'),
    MaxPooling2D((2,2)),             # (32, T/4, 64)
    tf.keras.layers.GlobalAveragePooling2D(),  # (batch, 64)
    Dense(128, activation='relu'),
    Dense(1)
])
model_valence.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                    loss='mse', metrics=['mae'])

###################TEMPO###################

model_tempo = Sequential([
    Input(shape=(128, T, 1)),
    Conv2D(32, (3,3), padding='same', activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), padding='same', activation='relu'),
    MaxPooling2D((2,2)),             # (32, T/4, 64)
    tf.keras.layers.GlobalAveragePooling2D(),  # (batch, 64)
    Dense(128, activation='relu'),
    Dense(1)
])
model_tempo.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                    loss='mse', metrics=['mae'])

# 모델 학습

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ModelCheckpoint("best_model.keras", save_best_only=True)
]

# history = model_tempo.fit(
#     features_train, y_train,
#     epochs=1000,
#     batch_size=16,
#     validation_split=0.2,
#     callbacks=callbacks
# )

###################LOUDNESS###################

model_loudness.fit(
    features_train, loudness_y_train,
    epochs=500,
    batch_size=16,
    validation_split=0.2,
    callbacks=callbacks
)

###################VALENCE###################

model_valence.fit(
    features_train, valence_y_train,
    epochs=500,
    batch_size=16,
    validation_split=0.2,
    callbacks=callbacks
)

###################TEMPO###################

model_tempo.fit(
    features_train, tempo_y_train,
    epochs=500,
    batch_size=16,
    validation_split=0.2,
    callbacks=callbacks
)

Epoch 1/500


ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-13-2780707498>", line 18, in <cell line: 0>

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

Out of memory while trying to allocate 16734796704 bytes.
	 [[{{node StatefulPartitionedCall}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_multi_step_on_iterator_1931]

# 사후 분석

In [None]:
# import matplotlib.pyplot as plt
# # from ace_tools import display_dataframe_to_user # ace_tools 모듈은 존재하지 않으므로 제거

# df_hist = pd.DataFrame(history.history)
# # display_dataframe_to_user("Training History", df_hist) # ace_tools 대체
# print("Training History:")
# print(df_hist)


# plt.figure()
# plt.plot(df_hist['loss'], label='Train Loss')
# if 'val_loss' in df_hist.columns:
#     plt.plot(df_hist['val_loss'], label='Val Loss')
# plt.xlabel("Epoch")
# plt.ylabel("MSE Loss")
# plt.legend()
# plt.grid(True)
# plt.title("Training & Validation Loss")
# plt.show()

# if 'mae' in df_hist.columns:
#     plt.figure()
#     plt.plot(df_hist['mae'], label='Train MAE')
#     if 'val_mae' in df_hist.columns:
#         plt.plot(df_hist['val_mae'], label='Val MAE')
#     plt.xlabel("Epoch")
#     plt.ylabel("MAE")
#     plt.legend()
#     plt.grid(True)
#     plt.title("Training & Validation MAE")
#     plt.show()

# last_row = df_hist.tail(1).transpose().rename(columns={df_hist.index[-1]:"Last_Epoch"})
# # display_dataframe_to_user("Final Metrics", last_row) # ace_tools 대체
# print("\nFinal Metrics:")
# print(last_row)

# 모델 저장

In [None]:
###################LOUDNESS###################

model_loudness.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_model.keras")
np.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_scaler_min_.npy", loudness_scaler.data_min_)
np.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_scaler_scale_.npy", loudness_scaler.scale_)


###################VALENCE###################

model_valence.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_model.keras")
np.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_scaler_min_.npy", valence_scaler.data_min_)
np.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_scaler_scale_.npy", valence_scaler.scale_)


###################TEMPO###################

model_tempo.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_model.keras")
np.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_scaler_min_.npy", tempo_scaler.data_min_)
np.save("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_scaler_scale_.npy", tempo_scaler.scale_)

# mel_scaler 추출
# max_duration 추출: 학습 시켰던 음원 중 가장 긴 길이를 가져옴

In [None]:
import joblib
joblib.dump(mel_scaler, '/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/mel_scaler.pkl')
joblib.dump(max_duration, '/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/max_duration.pkl')

['max_duration.pkl']

In [None]:
max_duration

16210

# 예측

In [None]:
!ls /content/drive/1yKoEcQvRJah-exf8gmCCHvaY9N1ONJ4R?usp=drive_link

ls: cannot access '/content/drive/1yKoEcQvRJah-exf8gmCCHvaY9N1ONJ4R?usp=drive_link': No such file or directory


## 오디오 업로드 및 mel-spectrogram 작업

In [None]:
###################LOUDNESS###################

model_loudness = load_model("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_model.keras")
loudness_scaler = MinMaxScaler() # 추론 시점에 새로 다시 선언
loudness_scaler.data_min_ = np.load("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_scaler_min_.npy")
loudness_scaler.scale_ = np.load("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_scaler_scale_.npy")
loudness_scaler.min_ = 0
loudness_scaler.data_max_ = loudness_scaler.data_min_ + 1 / loudness_scaler.scale_



###################VALENCE###################


model_valence = load_model("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_model.keras")
valence_scaler = MinMaxScaler() # 추론 시점에 새로 다시 선언
valence_scaler.data_min_ = np.load("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_scaler_min_.npy")
valence_scaler.scale_ = np.load("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/valence_scaler_scale_.npy")
valence_scaler.min_ = 0
valence_scaler.data_max_ = valence_scaler.data_min_ + 1 / valence_scaler.scale_



###################TEMPO###################

model_tempo = load_model("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_model.keras")
tempo_scaler = MinMaxScaler() # 추론 시점에 새로 다시 선언
tempo_scaler.data_min_ = np.load("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_scaler_min_.npy")
tempo_scaler.scale_ = np.load("/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/tempo_scaler_scale_.npy")
tempo_scaler.min_ = 0
tempo_scaler.data_max_ = tempo_scaler.data_min_ + 1 / tempo_scaler.scale_

ValueError: File not found: filepath=/content/drive/MyDrive/과정운영/KSEB/2025년도/프로젝트 결과/04. 김요한_전호연_김시명팀/김요한/loudness_model.keras. Please ensure the file is an accessible `.keras` zip file.

## 모델 및 scaler 불러오기

In [None]:
from google.colab import files

uploaded = files.upload()
filename = list(uploaded.keys())[0]

y, sr = librosa.load(filename, sr=16000)
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
mel_db = librosa.power_to_db(mel, ref=np.max)

## 위에서 사용한 mel_scaler 재사용함
mel_db = mel_scaler.transform(mel_db.reshape(-1, 1)).reshape(mel_db.shape)
mel_db = np.expand_dims(mel_db, axis=-1) # (128, T) → (128, T, 1)


# 길이 부족하면 반복해서 붙이고 잘라내기
T = mel_db.shape[1]
if T < max_duration:
    repeat = int(np.ceil(max_duration / T))
    mel_db = np.concatenate([mel_db] * repeat, axis=1)[:, :max_duration, :]
else:
    mel_db = mel_db[:, :max_duration, :]

## 예측

In [None]:
'''
📌 참고 사항:

mel_db 정규화용 scaler도 학습 시 fit한 값을 저장해두고 추론 시엔 transform만 사용해야 정합성 보장됩니다.

tempoScaler를 np.save로 저장 후 불러오면 inverse_transform 사용이 가능하도록 min_, scale_ 설정을 복원해줘야 합니다
'''

###################model_tempo################### <- TEMPO

# pred_scaled = model_tempo.predict(mel_db)
# pred_bpm = tempo_scaler.inverse_transform(pred_scaled)
# print(f"🎵 {filename}의 예측된 템포: {pred_bpm[0][0]:.2f} BPM")

In [None]:
pred_loudness_scaled = model_loudness.predict(mel_db)
pred_loudness = loudness_scaler.inverse_transform(pred_loudness_scaled)
# print(f"{pred_loudness[0][0]:.2f}")

In [None]:
pred_valence_scaled = model_valence.predict(mel_db)
pred_valence = valence_scaler.inverse_transform(pred_valence_scaled)
# print(f"{pred_valence[0][0]:.2f}")

In [None]:
pred_tempo_scaled = model_tempo.predict(mel_db)
pred_tempo = tempo_scaler.inverse_transform(pred_tempo_scaled)
# print(f"{pred_tempo[0][0]:.2f}")

In [None]:
result = {
    'loudness': float(f"{pred_loudness[0][0]:.2f}"),
    'valence': float(f"{pred_valence[0][0]:.2f}"),
    'tempo': float(f"{pred_tempo[0][0]:.2f}"),
}
result

In [None]:
!pip freeze >> requirements.txt

### 시각화