### google-net

import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Concatenate, AveragePooling2D, Dense, Dropout, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.inception_v3 import preprocess_input

def load_audio_to_spectrogram(file_path, n_fft=2048, hop_length=512):
    # 오디오 파일 로드
    y, sr = librosa.load(file_path)
    # 스펙트로그램 생성
    S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    S_DB = librosa.amplitude_to_db(np.abs(S), ref=np.max)
    return S_DB

def plot_spectrogram(S_DB):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')
    plt.tight_layout()
    plt.show()

# GoogleNet(Inception v1)의 간략한 구현 (여기서는 간소화된 버전으로 제공됩니다)
def inception_module(x,
                     filters_1x1,
                     filters_3x3_reduce,
                     filters_3x3,
                     filters_5x5_reduce,
                     filters_5x5,
                     filters_pool_proj,
                     name=None):
    
    conv_1x1 = Conv2D(filters_1x1, (1, 1), padding='same', activation='relu')(x)

    conv_3x3 = Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu')(x)
    conv_3x3 = Conv2D(filters_3x3, (3, 3), padding='same', activation='relu')(conv_3x3)

    conv_5x5 = Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu')(x)
    conv_5x5 = Conv2D(filters_5x5, (5, 5), padding='same', activation='relu')(conv_5x5)

    pool_proj = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    pool_proj = Conv2D(filters_pool_proj, (1, 1), padding='same', activation='relu')(pool_proj)

    output = Concatenate(axis=-1)([conv_1x1, conv_3x3, conv_5x5, pool_proj])
    
    return output

# GoogleNet 모델 구성
def create_googlenet(input_shape):
    input_layer = Input(shape=input_shape)
    
    # 이 부분에서는 실제 GoogleNet의 구조를 간소화하여 구현합니다.
    # 실제 구현 시 더 많은 Inception 모듈과 깊이를 추가해야 할 수 있습니다.
    
    x = inception_module(input_layer, 64, 96, 128, 16, 32, 32)
    x = AveragePooling2D((5, 5), strides=3)(x)
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.7)(x)
    x = Dense(10, activation='softmax')(x)  # 10개의 분류 클래스 가정
    
    model = Model(inputs=input_layer, outputs=x, name='inception_v1')
    return model

# 모델 생성 및 컴파일
model = create_googlenet(input_shape=(224, 224, 3))  # 스펙트로그램 이미지 크기와 채널에 맞게 조정 필요
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 요약 출력
model.summary()


### Inception V3

In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

def create_spectrogram(file_path):
    y, sr = librosa.load(file_path)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    S_DB = librosa.power_to_db(S, ref=np.max)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel', fmax=8000)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel spectrogram')
    plt.tight_layout()
    plt.savefig('spectrogram.png', bbox_inches='tight', pad_inches=0)
    plt.close()


from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input, decode_predictions
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# 모델 로드
base_model = InceptionV3(weights='imagenet', include_top=False)

# 글로벌 평균 풀링 레이어 추가
x = base_model.output
x = GlobalAveragePooling2D()(x)

# 완전 연결 레이어 추가
x = Dense(1024, activation='relu')(x)

# 분류 레이어 추가 (클래스 개수에 맞게 조정)
predictions = Dense(<클래스_개수>, activation='softmax')(x)

# 최종 모델
model = Model(inputs=base_model.input, outputs=predictions)

# 첫 번째: 기본 모델의 모든 레이어를 고정하고 새로운 레이어만 학습
for layer in base_model.layers:
    layer.trainable = False

# 컴파일
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# 모델 학습
# model.fit(...) 이 부분에 데이터셋에 맞춰 코드를 작성하시면 됩니다.


### melspectogram and mfcc

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

def save_mel_spectrogram(file_path, save_path, file_name):
    y, sr = librosa.load(file_path)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_DB = librosa.power_to_db(S, ref=np.max)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.savefig(f"{save_path}/{file_name}_mel.png")
    plt.close()

def save_mfcc(file_path, save_path, file_name):
    y, sr = librosa.load(file_path)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfcc, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    plt.savefig(f"{save_path}/{file_name}_mfcc.png")
    plt.close()


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# 데이터 로드 및 전처리
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    'path_to_train_dir',
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical')

validation_generator = validation_datagen.flow_from_directory(
    'path_to_validation_dir',
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical')

# 모델 로드 및 커스터마이징
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# 첫 번째: 기본 모델의 모든 레이어를 고정하고 새로운 레이어만 학습
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 훈련
model.fit(train_generator, validation_data=validation_generator, epochs=10)


### V3 mels and mfcc

import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

def create_spectrogram_and_mfcc(file_path, save_dir, file_name):
    y, sr = librosa.load(file_path)
    
    # Mel Spectrogram 생성
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_DB = librosa.power_to_db(S, ref=np.max)
    
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'{file_name} Mel Spectrogram')
    plt.savefig(os.path.join(save_dir, f'{file_name}_mel.png'))
    plt.close()
    
    # MFCC 생성
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfcc, x_axis='time')
    plt.colorbar()
    plt.title(f'{file_name} MFCC')
    plt.savefig(os.path.join(save_dir, f'{file_name}_mfcc.png'))
    plt.close()


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# ImageDataGenerator 초기화
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# train 및 validation 데이터셋 로드
train_generator = train_datagen.flow_from_directory(
    'path/to/train/directory',
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    'path/to/validation/directory',
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical')

# InceptionV3 모델 로드
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
# 여기서는 예시로 클래스 수를 10으로 두었습니다. 실제 데이터에 맞게 조정해야 합니다.
predictions = Dense(10, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# 모든 레이어를 재학습하지 않고, 상위 레이어만 학습
for layer in base_model.layers:
    layer.trainable = False
    
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(train_generator, validation_data=validation_generator, epochs=10)


### V4

In [None]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from keras.applications.inception_v4 import InceptionV4
from keras.applications.inception_v4 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [None]:
def audio_to_image(path, save_dir, labels, sr=8000, n_mels=200, n_mfcc=30):
    files = os.listdir(path)
    for file in files:
        audio, _ = librosa.load(os.path.join(path, file), sr=sr)
        S = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels)
        log_S = librosa.power_to_db(S, ref=np.max)
        mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=n_mfcc)
        n_mfcc = librosa.feature.delta(mfcc, order=2)
        
        plt.figure(figsize=(12, 4))
        librosa.display.specshow(n_mfcc, sr=sr, x_axis='time')
        plt.savefig(os.path.join(save_dir, file[:-4] + '.png'))

        labels.append(file.split('_')[0])  # Assuming label is the first part of file name
    return labels


In [None]:
def load_data(img_path, labels):
    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
    
    train_generator = datagen.flow_from_directory(
        img_path,
        target_size=(299, 299),
        batch_size=32,
        class_mode='categorical',
        subset='training')
    
    validation_generator = datagen.flow_from_directory(
        img_path,
        target_size=(299, 299),
        batch_size=32,
        class_mode='categorical',
        subset='validation')
    
    labels = to_categorical(labels)
    
    return train_generator, validation_generator


In [None]:
def train_model(train_generator, validation_generator, num_classes):
    base_model = InceptionV4(weights='imagenet', include_top=False)
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    for layer in base_model.layers:
        layer.trainable = False
    
    model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    checkpoint = ModelCheckpoint('model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
    early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=20, verbose=1, mode='max')
    
    model.fit(train_generator,
              epochs=100,
              validation_data=validation_generator,
              callbacks=[checkpoint, early])


In [None]:
audio_path = 'path_to_audio_files'
image_path = 'path_to_save_images'
labels = []
labels = audio_to_image(audio_path, image_path, labels)
train_generator, validation_generator = load_data(image_path, labels)
num_classes = len(np.unique(labels))
train_model(train_generator, validation_generator, num_classes)


### Image load amd classification use V3

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# 데이터 경로 설정
train_dir = 'path/to/train/directory'  # 훈련 데이터 디렉토리 경로
validation_dir = 'path/to/validation/directory'  # 검증 데이터 디렉토리 경로

# ImageDataGenerator를 사용하여 데이터 불러오기
train_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)
validation_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(299, 299),
    batch_size=32,
    class_mode='binary')

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(299, 299),
    batch_size=32,
    class_mode='binary')

# InceptionV3 모델 로드 및 커스터마이징
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)  # 이진 분류를 위한 sigmoid 활성화 함수 사용

model = Model(inputs=base_model.input, outputs=predictions)

# 상위 레이어를 제외한 나머지는 학습되지 않도록 설정
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# 모델 훈련
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size)

# 모델 성능 평가
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title('Accuracy')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title('Loss')

plt.show()

# Confusion Matrix 및 Classification Report
predictions = model.predict(validation_generator)
predicted_classes = np.where(predictions > 0.5, 1, 0)
true_classes = validation_generator.classes
class_labels = list(validation_generator.class_indices.keys())

conf_matrix = confusion_matrix(true_classes, predicted_classes)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

print(classification_report(true_classes, predicted_classes, target_names=class_labels))
