<a href="https://colab.research.google.com/github/ttogle918/AI_practice/blob/main/dacon_%EC%9D%8C%EC%84%B1_%EB%B6%84%EB%A5%98_%EA%B2%BD%EC%A7%84%EB%8C%80%ED%9A%8C/cnn2d.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

DACON sample 코드

[[Baseline] 1. 데이터 분석 입문자를 위한 데이터 분석 & 예측](https://dacon.io/competitions/official/235905/codeshare/5137?page=1&dtype=recent&ptype&fType)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# dataset

In [2]:
# audio 전처리를 위한 lib
import librosa
import librosa.display as dsp
from IPython.display import Audio

In [3]:
# 데이터 전처리를 위한 lib
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [4]:
train_path = '/content/drive/MyDrive/Colab Notebooks/dataset/dacon/user_data/train/'
train_csv_path = '/content/drive/MyDrive/Colab Notebooks/dataset/dacon/user_data/train.csv'

In [5]:
train = pd.read_csv(train_csv_path)
train.head(3)

Unnamed: 0,file_name,label
0,001.wav,9
1,002.wav,0
2,004.wav,1


In [6]:
train.shape

(400, 2)

In [7]:
# 같은 결과
train.label.groupby(train.label).count()
# train.label.value_counts()

label
0    40
1    40
2    40
3    40
4    40
5    40
6    40
7    40
8    40
9    40
Name: label, dtype: int64

In [8]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(929)

In [9]:
data, sample_rate = librosa.load(f'{train_path}001.wav', sr = 16000)
print('sample_rate:', sample_rate, ', audio shape:', data.shape)
print('length:', data.shape[0]/float(sample_rate), 'secs')

sample_rate: 16000 , audio shape: (10192,)
length: 0.637 secs


- sample_rate: 16000
  - 초당 16000개(16000Hz 주파수)의 샘플을 가지고 있는 데이터, 1초에 음성 신호를 16000번 샘플링
  - default : 22050Hz
  - 16000Hz로 설정한 이유 : 사람의 목소리는 대부분 16000Hz안에 포함되기 때문이다.
audio shape: (10192,)
length: 0.637 secs = sample_rate * audio_shape

## dataframe 생성

In [10]:
def train_dataset():
    dataset = []
    for file in tqdm(os.listdir(train_path),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(train_path,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(train[train.file_name == file].label)
            dataset.append([data, sr, class_label])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','sample_rate', 'label'])

In [11]:
train_wav = train_dataset()

100%|[32m██████████[0m| 400/400 [00:33<00:00, 11.83it/s]

Dataset 생성 완료





In [12]:
train_wav.head(3)

Unnamed: 0,data,sample_rate,label
0,"[0.00027186112, 0.00052218814, 0.00040256415, ...",16000,1
1,"[0.00011985076, 0.00016174652, 0.00017246709, ...",16000,9
2,"[3.6655838e-05, -3.7366447e-06, 3.4776433e-05,...",16000,5


In [13]:
train_wav['len'] = train_wav.data.apply(len)
train_wav.head()

Unnamed: 0,data,sample_rate,label,len
0,"[0.00027186112, 0.00052218814, 0.00040256415, ...",16000,1,10328
1,"[0.00011985076, 0.00016174652, 0.00017246709, ...",16000,9,10192
2,"[3.6655838e-05, -3.7366447e-06, 3.4776433e-05,...",16000,5,8642
3,"[-0.00010774565, -0.0001280595, -0.00013393736...",16000,0,14058
4,"[9.2610695e-05, 0.00018442213, 0.00021447388, ...",16000,7,11326


In [14]:
train_wav.len.describe()

count      400.000000
mean     10326.187500
std       1811.356277
min       5711.000000
25%       8934.750000
50%      10250.500000
75%      11593.000000
max      15573.000000
Name: len, dtype: float64

In [15]:
from sklearn.model_selection import train_test_split
# train_X, val_X, train_y, val_y = train_wav.data,  None, train_wav.label, None
train_X, val_X, train_y, val_y = train_test_split(train_wav.data, train_wav.label, test_size=0.2, stratify=train_wav.label)
print('학습시킬 train 셋 : ', train_X.shape, train_X.iloc[0].shape, train_y.shape)
print('검증할 val 셋 : ', val_X.shape, val_X.iloc[0].shape, val_y.shape)

학습시킬 train 셋 :  (320,) (9909,) (320,)
검증할 val 셋 :  (80,) (6910,) (80,)


In [16]:
val_y.value_counts()

5    8
2    8
8    8
3    8
4    8
6    8
1    8
7    8
0    8
9    8
Name: label, dtype: int64

## 음성 데이터 특징 추출


In [20]:
def preprocess_train_dataset(data, y_data):
    mfccs = []
    y_data_list = []
    wav_size = 12000
    for d, y_ in zip(data, y_data) :
        if len(d) < wav_size :
          d = np.pad(d, (0, wav_size-len(d)), constant_values=0)
        elif len(d) > wav_size :
          d = d[(len(d)-wav_size)//2 : wav_size+(len(d)-wav_size)//2]
        for r in (0, 1e-4, 1e-3):
          extracted_features = librosa.feature.mfcc(y= d + ((np.random.rand(*d.shape) - 0.5) * r), sr=16000, n_mfcc=40)
          mfccs.append(extracted_features)
          y_data_list.append(y_)

    mfccs = np.array(mfccs)
    y_data_list = np.array(y_data_list)
    return mfccs, y_data_list

def preprocess_val_dataset(data, y_data):
    mfccs = []
    wav_size = 12000
    y_data_list = []
    for d, y_ in zip(data, y_data) :
        if len(d) < 12000 :
          d = np.pad(d, (0, 12000-len(d)), constant_values=0)
        elif len(d) > 12000 :
          d = d[(len(d)-12000)//2 : 12000+(len(d)-12000)//2]
        extracted_features = librosa.feature.mfcc(y= d, sr=16000, n_mfcc=40)
        mfccs.append(extracted_features)
        y_data_list.append(y_)
    mfccs = np.array(mfccs)
    y_data_list = np.array(y_data_list)
    return mfccs, y_data_list

In [21]:
train_X, train_y = preprocess_train_dataset(train_X, train_y)
val_X, val_y = preprocess_val_dataset(val_X, val_y)

In [22]:
train_X.shape, val_X.shape

((960, 40, 24), (80, 40, 24))

In [23]:
train_X = train_X.reshape(train_X.shape[0], train_X.shape[1], train_X.shape[2], -1)
val_X = val_X.reshape(val_X.shape[0], val_X.shape[1], val_X.shape[2], -1)
train_X.shape, val_X.shape

((960, 40, 24, 1), (80, 40, 24, 1))

# 모델 학습 : CNN


In [24]:
from keras import Sequential
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

from keras.layers import Dense, GlobalAveragePooling2D, MaxPooling2D, Conv2D, Flatten, Dropout
from keras.models import Model

In [25]:
input_shape = (train_X.shape[1],train_X.shape[2], 1)

input_shape

(40, 24, 1)

In [28]:
def create_Model() :
    
  model = Sequential()
  model.add(Conv2D(32, kernel_size=2, padding='same', activation='relu', input_shape=input_shape))
  model.add(Conv2D(64, kernel_size=2, strides=2, padding='same', activation='relu'))
  model.add(MaxPooling2D(pool_size=2))

  model.add(Conv2D(128, kernel_size=3, strides=2, padding='same', activation='relu'))
  model.add(MaxPooling2D(pool_size=2))
  model.add(Dropout(0.1))

  model.add(Flatten())

  model.add(Dense(64, activation="relu"))
  model.add(Dropout(0.1))

  model.add(Dense(10, activation="softmax"))
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
  return model

In [29]:
model = create_Model()
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 40, 24, 32)        160       
                                                                 
 conv2d_4 (Conv2D)           (None, 20, 12, 64)        8256      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 10, 6, 64)        0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 5, 3, 128)         73856     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 2, 1, 128)        0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 2, 1, 128)        

In [30]:
from keras.models import load_model
from keras.callbacks import Callback

class save(Callback):
 def __init__(self, model):
  self.count = 0
  self.model = model

 def on_epoch_end(self, bath, logs={}):
  self.count = self.count + 1
  if self.count % 10 == 0:
   model.save("mymodel_epoch_{}.h5".format(self.count))

In [31]:
s = save(model)
es = EarlyStopping(monitor='val_accuracy', patience=5)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True)

history = model.fit(train_X, train_y, epochs=100, validation_data=(val_X, val_y), callbacks=[es, mc])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [116]:
model.evaluate(val_X, val_y)



[0.56875079870224, 0.8999999761581421]

In [32]:
train_X, train_y  = train_wav.data, train_wav.label
train_X, train_y = preprocess_train_dataset(train_X, train_y)
train_X.shape, train_y.shape

((1200, 40, 24), (1200,))

In [33]:
model = create_Model()
s = save(model)
es = EarlyStopping(monitor='accuracy', patience=3)
mc = ModelCheckpoint('best_model.h5', monitor='accuracy', save_best_only=True)
history = model.fit(train_X, train_y, epochs=12,  callbacks=[es, mc])

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


# test

In [34]:
test_path = '/content/drive/MyDrive/Colab Notebooks/dataset/dacon/user_data/test/'
test_csv_path = '/content/drive/MyDrive/Colab Notebooks/dataset/dacon/user_data/test.csv'

In [35]:
test = pd.read_csv(test_csv_path)
test.head()

Unnamed: 0,file_name
0,003.wav
1,008.wav
2,010.wav
3,015.wav
4,024.wav


In [36]:
def test_dataset():
    dataset = []
    for file in tqdm(os.listdir(test_path),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(test_path,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            dataset.append([data, file])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data', 'file_name'])

In [37]:
test_wav = test_dataset()

100%|[32m██████████[0m| 200/200 [00:08<00:00, 24.18it/s]

Dataset 생성 완료





In [38]:
test_wav.head(3)

Unnamed: 0,data,file_name
0,"[0.00027645202, 0.00047594117, 0.0004510493, 0...",010.wav
1,"[0.00048952625, 0.00085201755, 0.00076409266, ...",008.wav
2,"[-0.00054350717, -0.0008907122, -0.00071864494...",081.wav


In [40]:
def preprocess_test_dataset(data):
    mfccs = []
    wav_size = 12000
    for d in data :
        if len(d) < wav_size :
          d = np.pad(d, (0, wav_size-len(d)), constant_values=0)
        elif len(d) > wav_size :
          d = d[(len(d)-wav_size)//2 : wav_size+(len(d)-wav_size)//2]
        extracted_features = librosa.feature.mfcc(y= d, sr=16000, n_mfcc=40)
        mfccs.append(extracted_features)
    mfccs = np.array(mfccs)
    return mfccs

In [41]:
mfccs_2 = preprocess_test_dataset(test_wav.data)

In [42]:
# 모델의 예측과 실제 정답값을 비교합니다.
prediction = model.predict(mfccs_2)
prediction = np.argmax(prediction, axis=1)
prediction.shape

(200,)

In [43]:
test_wav['pred_label'] = prediction
test_wav.head(2)

Unnamed: 0,data,file_name,pred_label
0,"[0.00027645202, 0.00047594117, 0.0004510493, 0...",010.wav,3
1,"[0.00048952625, 0.00085201755, 0.00076409266, ...",008.wav,5


In [45]:
submission = pd.read_csv(f'{test_path[:-5]}sample_submission.csv')
submission['label'] = -1
submission.head(2)

Unnamed: 0,file_name,label
0,003.wav,-1
1,008.wav,-1


In [49]:
def get_pred(test_wav, submission) :
  for i, subm in enumerate(submission.file_name) :
    label = test_wav[test_wav['file_name'] == subm].pred_label
    submission['label'].iloc[i] = label
  return submission

In [50]:
submission = get_pred(test_wav, submission)
submission.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,5
2,010.wav,3
3,015.wav,8
4,024.wav,2


In [51]:
submission.label.value_counts()

3    22
1    22
0    20
5    20
8    20
2    20
6    20
7    19
4    19
9    18
Name: label, dtype: int64

In [52]:
submission.to_csv(f'cnn_.csv', index=False)