# データの可視化

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import tensorflow as tf
import tensorflow_io as tfio

import clef

## 1. CSVデータの可視化、データ数のカウント

## 1.1 csvファイルの読み込み

In [None]:
train_metadata = pd.read_csv(clef.constant.TRAIN_METADATA_CSV_PATH)
print("species = {}".format(len(train_metadata.primary_label.unique())))
train_metadata.head()

## 1.2 ディレクトリの数カウント

In [None]:
bird_sounds_dirs = [path for path in clef.constant.TRAIN_SHORT_AUDIO_PATH.glob("*") if path.is_dir()]
print("num of dirs = {}".format(len(bird_sounds_dirs)))

## 1.3 各クラスごとのデータ数

In [None]:
bird_count_dict = {name: 0 for name in train_metadata.primary_label.unique()}
for name in train_metadata.primary_label:
    bird_count_dict[name] += 1

counts = np.array(list(bird_count_dict.values()))
print("min = {}, max = {}".format(counts.min(), counts.max()))
plt.hist(bird_count_dict.values(), bins=50)

## 1.4 データをスペクトルグラムに変換

In [None]:
def convert_audio_to_spectrogram(audio_path, nfft=1024, window=1600, stride=1600):
    audio = tfio.audio.AudioIOTensor(str(audio_path))
    audio_tensor = tf.squeeze(audio[:], axis=[-1])
    spectrogram = tfio.experimental.audio.spectrogram(
        audio_tensor, nfft=nfft, window=window, stride=stride)
    return audio_tensor.numpy(), spectrogram.numpy().T

In [None]:
def plot_spectrogram_per_frame(spectrogram, window, stride, num_cols=3):
    print("time series = {}, window = {}, stride = {}".format(spectrogram.shape[1], window, stride))
    num_plot = int((spectrogram.shape[1] - window) / stride)
    num_rows = max(2, int((num_plot + num_cols - 1) / num_cols))
    fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(num_cols * 5, num_rows * 5))
    print(num_plot)
    for i in range(num_plot):
        axes[int(i / num_cols)][i % num_cols].imshow(
            spectrogram[:, i * stride: i * stride + window])

In [None]:
def show_spectrograms(train_metadata, idx, nfft, fft_window, fft_stride):
    row = train_metadata.iloc[idx]
    filepath = clef.constant.TRAIN_SHORT_AUDIO_PATH / row.primary_label / row.filename
    print("filename = {}".format(str(filepath)))
    audio_array, spectrogram = convert_audio_to_spectrogram(
        filepath, nfft=nfft, window=fft_window, stride=fft_stride)
    window = int(clef.constant.TEST_STRIDE_SEC * clef.constant.AUDIO_HELTZ / fft_window)
    stride = int(window / 2)
    plot_spectrogram_per_frame(np.log(spectrogram), window, stride)
    return spectrogram

In [None]:
nfft = 512
fft_window = 1600
fft_stride = 1600

In [None]:
_ = show_spectrograms(train_metadata, 0, nfft, fft_window, fft_stride)

In [None]:
spec = show_spectrograms(train_metadata, 1, nfft, fft_window, fft_stride)

In [None]:
spec.shape

In [None]:
a = [1, 2, 3 ,4 ,5]