In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import librosa
import torchaudio
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys

from tqdm.auto import tqdm
from IPython.display import Audio

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA version: {torch.version.cuda}")
print(f"CUDNN version: {torch.backends.cudnn.version()}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(torch.cuda.current_device())}")

cuda
PyTorch version: 2.3.1
CUDA version: 12.1
CUDNN version: 8907
CUDA available: True
Device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 3060 Laptop GPU


In [3]:
file_path = './TIMIT/'
data_path = './TIMIT/data'

train_data = pd.read_csv(os.path.join(file_path, 'train_data.csv'))
test_data = pd.read_csv(os.path.join(file_path, 'test_data.csv'))
df = pd.concat([train_data, test_data])

In [4]:
audio_files = {}
for index, row in df.iterrows():
    filename = row['filename']
    if type(filename) == str:
        filename = row['speaker_id'] + ' ' + filename.split('.')[0]
        if filename not in audio_files:
            audio_files[filename]={}
        
        if filename in audio_files:
            if row['is_audio'] is True and row['is_converted_audio'] == True:
                audio_files[filename]['audio_file'] = os.path.join(data_path, row['path_from_data_dir'])
            elif row['is_word_file'] is True:
                audio_files[filename]['word_file'] = os.path.join(data_path, row['path_from_data_dir'])
            elif row['is_phonetic_file'] is True:
                audio_files[filename]['phonetic_file'] = os.path.join(data_path, row['path_from_data_dir'])

In [5]:
print(len(audio_files))
for key,value in audio_files.items():
    print ('key: ',key,'value: ',value)
    break

6300
key:  MMDM0 SI681 value:  {'audio_file': './TIMIT/data\\TRAIN/DR4/MMDM0/SI681.WAV.wav', 'phonetic_file': './TIMIT/data\\TRAIN/DR4/MMDM0/SI681.PHN', 'word_file': './TIMIT/data\\TRAIN/DR4/MMDM0/SI681.WRD'}


In [6]:
female_files = {}
for index, row in df.iterrows():
    filename = row['filename']
    if type(filename) == str and row['speaker_id'].startswith('F'):
        if filename.split('.')[0] in ['SA1','SA2']:
            continue
        filename = row['speaker_id'] + ' ' + filename.split('.')[0]
        if filename not in female_files:
            female_files[filename]={}
        
        if filename in female_files:
            if row['is_audio'] is True and row['is_converted_audio'] == True:
                female_files[filename]['audio_file'] = os.path.join(data_path, row['path_from_data_dir'])
            elif row['is_word_file'] is True:
                female_files[filename]['word_file'] = os.path.join(data_path, row['path_from_data_dir'])
            elif row['is_phonetic_file'] is True:
                female_files[filename]['phonetic_file'] = os.path.join(data_path, row['path_from_data_dir'])

In [7]:
print(len(female_files))

1920


In [8]:
# TimitBet 61 phoneme mapping to 39 phonemes
# by Lee, K.-F., & Hon, H.-W. (1989). Speaker-independent phone recognition using hidden Markov models. IEEE Transactions on Acoustics, Speech, and Signal Processing, 37(11), 1641–1648. doi:10.1109/29.46546 
pho61_to_pho39 = {
    'iy':'iy',  'ih':'ih',   'eh':'eh',  'ae':'ae',    'ix':'ih',  'ax':'ah',   'ah':'ah',  'uw':'uw',
    'ux':'uw',  'uh':'uh',   'ao':'aa',  'aa':'aa',    'ey':'ey',  'ay':'ay',   'oy':'oy',  'aw':'aw',
    'ow':'ow',  'l':'l',     'el':'l',  'r':'r',      'y':'y',    'w':'w',     'er':'er',  'axr':'er',
    'm':'m',    'em':'m',     'n':'n',    'nx':'n',     'en':'n',  'ng':'ng',   'eng':'ng', 'ch':'ch',
    'jh':'jh',  'dh':'dh',   'b':'b',    'd':'d',      'dx':'dx',  'g':'g',     'p':'p',    't':'t',
    'k':'k',    'z':'z',     'zh':'sh',  'v':'v',      'f':'f',    'th':'th',   's':'s',    'sh':'sh',
    'hh':'hh',  'hv':'hh',   'pcl':'h#', 'tcl':'h#', 'kcl':'h#', 'qcl':'h#','bcl':'h#','dcl':'h#',
    'gcl':'h#','h#':'h#',  '#h':'h#',  'pau':'h#', 'epi': 'h#','nx':'n',   'ax-h':'ah','q':'h#' 
}
pho39_to_index = {
    'aa': 1, 'ae': 2, 'ah': 3, 'aw': 4, 'ay': 5, 'eh': 6, 'er': 7, 'ey': 8, 
    'dh': 9, 'dx': 10, 'b': 11, 'd': 12, 'ch': 13, 'f': 14, 'g': 15, 'z': 16, 
    'hh': 17, 'ih': 18, 'iy': 19, 'jh': 20, 'k': 21, 'l': 22, 'm': 23, 'n': 24, 
    'ng': 25, 'ow': 26, 'oy': 27, 'p': 28, 'r': 29, 's': 30, 'sh': 31, 't': 32, 
    'th': 33, 'uh': 34, 'uw': 35, 'v': 36, 'w': 37, 'y': 38, 'h#': 39
}

In [9]:
from scipy.io.wavfile import read
y1, sr1 = librosa.load('./TIMIT/data/TRAIN/DR1/FDAW0/SA1.WAV.wav', sr=16000)
sr2, y2 = read('./TIMIT/data/TRAIN/DR1/FDAW0/SA1.WAV.wav')
print(f'Shape of audio sequence: {y2.shape}')
print(f'sample rate: {sr2}')
if np.array_equal(y1, y2):
    print('y1=y2')
else:
    print('y1!=y2')

音频时间序列的形状: (53556,)
采样率: 16000
y1!=y2


In [10]:
# def compute_mel_spectrogram(y, sr=16000, n_fft=1024, hop_length=256, n_mels=80):
#     mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
#     log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
#     return log_mel_spectrogram

In [11]:
# mel_y1 = compute_mel_spectrogram(y1[0:1000])
# print(mel_y1[0])

In [12]:
# !git clone https://github.com/NVIDIA/DeepLearningExamples.git

In [13]:
waveglow_path = './DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/waveglow/'
sys.path.append(waveglow_path)
tacotron2_path = './DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/'
sys.path.append(tacotron2_path)

In [14]:
from denoiser import Denoiser
from model import WaveGlow
import data_function
import loss_function
from loss_function import WaveGlowLoss
import importlib.util
entrypoints_path = './DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/waveglow/entrypoints.py'

spec = importlib.util.spec_from_file_location("entrypoints", entrypoints_path)
entrypoints = importlib.util.module_from_spec(spec)
sys.modules["entrypoints"] = entrypoints
spec.loader.exec_module(entrypoints)

from entrypoints import nvidia_waveglow

In [15]:
from scipy.io.wavfile import read
import tacotron2_common.layers as layers
max_wav_value = 32768.0

def compute_mel_spectrogram(audio_data, sr=16000, n_fft=1024, hop_length=256, n_mels=80):
    stft = layers.TacotronSTFT(sampling_rate=16000)
    
    audio_norm = audio_data / max_wav_value
    audio_norm = audio_norm.unsqueeze(0)
    audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False)
    melspec = stft.mel_spectrogram(audio_norm)
    melspec = torch.squeeze(melspec, 0)

    return melspec

def load_audio_from_path(path):
    sampling_rate, data = read(path)
    audio_data = torch.FloatTensor(data.astype(np.float32))

    return audio_data

def compute_mel_from_path(path):
    return compute_mel_spectrogram(load_audio_from_path(path))

In [16]:
from torch.utils.data import DataLoader

def load_mel_audio(file_path, sr=16000, n_mels=80, n_fft=1024, hop_length=256):

    audio = load_audio_from_path(file_path)
    
    melspec = compute_mel_spectrogram(audio)
    
    return melspec, audio

class MelDataset(torch.utils.data.Dataset):
    def __init__(self, file_list):
        self.file_list = file_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_path = self.file_list[idx]
        mel_spectrogram, audio = load_mel_audio(file_path)
        return torch.tensor(mel_spectrogram, dtype=torch.float32).to(device), torch.tensor(audio, dtype=torch.float32).to(device)

file_list = []
for (key,value) in female_files.items():
    file_list.append(value['audio_file'])

dataset = MelDataset(file_list)
train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [17]:
dataset[0]

  return torch.tensor(mel_spectrogram, dtype=torch.float32).to(device), torch.tensor(audio, dtype=torch.float32).to(device)


(tensor([[ -8.4898,  -8.4369,  -9.0065,  ...,  -6.8391,  -7.0954,  -7.3317],
         [ -9.8049,  -9.1275,  -9.2912,  ...,  -6.9650,  -6.6273,  -6.6464],
         [ -9.1401,  -9.1792,  -9.6098,  ...,  -7.8798,  -6.9461,  -6.9621],
         ...,
         [ -9.7159, -10.0164, -10.0679,  ...,  -9.3277,  -9.2204,  -9.2737],
         [ -9.5567,  -9.7603,  -9.9210,  ...,  -9.1527,  -9.1147,  -9.2802],
         [ -9.4876,  -9.6483,  -9.7763,  ...,  -9.7336,  -9.6742,  -9.6836]],
        device='cuda:0'),
 tensor([14., -7., -1.,  ..., -4., -4., -1.], device='cuda:0'))

In [18]:
n_mel_channels = 80
n_flows = 12
n_group = 8
n_early_every = 4
n_early_size = 2
WN_config = {
    "n_layers": 8,
    "n_channels": 512,
    "kernel_size": 3
}

# waveglow = WaveGlow(n_mel_channels, n_flows, n_group, n_early_every, n_early_size, WN_config)
# waveglow = waveglow.cuda()

waveglow =  nvidia_waveglow(pretrained=True, model_math='fp32')
waveglow = waveglow.cuda()



In [19]:
from denoiser import Denoiser
tem_mal = compute_mel_from_path('./TIMIT/data/TRAIN/DR1/FCJF0/SA1.WAV.wav').to(device)

with torch.no_grad():
    audio = waveglow.infer(tem_mal.unsqueeze(0).to(device), sigma=0.666)

denoiser = Denoiser(waveglow)
audio_denoised = denoiser(audio, strength=0.01)[:, 0]
audio_numpy = audio_denoised.cpu().numpy()

Audio(audio_numpy, rate=18000)

In [20]:
raw_audio = load_audio_from_path('./TIMIT/data/TRAIN/DR1/FCJF0/SA1.WAV.wav')
audio_numpy = raw_audio.cpu().numpy()
Audio(audio_numpy, rate=16000)

In [21]:
pho61_to_pho39 = {
    'iy':'iy',  'ih':'ih',   'eh':'eh',  'ae':'ae',    'ix':'ih',  'ax':'ah',   'ah':'ah',  'uw':'uw',
    'ux':'uw',  'uh':'uh',   'ao':'aa',  'aa':'aa',    'ey':'ey',  'ay':'ay',   'oy':'oy',  'aw':'aw',
    'ow':'ow',  'l':'l',     'el':'l',  'r':'r',      'y':'y',    'w':'w',     'er':'er',  'axr':'er',
    'm':'m',    'em':'m',     'n':'n',    'nx':'n',     'en':'n',  'ng':'ng',   'eng':'ng', 'ch':'ch',
    'jh':'jh',  'dh':'dh',   'b':'b',    'd':'d',      'dx':'dx',  'g':'g',     'p':'p',    't':'t',
    'k':'k',    'z':'z',     'zh':'sh',  'v':'v',      'f':'f',    'th':'th',   's':'s',    'sh':'sh',
    'hh':'hh',  'hv':'hh',   'pcl':'h#', 'tcl':'h#', 'kcl':'h#', 'qcl':'h#','bcl':'h#','dcl':'h#',
    'gcl':'h#','h#':'h#',  '#h':'h#',  'pau':'h#', 'epi': 'h#','nx':'n',   'ax-h':'ah','q':'h#' 
}
pho39_to_index = {
    'aa': 1, 'ae': 2, 'ah': 3, 'aw': 4, 'ay': 5, 'eh': 6, 'er': 7, 'ey': 8, 
    'dh': 9, 'dx': 10, 'b': 11, 'd': 12, 'ch': 13, 'f': 14, 'g': 15, 'z': 16, 
    'hh': 17, 'ih': 18, 'iy': 19, 'jh': 20, 'k': 21, 'l': 22, 'm': 23, 'n': 24, 
    'ng': 25, 'ow': 26, 'oy': 27, 'p': 28, 'r': 29, 's': 30, 'sh': 31, 't': 32, 
    'th': 33, 'uh': 34, 'uw': 35, 'v': 36, 'w': 37, 'y': 38, 'h#': 39
}

def play_mel_audio(mel_spectrogram):
    with torch.no_grad():
        audio = waveglow.infer(mel_spectrogram.unsqueeze(0).to(device), sigma=1)
    
    denoiser = Denoiser(waveglow)
    audio_denoised = denoiser(audio, strength=0.01)[:, 0]
    audio_numpy = audio_denoised.cpu().numpy()

    return Audio(audio_numpy, rate=18000)

In [22]:
def read_and_convert_phonemes(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()

    end_time = 0
    
    phoneme_indices = []
    
    for line in lines:
        parts = line.strip().split()
        
        if len(parts) != 3:
            continue
        end_time = parts[1]
        phoneme = parts[2]
        
        if phoneme in pho61_to_pho39:
            pho39 = pho61_to_pho39[phoneme]
            if pho39 in pho39_to_index:
                index = pho39_to_index[pho39]-1
                phoneme_indices.append(index)
    phoneme_indices.append(39)
    
    return phoneme_indices,end_time

In [23]:
for key,value in audio_files.items():
    print ('key: ',key,'value: ',value)
    break

tacotron_file_list = []
for key,files in female_files.items():
    if len(files) == 3:
        tacotron_file_list.append(files)
print(len(tacotron_file_list))

key:  MMDM0 SI681 value:  {'audio_file': './TIMIT/data\\TRAIN/DR4/MMDM0/SI681.WAV.wav', 'phonetic_file': './TIMIT/data\\TRAIN/DR4/MMDM0/SI681.PHN', 'word_file': './TIMIT/data\\TRAIN/DR4/MMDM0/SI681.WRD'}
990


In [24]:
class Tacotron2Dataset(torch.utils.data.Dataset):
    def __init__(self, file_list):
        self.file_list = file_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_paths = self.file_list[idx]
        phone_list, end_time = read_and_convert_phonemes(file_paths['phonetic_file'])
        mel_spectrogram = compute_mel_spectrogram(load_audio_from_path(file_paths['audio_file'][0:int(end_time)]))
        
        phone_tensor = torch.tensor(phone_list, dtype=torch.int).clone().detach()
        mel_tensor = mel_spectrogram.float()
        
        return phone_tensor, mel_tensor, len(phone_list)

In [25]:
tacotron_dataset = Tacotron2Dataset(tacotron_file_list)
tacotron_dataset[0][1].shape

torch.Size([80, 180])

In [26]:
from tacotron2.model import Tacotron2
from tacotron2.loss_function import Tacotron2Loss
hparams = {
    'n_mel_channels': 80,
    'n_symbols': 40, 
    'symbols_embedding_dim': 512,
    'encoder_kernel_size': 5,
    'encoder_n_convolutions': 3,
    'encoder_embedding_dim': 512,
    'attention_rnn_dim': 1024,
    'attention_dim': 128,
    'attention_location_n_filters': 32,
    'attention_location_kernel_size': 31,
    'n_frames_per_step': 1,
    'decoder_rnn_dim': 1024,
    'prenet_dim': 256,
    'max_decoder_steps': 1000,
    'gate_threshold': 0.5,
    'p_attention_dropout': 0.1,
    'p_decoder_dropout': 0.1,
    'postnet_embedding_dim': 512,
    'postnet_kernel_size': 5,
    'postnet_n_convolutions': 5,
    'decoder_no_early_stopping': False,
    'mask_padding': False
}
model = Tacotron2(**hparams).to(device)

In [27]:
# !pip install inflect

In [28]:
import data_functions
import time
import loss_functions

# continue train

model = Tacotron2(**hparams).to(device)
checkpoint = torch.load(os.path.join('./processed_check_points/','checkpoint_epoch_510.pt'))

load_epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model_state_dict'])
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
print(checkpoint['loss'])
model_name = 'Tacotron2'
criterion = loss_functions.get_loss_function(model_name)
collate_fn = data_functions.get_collate_function(model_name)
train_loader = DataLoader(tacotron_dataset, num_workers=0, shuffle=True, batch_size=64, pin_memory=False, drop_last=False, collate_fn=collate_fn)
batch_to_gpu = data_functions.get_batch_to_gpu(model_name)

epochs = 300 
grad_clip_thresh = 1.0 

model.train()
print(f'load epoch: {load_epoch}')
for epoch in range(load_epoch+1, load_epoch+epochs+1):
    mean_loss = 0
    train_count = 0
    torch.cuda.synchronize()
    epoch_start_time = time.perf_counter()
    for i, batch in enumerate(train_loader):

        train_count += 1
        
#         texts, text_lengths, mels, gate_padded, mel_lengths, max_len = batch
#         texts, text_lengths, mels, gate_padded, mel_lengths = texts.to(device), text_lengths.to(device), mels.to(device), gate_padded.to(device), mel_lengths.to(device)

        torch.cuda.synchronize()
        model.zero_grad()
        x, y, num_items = batch_to_gpu(batch)
        outputs = model(x)
#         mel_outputs, mel_outputs_postnet, gate_outputs, alignments = outputs
#         targets = (mels, gate_padded)
        loss = criterion(outputs, y)
        reduced_loss = loss.item()
        if np.isnan(reduced_loss):
            raise Exception("loss is NaN")
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip_thresh)

        optimizer.step()
        model.zero_grad()
        mean_loss += loss.item()
        if (i) % 10 == 0:
            print(f'Epoch: {epoch+1}, Iteration: {i}, Loss: {loss.item()}')
        torch.cuda.synchronize()
    
    torch.cuda.synchronize()
    epoch_end_time = time.perf_counter()
    epoch_time = epoch_end_time - epoch_start_time
    mean_loss = mean_loss / train_count
    print(f'Epoch {epoch + 1} completed in {epoch_time:.2f} seconds, mean loss: {mean_loss}')
    
    if (epoch + 1) % 10 == 0:
        checkpoint_path = os.path.join('./processed_check_points/',f'checkpoint_epoch_{epoch+1}.pt')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': mean_loss,
        }, checkpoint_path)
        print(f'Checkpoint saved at {checkpoint_path}')

0.5820023287087679
load epoch: 509
Epoch: 511, Iteration: 0, Loss: 0.5504879951477051
Epoch: 511, Iteration: 10, Loss: 0.5578972101211548
Epoch 511 completed in 617.19 seconds, mean loss: 0.5865850895643234
Epoch: 512, Iteration: 0, Loss: 0.5740936994552612
Epoch: 512, Iteration: 10, Loss: 0.6150566339492798
Epoch 512 completed in 591.18 seconds, mean loss: 0.5811640545725822
Epoch: 513, Iteration: 0, Loss: 0.44456416368484497
Epoch: 513, Iteration: 10, Loss: 0.6107544898986816
Epoch 513 completed in 596.91 seconds, mean loss: 0.5760368220508099
Epoch: 514, Iteration: 0, Loss: 0.6168411374092102
Epoch: 514, Iteration: 10, Loss: 0.5262153744697571
Epoch 514 completed in 544.29 seconds, mean loss: 0.5691762007772923
Epoch: 515, Iteration: 0, Loss: 0.5928060412406921
Epoch: 515, Iteration: 10, Loss: 0.486288845539093
Epoch 515 completed in 512.06 seconds, mean loss: 0.597439356148243
Epoch: 516, Iteration: 0, Loss: 0.5373785495758057
Epoch: 516, Iteration: 10, Loss: 0.5938560962677002
Epo

KeyboardInterrupt: 

In [29]:
model = Tacotron2(**hparams).to(device)
checkpoint = torch.load(os.path.join('./processed_check_points/','checkpoint_epoch_560.pt'))

load_epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model_state_dict'])
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [30]:
phoneme_sequence,end_time = read_and_convert_phonemes(tacotron_file_list[10]['phonetic_file'])
print(phoneme_sequence)
length = [len(phoneme_sequence)]
inputs = torch.tensor(phoneme_sequence, dtype=torch.int).unsqueeze(0).to(device)
input_lengths = torch.tensor(length, dtype=torch.int).to(device)
print(inputs.shape)
print(input_lengths)
model.eval()

with torch.no_grad():
    mel_outputs, mel_lengths, alignments = model.infer(inputs, input_lengths)

print("Mel Outputs Postnet:", mel_outputs)
print("Mel Lengths:", mel_lengths)
print("Alignments:", alignments)
print(mel_outputs.shape)
play_audio = play_mel_audio(mel_outputs[0])
play_audio

[38, 28, 17, 22, 5, 22, 38, 10, 6, 38, 31, 34, 2, 21, 3, 38, 4, 38, 11, 5, 23, 17, 38, 20, 21, 38, 31, 36, 17, 23, 15, 38, 31, 17, 38, 5, 23, 38, 31, 6, 13, 28, 18, 21, 18, 38, 39]
torch.Size([1, 47])
tensor([47], device='cuda:0', dtype=torch.int32)
Mel Outputs Postnet: tensor([[[-8.5186, -8.2329, -8.2975,  ..., -7.3295, -6.6112, -4.4273],
         [-9.2644, -8.9011, -8.9370,  ..., -7.6295, -6.6912, -4.0440],
         [-9.2599, -8.9606, -9.0156,  ..., -7.7698, -6.8009, -4.2006],
         ...,
         [-9.9903, -9.8869, -9.7980,  ..., -8.9185, -7.8695, -4.9556],
         [-9.9586, -9.7991, -9.7109,  ..., -8.9210, -7.7923, -4.7890],
         [-9.8882, -9.7817, -9.7344,  ..., -9.0925, -8.0369, -5.2163]]],
       device='cuda:0')
Mel Lengths: tensor([221], device='cuda:0', dtype=torch.int32)
Alignments: tensor([[[1.2234e-01, 6.8358e-02, 2.1043e-02,  ..., 1.1077e-05,
          6.1144e-06, 2.9901e-05],
         [1.9205e-01, 1.4145e-01, 1.4216e-03,  ..., 1.7291e-05,
          4.1902e-06, 3.4

In [31]:
raw_audio = load_audio_from_path(tacotron_file_list[10]['audio_file'])
print(raw_audio.shape)
audio_numpy = raw_audio.cpu().numpy()
Audio(audio_numpy, rate=16000)

torch.Size([57344])
