In [1]:
import torch
import torch.nn
import conformer
import easydict
import numpy as np
import pandas as pd
import json
import os
import random
from warpctc_pytorch import CTCLoss
from collections import OrderedDict
import pandas as pd
from data.conformer_data_loader import AudioDataLoader, SpectrogramDataset
import librosa

In [2]:
import importlib
importlib.reload(conformer)

<module 'conformer' from '/Data/etc/Robust_ASR/conformer/__init__.py'>

In [2]:
args = easydict.EasyDict({
    'dim' : 144, 'dec_dim' : 320,
    'dim_head' : 64,
    'n_enc_layers' : 16,
    'heads' : 4,
    'ff_mult' :4,
    'conv_expansion_factor' : 2,
    'conv_kernel_size' : 31,
    'attn_dropout' : 0.1,
    'ff_dropout' : 0.1,
    'conv_dropout' : 0.1,
    'train_manifest' : './data/csvs/hacka_95_kss_pan_zeroth_aihub.csv',
    'val_manifest' : './data/csvs/dataset_with_age_val_5.csv',
    'exp_name' : './exp/conformer_0111_179/',
    'cuda' : False,
    'sample_rate' : 16000,
    'labels_path' : 'labels.json',
    'window_size' : .02, 'window_stride' : .01, 'window' : 'hamming',
    'hidden_size' : 1024, 'hidden_layers' : 5, 'rnn_type' : 'gru',
    'epochs' : 20, 'batch_size' : 3, 'num_workers' : 0,
    'augment' : False, 'spec_augment' : False,
    'seed' : 1234
})

#Gpu setting
device = torch.device("cuda" if args.cuda else "cpu")

# Set seeds for determinism
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)
random.seed(args.seed)

#Where to save the models and training's metadata
save_folder = os.path.join(args.exp_name, 'models')
tbd_logs = os.path.join(args.exp_name, 'tbd_logdir')
loss_save = os.path.join(args.exp_name, 'train.log')
config_save = os.path.join(args.exp_name, 'config.json')
os.makedirs(save_folder, exist_ok=True)

# save the experiment configuration.
with open(config_save, 'w') as f:
    json.dump(args.__dict__, f, indent=2)
    
labels = ' _가각간갇갈갉갊감갑값갓갔강갖갗같갚갛개객갠갤갬갭갯갰갱갸갹갼걀걋걍걔걘걜거걱건걷걸걺검겁것겄겅겆겉겊겋게겐겔겜겝겟겠겡겨격겪견겯결겸겹겻겼경곁계곈곌곕곗고곡곤곧골곪곬곯곰곱곳공곶과곽관괄괆괌괍괏광괘괜괠괩괬괭괴괵괸괼굄굅굇굉교굔굘굡굣구국군굳굴굵굶굻굼굽굿궁궂궈궉권궐궜궝궤궷귀귁귄귈귐귑귓규균귤그극근귿글긁금급긋긍긔기긱긴긷길긺김깁깃깅깆깊까깍깎깐깔깖깜깝깟깠깡깥깨깩깬깰깸깹깻깼깽꺄꺅꺌꺼꺽꺾껀껄껌껍껏껐껑께껙껜껨껫껭껴껸껼꼇꼈꼍꼐꼬꼭꼰꼲꼴꼼꼽꼿꽁꽂꽃꽈꽉꽐꽜꽝꽤꽥꽹꾀꾄꾈꾐꾑꾕꾜꾸꾹꾼꿀꿇꿈꿉꿋꿍꿎꿔꿜꿨꿩꿰꿱꿴꿸뀀뀁뀄뀌뀐뀔뀜뀝뀨끄끅끈끊끌끎끓끔끕끗끙끝끼끽낀낄낌낍낏낑나낙낚난낟날낡낢남납낫났낭낮낯낱낳내낵낸낼냄냅냇냈냉냐냑냔냘냠냥너넉넋넌널넒넓넘넙넛넜넝넣네넥넨넬넴넵넷넸넹녀녁년녈념녑녔녕녘녜녠노녹논놀놂놈놉놋농높놓놔놘놜놨뇌뇐뇔뇜뇝뇟뇨뇩뇬뇰뇹뇻뇽누눅눈눋눌눔눕눗눙눠눴눼뉘뉜뉠뉨뉩뉴뉵뉼늄늅늉느늑는늘늙늚늠늡늣능늦늪늬늰늴니닉닌닐닒님닙닛닝닢다닥닦단닫달닭닮닯닳담답닷닸당닺닻닿대댁댄댈댐댑댓댔댕댜더덕덖던덛덜덞덟덤덥덧덩덫덮데덱덴델뎀뎁뎃뎄뎅뎌뎐뎔뎠뎡뎨뎬도독돈돋돌돎돐돔돕돗동돛돝돠돤돨돼됐되된될됨됩됫됴두둑둔둘둠둡둣둥둬뒀뒈뒝뒤뒨뒬뒵뒷뒹듀듄듈듐듕드득든듣들듦듬듭듯등듸디딕딘딛딜딤딥딧딨딩딪따딱딴딸땀땁땃땄땅땋때땍땐땔땜땝땟땠땡떠떡떤떨떪떫떰떱떳떴떵떻떼떽뗀뗄뗌뗍뗏뗐뗑뗘뗬또똑똔똘똥똬똴뙈뙤뙨뚜뚝뚠뚤뚫뚬뚱뛔뛰뛴뛸뜀뜁뜅뜨뜩뜬뜯뜰뜸뜹뜻띄띈띌띔띕띠띤띨띰띱띳띵라락란랄람랍랏랐랑랒랖랗래랙랜랠램랩랫랬랭랴략랸럇량러럭런럴럼럽럿렀렁렇레렉렌렐렘렙렛렝려력련렬렴렵렷렸령례롄롑롓로록론롤롬롭롯롱롸롼뢍뢨뢰뢴뢸룀룁룃룅료룐룔룝룟룡루룩룬룰룸룹룻룽뤄뤘뤠뤼뤽륀륄륌륏륑류륙륜률륨륩륫륭르륵른를름릅릇릉릊릍릎리릭린릴림립릿링마막만많맏말맑맒맘맙맛망맞맡맣매맥맨맬맴맵맷맸맹맺먀먁먈먕머먹먼멀멂멈멉멋멍멎멓메멕멘멜멤멥멧멨멩며멱면멸몃몄명몇몌모목몫몬몰몲몸몹못몽뫄뫈뫘뫙뫼묀묄묍묏묑묘묜묠묩묫무묵묶문묻물묽묾뭄뭅뭇뭉뭍뭏뭐뭔뭘뭡뭣뭬뮈뮌뮐뮤뮨뮬뮴뮷므믄믈믐믓미믹민믿밀밂밈밉밋밌밍및밑바박밖밗반받발밝밞밟밤밥밧방밭배백밴밸뱀뱁뱃뱄뱅뱉뱌뱍뱐뱝버벅번벋벌벎범법벗벙벚베벡벤벧벨벰벱벳벴벵벼벽변별볍볏볐병볕볘볜보복볶본볼봄봅봇봉봐봔봤봬뵀뵈뵉뵌뵐뵘뵙뵤뵨부북분붇불붉붊붐붑붓붕붙붚붜붤붰붸뷔뷕뷘뷜뷩뷰뷴뷸븀븃븅브븍븐블븜븝븟비빅빈빌빎빔빕빗빙빚빛빠빡빤빨빪빰빱빳빴빵빻빼빽뺀뺄뺌뺍뺏뺐뺑뺘뺙뺨뻐뻑뻔뻗뻘뻠뻣뻤뻥뻬뼁뼈뼉뼘뼙뼛뼜뼝뽀뽁뽄뽈뽐뽑뽕뾔뾰뿅뿌뿍뿐뿔뿜뿟뿡쀼쁑쁘쁜쁠쁨쁩삐삑삔삘삠삡삣삥사삭삯산삳살삵삶삼삽삿샀상샅새색샌샐샘샙샛샜생샤샥샨샬샴샵샷샹섀섄섈섐섕서석섞섟선섣설섦섧섬섭섯섰성섶세섹센셀셈셉셋셌셍셔셕션셜셤셥셧셨셩셰셴셸솅소속솎손솔솖솜솝솟송솥솨솩솬솰솽쇄쇈쇌쇔쇗쇘쇠쇤쇨쇰쇱쇳쇼쇽숀숄숌숍숏숑수숙순숟술숨숩숫숭숯숱숲숴쉈쉐쉑쉔쉘쉠쉥쉬쉭쉰쉴쉼쉽쉿슁슈슉슐슘슛슝스슥슨슬슭슴습슷승시식신싣실싫심십싯싱싶싸싹싻싼쌀쌈쌉쌌쌍쌓쌔쌕쌘쌜쌤쌥쌨쌩썅써썩썬썰썲썸썹썼썽쎄쎈쎌쏀쏘쏙쏜쏟쏠쏢쏨쏩쏭쏴쏵쏸쐈쐐쐤쐬쐰쐴쐼쐽쑈쑤쑥쑨쑬쑴쑵쑹쒀쒔쒜쒸쒼쓩쓰쓱쓴쓸쓺쓿씀씁씌씐씔씜씨씩씬씰씸씹씻씽아악안앉않알앍앎앓암압앗았앙앝앞애액앤앨앰앱앳앴앵야약얀얄얇얌얍얏양얕얗얘얜얠얩어억언얹얻얼얽얾엄업없엇었엉엊엌엎에엑엔엘엠엡엣엥여역엮연열엶엷염엽엾엿였영옅옆옇예옌옐옘옙옛옜오옥온올옭옮옰옳옴옵옷옹옻와왁완왈왐왑왓왔왕왜왝왠왬왯왱외왹왼욀욈욉욋욍요욕욘욜욤욥욧용우욱운울욹욺움웁웃웅워웍원월웜웝웠웡웨웩웬웰웸웹웽위윅윈윌윔윕윗윙유육윤율윰윱윳융윷으윽은을읊음읍읏응읒읓읔읕읖읗의읩읜읠읨읫이익인일읽읾잃임입잇있잉잊잎자작잔잖잗잘잚잠잡잣잤장잦재잭잰잴잼잽잿쟀쟁쟈쟉쟌쟎쟐쟘쟝쟤쟨쟬저적전절젊점접젓정젖제젝젠젤젬젭젯젱져젼졀졈졉졌졍졔조족존졸졺좀좁좃종좆좇좋좌좍좔좝좟좡좨좼좽죄죈죌죔죕죗죙죠죡죤죵주죽준줄줅줆줌줍줏중줘줬줴쥐쥑쥔쥘쥠쥡쥣쥬쥰쥴쥼즈즉즌즐즘즙즛증지직진짇질짊짐집짓징짖짙짚짜짝짠짢짤짧짬짭짯짰짱째짹짼쨀쨈쨉쨋쨌쨍쨔쨘쨩쩌쩍쩐쩔쩜쩝쩟쩠쩡쩨쩽쪄쪘쪼쪽쫀쫄쫌쫍쫏쫑쫓쫘쫙쫠쫬쫴쬈쬐쬔쬘쬠쬡쭁쭈쭉쭌쭐쭘쭙쭝쭤쭸쭹쮜쮸쯔쯤쯧쯩찌찍찐찔찜찝찡찢찧차착찬찮찰참찹찻찼창찾채책챈챌챔챕챗챘챙챠챤챦챨챰챵처척천철첨첩첫첬청체첵첸첼쳄쳅쳇쳉쳐쳔쳤쳬쳰촁초촉촌촐촘촙촛총촤촨촬촹최쵠쵤쵬쵭쵯쵱쵸춈추축춘출춤춥춧충춰췄췌췐취췬췰췸췹췻췽츄츈츌츔츙츠측츤츨츰츱츳층치칙친칟칠칡침칩칫칭카칵칸칼캄캅캇캉캐캑캔캘캠캡캣캤캥캬캭컁커컥컨컫컬컴컵컷컸컹케켁켄켈켐켑켓켕켜켠켤켬켭켯켰켱켸코콕콘콜콤콥콧콩콰콱콴콸쾀쾅쾌쾡쾨쾰쿄쿠쿡쿤쿨쿰쿱쿳쿵쿼퀀퀄퀑퀘퀭퀴퀵퀸퀼큄큅큇큉큐큔큘큠크큭큰클큼큽킁키킥킨킬킴킵킷킹타탁탄탈탉탐탑탓탔탕태택탠탤탬탭탯탰탱탸턍터턱턴털턺텀텁텃텄텅테텍텐텔템텝텟텡텨텬텼톄톈토톡톤톨톰톱톳통톺톼퇀퇘퇴퇸툇툉툐투툭툰툴툼툽툿퉁퉈퉜퉤튀튁튄튈튐튑튕튜튠튤튬튱트특튼튿틀틂틈틉틋틔틘틜틤틥티틱틴틸팀팁팃팅파팍팎판팔팖팜팝팟팠팡팥패팩팬팰팸팹팻팼팽퍄퍅퍼퍽펀펄펌펍펏펐펑페펙펜펠펨펩펫펭펴편펼폄폅폈평폐폘폡폣포폭폰폴폼폽폿퐁퐈퐝푀푄표푠푤푭푯푸푹푼푿풀풂품풉풋풍풔풩퓌퓐퓔퓜퓟퓨퓬퓰퓸퓻퓽프픈플픔픕픗피픽핀필핌핍핏핑하학한할핥함합핫항해핵핸핼햄햅햇했행햐향허헉헌헐헒험헙헛헝헤헥헨헬헴헵헷헹혀혁현혈혐협혓혔형혜혠혤혭호혹혼홀홅홈홉홋홍홑화확환활홧황홰홱홴횃횅회획횐횔횝횟횡효횬횰횹횻후훅훈훌훑훔훗훙훠훤훨훰훵훼훽휀휄휑휘휙휜휠휨휩휫휭휴휵휸휼흄흇흉흐흑흔흖흗흘흙흠흡흣흥흩희흰흴흼흽힁히힉힌힐힘힙힛힝'
args['n_classes'] = len(labels)

In [3]:
audio_conf = dict(sample_rate=16000,
                    window_size=.02,
                    window_stride=.01,
                    window='hamming',
                    noise_dir= None,
                    noise_prob= 0.4,
                    noise_levels=(0.0, 0.5))

In [4]:
train_dataset = SpectrogramDataset(audio_conf = audio_conf, manifest_filepath = args.train_manifest, labels = labels,
                                  normalize=True, speed_volume_perturb = args.augment, spec_augment = args.spec_augment)
test_dataset = SpectrogramDataset(audio_conf = audio_conf, manifest_filepath = args.val_manifest, labels = labels,
                                 normalize=True, speed_volume_perturb=False, spec_augment=False)
# train_sampler = BucketingSampler(train_dataset, batch_size = args.batch_size)

# train_loader = AudioDataLoader(train_dataset, num_workers= args.num_workers, batch_sampler = train_sampler, pin_memory=True)
train_loader = AudioDataLoader(train_dataset, num_workers= args.num_workers, batch_size = args.batch_size , shuffle=True)
test_loader = AudioDataLoader(test_dataset, num_workers= args.num_workers, batch_size =  args.batch_size ,shuffle=True)

In [14]:
x = librosa.load('/Data/etc/Robust_ASR/data/aihub/KsponSpeech_04/KsponSpeech_0476/KsponSpeech_475244.wav', sr=16000)[0]

In [18]:
s = librosa.feature.melspectrogram(x, 16000, n_mels=83)
log_S = librosa.power_to_db(s, ref=np.max)

In [19]:
log_S.shape

(83, 110)

In [5]:
iterer = iter(test_loader)

In [6]:
data = next(iterer)
spect = data[0]

spect shape torch.Size([83, 501])
spect shape torch.Size([83, 426])
spect shape torch.Size([83, 651])


In [7]:
spect.shape

torch.Size([3, 1, 83, 651])

In [17]:
conv1 = torch.nn.Conv2d(1, 256, kernel_size=3, stride=2,).to(device)
conv2 = torch.nn.Conv2d(256, 256, kernel_size=3, stride=2,).to(device)

conv_1_res = conv1(spect)
conv_2_res = conv2(conv_1_res)

In [18]:
conv_2_res.shape

torch.Size([3, 256, 20, 162])

In [10]:
conv_2_res = conv_2_res.permute(0,3,2,1)

In [11]:
conv_2_res.shape

torch.Size([3, 162, 20, 256])

In [23]:
linear = torch.nn.Linear(256 * 20, 512).to(device)

In [22]:
conv_2_res.reshape((3, -1, 20*256)).shape

torch.Size([3, 162, 5120])

In [24]:
linear_res = linear(conv_2_res.reshape((3, -1, 20*256)))

In [25]:
linear_res.shape

torch.Size([3, 162, 512])

In [15]:
linear_res = linear_res.reshape((args.batch_size, -1, 512))

In [16]:
linear_res.shape

torch.Size([3, 3240, 512])

In [26]:
conf_layers = []
for i in range(17):
    conf_layers.append(conformer.ConformerBlock(
        dim = 512,
        dim_head = 32,
        heads = 8,
        ff_mult = 4,
        conv_expansion_factor = 2,
        conv_kernel_size = 31,
        attn_dropout = 0.,
        ff_dropout = 0.,
        conv_dropout = 0.
                )
              )

In [32]:
decoder_layers = [torch.nn.LSTM(512, 640)]

In [33]:
enc_pre = [conv1, conv2, linear]

In [34]:
fc = [torch.nn.Linear(640, 26)]

In [43]:
model = torch.nn.ModuleList(enc_pre + conf_layers + decoder_layers + fc)

In [40]:
def get_param_size(model):
    params = 0
    for p in model.parameters():
        tmp = 1
        for x in p.size():
            tmp *= x
        params += tmp
    return params

In [44]:
get_param_size(model)

112025658

In [51]:
get_param_size(model)

117801024

In [19]:
x = torch.randn(1, 1024, 256).to(device)

In [27]:
confblock_res = confblock(linear_res)

NameError: name 'confblock' is not defined

In [27]:
confblock_res.shape

torch.Size([3, 6318, 256])

In [9]:
class Enc_pre(torch.nn.Module):
    def __init__(self, args):
        super(Enc_pre, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 256, kernel_size=3, stride=2)
        self.conv2 = torch.nn.Conv2d(256, 256, kernel_size=3, stride=2)
        self.linear = torch.nn.Linear(256, args.dim)
        self.dropout = torch.nn.Dropout(p=0.1)


    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.permute(0,3,2,1) ######## unsure yet
        x = self.linear(x)
        x = self.dropout(x)
        
        return x
    
class Enc_body(torch.nn.Module):
    def __init__(self, args):
        super(Enc_body, self).__init__()
        
        conformer_blocks = []
        for _ in range(args.n_enc_layers):
            conformer_blocks.append(
                conformer.ConformerBlock(       
                    dim = args.dim,
                    dim_head = args.dim_head,
                    heads = args.heads ,
                    ff_mult = args.ff_mult,
                    conv_expansion_factor = args.conv_expansion_factor,
                    conv_kernel_size = args.conv_kernel_size,
                    attn_dropout = args.attn_dropout,
                    ff_dropout = args.ff_dropout,
                    conv_dropout = args.conv_dropout
                )
            )
        self.conformer_layers = torch.nn.ModuleList(conformer_blocks)
        
    def forward(self, x):
        for layer in self.conformer_layers:
            x = layer(x)
        
        return x
    
class Conformer(torch.nn.Module):
    def __init__(self, args):
        super(Conformer, self).__init__()
        self.enc_pre = Enc_pre(args)
        self.enc_body = Enc_body(args)
        self.dec_body = torch.nn.LSTM(args.dim, args.dec_dim, num_layers=1 )
        self.fc = torch.nn.Linear(args.dec_dim, args.n_classes)
#         self.model = torch.nn.ModuleList(self.enc_pre + self.enc_body + self.dec_body + self.fc)
    
    def forward(self, x) :
        x = self.enc_pre(x)
        x = x.reshape((x.size(0), -1, x.size(-1)))
        x = self.enc_body(x)
        x = x.transpose(0,1)
        x = self.dec_body(x)
        x = self.fc(x)
#         x = self.model(x)
        
        return x

In [10]:
conformer_model = Conformer(args)

In [11]:
conformer_model = conformer_model.to(device)
_ = conformer_model.train()

In [12]:
spect = spect.to(device)

In [13]:
conf_res = conformer_model(spect)

RuntimeError: CUDA out of memory. Tried to allocate 1.79 GiB (GPU 0; 15.75 GiB total capacity; 12.03 GiB already allocated; 1.07 GiB free; 13.52 GiB reserved in total by PyTorch)