In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pickle
import torch
import pandas as pd
import math
from sklearn.model_selection import train_test_split

from transformers import Wav2Vec2Processor, Wav2Vec2FeatureExtractor, HubertModel, HubertForSequenceClassification
from transformers import Trainer, TrainingArguments
from datasets import load_dataset
import soundfile as sf


from IPython.display import Audio, display, clear_output

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

In [2]:
import sys
sys.path.append("../scripts/")
import data_loader as dl

In [3]:
p_path =  "C:/Users/yagne/Downloads/feature_extracts_11/"
data = pd.read_csv("../outputs/summary_data.csv")
data['line'] = data['line'].astype(str)
data.sample(n=2)


Unnamed: 0,file,line,start,end,speaker,speaker_role,word_count,duration,text,start_idx,...,justice_year_onset_time_diff_mean_mean,justice_year_onset_time_diff_mean_std,justice_year_onset_time_diff_stddev_mean,justice_year_onset_time_diff_stddev_std,justice_year_onset_time_diff_entropy_mean,justice_year_onset_time_diff_entropy_std,justice_year_word_rate_mean,justice_year_word_rate_std,justice_year_onset_rate_mean,justice_year_onset_rate_std
7626,15-8544,99,1761.06,1785.475,Samuel_A_Alito_Jr,scotus_justice,74,24.415,All right. So it's for vagueness purposes. Now...,28176960,...,0.179352,0.028832,0.161813,0.033735,-122.598295,64.552798,2.86663,0.468534,7.023048,0.871192
15789,19-7,152,3195.68,3211.4,Brett_M_Kavanaugh,scotus_justice,50,15.72,On your -- on your definition of how we should...,51130880,...,0.236285,0.137546,0.222739,0.128992,-95.036539,66.024475,2.793239,0.448214,6.319411,1.750082


In [4]:
# mp3_clips = pickle.load(open(f"C:/Users/yagne/Downloads/mp3_tensors.pkl", "rb"))
# mp3_clips = np.array([(m['file'], m['line']) for m in mp3_clips])

from pathlib import Path
pts = [path for path in Path(p_path).rglob("*.pt")]
print("PyTorch Files:", len(pts))

mp3_clip_names = [("-".join(p[:-1]),p[-1]) for p in [p.name.replace(".pt", "").split('-') for p in pts]]

clean_mp3_clips = []
for m in mp3_clip_names:
#     a = torch.load(f"{p_path}{m[0]}-{m[1]}.pt")
#     if a.size()[0] != 1:
#         print(m[0],m[1])
#         print(a.size())
#         b = a.mean(dim=0).unsqueeze(0)
#         print(b.size())
    a = data.loc[(data['file'] == m[0]) & (data['line'] == m[1])]
    if a.shape[0] != 1:
        continue
#         print(m[0], m[1])
    else:
        clean_mp3_clips.append((m[0],m[1]))
    
mp3_clips = np.array(clean_mp3_clips)

PyTorch Files: 3310


In [5]:
class HUBERTDataset(torch.utils.data.Dataset):
    def __init__(self, mp3_clips, summary_data, pt_path):
        self.mp3_clips = mp3_clips
        self.summary_data = summary_data
        self.pt_path = pt_path

    def __getitem__(self, idx):
        file, line = self.mp3_clips[idx]
        input_values = torch.load(f"{self.pt_path}{file}-{line}.pt")
        input_values.requires_grad = False
      
        if input_values.size()[0] != 1:
            input_values = input_values.mean(dim=0)
        else:
            input_values = input_values.squeeze(0)
        labels = self.summary_data.loc[
            (self.summary_data['file'] == file) & (self.summary_data['line'] == line),'gs_score'
        ].values[0]
        labels = torch.tensor(labels).unsqueeze(0)
        return {'input_values': input_values.float(),
                'labels': labels.float()
               }

    def __len__(self):
        return len(self.mp3_clips)

In [6]:
print(next(iter(HUBERTDataset(mp3_clips, data, p_path))))

{'input_values': tensor([[-0.0010, -0.0090, -0.1289,  ..., -0.2402,  0.1756, -0.3532],
        [-0.1262, -0.0546, -0.1580,  ..., -0.2267,  0.2041, -0.1039],
        [-0.1728, -0.0801, -0.1477,  ..., -0.2121,  0.2264,  0.1342],
        ...,
        [-0.1740,  0.0968, -0.0503,  ...,  0.1944, -0.3485, -0.2532],
        [-0.1439,  0.1856,  0.0105,  ...,  0.0370, -0.2140,  0.0294],
        [-0.0805,  0.2399,  0.0557,  ..., -0.0298, -0.0662,  0.1465]]), 'labels': tensor([-1.1868])}


In [7]:
class HUBERTDataModule(pl.LightningDataModule):
    def __init__(self, mp3_clips, metadata, pt_path, batch_size=4, num_workers=4, seed=42):
        super().__init__()
        self.mp3_clips = mp3_clips
        self.metadata = metadata
        self.pt_path = pt_path
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.seed = seed
        
    def prepare_data(self):
        rng = np.random.default_rng(self.seed)
        indices = rng.permutation(self.mp3_clips.shape[0])
        train_size = math.floor(len(indices) * 0.80)
        val_size = math.floor(len(indices) * 0.10)   
        self.train_idx = indices[:train_size]
        self.val_idx = indices[train_size : train_size + val_size]
        self.test_idx = indices[train_size + val_size :]
        
        print(train_size, val_size, len(indices) - train_size - val_size)
        
    def setup(self):
        self.train_clips = self.mp3_clips[self.train_idx]
        print("Train",len(self.train_clips))
        self.val_clips = self.mp3_clips[self.val_idx]
        print("Val",len(self.val_clips))
        self.test_clips = self.mp3_clips[self.test_idx]
        print("Test",len(self.test_clips))
        
    def train_dataloader(self):
        self.train_data = dl.HUBERTDataset(self.train_clips, self.metadata, self.pt_path)
        train_loader = DataLoader(self.train_data, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        return train_loader
    def val_dataloader(self):
        self.val_data = dl.HUBERTDataset(self.val_clips, self.metadata, self.pt_path)
        val_loader = DataLoader(self.val_data, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        return val_loader
    def test_dataloader(self):
        self.test_data = dl.HUBERTDataset(self.test_clips, self.metadata, self.pt_path)
        test_loader = DataLoader(self.test_data, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        return test_loader
        

In [27]:
class HUBERTRegressor(pl.LightningModule):
    def __init__(self, criterion, learning_rate=0.0001, hidden_size=768, projector_size=256):
        super().__init__()
        self.projector = nn.Linear(hidden_size, projector_size)
        self.regressor = nn.Linear(projector_size, 1)
        self.criterion = criterion
        self.learning_rate = learning_rate
        
    def forward(self, x):
        print(x.size())
        projected = self.projector(x)
        print(projected.size())
        pooled_output = projected.mean(dim=1)
        print(pooled_output.size())
        pred = self.regressor(pooled_output)
        print(pred.size())
        return pred
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
    
    def training_step(self, batch, batch_idx):
        x, y = batch['input_values'], batch['labels']
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log("train_loss", loss, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch['input_values'], batch['labels']
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, prog_bar=True, logger=True)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch['input_values'], batch['labels']
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log('test_loss', loss, prog_bar=True, logger=True)
        return loss

In [9]:
checkpoint_callback = ModelCheckpoint(
    dirpath = "checkpoints",
    filename = "best-checkpoint", 
    save_top_k=1, 
    verbose =True, 
    monitor = "val_loss",
    mode="min"
)
logger = TensorBoardLogger("hubert")
early_stopping_callback = EarlyStopping(monitor="val_loss", patience = 2)

trainer = pl.Trainer(
    max_epochs=20,
    logger=logger,
    gpus=0,
    progress_bar_refresh_rate=1,
    checkpoint_callback=checkpoint_callback,
    callbacks = [early_stopping_callback],
    overfit_batches=1,
    log_every_n_steps=1
)

  rank_zero_deprecation(
  rank_zero_deprecation(
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(


In [29]:
model = HUBERTRegressor(criterion=nn.MSELoss())

In [11]:
dm = HUBERTDataModule(np.array(clean_mp3_clips), data, p_path, batch_size=1, num_workers=1)
dm.prepare_data()
dm.setup()

b1 = next(iter(dm.val_dataloader()))
print(b1)

2626 328 329
Train 2626
Val 328
Test 329
{'input_values': tensor([[[-1.3758e-01,  3.4996e-02,  3.3770e-01,  ...,  7.6871e-03,
           1.2084e-01,  8.7262e-02],
         [-1.4544e-01,  3.2742e-02,  3.3487e-01,  ...,  2.5508e-02,
           1.0931e-01,  9.1300e-02],
         [-1.5034e-01,  3.2271e-02,  3.3418e-01,  ...,  7.2643e-02,
           8.0504e-02,  9.4673e-02],
         ...,
         [-2.6175e-01, -7.3572e-02, -3.2483e-01,  ...,  3.7043e-02,
          -1.9819e-01, -5.9894e-02],
         [-2.2815e-01,  3.0404e-03, -3.5803e-01,  ..., -2.1670e-04,
          -1.6846e-01, -9.7121e-02],
         [-1.8178e-01,  1.1292e-01,  2.7791e-01,  ..., -6.3717e-02,
           3.5064e-02,  1.4509e-01]]]), 'labels': tensor([[0.1288]])}


In [12]:
%load_ext tensorboard
%tensorboard --logdir ./hubert

Reusing TensorBoard on port 6006 (pid 12912), started 1:36:22 ago. (Use '!kill 12912' to kill it.)

In [13]:
trainer.fit(model, dm)
trainer.test(model, datamodule=dm)

  rank_zero_deprecation(

  | Name      | Type    | Params
--------------------------------------
0 | projector | Linear  | 196 K 
1 | regressor | Linear  | 257   
2 | criterion | MSELoss | 0     
--------------------------------------
197 K     Trainable params
0         Non-trainable params
197 K     Total params
0.788     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.0005455315113067627}
--------------------------------------------------------------------------------


[{'test_loss': 0.0005455315113067627}]

In [32]:
model(b1['input_values']),b1['labels']

torch.Size([1, 999, 768])
torch.Size([1, 999, 256])
torch.Size([1, 256])
torch.Size([1, 1])


(tensor([[0.0194]], grad_fn=<AddmmBackward0>), tensor([[0.1288]]))