In [1]:
import torch
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification
from transformers import DebertaTokenizer, DebertaForSequenceClassification

tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-large")
model = DebertaForSequenceClassification.from_pretrained("microsoft/deberta-large")

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]


2022-06-16 14:53:31.233789: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
Some weights of the model checkpoint at microsoft/deberta-large were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenc

'LABEL_0'

In [None]:
device = torch.device("cuda:0")

In [2]:
import gc
import os
import sys
import time
import pickle
import random
from tqdm import tqdm
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

import torch
import transformers
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModel, AutoTokenizer, AdamW, get_linear_schedule_with_warmup

import warnings
warnings.simplefilter('ignore')

In [3]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,discourse_id,essay_id,discourse_text,discourse_type,discourse_effectiveness
0,0013cc385424,007ACE74B050,"Hi, i'm Isaac, i'm going to be writing about h...",Lead,Adequate
1,9704a709b505,007ACE74B050,"On my perspective, I think that the face is a ...",Position,Adequate
2,c22adee811b6,007ACE74B050,I think that the face is a natural landform be...,Claim,Adequate
3,a10d361e54e4,007ACE74B050,"If life was on Mars, we would know by now. The...",Evidence,Adequate
4,db3e453ec4e2,007ACE74B050,People thought that the face was formed by ali...,Counterclaim,Adequate


In [4]:
test_df = pd.read_csv('test.csv')
test_df.head()

Unnamed: 0,discourse_id,essay_id,discourse_text,discourse_type
0,a261b6e14276,D72CB1C11673,Making choices in life can be very difficult. ...,Lead
1,5a88900e7dc1,D72CB1C11673,Seeking multiple opinions can help a person ma...,Position
2,9790d835736b,D72CB1C11673,it can decrease stress levels,Claim
3,75ce6d68b67b,D72CB1C11673,a great chance to learn something new,Claim
4,93578d946723,D72CB1C11673,can be very helpful and beneficial.,Claim


In [5]:
df.essay_id.value_counts()

91B1F82B2CF1    23
4CA37D113612    23
900A879708F0    23
A7EC6F462F8B    22
DECAE402BB38    22
                ..
AB02689C1A9B     1
FFFF80B8CC2F     1
377548575048     1
5E85F1FB4E22     1
9706F8E7D534     1
Name: essay_id, Length: 4191, dtype: int64

In [6]:
df.loc[df.essay_id=='91B1F82B2CF1'].head()

Unnamed: 0,discourse_id,essay_id,discourse_text,discourse_type,discourse_effectiveness
25190,2d4def8e7c09,91B1F82B2CF1,Many people may think that attending school on...,Lead,Adequate
25191,0a6634792991,91B1F82B2CF1,I would say that I disagree with that statemen...,Position,Adequate
25192,e73c3a854460,91B1F82B2CF1,"Yes, online school would be better for student...",Counterclaim,Adequate
25193,57d92e1dddb3,91B1F82B2CF1,but what about in the future when they lack ba...,Rebuttal,Adequate
25194,4e57f20c26e0,91B1F82B2CF1,"yes, the online courses could be more personal...",Counterclaim,Adequate


In [7]:
from torch.utils.data import DataLoader
import warnings,transformers,logging,torch
from transformers import TrainingArguments,Trainer

In [8]:
# iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
# if not iskaggle:
#     import zipfile,kaggle
#     path = Path('feedback-prize-effectiveness')
#     kaggle.api.competition_download_cli(str(path))
#     zipfile.ZipFile(f'{path}.zip').extractall(path)


import datasets
from datasets import load_dataset, Dataset, DatasetDict

In [9]:
sep = tokenizer.sep_token
sep

'[SEP]'

In [10]:
df['inputs'] = df.discourse_type + sep +df.discourse_text

In [11]:
new_label = {"discourse_effectiveness": {"Ineffective": 0, "Adequate": 1, "Effective": 2}}
df = df.replace(new_label)
df = df.rename(columns = {"discourse_effectiveness": "label"})

In [12]:
ds = Dataset.from_pandas(df)

In [13]:
def tok_func(x): return tokenizer(x["inputs"], truncation=True)

In [14]:
tok_func(ds[0])

{'input_ids': [1, 32258, 2, 30086, 6, 939, 437, 12370, 6, 939, 437, 164, 7, 28, 2410, 59, 141, 42, 652, 15, 6507, 16, 10, 1632, 1212, 3899, 50, 114, 89, 16, 301, 15, 6507, 14, 156, 24, 4, 20, 527, 16, 59, 141, 6109, 362, 10, 2170, 9, 6507, 8, 10, 652, 21, 450, 15, 5, 5518, 4, 6109, 630, 75, 216, 114, 5, 1212, 3899, 21, 1412, 30, 301, 15, 6507, 6, 50, 114, 24, 16, 95, 10, 1632, 1212, 3899, 4, 1437, 2], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [15]:
inps = "discourse_text","discourse_type"
tok_ds = ds.map(tok_func, batched=True, remove_columns=inps+('inputs','discourse_id','essay_id'))



  0%|          | 0/37 [00:00<?, ?ba/s]

In [16]:
essay_ids = df.essay_id.unique()
np.random.seed(42)
np.random.shuffle(essay_ids)
essay_ids[:5]

array(['B5C606F0A883', 'FA4FE7706A1A', '37A77BEAD718', '0ED28D8A5EC4',
       'F25BA634ADDD'], dtype=object)

In [17]:
val_prop = 0.2
val_sz = int(len(essay_ids)*val_prop)
val_essay_ids = essay_ids[:val_sz]

In [18]:
is_val = np.isin(df.essay_id, val_essay_ids)
idxs = np.arange(len(df))
val_idxs = idxs[ is_val]
trn_idxs = idxs[~is_val]
len(val_idxs),len(trn_idxs)

(7181, 29584)

In [19]:
dds = DatasetDict({"train":tok_ds.select(trn_idxs),
             "test": tok_ds.select(val_idxs)})

In [20]:
def get_dds(df, train=True):
    ds = Dataset.from_pandas(df)
    ds.todevice=('cuda:0')
    to_remove = ['discourse_text','discourse_type','inputs','discourse_id','essay_id']
    tok_ds = ds.map(tok_func, batched=True, remove_columns=to_remove)
    
    if train:
        return DatasetDict({"train":tok_ds.select(trn_idxs), "test": tok_ds.select(val_idxs)})
    else: 
        return tok_ds

In [21]:
lr,bs = 8e-5,16
wd,epochs = 0.01,1

In [22]:
from sklearn.metrics import log_loss
import torch.nn.functional as F
def score(preds): return {'log loss': log_loss(preds.label_ids, F.softmax(torch.Tensor(preds.predictions)))}

In [23]:
def get_trainer(dds):
    args = TrainingArguments('outputs', learning_rate=lr, warmup_ratio=0.1, lr_scheduler_type='cosine', fp16=True,
        evaluation_strategy="epoch", per_device_train_batch_size=bs, per_device_eval_batch_size=bs*2,
        num_train_epochs=epochs, weight_decay=wd, report_to='none')



    model = DebertaForSequenceClassification.from_pretrained(
    "microsoft/deberta-large", num_labels=3, problem_type="multi_label_classification")
    device = torch.device("cuda:0")
    model.to(device)

    return Trainer(model, args, train_dataset=dds['train'], eval_dataset=dds['test'],
                   tokenizer=tokenizer, compute_metrics=score)

In [24]:
import os
os.environ["NCCL_DEBUG"] = "INFO"

In [25]:
trainer = get_trainer(dds)
trainer.train()

Some weights of the model checkpoint at microsoft/deberta-large were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler

innerLab03-Dell:966907:966907 [0] NCCL INFO Bootstrap : Using [0]eno4:192.168.0.138<0> [1]br-f27996258617:172.19.0.1<0> [2]br-6cb110c1c75c:172.21.0.1<0> [3]br-9887d1db9997:172.22.0.1<0> [4]veth3eb884a:fe80::e8d6:d6ff:fe95:4656%veth3eb884a<0> [5]vethda223e3:fe80::7009:86ff:fe3e:8cbb%vethda223e3<0> [6]veth565ff73:fe80::6c07:86ff:fe89:a597%veth565ff73<0> [7]veth9ca141a:fe80::d8fb:15ff:fe1a:4a87%veth9ca141a<0> [8]vethf8502bf:fe80::c8d7:53ff:fe5d:d039%vethf8502bf<0> [9]veth1ed4325:fe80::94d8:ecff:fef7:427%veth1ed4325<0> [10]vethc00f921:fe80::e854:65ff:fe67:2009%vethc00f921<0> [11]veth3fd2dca:fe80::841b:56ff:fe1a:398e%veth3fd2dca<0> [12]veth68f2179:fe80::b8e9:5cff:fe04:2721%veth68f2179<0> [13]vethbbe2260:fe80::c72:8ff:fe36:7aac%vethbbe2260<0> [14]veth29b2cfc:fe80::f42e:12ff:fe72:527d%veth29b2cfc<0> [15]veth72cbe3d:fe80::f0e8:55ff:fe83:93dd%veth72cbe3d<0>
innerLab03-Dell:966907:966907 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation

innerLab03-Dell:9

RuntimeError: NCCL Error 2: unhandled system error