In [1]:
# Importing the libraries needed
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import seaborn as sns
import transformers
import json
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizerFast, RobertaConfig, AutoTokenizer
import logging
from tqdm import tqdm
from collections import OrderedDict
logging.basicConfig(level=logging.ERROR)

In [3]:
df = pd.read_csv('../../data/Airline_sentiment_raw.csv')
df_train, df_test = train_test_split(df, test_size=0.2, random_state=52)

# Data Preprocessing

In [4]:
X_train = np.array(list(df_train['text']))
X_test = np.array(list(df_test['text']))
from sklearn.preprocessing import LabelEncoder
e = LabelEncoder()
y_train = e.fit_transform(df_train['airline_sentiment'])
y_test = e.transform(df_test['airline_sentiment'])

In [5]:
torch.from_numpy(y_test)

tensor([1, 0, 0,  ..., 0, 0, 0])

In [6]:
# Custom dataset
class CustomTextDataset(Dataset):
    def __init__(self, text, labels):
        self.labels = labels
        self.text = text
        
    def __len__(self):
            return len(self.labels)
        
    def __getitem__(self, idx):
            label = self.labels[idx]
            text = self.text[idx]
            sample = {"Text": text, "Class": label}
            return sample


In [7]:
dataset = CustomTextDataset(X_train, y_train)

In [8]:
DL_DS = DataLoader(dataset, batch_size=2, shuffle = True)

# Model

In [9]:
from typing import List
class RobertaClassification(torch.nn.Module):
    MODEL_ID = 'roberta-base'
    DEFAULT_LOSS = torch.nn.CrossEntropyLoss(reduction='mean')
    #DEFAULT_OPT = torch.optim.Adam()
    
    def __init__(self, hidden_dims: List[int]) -> None:
        super().__init__()
        self.tokenizer = RobertaTokenizerFast.from_pretrained(self.MODEL_ID)
        self.roberta = RobertaModel.from_pretrained(self.MODEL_ID)
        self.layers = []
        for i in range(len(hidden_dims)-1):
            self.layers.append((f'conv{i}', torch.nn.Linear(hidden_dims[i],hidden_dims[i+1])))
            self.layers.append((f'relu{i}', torch.nn.ReLU()))
        self.layers.append(('final_layer', torch.nn.Linear(hidden_dims[-1], 3)))
    
    def build(self) -> None:
        def init_normal(m):
            if type(m) == torch.nn.Linear:
                torch.nn.init.uniform_(m.weight)
        self.model = torch.nn.Sequential(OrderedDict(self.layers))
        self.model.apply(init_normal)

    def forward(self, x: str):
        x = self.tokenizer(x, padding=True, truncation=True, max_length=512, return_tensors="pt")
        x = self.roberta(**x).pooler_output
        return self.model(x)
    
    def train(self, 
              batch_size: int = 64, 
              loss: torch.nn.modules.loss = DEFAULT_LOSS,
              #optimizer: torch.nn.modules = self.DEFAULT_OPT,
             ):
        ...
        

In [10]:
rc = RobertaClassification([768,512,128,64])

Downloading tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
rc.build()
rc(['How are you'])

tensor([[866535.1250, 892398.8750, 771191.6875]], grad_fn=<AddmmBackward0>)

In [12]:
N_EPOCHS = 500
BATCH_SIZE = 256
dl = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle = True)
loss_fn = torch.nn.CrossEntropyLoss(reduction='mean')
optimizer = torch.optim.Adam(rc.parameters())
dataset_size = len(dl.dataset)

In [None]:
train_loss_hist = []
train_acc_hist = []
test_loss_hist = []
test_acc_hist = []

for epoch in range(N_EPOCHS):
    print(f"Epoch {epoch + 1}\n-------------------------------")
    epoch_loss = []
    epoch_acc = []
    # set model in training mode and run through each batch
    rc.train()
    
    # Loop over batches in an epoch using DataLoader
    for id_batch, batched_data in enumerate(dl):
        x_batch = batched_data['Text']
        y_batch = batched_data['Class']

        y_batch_pred = rc(x_batch)

        loss = loss_fn(y_batch_pred, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = (torch.argmax(y_batch_pred, 1) == y_batch).float().mean()
        epoch_loss.append(float(loss))
        epoch_acc.append(float(acc))
        # Every 100 batches, print the loss for this batch
        # as well as the number of examples processed so far 
        if id_batch % 100 == 0:
            loss, current = loss.item(), (id_batch + 1)* len(x_batch)
            print(f"loss: {loss:>7f}  [{current:>5d}/{dataset_size:>5d}]")
    
    rc.eval()
    y_pred = rc(list(X_test[:256]))
    ce = loss_fn(y_pred, torch.from_numpy(y_test[:256]))
    acc = (torch.argmax(y_pred, 1) == torch.from_numpy(y_test[:256])).float().mean()
    
    train_loss_hist.append(np.mean(epoch_loss))
    train_acc_hist.append(np.mean(epoch_acc))
    test_loss_hist.append(ce)
    test_acc_hist.append(acc)
    
    print(f"train loss: {train_loss_hist[-1]:>7f}    train acc: {train_acc_hist[-1]:>5f}")
    print(f"test loss: {test_acc_hist[-1]:>7f}    test acc: {test_acc_hist[-1]:>5f}")
    


Epoch 1
-------------------------------
loss: 43585.707031  [  256/11712]
train loss: 948.493097    train acc: 0.581748
test loss: 0.617188    test acc: 0.617188
Epoch 2
-------------------------------
loss: 0.912096  [  256/11712]
train loss: 0.914123    train acc: 0.627944
test loss: 0.617188    test acc: 0.617188
Epoch 3
-------------------------------
loss: 0.882418  [  256/11712]
train loss: 0.914145    train acc: 0.628199
test loss: 0.617188    test acc: 0.617188
Epoch 4
-------------------------------
loss: 0.949663  [  256/11712]
train loss: 0.914030    train acc: 0.628199
test loss: 0.617188    test acc: 0.617188
Epoch 5
-------------------------------
loss: 0.916506  [  256/11712]
train loss: 0.914171    train acc: 0.628029
test loss: 0.617188    test acc: 0.617188
Epoch 6
-------------------------------
loss: 0.943697  [  256/11712]
train loss: 0.914420    train acc: 0.627831
test loss: 0.617188    test acc: 0.617188
Epoch 7
-------------------------------
loss: 0.946919  [ 

In [1]:
y_test[:256]

NameError: name 'y_test' is not defined

In [102]:
print(y_batch)

tensor([0, 0, 1, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 0, 1, 0, 1,
        2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 0, 0,
        1, 1, 0, 0, 2, 2, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
        0, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0, 1, 0, 1, 1, 0, 1, 2, 0, 0, 2, 0,
        2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
        0, 0, 2, 0, 0, 0, 2, 0, 2, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0,
        1, 0, 0, 2, 0, 1, 2, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 2, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 1,
        0, 2, 2, 0, 2, 1, 0, 0, 1, 1, 2, 1, 1, 0, 0, 0])


In [103]:
print(y_batch_pred)

tensor([[12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
        [12.3881, 11.3308, 11.0461],
 

In [4]:
model_id = 'roberta-base'
tokenizer = RobertaTokenizerFast.from_pretrained(model_id)
roberta_model = RobertaModel.from_pretrained(model_id)

# This function tokenizes the input text using the RoBERTa tokenizer. 
# It applies padding and truncation to ensure that all sequences have the same length (256 tokens).
def get_embedding(x):
    ins = tokenizer(x, padding=True, truncation=True, max_length=512, return_tensors="pt")
    return roberta_model(**ins).pooler_output

# X_train = get_embedding(list(df_train['text']))


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
batch_size = 512
n_batches = len(df_train)//batch_size + 1


In [10]:
def gen_data():
    for i in tqdm(range(n_batches)):
        i0 = batch_size*i
        i1 = min([batch_size*(i+1),len(df_train)])
        yield get_embedding(list(df_train.iloc[i0:i1]['text']))

In [None]:
outs = [torch.tensor([0]) for i in range(n_batches)]
i = 0
for data in gen_data():
    outs[i] = data
    i += 1

 13%|███████████▏                                                                          | 3/23 [00:42<04:58, 14.90s/it]

In [14]:
torch.optim.Adam(model.parameters())

NameError: name 'model' is not defined

In [11]:
type()

TypeError: __init__() missing 1 required positional argument: 'params'

In [24]:
rc = RobertaClassification([768,512,128,64])

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
rc.build()
rc.forward(['How are you'])

tensor([[18.7333]], grad_fn=<AddmmBackward0>)

In [29]:
loss = torch.nn.CrossEntropyLoss(reduction='mean')
optimizer = torch.optim.Adam(rc.parameters())

In [None]:
for t in range(500):
    y_pred = rc.forward(x)

    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [28]:
rc.model.parameters()

<generator object Module.parameters at 0x2f3503dd0>

In [46]:
from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)


TypeError: 'int' object is not callable

In [None]:
import torch
N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for t in range(500):
    y_pred = model(x)

    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [7]:
rc = RobertaClassification([768, 512, 128, 64])

NameError: name 'RobertaClassification' is not defined

In [12]:
rc.build()

In [21]:
X_train = list(df_train['text'])

In [None]:
rc.forward(X_train)

In [None]:
loss = torch.nn.CrossEntropyLoss(reduction='mean')
optimizer = torch.optim.Adam()
for t in range(100):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = rc(x)

    # Compute and print loss
    loss = loss(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [61]:
rc.forward('my dog is cute!')

TypeError: string indices must be integers

In [51]:
from collections import OrderedDict

hidden_dims = [768,512,128,64]


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [52]:
hl

OrderedDict([('pretrained',
              RobertaModel(
                (embeddings): RobertaEmbeddings(
                  (word_embeddings): Embedding(50265, 768, padding_idx=1)
                  (position_embeddings): Embedding(514, 768, padding_idx=1)
                  (token_type_embeddings): Embedding(1, 768)
                  (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
                  (dropout): Dropout(p=0.1, inplace=False)
                )
                (encoder): RobertaEncoder(
                  (layer): ModuleList(
                    (0-11): 12 x RobertaLayer(
                      (attention): RobertaAttention(
                        (self): RobertaSelfAttention(
                          (query): Linear(in_features=768, out_features=768, bias=True)
                          (key): Linear(in_features=768, out_features=768, bias=True)
                          (value): Linear(in_features=768, out_features=768, bias=True)
                        

In [50]:
layers

[('pretrained',
  RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,

In [43]:
torch.nn.Sequential?

[0;31mInit signature:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mSequential[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
A sequential container.
Modules will be added to it in the order they are passed in the
constructor. Alternatively, an ``OrderedDict`` of modules can be
passed in. The ``forward()`` method of ``Sequential`` accepts any
input and forwards it to the first module it contains. It then
"chains" outputs to inputs sequentially for each subsequent module,
finally returning the output of the last module.

The value a ``Sequential`` provides over manually calling a sequence
of modules is that it allows treating the whole container as a
single module, such that performing a transformation on the
``Sequential`` applies to each of the modules it stores (which are
each a registered submodule of the ``Sequential``).

What's the difference between a ``Sequential`` and a
:class:`torch.nn.ModuleList`? A ``ModuleL

In [42]:
torch.nn.Sequential(hl)

Sequential(
  (conv0): Linear(in_features=768, out_features=512, bias=True)
  (relu0): ReLU()
  (conv1): Linear(in_features=512, out_features=128, bias=True)
  (relu1): ReLU()
  (conv2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
)

In [35]:
hidden_layers = []
hidden_dims = [768,512,128,64]
for i in range(len(hidden_dims)-1):
    hidden_layers.append(torch.nn.Linear(hidden_dims[i],hidden_dims[i+1]))
    hidden_layers.append(torch.nn.ReLU())
torch.nn.Sequential(hidden_layers)

TypeError: list is not a Module subclass

In [31]:
RobertaModel.from_pretrained?

[0;31mSignature:[0m
[0mRobertaModel[0m[0;34m.[0m[0mfrom_pretrained[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mpretrained_model_name_or_path[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mos[0m[0;34m.[0m[0mPathLike[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0mmodel_args[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mconfig[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mconfiguration_utils[0m[0;34m.[0m[0mPretrainedConfig[0m[0;34m,[0m [0mstr[0m[0;34m,[0m [0mos[0m[0;34m.[0m[0mPathLike[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcache_dir[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mos[0m[0;34m.[0m[0mPathLike[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mignore_mismatched_sizes[0m[0;34m:[

In [29]:
torch.nn.ReLU?

[0;31mInit signature:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mReLU[0m[0;34m([0m[0minplace[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Applies the rectified linear unit function element-wise:

:math:`\text{ReLU}(x) = (x)^+ = \max(0, x)`

Args:
    inplace: can optionally do the operation in-place. Default: ``False``

Shape:
    - Input: :math:`(*)`, where :math:`*` means any number of dimensions.
    - Output: :math:`(*)`, same shape as the input.

.. image:: ../scripts/activation_images/ReLU.png

Examples::

    >>> m = nn.ReLU()
    >>> input = torch.randn(2)
    >>> output = m(input)


  An implementation of CReLU - https://arxiv.org/abs/1603.05201

    >>> m = nn.ReLU()
    >>> input = torch.randn(2).unsqueeze(0)
    >>> output = torch.cat((m(input), m(-input)))
[0;31mInit docstring:[0m Initializes internal Module state, shared by both nn.Module and ScriptModule.
[0;31mFile:[0m   

In [26]:
RobertaModel.from_pretrained?

[0;31mSignature:[0m
[0mRobertaModel[0m[0;34m.[0m[0mfrom_pretrained[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mpretrained_model_name_or_path[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mos[0m[0;34m.[0m[0mPathLike[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0mmodel_args[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mconfig[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mconfiguration_utils[0m[0;34m.[0m[0mPretrainedConfig[0m[0;34m,[0m [0mstr[0m[0;34m,[0m [0mos[0m[0;34m.[0m[0mPathLike[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcache_dir[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mos[0m[0;34m.[0m[0mPathLike[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mignore_mismatched_sizes[0m[0;34m:[

In [24]:
torch.nn.Module?

[0;31mInit signature:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mModule[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in
a tree structure. You can assign the submodules as regular attributes::

    import torch.nn as nn
    import torch.nn.functional as F

    class Model(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(1, 20, 5)
            self.conv2 = nn.Conv2d(20, 20, 5)

        def forward(self, x):
            x = F.relu(self.conv1(x))
            return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their
parameters converted too when you call :meth:`to`, etc.

.. note::
    As per the exampl

In [10]:
model

RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(50265, 768, padding_idx=1)
    (position_embeddings): Embedding(512, 768, padding_idx=1)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): RobertaEncoder(
    (layer): ModuleList(
      (0-11): 12 x RobertaLayer(
        (attention): RobertaAttention(
          (self): RobertaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): RobertaSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropou

In [4]:
df.head()

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,5.703061e+17,neutral,1.0,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2015-02-24 11:35:52 -0800,,Eastern Time (US & Canada)
1,5.703011e+17,positive,0.3486,,0.0,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2015-02-24 11:15:59 -0800,,Pacific Time (US & Canada)
2,5.703011e+17,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2015-02-24 11:15:48 -0800,Lets Play,Central Time (US & Canada)
3,5.70301e+17,negative,1.0,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2015-02-24 11:15:36 -0800,,Pacific Time (US & Canada)
4,5.703008e+17,negative,1.0,Can't Tell,1.0,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2015-02-24 11:14:45 -0800,,Pacific Time (US & Canada)
