# Classification 3. Hierarchical-Separate Approach 

In [4]:
!pip install transformers

[0m

In [1]:
from transformers import BertTokenizer
import torch
import numpy as np
from torch import nn
from transformers import BertModel
from torch.optim import Adam
from tqdm import tqdm
import pandas as pd
from sklearn.metrics import confusion_matrix

In [3]:
df = pd.read_csv("beer_df_large.csv", sep="\t")

In [4]:
df = df.drop(columns=['Name', 'Company', 'Region', 'ABV', 'Avg', 'Rate'])

In [5]:
df

Unnamed: 0,Group,Style,Review
0,Bocks,Bock,From a 12oz bottle into a cocktail glass.\n\nC...
1,Bocks,Bock,"Great relaxing beer. Very mellow, great taste-..."
2,Bocks,Bock,"Appearance: Clear, bright copper color. Frothy..."
3,Bocks,Bock,As a German staying for holidays in the US I c...
4,Bocks,Bock,I would guess this is Shiner's #1 beer. It's p...
...,...,...,...
174679,Wild/Sour Beers,Wild Ale,"A really well done, well balanced sour with an..."
174680,Wild/Sour Beers,Wild Ale,"22oz bottle. Poured out a slightly hazy, brigh..."
174681,Wild/Sour Beers,Wild Ale,"There's lots of apricot in this beer, just a t..."
174682,Wild/Sour Beers,Wild Ale,Poured from a 22oz bomber into my Drie de Font...


In [6]:
groups = df['Group'].value_counts().index.tolist()
groups

['Pale Lagers',
 'Pale Ales',
 'Strong Ales',
 'Stouts',
 'India Pale Ales',
 'Wild/Sour Beers',
 'Specialty Beers',
 'Dark Lagers',
 'Porters',
 'Wheat Beers',
 'Brown Ales',
 'Bocks',
 'Dark Ales']

In [7]:
group_dfs = []
for g in groups:
    new_df = df[df['Group'] == g]
    new_df = new_df.drop('Group', axis=1)
    group_dfs.append(new_df)

In [8]:
group_dfs[0]

Unnamed: 0,Style,Review
66877,American Adjunct Lager,"Sweet, clean, green apples, mild bitterness. S..."
66878,American Adjunct Lager,Look...Clear pale yellow with a white carbonat...
66879,American Adjunct Lager,Look: Crystal clear yellow with an awesome-loo...
66880,American Adjunct Lager,Defies comparison. Hating on this is like comp...
66881,American Adjunct Lager,"Might be my least favorite AAL, mostly from th..."
...,...,...
93688,Malt Liquor,"Bottled in the 1970's era, and drank almost 50..."
93689,Malt Liquor,"Small pour at brewery. Looks lika glass of OJ,..."
93690,Malt Liquor,Small pour at brewery. Copper body with tiny h...
93691,Malt Liquor,"Four pack from brewery. Dark golden, patchy he..."


In [12]:
#df['Review'] = df.apply(lambda x: 'Group: {}. Review: {}'.format(x['Group'], x['Review']), axis=1)
#df = df.drop('Group', axis=1)

In [9]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [10]:
lables_list = []
for gd in group_dfs:
    labels = {}
    for i, name in enumerate(gd['Style'].value_counts().index.tolist()):
        labels[name] = i
    lables_list.append(labels)

In [19]:
lables_list[2]

{'Tripel': 0,
 'Belgian Pale Strong Ale': 1,
 'Belgian Dark Strong Ale': 2,
 'English Barleywine': 3,
 'American Barleywine': 4,
 'Scotch Ale / Wee Heavy': 5,
 'Quadrupel (Quad)': 6,
 'American Strong Ale': 7,
 'Old Ale': 8,
 'Imperial Red Ale': 9,
 'English Strong Ale': 10,
 'Wheatwine': 11}

In [26]:
res = {}

In [27]:
for i, name in enumerate(groups):
    res[name] = {id: style for style, id in lables_list[i].items()}

In [28]:
res

{'Pale Lagers': {0: 'American Adjunct Lager',
  1: 'German Pilsner',
  2: 'European Pale Lager',
  3: 'American Lager',
  4: 'Helles',
  5: 'Bohemian / Czech Pilsner',
  6: 'Light Lager',
  7: 'Kellerbier / Zwickelbier',
  8: 'European / Dortmunder Export Lager',
  9: 'Festbier / Wiesnbier',
  10: 'European Strong Lager',
  11: 'Malt Liquor',
  12: 'India Pale Lager (IPL)',
  13: 'Imperial Pilsner',
  14: 'Czech Pale Lager'},
 'Pale Ales': {0: 'English Pale Ale',
  1: 'American Blonde Ale',
  2: 'Saison',
  3: 'American Pale Ale',
  4: 'Kölsch',
  5: 'American Amber / Red Ale',
  6: 'Belgian Pale Ale',
  7: 'English Bitter',
  8: 'Extra Special / Strong Bitter (ESB)',
  9: 'Irish Red Ale',
  10: 'Belgian Blonde Ale',
  11: 'Bière de Garde',
  12: 'Grisette',
  13: 'English Pale Mild Ale'},
 'Strong Ales': {0: 'Tripel',
  1: 'Belgian Pale Strong Ale',
  2: 'Belgian Dark Strong Ale',
  3: 'English Barleywine',
  4: 'American Barleywine',
  5: 'Scotch Ale / Wee Heavy',
  6: 'Quadrupel (Qu

In [12]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, df_, labels_):
        self.labels = [labels_[label] for label in df_['Style']]
        self.texts = [tokenizer(text, 
                                padding='max_length', 
                                max_length = 512, 
                                truncation=True, 
                                return_tensors="pt") 
                      for text in df_['Review']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):
        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)
        return batch_texts, batch_y

In [18]:
!pip install sklearn


Collecting sklearn
  Downloading sklearn-0.0.post1.tar.gz (3.6 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25ldone
[?25h  Created wheel for sklearn: filename=sklearn-0.0.post1-py3-none-any.whl size=2344 sha256=855bf7a0d608464fd6665585671b00ce0ad48d3a3599726c7fd197b273976930
  Stored in directory: /root/.cache/pip/wheels/15/9a/b1/2478e73a520d596fab614693f5cd1beef4ba3db737bed1ac7d
Successfully built sklearn
Installing collected packages: sklearn
Successfully installed sklearn-0.0.post1
[0m

In [13]:
from sklearn.model_selection import train_test_split

df_splits = []
for gd in group_dfs:
    df_train, df_test = train_test_split(gd, train_size = 0.9, random_state=42, stratify = gd['Style'])
    df_train, df_val = train_test_split(df_train, train_size = 0.88889, random_state=42, stratify = df_train['Style'])
    df_splits.append((df_train, df_val, df_test))
    print(len(df_train), len(df_val), len(df_test))

21452 2682 2682
18504 2313 2313
16604 2076 2076
13391 1674 1674
11332 1417 1417
10658 1333 1333
9759 1220 1220
9737 1218 1218
7602 951 951
6775 847 847
5589 699 699
4652 582 582
3683 461 461


In [14]:
class BertClassifier(nn.Module):
    def __init__(self, num_classes, dropout=0.2):
        super(BertClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, num_classes)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):
        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)
        return final_layer

In [15]:
def train(model, train_data, val_data, learning_rate, epochs, labels):
    print('Loading data into a dataset...')

    train, val = Dataset(train_data, labels), Dataset(val_data, labels)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)
    print('Data loaded')

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)

    if use_cuda:
            model = model.cuda()
            criterion = criterion.cuda()

    for epoch_num in range(epochs):

            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)
                
                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
                | Train Accuracy: {total_acc_train / len(train_data): .3f} \
                | Val Loss: {total_loss_val / len(val_data): .3f} \
                | Val Accuracy: {total_acc_val / len(val_data): .3f}')
                  

In [17]:
EPOCHS = 5
models = []
LR = 1e-6

for ds in range(len(df_splits)):
    model = BertClassifier(len(lables_list[ds]))
    print(f'Training for group: {groups[ds]}')
    train(model, df_splits[ds][0], df_splits[ds][1], LR, EPOCHS, lables_list[ds])
    torch.save(model, f'{ds}.pt')

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Pale Lagers
Loading data into a dataset...
Data loaded


100%|██████████| 10726/10726 [11:20<00:00, 15.75it/s]


Epochs: 1 | Train Loss:  1.212                 | Train Accuracy:  0.210                 | Val Loss:  1.054                 | Val Accuracy:  0.340


100%|██████████| 10726/10726 [11:37<00:00, 15.38it/s]


Epochs: 2 | Train Loss:  0.987                 | Train Accuracy:  0.385                 | Val Loss:  0.921                 | Val Accuracy:  0.428


100%|██████████| 10726/10726 [12:04<00:00, 14.80it/s]


Epochs: 3 | Train Loss:  0.854                 | Train Accuracy:  0.474                 | Val Loss:  0.862                 | Val Accuracy:  0.455


100%|██████████| 10726/10726 [12:03<00:00, 14.83it/s]


Epochs: 4 | Train Loss:  0.749                 | Train Accuracy:  0.541                 | Val Loss:  0.841                 | Val Accuracy:  0.457


100%|██████████| 10726/10726 [11:26<00:00, 15.62it/s]


Epochs: 5 | Train Loss:  0.648                 | Train Accuracy:  0.609                 | Val Loss:  0.838                 | Val Accuracy:  0.463


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Pale Ales
Loading data into a dataset...
Data loaded


100%|██████████| 9252/9252 [09:48<00:00, 15.72it/s]


Epochs: 1 | Train Loss:  1.154                 | Train Accuracy:  0.260                 | Val Loss:  0.924                 | Val Accuracy:  0.439


100%|██████████| 9252/9252 [09:50<00:00, 15.67it/s]


Epochs: 2 | Train Loss:  0.810                 | Train Accuracy:  0.500                 | Val Loss:  0.770                 | Val Accuracy:  0.514


100%|██████████| 9252/9252 [10:11<00:00, 15.14it/s]


Epochs: 3 | Train Loss:  0.682                 | Train Accuracy:  0.574                 | Val Loss:  0.720                 | Val Accuracy:  0.531


100%|██████████| 9252/9252 [09:47<00:00, 15.76it/s]


Epochs: 4 | Train Loss:  0.597                 | Train Accuracy:  0.626                 | Val Loss:  0.715                 | Val Accuracy:  0.537


100%|██████████| 9252/9252 [09:47<00:00, 15.75it/s]


Epochs: 5 | Train Loss:  0.517                 | Train Accuracy:  0.682                 | Val Loss:  0.723                 | Val Accuracy:  0.533


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Strong Ales
Loading data into a dataset...
Data loaded


100%|██████████| 8302/8302 [08:47<00:00, 15.73it/s]


Epochs: 1 | Train Loss:  1.081                 | Train Accuracy:  0.253                 | Val Loss:  0.888                 | Val Accuracy:  0.417


100%|██████████| 8302/8302 [08:50<00:00, 15.64it/s]


Epochs: 2 | Train Loss:  0.800                 | Train Accuracy:  0.455                 | Val Loss:  0.768                 | Val Accuracy:  0.475


100%|██████████| 8302/8302 [09:15<00:00, 14.94it/s]


Epochs: 3 | Train Loss:  0.694                 | Train Accuracy:  0.529                 | Val Loss:  0.719                 | Val Accuracy:  0.504


100%|██████████| 8302/8302 [09:04<00:00, 15.25it/s]


Epochs: 4 | Train Loss:  0.613                 | Train Accuracy:  0.594                 | Val Loss:  0.722                 | Val Accuracy:  0.518


100%|██████████| 8302/8302 [08:47<00:00, 15.74it/s]


Epochs: 5 | Train Loss:  0.530                 | Train Accuracy:  0.659                 | Val Loss:  0.702                 | Val Accuracy:  0.528


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Stouts
Loading data into a dataset...
Data loaded


100%|██████████| 6696/6696 [07:05<00:00, 15.73it/s]


Epochs: 1 | Train Loss:  0.964                 | Train Accuracy:  0.257                 | Val Loss:  0.863                 | Val Accuracy:  0.367


100%|██████████| 6696/6696 [07:04<00:00, 15.77it/s]


Epochs: 2 | Train Loss:  0.813                 | Train Accuracy:  0.411                 | Val Loss:  0.793                 | Val Accuracy:  0.421


100%|██████████| 6696/6696 [07:04<00:00, 15.76it/s]


Epochs: 3 | Train Loss:  0.727                 | Train Accuracy:  0.481                 | Val Loss:  0.769                 | Val Accuracy:  0.438


100%|██████████| 6696/6696 [07:04<00:00, 15.76it/s]


Epochs: 4 | Train Loss:  0.658                 | Train Accuracy:  0.541                 | Val Loss:  0.765                 | Val Accuracy:  0.434


100%|██████████| 6696/6696 [07:04<00:00, 15.77it/s]


Epochs: 5 | Train Loss:  0.575                 | Train Accuracy:  0.614                 | Val Loss:  0.774                 | Val Accuracy:  0.428


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: India Pale Ales
Loading data into a dataset...
Data loaded


100%|██████████| 5666/5666 [06:01<00:00, 15.66it/s]


Epochs: 1 | Train Loss:  0.954                 | Train Accuracy:  0.279                 | Val Loss:  0.819                 | Val Accuracy:  0.434


100%|██████████| 5666/5666 [06:14<00:00, 15.15it/s]


Epochs: 2 | Train Loss:  0.667                 | Train Accuracy:  0.552                 | Val Loss:  0.551                 | Val Accuracy:  0.638


100%|██████████| 5666/5666 [05:59<00:00, 15.78it/s]


Epochs: 3 | Train Loss:  0.492                 | Train Accuracy:  0.684                 | Val Loss:  0.499                 | Val Accuracy:  0.659


100%|██████████| 5666/5666 [06:04<00:00, 15.54it/s]


Epochs: 4 | Train Loss:  0.397                 | Train Accuracy:  0.754                 | Val Loss:  0.483                 | Val Accuracy:  0.671


100%|██████████| 5666/5666 [06:19<00:00, 14.95it/s]


Epochs: 5 | Train Loss:  0.315                 | Train Accuracy:  0.816                 | Val Loss:  0.484                 | Val Accuracy:  0.680


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Wild/Sour Beers
Loading data into a dataset...
Data loaded


100%|██████████| 5329/5329 [05:38<00:00, 15.73it/s]


Epochs: 1 | Train Loss:  1.071                 | Train Accuracy:  0.244                 | Val Loss:  0.920                 | Val Accuracy:  0.405


100%|██████████| 5329/5329 [05:43<00:00, 15.53it/s]


Epochs: 2 | Train Loss:  0.802                 | Train Accuracy:  0.487                 | Val Loss:  0.749                 | Val Accuracy:  0.521


100%|██████████| 5329/5329 [05:56<00:00, 14.95it/s]


Epochs: 3 | Train Loss:  0.649                 | Train Accuracy:  0.585                 | Val Loss:  0.657                 | Val Accuracy:  0.562


100%|██████████| 5329/5329 [05:55<00:00, 14.97it/s]


Epochs: 4 | Train Loss:  0.542                 | Train Accuracy:  0.654                 | Val Loss:  0.616                 | Val Accuracy:  0.586


100%|██████████| 5329/5329 [05:56<00:00, 14.93it/s]


Epochs: 5 | Train Loss:  0.451                 | Train Accuracy:  0.717                 | Val Loss:  0.617                 | Val Accuracy:  0.587


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Specialty Beers
Loading data into a dataset...
Data loaded


100%|██████████| 4880/4880 [05:23<00:00, 15.10it/s]


Epochs: 1 | Train Loss:  1.152                 | Train Accuracy:  0.242                 | Val Loss:  0.947                 | Val Accuracy:  0.416


100%|██████████| 4880/4880 [05:09<00:00, 15.77it/s]


Epochs: 2 | Train Loss:  0.784                 | Train Accuracy:  0.529                 | Val Loss:  0.618                 | Val Accuracy:  0.737


100%|██████████| 4880/4880 [05:25<00:00, 14.98it/s]


Epochs: 3 | Train Loss:  0.413                 | Train Accuracy:  0.797                 | Val Loss:  0.415                 | Val Accuracy:  0.769


100%|██████████| 4880/4880 [05:26<00:00, 14.96it/s]


Epochs: 4 | Train Loss:  0.320                 | Train Accuracy:  0.835                 | Val Loss:  0.380                 | Val Accuracy:  0.783


100%|██████████| 4880/4880 [05:14<00:00, 15.53it/s]


Epochs: 5 | Train Loss:  0.251                 | Train Accuracy:  0.873                 | Val Loss:  0.355                 | Val Accuracy:  0.800


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Dark Lagers
Loading data into a dataset...
Data loaded


100%|██████████| 4869/4869 [05:09<00:00, 15.75it/s]


Epochs: 1 | Train Loss:  1.003                 | Train Accuracy:  0.280                 | Val Loss:  0.906                 | Val Accuracy:  0.375


100%|██████████| 4869/4869 [05:13<00:00, 15.53it/s]


Epochs: 2 | Train Loss:  0.859                 | Train Accuracy:  0.423                 | Val Loss:  0.836                 | Val Accuracy:  0.428


100%|██████████| 4869/4869 [05:11<00:00, 15.64it/s]


Epochs: 3 | Train Loss:  0.767                 | Train Accuracy:  0.490                 | Val Loss:  0.763                 | Val Accuracy:  0.484


100%|██████████| 4869/4869 [05:05<00:00, 15.92it/s]


Epochs: 4 | Train Loss:  0.663                 | Train Accuracy:  0.571                 | Val Loss:  0.680                 | Val Accuracy:  0.539


100%|██████████| 4869/4869 [05:10<00:00, 15.70it/s]


Epochs: 5 | Train Loss:  0.535                 | Train Accuracy:  0.649                 | Val Loss:  0.644                 | Val Accuracy:  0.565


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Porters
Loading data into a dataset...
Data loaded


100%|██████████| 3801/3801 [04:03<00:00, 15.63it/s]


Epochs: 1 | Train Loss:  0.856                 | Train Accuracy:  0.258                 | Val Loss:  0.776                 | Val Accuracy:  0.399


100%|██████████| 3801/3801 [04:02<00:00, 15.69it/s]


Epochs: 2 | Train Loss:  0.724                 | Train Accuracy:  0.452                 | Val Loss:  0.715                 | Val Accuracy:  0.452


100%|██████████| 3801/3801 [04:01<00:00, 15.75it/s]


Epochs: 3 | Train Loss:  0.645                 | Train Accuracy:  0.529                 | Val Loss:  0.700                 | Val Accuracy:  0.468


100%|██████████| 3801/3801 [04:02<00:00, 15.68it/s]


Epochs: 4 | Train Loss:  0.578                 | Train Accuracy:  0.589                 | Val Loss:  0.692                 | Val Accuracy:  0.482


100%|██████████| 3801/3801 [04:01<00:00, 15.73it/s]


Epochs: 5 | Train Loss:  0.502                 | Train Accuracy:  0.653                 | Val Loss:  0.697                 | Val Accuracy:  0.484


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Wheat Beers
Loading data into a dataset...
Data loaded


100%|██████████| 3388/3388 [03:36<00:00, 15.67it/s]


Epochs: 1 | Train Loss:  0.734                 | Train Accuracy:  0.456                 | Val Loss:  0.593                 | Val Accuracy:  0.590


100%|██████████| 3388/3388 [03:34<00:00, 15.76it/s]


Epochs: 2 | Train Loss:  0.507                 | Train Accuracy:  0.678                 | Val Loss:  0.489                 | Val Accuracy:  0.671


100%|██████████| 3388/3388 [03:35<00:00, 15.75it/s]


Epochs: 3 | Train Loss:  0.414                 | Train Accuracy:  0.739                 | Val Loss:  0.457                 | Val Accuracy:  0.710


100%|██████████| 3388/3388 [03:35<00:00, 15.69it/s]


Epochs: 4 | Train Loss:  0.343                 | Train Accuracy:  0.792                 | Val Loss:  0.445                 | Val Accuracy:  0.700


100%|██████████| 3388/3388 [03:35<00:00, 15.74it/s]


Epochs: 5 | Train Loss:  0.271                 | Train Accuracy:  0.847                 | Val Loss:  0.457                 | Val Accuracy:  0.699


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Brown Ales
Loading data into a dataset...
Data loaded


100%|██████████| 2795/2795 [03:05<00:00, 15.09it/s]


Epochs: 1 | Train Loss:  0.766                 | Train Accuracy:  0.327                 | Val Loss:  0.732                 | Val Accuracy:  0.362


100%|██████████| 2795/2795 [03:07<00:00, 14.91it/s]


Epochs: 2 | Train Loss:  0.677                 | Train Accuracy:  0.433                 | Val Loss:  0.661                 | Val Accuracy:  0.421


100%|██████████| 2795/2795 [03:07<00:00, 14.90it/s]


Epochs: 3 | Train Loss:  0.590                 | Train Accuracy:  0.515                 | Val Loss:  0.617                 | Val Accuracy:  0.474


100%|██████████| 2795/2795 [03:06<00:00, 14.97it/s]


Epochs: 4 | Train Loss:  0.515                 | Train Accuracy:  0.587                 | Val Loss:  0.594                 | Val Accuracy:  0.511


100%|██████████| 2795/2795 [03:06<00:00, 14.97it/s]


Epochs: 5 | Train Loss:  0.443                 | Train Accuracy:  0.658                 | Val Loss:  0.594                 | Val Accuracy:  0.492


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Bocks
Loading data into a dataset...
Data loaded


100%|██████████| 2326/2326 [02:35<00:00, 14.94it/s]


Epochs: 1 | Train Loss:  0.744                 | Train Accuracy:  0.335                 | Val Loss:  0.677                 | Val Accuracy:  0.416


100%|██████████| 2326/2326 [02:35<00:00, 14.91it/s]


Epochs: 2 | Train Loss:  0.592                 | Train Accuracy:  0.522                 | Val Loss:  0.535                 | Val Accuracy:  0.610


100%|██████████| 2326/2326 [02:35<00:00, 14.93it/s]


Epochs: 3 | Train Loss:  0.465                 | Train Accuracy:  0.669                 | Val Loss:  0.466                 | Val Accuracy:  0.682


100%|██████████| 2326/2326 [02:36<00:00, 14.91it/s]


Epochs: 4 | Train Loss:  0.391                 | Train Accuracy:  0.727                 | Val Loss:  0.450                 | Val Accuracy:  0.687


100%|██████████| 2326/2326 [02:35<00:00, 14.96it/s]


Epochs: 5 | Train Loss:  0.332                 | Train Accuracy:  0.775                 | Val Loss:  0.453                 | Val Accuracy:  0.686


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training for group: Dark Ales
Loading data into a dataset...
Data loaded


100%|██████████| 1842/1842 [02:00<00:00, 15.28it/s]


Epochs: 1 | Train Loss:  0.632                 | Train Accuracy:  0.448                 | Val Loss:  0.480                 | Val Accuracy:  0.672


100%|██████████| 1842/1842 [01:56<00:00, 15.75it/s]


Epochs: 2 | Train Loss:  0.356                 | Train Accuracy:  0.750                 | Val Loss:  0.323                 | Val Accuracy:  0.746


100%|██████████| 1842/1842 [01:56<00:00, 15.76it/s]


Epochs: 3 | Train Loss:  0.257                 | Train Accuracy:  0.814                 | Val Loss:  0.304                 | Val Accuracy:  0.755


100%|██████████| 1842/1842 [01:56<00:00, 15.75it/s]


Epochs: 4 | Train Loss:  0.194                 | Train Accuracy:  0.865                 | Val Loss:  0.292                 | Val Accuracy:  0.779


100%|██████████| 1842/1842 [01:56<00:00, 15.81it/s]


Epochs: 5 | Train Loss:  0.142                 | Train Accuracy:  0.920                 | Val Loss:  0.293                 | Val Accuracy:  0.807


In [None]:
def evaluate(model, test_data):
    y_pred = []
    y_true = []
    print('Loading data...')
    test = Dataset(test_data)

    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)
    print('Data loaded')

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:
        model = model.cuda()

    total_acc_test = 0
    
    with torch.no_grad():
        for test_input, test_label in test_dataloader:

            test_label = test_label.to(device)
            mask = test_input['attention_mask'].to(device)
            input_id = test_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, mask)
            y_pred.append(output.argmax(dim=1))
            y_true.append(test_label)

            acc = (output.argmax(dim=1) == test_label).sum().item()
            total_acc_test += acc
    
    print(f'Test Accuracy: {total_acc_test / len(test_data): .3f}')
    return y_pred, y_true
    

In [None]:
for m in range(len(models)):
    y_pred, y_true = evaluate(models[m], df_splits[2])

In [None]:
import torch
load = torch.load('beert_model_simple.pt')

confusion matrix

In [None]:
t = [x.cpu() for x in y_true]

In [None]:
classes = labels.keys()
cf_matrix = confusion_matrix([x.cpu() for x in y_true], [y.cpu() for y in y_pred])
df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None], index = [i for i in classes],
                     columns = [i for i in classes])
plt.figure(figsize = (12,7))
sn.heatmap(df_cm, annot=True)
plt.savefig('output.png')