In [None]:
data_dir = 'cloth'
output_dir = 'cloth'
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig, BertModel, AdamW, get_cosine_schedule_with_warmup
import warnings
warnings.filterwarnings("ignore")
import torch
model_name = 'bert-large-uncased'
bert_dir='bert-large-uncased'
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# DEVICE = "cpu"
max_len = 30 
BATCH_SIZE = 64
EPOCHS = 5
LEARNING_RATE = 0.001

In [None]:
tokenizer = BertTokenizer.from_pretrained(bert_dir)
config = BertConfig.from_pretrained(bert_dir, num_labels=2)

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch.nn as nn
from torch.utils.data import TensorDataset,DataLoader,RandomSampler,SequentialSampler
import json,time
from  tqdm import tqdm
from sklearn.metrics import accuracy_score,classification_report
import torch.nn.functional as F
import torch.optim as optim

In [None]:
train_file = os.path.join(data_dir,'Womens_Clothing_E-Commerce_Reviews.csv')
data = pd.read_csv(train_file)
data['Review Text'] = data['Review Text'].fillna(' ')

meta_cols = ['Division Name', 'Department Name', 'Class Name']
def col2ix(x,col_cnt):
  if x in col_cnt:
    return col_cnt[x]
  return len(col_cnt.keys())
dummy_names = []
for c in meta_cols:
  col_cnt = {value:idx for idx,value in enumerate(list(set(data[c])))}
  data[c+'_'] = data[c].apply(lambda x: col2ix(x, col_cnt))
  dummies = pd.get_dummies(data[c+'_'], prefix=c.split()[0])
  names = list(dummies.columns)
  data = pd.concat((data,dummies),axis = 1)
  dummy_names += names
all_cols = ['Review Text'] + ['Age'] + dummy_names

# X_train, X_dev, y_train, y_dev = train_test_split(np.array(train[['Review Text','Age']]), np.array(train['Recommended IND']), test_size=0.2, random_state=42)
X_train, X_dev, y_train, y_dev = train_test_split(np.array(data[all_cols]), np.array(data['Recommended IND']), test_size=0.2, random_state=42)
X_dev, X_test, y_dev, y_test = train_test_split(X_dev, y_dev, test_size=0.5, random_state=42)

In [None]:
def get_dataloader(text, metad, label = None, test = False):
    input_ids,token_type_ids,attention_mask = [],[],[]
    metadata, labels = [], []
    for i,t in enumerate(text):
        encoded = tokenizer.encode_plus(text=t,max_length=max_len,padding='max_length',truncation=True)
        input_ids.append(encoded['input_ids'])
        token_type_ids.append(encoded['token_type_ids'])
        attention_mask.append(encoded['attention_mask'])
        # if not test:
        labels.append(int(label[i]))
        # else: labels.append(0)
        metadata.append(metad[i])

    input_ids,token_type_ids,attention_mask = torch.tensor(input_ids),torch.tensor(token_type_ids),torch.tensor(attention_mask)
    metadata, labels = torch.tensor(metadata), torch.tensor(labels)
    data = TensorDataset(input_ids,token_type_ids,attention_mask,labels,metadata)
    loader = DataLoader(data,batch_size=BATCH_SIZE,shuffle=True) 
    return loader
    

train_loader = get_dataloader(X_train[:,0], X_train[:,1:].astype(np.int64), label = y_train)
dev_loader = get_dataloader(X_dev[:,0], X_dev[:,1:].astype(np.int64),y_dev)
test_loader = get_dataloader(X_test[:,0], X_test[:,1:].astype(np.int64),label=y_test, test = True)

In [None]:
class Bert_Model(nn.Module):
    def __init__(self,bert_path,classes=2):
        super(Bert_Model,self).__init__()
        self.config = BertConfig.from_pretrained(bert_path)
        self.bert = BertModel.from_pretrained(bert_path)
        for param in self.bert.parameters():
            param.requires_grad=True
        self.fc = nn.Linear(self.config.hidden_size,classes)
    def forward(self,input_ids,token_type_ids,attention_mask):
        output = self.bert(input_ids,token_type_ids,attention_mask)
        logit = self.fc(output[1])
        return logit

model = Bert_Model(bert_dir).to(DEVICE)
optimizer = AdamW(model.parameters(),lr=LEARNING_RATE,weight_decay=1e-4)

schedule = get_cosine_schedule_with_warmup(optimizer,num_warmup_steps=len(train_loader),num_training_steps=EPOCHS*len(test_loader))

Some weights of the model checkpoint at /home/mengyixuan/Depression/liar/siamese/bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

def evaluate_p_r_f1_acc(y_pred, y_true):
    precision = precision_score(y_pred, y_true)
    recall = recall_score(y_pred, y_true)
    fscore = f1_score(y_pred, y_true)
    acc = accuracy_score(y_pred, y_true)
    return precision, recall, fscore, acc

def evaluate(model,data_loader,device,name='cloth_bert.pth'):
    model.eval()
    val_true,val_pred = [],[]
    with torch.no_grad():
        for idx,(ids,tpe,att,y,meta) in enumerate(data_loader):
            y_pred = model(ids.to(device),tpe.to(device),att.to(device))  # prob mat
            y_pred = torch.argmax(y_pred,dim=1).detach().cpu().numpy().tolist()
            val_pred += y_pred
            val_true += y.squeeze().cpu().numpy().tolist()
    p, r, fscore, acc = evaluate_p_r_f1_acc(val_pred, val_true)
    print('\tF-score: ', fscore, '\tacc: ', acc)
    return acc

def train(model,train_loader,valid_loader,optimizer,schedule,device,epoch, name = 'cloth_bert.pth'):
    best_acc = 0.0
    criterion = nn.CrossEntropyLoss()  
    for i in range(epoch):
        start = time.time()
        model.train()
        print("### Epoch {} ###".format(i+1))
        train_loss_sum = 0.0
        for idx,(ids,tpe,att,y,metadata) in enumerate(train_loader):
            ids,tpe,att,y,metadata = ids.to(device),tpe.to(device),att.to(device),y.to(device),metadata.to(device)
            if 'meta' in name:
                y_pred = model(ids,metadata,tpe,att) 
            else:
                y_pred = model(ids,tpe,att) 
            loss = criterion(y_pred,y) 
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            schedule.step()
            train_loss_sum += loss.item()
            
            if(idx+1)%(len(train_loader)//5)==0:
                print("epoch {:04d}, step {:04d}/{:04d}, loss {:.4f}, time {:.4f}".format(
                i+1,idx+1,len(train_loader),train_loss_sum/(idx+1),time.time()-start))

        model.eval()
        acc = evaluate(model,valid_loader,device)  
        if acc > best_acc :
            best_acc = acc
            # torch.save(model.state_dict(),"best_bert_model.pth") 
            torch.save(model.state_dict(), os.path.join(output_dir,name))
        print("current acc is {:.4f},best acc is {:.4f}".format(acc,best_acc))
        print("time costed = {}s \n".format(round(time.time()-start,5)))

In [None]:
train(model,train_loader,dev_loader,optimizer,schedule,DEVICE,EPOCHS)

In [None]:
model = Bert_Model(bert_dir).to(DEVICE)
model.load_state_dict(torch.load("cloth_bert-acc8838.pth"))
acc = evaluate(model,dev_loader,DEVICE) 
acc = evaluate(model,test_loader,DEVICE) 

Some weights of the model checkpoint at /home/mengyixuan/Depression/liar/siamese/bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Precision:  0.9479381443298969 	Recall:  0.9144704127299851 	F-score:  0.9309035687167806 	acc:  0.8837803320561941
Precision:  0.9402202412165706 	Recall:  0.8920398009950249 	F-score:  0.91549655348481 	acc:  0.8590889740315028
