<a href="https://colab.research.google.com/github/vatsal-kr/LegalNER-CS60075/blob/main/Model_Fusing_Method_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install transformers

In [None]:
%pip install simpletransformers
import numpy as np
import pandas as pd
import json
import re
import glob
import nltk
import spacy
from simpletransformers.ner.ner_model import NERModel

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
judgement_data_train = pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/NLP Term Project/judgement_data_train.pkl')
preamble_data_train = pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/NLP Term Project/preamble_data_train.pkl')
judgement_data_dev = pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/NLP Term Project/judgement_data_dev.pkl')
preamble_data_dev = pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/NLP Term Project/preamble_data_dev.pkl')

In [None]:
def get_df(judgement_data, preamble_data):
  all_tokens = pd.concat([judgement_data['text_tokenized'], preamble_data['text_tokenized']]).tolist()
  all_labels = pd.concat([judgement_data['label'], preamble_data['label']]).tolist()
  data = pd.DataFrame([])
  for i, (t, l) in enumerate(zip(all_tokens, all_labels)):
    assert len(t)==len(l)
    data = pd.concat([data, pd.DataFrame({
        'sentence_id': [i]*len(t),
        'words': t,
        'labels': l,
    })])
  return data

In [None]:
train_data, test_data = get_df(judgement_data_train, preamble_data_train), get_df(judgement_data_dev, preamble_data_dev)

In [None]:
with open('/content/drive/MyDrive/Colab Notebooks/NLP Term Project/tags.txt') as f:
  custom_labels = f.read().splitlines()

In [None]:
models = {
    'law-ai/InLegalBERT': 'bert',
    'nlpaueb/legal-bert-base-uncased': 'bert',
    'nlpaueb/bert-base-uncased-contracts': 'bert',
    'law-ai/InCaseLawBERT': 'bert',
    'zlucia/legalbert': 'bert',
    'bert-base-uncased': 'bert',
    'saibo/legal-roberta-base': 'roberta',
    'roberta-base': 'roberta',
}

In [None]:
model_list=[]
for x,y in models.items():
  for i,j in models.items():
    if y==j and x!=i:
      tup1=(x,i,y)
      tup2=(i,x,y)
      if tup1 not in model_list and tup2 not in model_list:
        model_list.append(tup1)

In [None]:
model_list[:]

[('law-ai/InLegalBERT', 'nlpaueb/legal-bert-base-uncased', 'bert'),
 ('law-ai/InLegalBERT', 'nlpaueb/bert-base-uncased-contracts', 'bert'),
 ('law-ai/InLegalBERT', 'law-ai/InCaseLawBERT', 'bert'),
 ('law-ai/InLegalBERT', 'zlucia/legalbert', 'bert'),
 ('law-ai/InLegalBERT', 'bert-base-uncased', 'bert'),
 ('nlpaueb/legal-bert-base-uncased',
  'nlpaueb/bert-base-uncased-contracts',
  'bert'),
 ('nlpaueb/legal-bert-base-uncased', 'law-ai/InCaseLawBERT', 'bert'),
 ('nlpaueb/legal-bert-base-uncased', 'zlucia/legalbert', 'bert'),
 ('nlpaueb/legal-bert-base-uncased', 'bert-base-uncased', 'bert'),
 ('nlpaueb/bert-base-uncased-contracts', 'law-ai/InCaseLawBERT', 'bert'),
 ('nlpaueb/bert-base-uncased-contracts', 'zlucia/legalbert', 'bert'),
 ('nlpaueb/bert-base-uncased-contracts', 'bert-base-uncased', 'bert'),
 ('law-ai/InCaseLawBERT', 'zlucia/legalbert', 'bert'),
 ('law-ai/InCaseLawBERT', 'bert-base-uncased', 'bert'),
 ('zlucia/legalbert', 'bert-base-uncased', 'bert'),
 ('saibo/legal-roberta-bas

In [None]:
from collections import OrderedDict
import torch

def get_weighted_avg_model(model1,model2,factor):
  avg_state_dict = OrderedDict()
  for key in model2.state_dict():
    # print(key)
    if key.split('.')[1]=='embeddings' or key.split('.')[0]=='classifier':
      avg_state_dict[key]=model2.state_dict()[key]
    else:
      avg_state_dict[key]=(1-factor)*model1.state_dict()[key]+factor*model2.state_dict()[key]
  # model3=AutoModelForTokenClassification.from_pretrained('roberta-base',num_labels=29,ignore_mismatched_sizes=True)
  # model3.load_state_dict(avg_state_dict)
  return avg_state_dict

In [None]:
def format_result(model_name1,model_name2,model_type,train_result,test_result,factor): #factor*fine-tunned+(1-factor)*pre-trained
  print(f'\n\n----------TASK_2 for MODEL : {model_name1}, {model_name2}, TYPE : {model_type}, factor : {factor}--------------\nRESULTS ON TRAINING SET\n{train_result}\n\nRESULTS ON DEV SET\n{test_result}\n\n')

In [None]:
from transformers import AutoConfig , AutoModel, AutoModelForTokenClassification, AutoTokenizer


def TASK_2(test_data,model_name1,model_name2, model_type, custom_labels,factor):
  dir1=model_name1.split('/')[-1]
  dir2=model_name2.split('/')[-1]
  # print(dir1 , " " , dir2)
  # model_path1="/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Trained_Model/"+dir1
  # weighted_model_path1="/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Weighted_Trained_Model/"+dir1

  # model_path2="/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Trained_Model/legal-roberta-base
  task2_model_path="/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Task_2/"+dir1+dir2
  model1 = AutoModelForTokenClassification.from_pretrained(model_name1,num_labels=29) # PRETRAINED VERSION OF THE SAME MODEL TYPE
  model2 = AutoModelForTokenClassification.from_pretrained(model_name2,num_labels=29) # PRETRAINED VERSION OF THE SAME MODEL TYPE



  # config = AutoConfig.from_pretrained(model_path+"/config.json") # fINETUNED VERSION OF THE SAME MODEL TYPE
  # model2 = AutoModelForTokenClassification.from_pretrained(model_path+"/pytorch_model.bin", config=config)

  avg_state_dict=get_weighted_avg_model(model1,model2,factor) # WEIGHTED AVERAGE OF THE TWO MODELS [0.5]
  model3=AutoModelForTokenClassification.from_pretrained(model_name2,num_labels=29)
  model3.load_state_dict(avg_state_dict) #LOADED THE WEIGHTS INTO A MODEL OF THE SAME TYPE

  tokenizer = AutoTokenizer.from_pretrained(model_name2)
  tokenizer.save_pretrained(task2_model_path)
  model3.save_pretrained(task2_model_path) # SAVING THE MODEL IN ORDER TO SEND TO THE NERModel

  # model4 = NERModel(model_type,weighted_model_path,labels=custom_labels ,use_cuda= True)
  model4 = NERModel(model_type, task2_model_path, labels = custom_labels, args={'num_train_epochs': 3, 'overwrite_output_dir': True, 'save_model_every_epoch':False,
                                      'learning_rate': 5e-5, 'warmup_ratio': 0.0, 'fp16': False,
                                      'manual_seed': 42},use_cuda=True)
  model4.train_model(train_data)
  # return model4

  train_result, _, predictions = model4.eval_model(train_data)
  test_result, _, predictions = model4.eval_model(test_data)

  format_result(model_name1,model_name2,model_type,train_result,test_result,factor)
  # print(model_name)
  # print(result)
  # print('\n\n')


In [None]:
for model_name1, model_name2, model_type in model_list[:5]:
  TASK_2(test_data, model_name1,model_name2 ,model_type, custom_labels,factor=0.5)
  # print('{} is Saved Successfully..\n\n'.format(model_name))

Some weights of the model checkpoint at law-ai/InLegalBERT were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from th

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/135 [00:00<?, ?it/s]



----------TASK_2 for MODEL : law-ai/InLegalBERT, nlpaueb/legal-bert-base-uncased, TYPE : bert, factor : 0.5--------------
RESULTS ON TRAINING SET
{'eval_loss': 0.056124442783861674, 'precision': 0.8255878614553543, 'recall': 0.8630927663815298, 'f1_score': 0.843923829631735}

RESULTS ON DEV SET
{'eval_loss': 0.16066905495331243, 'precision': 0.6593323216995448, 'recall': 0.6999597261377366, 'f1_score': 0.6790388747802305}




Some weights of the model checkpoint at law-ai/InLegalBERT were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from th

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/135 [00:00<?, ?it/s]



----------TASK_2 for MODEL : law-ai/InLegalBERT, nlpaueb/bert-base-uncased-contracts, TYPE : bert, factor : 0.5--------------
RESULTS ON TRAINING SET
{'eval_loss': 0.08907467955825003, 'precision': 0.7292056074766355, 'recall': 0.7698569314257523, 'f1_score': 0.7489800815934725}

RESULTS ON DEV SET
{'eval_loss': 0.19911999425126448, 'precision': 0.5621780721118469, 'recall': 0.6092503987240829, 'f1_score': 0.5847684653654802}




Some weights of the model checkpoint at law-ai/InLegalBERT were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from th

Downloading (…)lve/main/config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/534M [00:00<?, ?B/s]

Some weights of the model checkpoint at law-ai/InCaseLawBERT were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from 

Downloading (…)okenizer_config.json:   0%|          | 0.00/343 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

  0%|          | 0/3 [00:01<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/135 [00:00<?, ?it/s]



----------TASK_2 for MODEL : law-ai/InLegalBERT, law-ai/InCaseLawBERT, TYPE : bert, factor : 0.5--------------
RESULTS ON TRAINING SET
{'eval_loss': 0.08256763534688137, 'precision': 0.7448651993965884, 'recall': 0.7916872224963, 'f1_score': 0.7675628275903302}

RESULTS ON DEV SET
{'eval_loss': 0.2015319441134731, 'precision': 0.567006554989075, 'recall': 0.6208133971291866, 'f1_score': 0.5926912828321279}




Some weights of the model checkpoint at law-ai/InLegalBERT were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from th

Downloading (…)lve/main/config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/441M [00:00<?, ?B/s]

Some weights of the model checkpoint at zlucia/legalbert were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the 

Downloading (…)okenizer_config.json:   0%|          | 0.00/300 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/135 [00:00<?, ?it/s]



----------TASK_2 for MODEL : law-ai/InLegalBERT, zlucia/legalbert, TYPE : bert, factor : 0.5--------------
RESULTS ON TRAINING SET
{'eval_loss': 0.07917709525352852, 'precision': 0.751342307991596, 'recall': 0.7939072520966947, 'f1_score': 0.7720385399592213}

RESULTS ON DEV SET
{'eval_loss': 0.20154595811057974, 'precision': 0.5899925317401046, 'recall': 0.6299840510366826, 'f1_score': 0.6093328191284227}




Some weights of the model checkpoint at law-ai/InLegalBERT were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from th

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/1375 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/135 [00:00<?, ?it/s]



----------TASK_2 for MODEL : law-ai/InLegalBERT, bert-base-uncased, TYPE : bert, factor : 0.5--------------
RESULTS ON TRAINING SET
{'eval_loss': 0.08457276039976966, 'precision': 0.7392270795148773, 'recall': 0.779312613057063, 'f1_score': 0.758740768907479}

RESULTS ON DEV SET
{'eval_loss': 0.20084865428507329, 'precision': 0.5734526472781506, 'recall': 0.613237639553429, 'f1_score': 0.5926782273603082}




In [None]:
from transformers import AutoConfig , AutoModel, AutoModelForTokenClassification
model1 = AutoModelForTokenClassification.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

In [None]:

config = AutoConfig.from_pretrained("/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Trained_Model/bert-base-uncased/config.json")
model2 = AutoModelForTokenClassification.from_pretrained("/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Trained_Model/bert-base-uncased/pytorch_model.bin", config=config)

In [None]:
from collections import OrderedDict
import torch

In [None]:
def get_weighted_avg_model2(model1,model2,factor):
  avg_state_dict = OrderedDict()
  for key in model2.state_dict():
    print(key)
    if key.split('.')[1]=='embeddings' or key.split('.')[0]=='classifier':
      avg_state_dict[key]=model2.state_dict()[key]
    else:
      avg_state_dict[key]=(1-factor)*model1.state_dict()[key]+factor*model2.state_dict()[key]
  model3=AutoModelForTokenClassification.from_pretrained('bert-base-uncased',num_labels=29,ignore_mismatched_sizes=True)
  model3.load_state_dict(avg_state_dict)
  return model3

In [None]:
model3=get_weighted_avg_model2(model1,model2,1)

bert.embeddings.position_ids
bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.weight
bert.embeddings.LayerNorm.bias
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.weight
bert.encoder.layer.0.attention.output.LayerNorm.bias
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.weight
bert.encoder.layer.0.output.LayerNorm.bias
bert.encoder.layer.1.attenti

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
tokenizer.save_pretrained("/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Weighted_Trained_Model/bert-base-uncased")
model3.save_pretrained("/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Weighted_Trained_Model/bert-base-uncased")

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
model4 = NERModel("bert",'/content/drive/MyDrive/Colab Notebooks/NLP Term Project/Weighted_Trained_Model/bert-base-uncased',labels=custom_labels ,use_cuda= True)

In [None]:
result, _, predictions = model4.eval_model(test_data)

  0%|          | 0/2 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/135 [00:00<?, ?it/s]

In [None]:
result

{'eval_loss': 0.19275242939453435,
 'precision': 0.5957132298595713,
 'recall': 0.6427432216905901,
 'f1_score': 0.6183352512466436}

In [None]:
result

{'eval_loss': 0.09451309311790047,
 'precision': 0.8373033707865168,
 'recall': 0.8918142651986597,
 'f1_score': 0.8636995827538247}