In [1]:
import json
import nltk
import datasets
import joblib
from nltk.tokenize import word_tokenize, sent_tokenize
from transformers import AutoTokenizer

task = "srl"
model_checkpoint = "/home/ziggy/Desktop/Ad_ml/final/base_model"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [2]:
import data_process
from data_process import calculate_failure_rate, classify_data, extract_feature_and_label,preprocess_data_model, read_conll_file, write_conll_multiple, tokenize_conll, tokenize_, format_conll, format_conll, preprocess_data,create_word_sentlist,tokenize_and_align_labels 

[nltk_data] Downloading package wordnet to /home/ziggy/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
from feature import tokenize, lemmatization, named_entity_recognition, sub_tree, capitalization, syntactic_head, PoS_tag, Tag, dep_relations, dep_path, dep_dist_to_head, extract_morph_inform, is_predicate, extract_wordnet_class

# Read from json

In [4]:


# Open the JSON file
with open('data_5.json') as f:
    
    data = json.load(f)



## Extract tests and target information

In [5]:
#extract text, token, label, pred from json

test_sentence={}
t_target={}
capability_sets=list(data['capabilities'].keys())
# print(capability_sets)

for c in capability_sets:

    # c_name = data['capabilities'][c]['name']

    
    c_test = data['capabilities'][c]["tests"]
    # print(c_test)

    for t_name in list(c_test.keys()):
        # print(t_name)
        l_dict = c_test[t_name]
    
        if not isinstance(l_dict, list):

            l_dict = list(c_test[t_name].values())[0]
            t_name =list(c_test[t_name].keys())[0]
                
            # print(t_name, l_dict)
            
            
        for dic in l_dict:
            
            if not isinstance(dic["data"], list):
                # print(dic["data"])
                
                if t_name in test_sentence:
                    test_sentence[t_name].append(dic["data"])
                    t_target[t_name].append((dic["token"], dic["label"], dic["predicate"]))
                else:
                    test_sentence[t_name] = [dic["data"]]

                    t_target[t_name]= [(dic["token"], dic["label"], dic["predicate"])]
            else:
                # print(dic["data"])
                for i in range(len(dic["data"])):
                    text, token, label, pred = dic["data"][i], dic["token"][i], dic["label"][i], dic["predicate"][i]
                    # print(data, token, label, pred)
                    
                    if t_name in test_sentence:
                        test_sentence[t_name].append(text)
                        t_target[t_name].append((token, label, pred))
                    else:
                        test_sentence[t_name] = [text]

                        t_target[t_name]= [(token, label, pred)]

    

In [6]:
print(test_sentence.keys())
print(t_target.keys())

dict_keys(['passive', 'relative_cause', 'indirect_object', 'double_object', 'Location_modifier', 'Negation'])
dict_keys(['passive', 'relative_cause', 'indirect_object', 'double_object', 'Location_modifier', 'Negation'])


In [7]:
test_names=list(t_target.keys())
print(test_names)

['passive', 'relative_cause', 'indirect_object', 'double_object', 'Location_modifier', 'Negation']


In [8]:
print(t_target["passive"])

[('play', 'ARG1', 'directed'), ('project', 'ARG1', 'managed'), ('movie', 'ARG1', 'made'), ('painting', 'ARG1', 'created'), ('problem', 'ARG1', 'solved'), ('dance', 'ARG1', 'choreographed'), ('building', 'ARG1', 'designed'), ('surgery', 'ARG1', 'performed'), ('speech', 'ARG1', 'delivered')]


# 1. Tests for Passive

In [9]:
test_name = "passive"
caba_sents = test_sentence[test_name]

In [10]:
t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

## 1.1 Extract Features and data process

In [11]:
#create features and write into conll for inspection
output_file = f"{test_name}_f.conll"
write_conll_multiple(caba_sents, output_file)

In [12]:
#read conll
conll_output = read_conll_file(output_file)

In [13]:
#preprocess data
preprocessed_test=preprocess_data_model(conll_output)

## 1.2 Implement logistic regression

In [14]:
# load model and vectorizer
model_single = joblib.load('logistic_regression/logreg_single.pkl')
vec_single = joblib.load('logistic_regression/vectorizer.pkl')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [15]:
using_test_set, test_gold = extract_feature_and_label(preprocessed_test,t_tokens,t_label)
single_predictions = classify_data(model_single, vec_single, using_test_set)

## 1.3 Evaluation

In [16]:
#target token
t_token_label = []
t_token_pred = []
for i,t in enumerate(test_gold):
    if t != "_":
        t_token_label.append(t)
        t_token_pred.append(single_predictions[i])
    else:
        continue

In [17]:
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)

In [18]:
input_json = {}
if test_name not in input_json:
    input_json[test_name] = ((failure_rate, failure_count))
    
print(input_json)

{'passive': (1.0, 9)}


# 2. Tests for Relative_cause

In [19]:
test_name = "relative_cause"
caba_sents = test_sentence[test_name]

t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

#create features and write into conll for inspection
output_file = f"{test_name}_f.conll"
write_conll_multiple(caba_sents, output_file)

#read conll
conll_output = read_conll_file(output_file)

#preprocess data
preprocessed_test=preprocess_data_model(conll_output)

using_test_set, test_gold = extract_feature_and_label(preprocessed_test,t_tokens,t_label)
single_predictions = classify_data(model_single, vec_single, using_test_set)

#target token
t_token_label = []
t_token_pred = []
for i,t in enumerate(test_gold):
    if t != "_":
        t_token_label.append(t)
        t_token_pred.append(single_predictions[i])
    else:
        continue
        
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json:
    input_json [test_name]= ((failure_rate, failure_count))

In [20]:
print(input_json)

{'passive': (1.0, 9), 'relative_cause': (1.0, 9)}


# 3. Tests for indirect_object

In [21]:
test_name = "indirect_object"
caba_sents = test_sentence[test_name]

t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

#create features and write into conll for inspection
output_file = f"{test_name}_f.conll"
write_conll_multiple(caba_sents, output_file)

#read conll
conll_output = read_conll_file(output_file)

#preprocess data
preprocessed_test=preprocess_data_model(conll_output)

using_test_set, test_gold = extract_feature_and_label(preprocessed_test,t_tokens,t_label)
single_predictions = classify_data(model_single, vec_single, using_test_set)

#target token
t_token_label = []
t_token_pred = []
for i,t in enumerate(test_gold):
    if t != "_":
        t_token_label.append(t)
        t_token_pred.append(single_predictions[i])
    else:
        continue
        
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json:
    input_json [test_name]= ((failure_rate, failure_count))

In [22]:
print(input_json)

{'passive': (1.0, 9), 'relative_cause': (1.0, 9), 'indirect_object': (1.0, 9)}


# 4. Tests for double_object

In [23]:
test_name = "double_object"
caba_sents = test_sentence[test_name]

t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

#create features and write into conll for inspection
output_file = f"{test_name}_f.conll"
write_conll_multiple(caba_sents, output_file)

#read conll
conll_output = read_conll_file(output_file)

#preprocess data
preprocessed_test=preprocess_data_model(conll_output)

using_test_set, test_gold = extract_feature_and_label(preprocessed_test,t_tokens,t_label)
single_predictions = classify_data(model_single, vec_single, using_test_set)

#target token
t_token_label = []
t_token_pred = []
for i,t in enumerate(test_gold):
    if t != "_":
        t_token_label.append(t)
        t_token_pred.append(single_predictions[i])
    else:
        continue
        
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json:
    input_json [test_name]= ((failure_rate, failure_count))

print(input_json)

{'passive': (1.0, 9), 'relative_cause': (1.0, 9), 'indirect_object': (1.0, 9), 'double_object': (1.0, 10)}


In [24]:
print(t_token_label)

['ARG2', 'ARG2', 'ARG2', 'ARG2', 'ARG2', 'ARG2', 'ARG2', 'ARG2', 'ARG2', 'ARG2']


# 5. Tests for Location_modifier

In [25]:
test_name = "Location_modifier"
caba_sents = test_sentence[test_name]

t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

#create features and write into conll for inspection
output_file = f"{test_name}_f.conll"
write_conll_multiple(caba_sents, output_file)

#read conll
conll_output = read_conll_file(output_file)

#preprocess data
preprocessed_test=preprocess_data_model(conll_output)

using_test_set, test_gold = extract_feature_and_label(preprocessed_test,t_tokens,t_label)
single_predictions = classify_data(model_single, vec_single, using_test_set)

#target token
t_token_label = []
t_token_pred = []
for i,t in enumerate(test_gold):
    if t != "_":
        t_token_label.append(t)
        t_token_pred.append(single_predictions[i])
    else:
        continue
        
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json:
    input_json [test_name]= ((failure_rate, failure_count))

print(input_json)

{'passive': (1.0, 9), 'relative_cause': (1.0, 9), 'indirect_object': (1.0, 9), 'double_object': (1.0, 10), 'Location_modifier': (1.0, 16)}


# 6. Tests for Negation

In [26]:
test_name = "Negation"
caba_sents = test_sentence[test_name]

t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

#create features and write into conll for inspection
output_file = f"{test_name}_f.conll"
write_conll_multiple(caba_sents, output_file)

#read conll
conll_output = read_conll_file(output_file)

#preprocess data
preprocessed_test=preprocess_data_model(conll_output)

using_test_set, test_gold = extract_feature_and_label(preprocessed_test,t_tokens,t_label)
single_predictions = classify_data(model_single, vec_single, using_test_set)

#target token
t_token_label = []
t_token_pred = []
for i,t in enumerate(test_gold):
    if t != "_":
        t_token_label.append(t)
        t_token_pred.append(single_predictions[i])
    else:
        continue
        
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json:
    input_json [test_name]= ((failure_rate, failure_count))

print(input_json)

{'passive': (1.0, 9), 'relative_cause': (1.0, 9), 'indirect_object': (1.0, 9), 'double_object': (1.0, 10), 'Location_modifier': (1.0, 16), 'Negation': (1.0, 8)}


In [27]:
import json

def update_data_with_test(input_json, input_file):
    # Load the JSON file
    with open(input_file, 'r') as file:
        data = json.load(file)

    # Update data with test information
    
    for k in input_json: 
        test_name = k
        failure_rate, failure_count = input_json[k]
        for capability in data['capabilities'].values():
        # print(capability)
            tests = capability.get("tests")
            # print(tests)
            if len(tests) == 1:
                data_name = [k for k in tests][0]

                if data_name == test_name:
                    # capability['failure_rate'] = capability.pop('pass')
                    capability['failure_rate'] = failure_rate
                    capability['fail'] = failure_count
                    # print(capability['fail'])
            else:
                for i in tests:
                    # print(tests[i])
                    # print(i)
                    data_name = [k for k in tests[i]][0]

                    if data_name == test_name:
                        # capability['failure_rate'] = capability.pop('pass')
                        capability['tests'][i]["failure_rate"] = failure_rate
                        capability['tests'][i]["fail"] = failure_count

    # Write the updated data back to the file
    with open('data_result.json', 'w') as file:
        json.dump(data, file, indent=4)




In [30]:
with open('data_5.json', 'r') as file:
    data = json.load(file)

for k in input_json: 
    test_name = k
    failure_rate, failure_count = input_json[k]
    for capability in data['capabilities'].values():
        # print(capability)
        tests = capability.get("tests")
            # print(tests)
        if len(tests) == 1:
            data_name = [k for k in tests][0]

            if data_name == test_name:
                    # capability['failure_rate'] = capability.pop('pass')
                
                capability['failure_rate'] = failure_rate
                
                capability['fail'] = failure_count
                    # print(capability['fail'])
        else:
            for i in tests:
                # print(tests[i])
                # print(i)
                data_name = [k for k in tests[i]][0]

                if data_name == test_name:
                    # print("right name")
                        # capability['failure_rate'] = capability.pop('pass')
                    
                    tests[i]["failure_rate"] = failure_rate
                    
                    tests[i]["fail"] = failure_count

with open('data_result.json', 'w') as file:
    json.dump(data, file, indent=4)  
    

# 2. Advance_model

In [33]:
label_dict = {'C-ARGM-EXT': 0, 'C-ARG4': 1, 'C-ARGM-DIR': 2, 'ARG5': 3, 'C-ARGM-CXN': 4, 'ARGA': 5, 'C-ARG2': 6, 'C-ARGM-GOL': 7, 'C-V': 8, 'ARGM-MNR': 9, 'R-ARGM-TMP': 10, 'ARGM-LOC': 11, 'ARGM-DIR': 12, 'C-ARGM-TMP': 13, 'C-ARG3': 14, 'C-ARGM-COM': 15, 'ARGM-ADV': 16, '_': 17, 'R-ARGM-GOL': 18, 'C-ARGM-ADV': 19, 'R-ARGM-ADV': 20, 'R-ARG1': 21, 'ARGM-CAU': 22, 'C-ARGM-PRR': 23, 'ARG3': 24, 'C-ARG1-DSP': 25, 'R-ARGM-CAU': 26, 'C-ARGM-LOC': 27, 'R-ARG0': 28, 'R-ARG3': 29, 'ARG1': 30, 'R-ARGM-LOC': 31, 'ARGM-GOL': 32, 'ARGM-DIS': 33, 'ARGM-PRD': 34, 'C-ARG1': 35, 'R-ARGM-MNR': 36, 'ARGM-EXT': 37, 'ARG2': 38, 'ARGM-TMP': 39, 'R-ARG2': 40, 'R-ARG4': 41, 'ARG0': 42, 'ARGM-PRR': 43, 'R-ARGM-DIR': 44, 'ARG1-DSP': 45, 'ARGM-CXN': 46, 'ARGM-PRP': 47, 'C-ARG0': 48, 'C-ARGM-PRP': 49, 'R-ARGM-COM': 50, 'ARGM-REC': 51, 'R-ARGM-ADJ': 52, 'C-ARGM-MNR': 53, 'ARGM-NEG': 54, 'ARGM-COM': 55, 'ARGM-ADJ': 56, 'ARGM-MOD': 57, 'ARG4': 58, 'ARGM-LVB': 59}



In [39]:
#load machine
import transformers
from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer, DataCollatorForTokenClassification
from datasets import load_metric

import torch

model_directory = 'advanced_model/advanced'
# model_directory = 'models/advanced_model/advanced'
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_directory)
model = AutoModelForTokenClassification.from_pretrained(model_directory,num_labels=len(label_dict))

# 2.1 passive

In [42]:
# test_name = "passive"
# test_sents = test_sentence[test_name]
# target_infor = t_target[test_name]
# tokenized_sentences = tokenize_conll(test_sents)

# passive_ds = preprocess_data(tokenized_sentences,target_infor)
# # print(passive_ds)
# p_testsent = create_word_sentlist(passive_ds)
# p_testds = datasets.Dataset.from_list(p_testsent)
# p_tokenized_test = p_testds.map(tokenize_and_align_labels, batched=True)
# # print(p_tokenized_test)
# p_testds = datasets.Dataset.from_list(p_testsent)
# p_tokenized_test = p_testds.map(tokenize_and_align_labels, batched=True)

In [43]:
test_name = "passive"
caba_sents = test_sentence[test_name]
t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

In [44]:
# Tokenize and prepare input
inputs = tokenizer(caba_sents, padding=True, truncation=True, return_tensors="pt")
# inputs = tokenizer.encode(sentence, return_tensors="pt")
# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
predictions = torch.argmax(outputs.logits, dim=2)

# Decode predictions using label_dict
decoded_predictions = []
for i, sentence_predictions in enumerate(predictions):
    decoded_labels = [list(label_dict.keys())[list(label_dict.values()).index(label_id)] for label_id in sentence_predictions]
    decoded_predictions.append(decoded_labels)

t_token_label=[]
t_token_pred=[]
# Print decoded predictions
for i, sentence in enumerate(caba_sents):
    tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[i])
    t_token_label.append(t_label[i])
    t_token = t_tokens[i]
    ind_pred = tokens.index(t_token)
    t_token_pred.append(decoded_predictions[ind_pred])
    # print("Sentence:", sentence)
    # print("Tokens:", tokens)
    # print("Predicted Labels:", decoded_predictions[i])
    print()












In [45]:
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
# print(failure_rate, failure_count)
input_json_a = {}
if test_name not in input_json_a:
    input_json_a[test_name] = ((failure_rate, failure_count))
    
print(input_json_a)

{'passive': (1.0, 9)}


# 2.2 relative_cause

In [46]:
test_name = "relative_cause"
caba_sents = test_sentence[test_name]
t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

# Tokenize and prepare input
inputs = tokenizer(caba_sents, padding=True, truncation=True, return_tensors="pt")
# inputs = tokenizer.encode(sentence, return_tensors="pt")
# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
predictions = torch.argmax(outputs.logits, dim=2)

# Decode predictions using label_dict
decoded_predictions = []
for i, sentence_predictions in enumerate(predictions):
    decoded_labels = [list(label_dict.keys())[list(label_dict.values()).index(label_id)] for label_id in sentence_predictions]
    decoded_predictions.append(decoded_labels)

t_token_label=[]
t_token_pred=[]
# Print decoded predictions
for i, sentence in enumerate(caba_sents):
    tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[i])
    t_token_label.append(t_label[i])
    t_token = t_tokens[i]
    ind_pred = tokens.index(t_token)
    t_token_pred.append(decoded_predictions[ind_pred])
    # print("Sentence:", sentence)
    # print("Tokens:", tokens)
    # print("Predicted Labels:", decoded_predictions[i])
    print()






IndexError: list index out of range

In [47]:
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json_a:
    input_json_a[test_name] = ((failure_rate, failure_count))
    
print(input_json_a)

{'passive': (1.0, 9), 'relative_cause': (0.75, 3)}


# 2.3 indirect_object

In [48]:
test_name = "indirect_object"
caba_sents = test_sentence[test_name]
t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

# Tokenize and prepare input
inputs = tokenizer(caba_sents, padding=True, truncation=True, return_tensors="pt")
# inputs = tokenizer.encode(sentence, return_tensors="pt")
# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
predictions = torch.argmax(outputs.logits, dim=2)

# Decode predictions using label_dict
decoded_predictions = []
for i, sentence_predictions in enumerate(predictions):
    decoded_labels = [list(label_dict.keys())[list(label_dict.values()).index(label_id)] for label_id in sentence_predictions]
    decoded_predictions.append(decoded_labels)

t_token_label=[]
t_token_pred=[]
# Print decoded predictions
for i, sentence in enumerate(caba_sents):
    tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[i])
    t_token_label.append(t_label[i])
    t_token = t_tokens[i]
    ind_pred = tokens.index(t_token)
    t_token_pred.append(decoded_predictions[ind_pred])
    # print("Sentence:", sentence)
    # print("Tokens:", tokens)
    # print("Predicted Labels:", decoded_predictions[i])
    print()




ValueError: 'Mindy' is not in list

In [49]:
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json_a:
    input_json_a[test_name] = ((failure_rate, failure_count))
    
print(input_json_a)

{'passive': (1.0, 9), 'relative_cause': (0.75, 3), 'indirect_object': (0.5, 1)}


# 2.4 double_object

In [50]:
test_name = "double_object"
caba_sents = test_sentence[test_name]
t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

# Tokenize and prepare input
inputs = tokenizer(caba_sents, padding=True, truncation=True, return_tensors="pt")
# inputs = tokenizer.encode(sentence, return_tensors="pt")
# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
predictions = torch.argmax(outputs.logits, dim=2)

# Decode predictions using label_dict
decoded_predictions = []
for i, sentence_predictions in enumerate(predictions):
    decoded_labels = [list(label_dict.keys())[list(label_dict.values()).index(label_id)] for label_id in sentence_predictions]
    decoded_predictions.append(decoded_labels)

t_token_label=[]
t_token_pred=[]
# Print decoded predictions
for i, sentence in enumerate(caba_sents):
    tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[i])
    t_token_label.append(t_label[i])
    t_token = t_tokens[i]
    ind_pred = tokens.index(t_token)
    t_token_pred.append(decoded_predictions[ind_pred])
    # print("Sentence:", sentence)
    # print("Tokens:", tokens)
    # print("Predicted Labels:", decoded_predictions[i])
    print()




ValueError: 'Mindy' is not in list

In [51]:
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json_a:
    input_json_a[test_name] = ((failure_rate, failure_count))
    
print(input_json_a)

{'passive': (1.0, 9), 'relative_cause': (0.75, 3), 'indirect_object': (0.5, 1), 'double_object': (0.5, 1)}


# 2.5 Location_modifier

In [52]:
test_name = "Location_modifier"
caba_sents = test_sentence[test_name]
t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

# Tokenize and prepare input
inputs = tokenizer(caba_sents, padding=True, truncation=True, return_tensors="pt")
# inputs = tokenizer.encode(sentence, return_tensors="pt")
# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
predictions = torch.argmax(outputs.logits, dim=2)

# Decode predictions using label_dict
decoded_predictions = []
for i, sentence_predictions in enumerate(predictions):
    decoded_labels = [list(label_dict.keys())[list(label_dict.values()).index(label_id)] for label_id in sentence_predictions]
    decoded_predictions.append(decoded_labels)

t_token_label=[]
t_token_pred=[]
# Print decoded predictions
for i, sentence in enumerate(caba_sents):
    tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[i])
    t_token_label.append(t_label[i])
    t_token = t_tokens[i]
    ind_pred = tokens.index(t_token)
    t_token_pred.append(decoded_predictions[ind_pred])
    # print("Sentence:", sentence)
    # print("Tokens:", tokens)
    # print("Predicted Labels:", decoded_predictions[i])
    print()

ValueError: 'China' is not in list

In [53]:
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)
if test_name not in input_json_a:
    input_json_a[test_name] = ((failure_rate, failure_count))
    
print(input_json_a)

{'passive': (1.0, 9), 'relative_cause': (0.75, 3), 'indirect_object': (0.5, 1), 'double_object': (0.5, 1), 'Location_modifier': (0.0, 0)}


# 2.6 Negation

In [54]:
test_name = "Negation"
caba_sents = test_sentence[test_name]
t_tokens = [tup[0] for tup in t_target[test_name]] 
t_label= [tup[1] for tup in t_target[test_name]] 
t_pred  = [tup[2] for tup in t_target[test_name]] 

# Tokenize and prepare input
inputs = tokenizer(caba_sents, padding=True, truncation=True, return_tensors="pt")
# inputs = tokenizer.encode(sentence, return_tensors="pt")
# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
predictions = torch.argmax(outputs.logits, dim=2)

# Decode predictions using label_dict
decoded_predictions = []
for i, sentence_predictions in enumerate(predictions):
    decoded_labels = [list(label_dict.keys())[list(label_dict.values()).index(label_id)] for label_id in sentence_predictions]
    decoded_predictions.append(decoded_labels)

t_token_label=[]
t_token_pred=[]
# Print decoded predictions
for i, sentence in enumerate(caba_sents):
    tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[i])
    t_token_label.append(t_label[i])
    t_token = t_tokens[i]
    ind_pred = tokens.index(t_token)
    t_token_pred.append(decoded_predictions[ind_pred])
    # print("Sentence:", sentence)
    # print("Tokens:", tokens)
    # print("Predicted Labels:", decoded_predictions[i])
    print()









ValueError: 'p' is not in list

In [55]:
failure_rate, failure_count=calculate_failure_rate(t_token_label, t_token_pred)

rounded_number = round(failure_rate, 2)
print(rounded_number)

0.86


In [56]:
if test_name not in input_json_a:
    input_json_a[test_name] = (rounded_number, failure_count)
    
print(input_json_a)

{'passive': (1.0, 9), 'relative_cause': (0.75, 3), 'indirect_object': (0.5, 1), 'double_object': (0.5, 1), 'Location_modifier': (0.0, 0), 'Negation': (0.86, 6)}


In [57]:
if test_name not in input_json_a:
    
    input_json_a[test_name] = (rounded_number, failure_count)
    
print(input_json_a)

{'passive': (1.0, 9), 'relative_cause': (0.75, 3), 'indirect_object': (0.5, 1), 'double_object': (0.5, 1), 'Location_modifier': (0.0, 0), 'Negation': (0.86, 6)}


In [62]:
with open('data_result.json', 'r') as file:
    data = json.load(file)

for k in input_json_a: 
    test_name = k
    failure_rate, failure_count = input_json_a[k]
    for capability in data['capabilities'].values():
        # print(capability)
        tests = capability.get("tests")
            # print(tests)
        if len(tests) == 1:
            data_name = [k for k in tests][0]

            if data_name == test_name:
                    # capability['failure_rate'] = capability.pop('pass')
                lr_num =  capability['failure_rate']
                capability['failure_rate'] = {"lr": lr_num, "ad": failure_rate}
                lr_count = capability['fail']
                capability['fail'] = {"lr": lr_count, "ad": failure_count}
                    # print(capability['fail'])
        else:
            for i in tests:
                # print(tests[i])
                # print(i)
                data_name = [k for k in tests[i]][0]

                if data_name == test_name:
                    # print("right name")
                        # capability['failure_rate'] = capability.pop('pass')
                    lr_num =  capability['tests'][i]["failure_rate"]
                    tests[i]["failure_rate"] = {"lr":lr_num, "ad": failure_rate}
                    lr_count = capability['tests'][i]["fail"]
                    tests[i]["fail"] = {"lr":lr_count,"ad": failure_count}

with open('data_result.json', 'w') as file:
    json.dump(data, file, indent=4)  
    
    

