# PART 3. Claim verification

import libs

In [1]:
import pickle
from pathlib import Path
from typing import Dict, Tuple

import numpy as np
import pandas as pd
from pandarallel import pandarallel
from tqdm.auto import tqdm

import sys
import json
import torch
from sklearn.metrics import accuracy_score
from torch.optim import AdamW
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from transformers import (
    AutoModelForSequenceClassification,
    AutoConfig,
    AutoTokenizer,
    get_scheduler,
)

from dataset import BERTDataset
from utils import (
    generate_evidence_to_wiki_pages_mapping,
    jsonl_dir_to_df,
    load_json,
    load_model,
    save_checkpoint,
    set_lr_scheduler,
)

pandarallel.initialize(progress_bar=True, verbose=0, nb_workers=4)

Global variables

In [2]:
LABEL2ID: Dict[str, int] = {
    "supports": 0,
    "refutes": 1,
    "NOT ENOUGH INFO": 2,
}
ID2LABEL: Dict[int, str] = {v: k for k, v in LABEL2ID.items()}

TRAIN_DATA = load_json("data/train_doc5sent5.jsonl")
DEV_DATA = load_json("data/dev_doc5sent5.jsonl")

TRAIN_PKL_FILE = Path("data/train_doc5sent5.pkl")
DEV_PKL_FILE = Path("data/dev_doc5sent5.pkl")

In [3]:
pd.DataFrame(DEV_DATA)

Unnamed: 0,id,label,claim,evidence,predicted_pages,predicted_evidence
0,5475,supports,IIA的執行主席比爾 · 比紹普於2004年過世。,"[[[4549, 4635, 比爾·比紹普, 3], [4549, 4635, 比爾·比紹普...",[比爾·比紹普],"[[比爾·比紹普, 0], [比爾·比紹普, 3]]"
1,12722,refutes,由爾文·克許納製作的《星際大戰五部曲：帝國大反擊》是續集超越前作的典範，內容是發生在前作《星...,"[[[11729, 10428, 星際大戰五部曲：帝國大反擊, 0]]]","[星際大戰_(消歧義), 星際大戰五部曲：帝國大反擊, 爾文·克許納]","[[星際大戰五部曲：帝國大反擊, 18], [爾文·克許納, 0], [星際大戰五部曲：帝國..."
2,1546,supports,上夸克與下垮克是所有夸克質量最低的，質量較高的夸克藉由粒子衰變的自發過程變成上或下夸克。,"[[[1488, 1595, 夸克, 8], [1488, 1595, 粒子衰變, 0], ...","[粒子衰變, 上夸克, 夸克, 下夸克]","[[夸克, 7], [夸克, 8], [粒子衰變, 0], [下夸克, 1], [夸克, 9]]"
3,7916,refutes,建於1659年的基留希是位於北美洲的一座村落。,"[[[7131, 6794, 基留希, 0]]]",[基留希],"[[基留希, 0]]"
4,10960,supports,阿登多夫在2011年的人口數據中爲女生多於男生。,"[[[10201, 9240, 阿登多夫, 0], [10201, 9240, 阿登多夫, ...",[阿登多夫],"[[阿登多夫, 0], [阿登多夫, 1]]"
...,...,...,...,...,...,...
576,4947,refutes,藤子·F·不二雄是藤本弘單飛後的匿名。,"[[[6796, 6520, 藤子·F·不二雄, 6]]]","[藤子·F·不二雄, 藤子·F·不二雄博物館, 藤子·F·不二雄創作]","[[藤子·F·不二雄, 0], [藤子·F·不二雄博物館, 27], [藤子·F·不二雄創作..."
577,154,NOT ENOUGH INFO,大象出現於唐朝。,"[[313, None, None, None]]",[黔中郡_(唐朝)],"[[黔中郡_(唐朝), 3], [黔中郡_(唐朝), 0], [黔中郡_(唐朝), 4], ..."
578,12531,refutes,在拉脫維亞舉行的2003年歐洲歌唱大賽的冠軍從缺。,"[[[11900, 10569, 2003年歐洲歌唱大賽, 20]]]",[2003年歐洲歌唱大賽],"[[2003年歐洲歌唱大賽, 0], [2003年歐洲歌唱大賽, 4], [2003年歐洲歌..."
579,754,NOT ENOUGH INFO,剛果民主共和國城市爲金沙薩。,"[[871, None, None, None]]","[維維_(剛果民主共和國), 剛果民主共和國]","[[剛果民主共和國, 0], [剛果民主共和國, 9], [剛果民主共和國, 8], [剛果..."


In [4]:


mapping_path = Path(f"data/part2_mapping.json")
if mapping_path.exists():
    mapping = json.load( open( "data/part2_mapping.json" ) )
else:
    wiki_pages = jsonl_dir_to_df("data/wiki-pages")
    mapping = generate_evidence_to_wiki_pages_mapping(wiki_pages)
    json.dump( mapping, open( "data/part2_mapping.json", 'w' ) )
    del wiki_pages

Preload wiki database (same as part 2.)

### Helper function

AICUP dataset with top-k evidence sentences.

In [5]:
class AicupTopkEvidenceBERTDataset(BERTDataset):
    """AICUP dataset with top-k evidence sentences."""

    def __getitem__(
        self,
        idx: int,
        **kwargs,
    ) -> Tuple[Dict[str, torch.Tensor], int]:
        item = self.data.iloc[idx]
        claim = item["claim"]
        evidence = item["evidence_list"]

        # In case there are less than topk evidence sentences
        pad = ["[PAD]"] * (self.topk - len(evidence))
        evidence += pad
        concat_claim_evidence = " [SEP] ".join([*claim, *evidence])
#         print(concat_claim_evidence)
        concat = self.tokenizer(
            concat_claim_evidence,
            padding="max_length",
            max_length=self.max_length,
            truncation=True,
        )
        label = LABEL2ID[item["label"]] if "label" in item else -1
        concat_ten = {k: torch.tensor(v) for k, v in concat.items()}

        if "label" in item:
            concat_ten["labels"] = torch.tensor(label)

        return concat_ten

Evaluation function

In [6]:
def run_evaluation(model: torch.nn.Module, dataloader: DataLoader, device):
    model.eval()

    loss = 0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for batch in tqdm(dataloader):
            y_true.extend(batch["labels"].tolist())

            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss += outputs.loss.item()
            logits = outputs.logits
            y_pred.extend(torch.argmax(logits, dim=1).tolist())

    acc = accuracy_score(y_true, y_pred)

    return {"val_loss": loss / len(dataloader), "val_acc": acc}

Prediction

### Main function

In [7]:
def join_with_topk_evidence(
    df: pd.DataFrame,
    mapping: dict,
    mode: str = "train",
    topk: int = 5,
) -> pd.DataFrame:
    """join_with_topk_evidence join the dataset with topk evidence.

    Note:
        After extraction, the dataset will be like this:
               id     label         claim                           evidence            evidence_list
        0    4604  supports       高行健...     [[[3393, 3552, 高行健, 0], [...  [高行健 （ ）江西赣州出...
        ..    ...       ...            ...                                ...                     ...
        945  2095  supports       美國總...  [[[1879, 2032, 吉米·卡特, 16], [...  [卸任后 ， 卡特積極參與...
        停各种战争及人質危機的斡旋工作 ， 反对美国小布什政府攻打伊拉克...

        [946 rows x 5 columns]

    Args:
        df (pd.DataFrame): The dataset with evidence.
        wiki_pages (pd.DataFrame): The wiki pages dataframe
        topk (int, optional): The topk evidence. Defaults to 5.
        cache(Union[Path, str], optional): The cache file path. Defaults to None.
            If cache is None, return the result directly.

    Returns:
        pd.DataFrame: The dataset with topk evidence_list.
            The `evidence_list` column will be: List[str]
    """

    # format evidence column to List[List[Tuple[str, str, str, str]]]
    if "evidence" in df.columns:
        df["evidence"] = df["evidence"].parallel_map(
            lambda x: [[x]] if not isinstance(x[0], list) else [x]
            if not isinstance(x[0][0], list) else x)

    print(f"Extracting evidence_list for the {mode} mode ...")
    if mode == "eval":
        # extract evidence
        df["evidence_list"] = df["predicted_evidence"].parallel_map(lambda x: [
            mapping.get(evi_id, {}).get(str(evi_idx), "")
            for evi_id, evi_idx in x  # for each evidence list
        ][:topk] if isinstance(x, list) else [])
        print(df["evidence_list"][:5])
    else:
        # extract evidence
        df["evidence_list"] = df["evidence"].parallel_map(lambda x: [
            " ".join([  # join evidence
                mapping.get(evi_id, {}).get(str(evi_idx), "")
                for _, _, evi_id, evi_idx in evi_list
            ]) if isinstance(evi_list, list) else ""
            for evi_list in x  # for each evidence list
        ][:1] if isinstance(x, list) else [])

    return df

### Step 1. Setup training environment

Hyperparams

In [12]:
#@title  { display-mode: "form" }

MODEL_NAME = "bert-base-chinese"  #@param {type:"string"}
TRAIN_BATCH_SIZE = 7  #@param {type:"integer"}
TEST_BATCH_SIZE = 7  #@param {type:"integer"}
SEED = 20  #@param {type:"integer"}
LR = 1e-4  #@param {type:"number"}
NUM_EPOCHS = 10  #@param {type:"integer"}
MAX_SEQ_LEN =  512  #@param {type:"integer"}
EVIDENCE_TOPK = 5  #@param {type:"integer"}
VALIDATION_STEP = 250  #@param {type:"integer"}

ACCUMULATION_STEP = 20


Experiment Directory

In [13]:
OUTPUT_FILENAME = "submission.jsonl"

EXP_DIR = f"claim_verification/e{NUM_EPOCHS}_bs{TRAIN_BATCH_SIZE}_" + f"{LR}_top{EVIDENCE_TOPK}"
LOG_DIR = "logs/" + EXP_DIR
CKPT_DIR = "checkpoints/" + EXP_DIR

if not Path(LOG_DIR).exists():
    Path(LOG_DIR).mkdir(parents=True)

if not Path(CKPT_DIR).exists():
    Path(CKPT_DIR).mkdir(parents=True)

### Step 2. Concat claim and evidences
join topk evidence

In [14]:
if not TRAIN_PKL_FILE.exists():
    train_df = join_with_topk_evidence(
        pd.DataFrame(TRAIN_DATA),
        mapping,
        topk=EVIDENCE_TOPK,
    )
    train_df.to_pickle(TRAIN_PKL_FILE, protocol=4)
else:
    with open(TRAIN_PKL_FILE, "rb") as f:
        train_df = pickle.load(f)

if not DEV_PKL_FILE.exists():
    dev_df = join_with_topk_evidence(
        pd.DataFrame(DEV_DATA),
        mapping,
        mode="eval",
        topk=EVIDENCE_TOPK,
    )
    dev_df.to_pickle(DEV_PKL_FILE, protocol=4)
else:
    with open(DEV_PKL_FILE, "rb") as f:
        dev_df = pickle.load(f)

### Step 3. Training

In [15]:
train_df["label"].value_counts()

supports           4744
refutes            3148
NOT ENOUGH INFO    3147
Name: label, dtype: int64

Prevent CUDA out of memory

In [16]:
want_index = train_df[train_df["label"]=="refutes"].index
copy_df = train_df.loc[want_index].sample(frac=0.5, replace=True, random_state=42)
train_df = pd.concat([train_df, copy_df] , ignore_index=True)


In [17]:
train_df["label"].value_counts()

supports           4744
refutes            4092
NOT ENOUGH INFO    3147
Name: label, dtype: int64

In [18]:
dev_df["label"].value_counts()

supports           250
NOT ENOUGH INFO    166
refutes            165
Name: label, dtype: int64

In [19]:
torch.cuda.empty_cache()

In [20]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

train_dataset = AicupTopkEvidenceBERTDataset(
    train_df,
    tokenizer=tokenizer,
    max_length=MAX_SEQ_LEN,
)
val_dataset = AicupTopkEvidenceBERTDataset(
    dev_df,
    tokenizer=tokenizer,
    max_length=MAX_SEQ_LEN,
)

train_dataloader = DataLoader(
    train_dataset,
    shuffle=True,
    batch_size=TRAIN_BATCH_SIZE,
)
eval_dataloader = DataLoader(val_dataset, batch_size=TEST_BATCH_SIZE)

In [21]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device(
    "cpu")
config = AutoConfig.from_pretrained(MODEL_NAME, num_labels=len(LABEL2ID))
config.hidden_dropout_prob = 0.3
config.attention_probs_dropout_prob = 0.3
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, config=config)
model  = model.to(device)

optimizer = AdamW(model.parameters(), lr=LR)
num_training_steps = NUM_EPOCHS * len(train_dataloader)
lr_scheduler = set_lr_scheduler(optimizer, num_training_steps)

writer = SummaryWriter(LOG_DIR)

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [22]:
model.config

BertConfig {
  "_name_or_path": "bert-base-chinese",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.3,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.3,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.27.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [23]:
model = load_model(model, "val_acc=0.4871_model.1200.pt", CKPT_DIR)

FileNotFoundError: [Errno 2] No such file or directory: 'checkpoints/claim_verification/e3_bs14_0.0003_top5/val_acc=0.4871_model.1200.pt'

Training (30 mins)

In [24]:
from IPython.display import clear_output
progress_bar = tqdm(range(num_training_steps))
current_steps = 0

train_info_list = {"acc":[] , "loss":[]}
val_info_list = {"acc":[] , "loss":[]}

y_pred = []
y_true = []
max_val_acc = 0
accuracy_info = 0
loss_info = 0
val_loss = 0
val_acc = 0
for epoch in range(NUM_EPOCHS):
    model.train()
    accuracy_info = 0
    loss_info = 0
    
    for i,batch in enumerate(train_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        logits = outputs.logits

        # 計算損失
        loss =  outputs.loss / ACCUMULATION_STEP
        loss.backward()
        
        
        if i % ACCUMULATION_STEP == 0 and  i > 0:
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            
        progress_bar.update(1)
        writer.add_scalar("training_loss", loss.item(), current_steps)

        y_pred.extend(torch.argmax(outputs.logits, dim=1).tolist())
        y_true.extend(batch["labels"].tolist())

        accuracy_info += accuracy_score(y_pred,y_true)
        loss_info += outputs.loss.item()
        
        current_steps += 1
            

        if current_steps % VALIDATION_STEP == 0 and current_steps > 0:
            print("Start validation")
            val_results = run_evaluation(model, eval_dataloader, device)
            val_loss = val_results["val_loss"]
            val_acc = val_results["val_acc"]
            max_val_acc = max(max_val_acc , val_acc)
            
            # log each metric separately to TensorBoard
            for metric_name, metric_value in val_results.items():
                writer.add_scalar(f"{metric_name}", metric_value, current_steps)
                
#             if(val_acc >= 0.4 and abs(val_acc-max_val_acc) <= 0.001):
            save_checkpoint(
                model,
                CKPT_DIR,
                current_steps,
                mark=f"val_acc={val_results['val_acc']:.4f}",
            )

            # 將資訊整理進List
            train_info_list["acc"].append(accuracy_info / VALIDATION_STEP)
            train_info_list["loss"].append(loss_info  / VALIDATION_STEP)
            val_info_list["acc"].append(val_acc)
            val_info_list["loss"].append(val_loss)
            print(f'train_acc:{accuracy_info / VALIDATION_STEP : .6f} , train_loss:{loss_info  / VALIDATION_STEP : .6f}')
            print(f'val_acc:{val_acc : .6f} , val_loss:{val_loss : .6f}')
            print()
            accuracy_info = 0
            loss_info = 0
    optimizer.step()
    lr_scheduler.step()
    optimizer.zero_grad()

print("Finished training!")

  0%|          | 0/2568 [00:00<?, ?it/s]

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.354030 , train_loss: 1.135963
val_acc: 0.481928 , val_loss: 1.068603

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.463531 , train_loss: 0.847310
val_acc: 0.485370 , val_loss: 1.118052

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.551509 , train_loss: 0.570238
val_acc: 0.485370 , val_loss: 1.388660

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.596420 , train_loss: 0.501218
val_acc: 0.487091 , val_loss: 1.622366

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.449664 , train_loss: 0.364168
val_acc: 0.483649 , val_loss: 1.745520

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.637806 , train_loss: 0.485194
val_acc: 0.487091 , val_loss: 1.609402

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.647791 , train_loss: 0.495083
val_acc: 0.487091 , val_loss: 1.776089

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

train_acc: 0.658976 , train_loss: 0.469379
val_acc: 0.488812 , val_loss: 1.956687




KeyboardInterrupt



In [25]:
model = load_model(model, "val_acc=0.4888_model.1600.pt", CKPT_DIR)

In [24]:
print("Start validation")
val_results = run_evaluation(model, eval_dataloader, device)
val_loss = val_results["val_loss"]
val_acc = val_results["val_acc"]
max_val_acc = max(max_val_acc , val_acc)

# log each metric separately to TensorBoard
for metric_name, metric_value in val_results.items():
    writer.add_scalar(f"{metric_name}", metric_value, current_steps)

#             if(val_acc >= 0.4 and abs(val_acc-max_val_acc) <= 0.001):
save_checkpoint(
    model,
    CKPT_DIR,
    current_steps,
    mark=f"val_acc={val_results['val_acc']:.4f}",
)

Start validation


  0%|          | 0/42 [00:00<?, ?it/s]

In [20]:

save_checkpoint(
    model,
    CKPT_DIR,
    current_steps,
    mark=f"val_acc={val_results['val_acc']:.4f}",
)

In [21]:
%load_ext tensorboard
%tensorboard --logdir logs
%reload_ext tensorboard

Reusing TensorBoard on port 6006 (pid 16660), started 6 days, 6:01:57 ago. (Use '!kill 16660' to kill it.)

### Step 4. Make your submission

In [26]:
TEST_DATA = load_json("data/test_doc5sent5.jsonl")
TEST_PKL_FILE = Path("data/test_doc5sent5.pkl")

if not TEST_PKL_FILE.exists():
    test_df = join_with_topk_evidence(
        pd.DataFrame(TEST_DATA),
        mapping,
        mode="eval",
        topk=EVIDENCE_TOPK,
    )
    test_df.to_pickle(TEST_PKL_FILE, protocol=4)
else:
    with open(TEST_PKL_FILE, "rb") as f:
        test_df = pickle.load(f)

test_dataset = AicupTopkEvidenceBERTDataset(
    test_df,
    tokenizer=tokenizer,
    max_length=MAX_SEQ_LEN,
)
test_dataloader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE)

In [29]:

models = []
models.append(load_model(model, "val_acc=0.4888_model.1600.pt", CKPT_DIR))
# models.append(load_model(model, "val_acc=0.5611_model.16500.pt", CKPT_DIR))
# models.append(load_model(model, "val_acc=0.5577_model.17000.pt", CKPT_DIR))
# models.append(load_model(model, "val_acc=0.5047_model.46000(分數為0.451972).pt", CKPT_DIR))

In [30]:
len(models)

1

In [31]:
def run_predict(models: list, test_dl: DataLoader, device) -> list:
    for model in models:
        model.eval()

    preds = []
    for batch in tqdm(test_dl,
                      total=len(test_dl),
                      leave=False,
                      desc="Predicting"):
        batch = {k: v.to(device) for k, v in batch.items()}
        
        tmpList = []
            
        for model in models:
            pred = model(**batch).logits
            pred = torch.argmax(pred, dim=1)
            tmpList.append(pred.tolist())
        FinalPred = []
        
        for col in range(len(tmpList[0])):
            num = [0,0,0]
            for row in range(len(tmpList)):
                num[tmpList[row][col]] += 1
            ans = num.index(max(num))
            FinalPred.append(ans)
        
#         print(tmpList[0])
#         print(tmpList[1])
#         print(tmpList[2])
#         print(FinalPred)
#         print('-'*50)
        preds.extend(FinalPred)
    return preds

In [32]:
predicted_label = run_predict(models, test_dataloader, device)

Predicting:   0%|          | 0/646 [00:00<?, ?it/s]

Write files

In [33]:
predict_dataset = test_df.copy()
predict_dataset["predicted_label"] = list(map(ID2LABEL.get, predicted_label))
predict_dataset[["id", "predicted_label", "predicted_evidence"]].to_json(
    OUTPUT_FILENAME,
    orient="records",
    lines=True,
    force_ascii=False,
)

In [34]:
sub_df = pd.read_json(r'./submission.jsonl',orient="records",lines=True)

In [35]:
sub_df['predicted_label'].value_counts()

supports           8141
NOT ENOUGH INFO     867
refutes              30
Name: predicted_label, dtype: int64

# FINISH

# 檢查submission有沒有不符合格式

In [26]:
sub_df = pd.read_json(r'./submission.jsonl',orient="records",lines=True)
# 檢查supports的部分是否都含有predicted_evidence

In [27]:
# 檢查support或refutes的陳述句是否至少帶有一組證據句
check_df = sub_df.iloc[sub_df[sub_df['predicted_label'] != 'NOT ENOUGH INFO'].index]
for List in check_df['predicted_evidence']:
#     print(List)
    if(len(List) == 0):
        print('[Error] ENOUGH INFO has empty list', List)

In [28]:
# 檢查NOT ENOUGH INFO標籤的資料是否皆沒有證據句
check_df = sub_df.iloc[sub_df[sub_df['predicted_label'] == 'NOT ENOUGH INFO'].index]
for i,List in enumerate(check_df['predicted_evidence']):
    if(len(List) != 0):
        print('[Error] NOT ENOUGH INFO has evidence , index i', i)
        print(List)