In [49]:
from beir import LoggingHandler
from beir import util
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval import models

import pandas as pd

import dataclasses
import json
import logging
import os
import pathlib

logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])



In [58]:
@dataclasses.dataclass
class BeirTopKSimilarityMetadata:
    dataset_name: str
    num_queries: int
    num_documents: int
    num_positive_annotations: int
    model_name: str
    top_k: bool
    include_text: bool

    def __init__(self, dataset_name, queries, corpus, qrels, model_name, top_k, include_text):
        self.dataset_name = dataset_name
        self.num_queries = len(queries)
        self.num_documents = len(corpus)
        self.num_positive_annotations = sum(len(rel_docids) for qid, rel_docids in qrels.items())
        self.model_name = model_name
        self.top_k = top_k
        self.include_text = include_text
        

def load_data(dataset_name, datasets_path=".."):
    url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset_name)
    out_dir = os.path.join("..", "datasets")
    data_path = util.download_and_unzip(url, out_dir)
    
    #### Provide the data_path where scifact has been downloaded and unzipped
    corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")
    return data_path, corpus, queries, qrels


def compute_similarity(model_name, batch_size, score_function=None) -> dict[str, dict[str, float]]:
    if score_function is None:
        score_function = "dot"
    if score_function not in ("dot", "cos_sim"):
        raise ValueError(f"score_function must be either 'dot' or 'cos_sim'. Received: '{score_function}'")

    model = DRES(models.SentenceBERT(model_name, trust_remote_code=True), batch_size=batch_size, trust_remote_code=True)
    retriever = EvaluateRetrieval(model, score_function=score_function)
    q_doc_sim = retriever.retrieve(corpus, queries)
    return q_doc_sim


def get_top_k_similar_docs(doc_sim: dict[str, float], k: int) -> dict[str, float]:
    return dict(sorted(doc_sim.items(), key=lambda x: x[1], reverse=True)[:k])


def get_labels_similarities(query, top_k_similar_docs, qrels):
    labels = {}
    similarity_scores = {}
    for doc, similarity_score in top_k_similar_docs.items():
        if doc in qrels[query] and qrels[query][doc] == 1:
            labels[doc] = 1
        else:
            labels[doc] = 0
        similarity_scores[doc] = similarity_score
    return labels, similarity_scores
        

def get_top_k_data_frame(q_doc_sim, top_k, include_text=False):
    qids = []
    docids = []
    rels = []
    sims = []
    if include_text:
        q_texts = []
        title_texts = []
        doc_texts = []
    
    for qid, doc_sim in q_doc_sim.items():
        top_k_doc_scores = get_top_k_similar_docs(q_doc_sim[qid], top_k)
        docid_labels, similarities = get_labels_similarities(qid, top_k_doc_scores, qrels)
        for docid, rel in docid_labels.items():
            qids.append(qid)
            docids.append(docid)
            rels.append(rel)
            sims.append(similarities[docid])
            if include_text:
                q_texts.append(queries[qid])
                title_texts.append(corpus[docid].get("title", ""))
                doc_texts.append(corpus[docid].get("text", ""))
    data = {
        "qid": qids, 
        "docid": docids, 
        "rel": rels,
        "sim": sims,
    }
    
    if include_text:
        data.update({
            "query": q_texts,
            "title": title_texts,
            "corpus": doc_texts,
        })
    return pd.DataFrame(data)    


def main(dataset_name=None):
    datasets = [
        "trec-covid",
        "nq",
        "hotpotqa",
        "arguana",
        "webis-touche2020",
        "cqadupstack",
        "dbpedia-entity",
        "scidocs",
        "fever",
    ]
    # dataset_name = "scifact"
    model_name = "msmarco-distilbert-base-tas-b"
    batch_size = 64
    top_k = 500
    include_text = False

    def process_dataset(dataset_name):
        # Load dataset.
        logging.info("## Load dataset.")
        data_path, corpus, queries, qrels = load_data(dataset_name)
        # Compute similarity scores.
        logging.info("## Compute similarity scores.")
        q_doc_sim = compute_similarity(model_name, batch_size)    
        # Get DF.
        logging.info("## Get the dataframe.")
        df = get_top_k_data_frame(q_doc_sim, top_k, include_text)
        metadata = BeirTopKSimilarityMetadata(
            dataset_name, queries, corpus, qrels, model_name, top_k, include_text)

        # Write to files
        logging.info("## Write to files.")
        df.to_csv(data_path + ".csv")
        with open(data_path + "_metadata.json", "w") as fout:
            json.dump(dataclasses.asdict(metadata), fout, indent=4)

    
    if dataset_name is None:
        for dataset_idx, dataset_name in enumerate(datasets):
            logging.info(f"#### DATASET {dataset_idx}: {dataset_name}")
            process_dataset(dataset_idx, dataset_name)
    else:
        # Only process the specified dataset.
        logging.info(f"#### DATASET: {dataset_name}")
        process_dataset(dataset_name)


In [59]:
main(dataset_name="scifact")

2024-09-02 16:12:49 - #### DATASET: scifact
2024-09-02 16:12:49 - ## Load dataset.
2024-09-02 16:12:49 - Loading Corpus...


  0%|          | 0/5183 [00:00<?, ?it/s]

2024-09-02 16:12:49 - Loaded 5183 TEST Documents.
2024-09-02 16:12:49 - Doc Example: {'text': 'Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, the mean apparent diffusion coefficients at both times were similar (1.2 vers

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-09-02 16:12:50 - Sorting Corpus by document length (Longest first)...
2024-09-02 16:12:50 - Scoring Function: Dot Product (dot)
2024-09-02 16:12:50 - Encoding Batch 1/1...


Batches:   0%|          | 0/81 [00:00<?, ?it/s]

2024-09-02 16:13:05 - ## Get the dataframe.
2024-09-02 16:13:05 - ## Write to files.


In [55]:
#### Load the SBERT model and retrieve using cosine-similarity

model_name = "msmarco-distilbert-base-tas-b"
batch_size = 64


model = DRES(models.SentenceBERT(model_name, trust_remote_code=True), batch_size=batch_size, trust_remote_code=True)
retriever = EvaluateRetrieval(model, score_function="dot") # or "cos_sim" for cosine similarity
results = retriever.retrieve(corpus, queries)

2024-08-30 20:53:22 - Use pytorch device_name: cuda
2024-08-30 20:53:22 - Load pretrained SentenceTransformer: msmarco-distilbert-base-tas-b




2024-08-30 20:53:23 - Encoding Queries...


Batches:   0%|          | 0/54 [00:00<?, ?it/s]

2024-08-30 20:53:24 - Sorting Corpus by document length (Longest first)...
2024-08-30 20:53:26 - Scoring Function: Dot Product (dot)
2024-08-30 20:53:26 - Encoding Batch 1/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 20:56:19 - Encoding Batch 2/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 20:58:37 - Encoding Batch 3/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:00:36 - Encoding Batch 4/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:02:25 - Encoding Batch 5/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:04:06 - Encoding Batch 6/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:05:40 - Encoding Batch 7/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:07:09 - Encoding Batch 8/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:08:33 - Encoding Batch 9/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:09:53 - Encoding Batch 10/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:11:09 - Encoding Batch 11/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:12:23 - Encoding Batch 12/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:13:34 - Encoding Batch 13/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:14:42 - Encoding Batch 14/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:15:49 - Encoding Batch 15/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:16:53 - Encoding Batch 16/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:17:55 - Encoding Batch 17/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:18:56 - Encoding Batch 18/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:19:54 - Encoding Batch 19/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:20:51 - Encoding Batch 20/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:21:47 - Encoding Batch 21/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:22:41 - Encoding Batch 22/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:23:33 - Encoding Batch 23/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:24:24 - Encoding Batch 24/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:25:13 - Encoding Batch 25/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:26:00 - Encoding Batch 26/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:26:46 - Encoding Batch 27/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:27:30 - Encoding Batch 28/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:28:13 - Encoding Batch 29/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:28:55 - Encoding Batch 30/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:29:35 - Encoding Batch 31/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:30:14 - Encoding Batch 32/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:30:51 - Encoding Batch 33/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:31:28 - Encoding Batch 34/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:32:03 - Encoding Batch 35/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:32:37 - Encoding Batch 36/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:33:09 - Encoding Batch 37/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:33:41 - Encoding Batch 38/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:34:11 - Encoding Batch 39/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:34:39 - Encoding Batch 40/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:35:07 - Encoding Batch 41/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:35:33 - Encoding Batch 42/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:35:59 - Encoding Batch 43/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:36:23 - Encoding Batch 44/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:36:45 - Encoding Batch 45/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:37:07 - Encoding Batch 46/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:37:27 - Encoding Batch 47/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:37:46 - Encoding Batch 48/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:38:04 - Encoding Batch 49/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:38:20 - Encoding Batch 50/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:38:34 - Encoding Batch 51/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:38:47 - Encoding Batch 52/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:38:58 - Encoding Batch 53/54...


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

2024-08-30 21:39:07 - Encoding Batch 54/54...


Batches:   0%|          | 0/492 [00:00<?, ?it/s]

In [56]:
#### Evaluate your model with NDCG@k, MAP@K, Recall@K and Precision@K  where k = [1,3,5,10,100,1000] 
    ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)

print(ndcg)

2024-08-30 22:52:02 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2024-08-30 22:52:03 - 

2024-08-30 22:52:03 - NDCG@1: 0.2714
2024-08-30 22:52:03 - NDCG@3: 0.3813
2024-08-30 22:52:03 - NDCG@5: 0.4231
2024-08-30 22:52:03 - NDCG@10: 0.4629
2024-08-30 22:52:03 - NDCG@100: 0.5154
2024-08-30 22:52:03 - NDCG@1000: 0.5249
2024-08-30 22:52:03 - 

2024-08-30 22:52:03 - MAP@1: 0.2418
2024-08-30 22:52:03 - MAP@3: 0.3439
2024-08-30 22:52:03 - MAP@5: 0.3687
2024-08-30 22:52:03 - MAP@10: 0.3870
2024-08-30 22:52:03 - MAP@100: 0.3989
2024-08-30 22:52:03 - MAP@1000: 0.3994
2024-08-30 22:52:03 - 

2024-08-30 22:52:03 - Recall@1: 0.2418
2024-08-30 22:52:03 - Recall@3: 0.4611
2024-08-30 22:52:03 - Recall@5: 0.5570
2024-08-30 22:52:03 - Recall@10: 0.6726
2024-08-30 22:52:03 - Recall@100: 0.9030
2024-08-30 22:52:03 - Recall@1000: 0.9732
2024-08-30 22:52:03 - 

2024-08-30 22:52:03 - P@1: 0.2714
2024-08-30 22:52:03

In [60]:
ndcg

NameError: name 'ndcg' is not defined

In [4]:
results

{'1': {'xov992x7': 102.7401351928711,
  'zr0ylclx': 102.74263000488281,
  'rvgn12nj': 102.7409896850586,
  'ab70s980': 102.7465591430664,
  'm5mo9kfs': 102.75932312011719,
  'osh00y37': 102.74137115478516,
  '9jjhfpdl': 102.74415588378906,
  'uxz9uedr': 102.77181243896484,
  'efebiaer': 102.7500228881836,
  'tyd50s4m': 102.76490020751953,
  '40jyzfkp': 102.77094268798828,
  'b25m9pub': 102.74260711669922,
  'w4j81ftw': 102.75237274169922,
  'j1jlgdnf': 102.76032257080078,
  'i16a67rb': 102.75927734375,
  'khhiex2c': 102.80208587646484,
  'i969aqn9': 102.80107879638672,
  'lyrlrx8p': 102.77182006835938,
  '45njrehj': 102.75751495361328,
  'q7fabwio': 102.77234649658203,
  'dwxsuju8': 102.77678680419922,
  'bnsxw0bz': 102.77948760986328,
  'hrhnogmm': 102.78282165527344,
  'ck8l0c6k': 102.74260711669922,
  'sn4cpn7c': 102.74287414550781,
  'cq6v8rnp': 102.76831817626953,
  'cv4uyxmi': 102.79068756103516,
  '9yinvobl': 102.77776336669922,
  '04r16xxz': 102.82829284667969,
  '0lb60y12': 10

In [43]:
import pandas as pd

k = 500

qids = []
docids = []
rels = []
sims = []
# q_texts = []
# title_texts = []
# doc_texts = []

for qid, res in results.items():
    top_k = get_top_k(res, k)
    docid_labels, similarities = get_labels_similarities(qid, top_k, qrels)
    for docid, rel in docid_labels.items():
        qids.append(qid)
        docids.append(docid)
        rels.append(rel)
        sims.append(similarities[docid])
        # q_texts.append(queries[qid])
        # title_texts.append(corpus[docid].get('title', ''))
        # doc_texts.append(corpus[docid].get('text', ''))

# for doc_id in top_k:
#     print(corpus[doc_id]['text'])
    # print()

In [44]:
# df = pd.DataFrame({'qid': qids, 'docid': docids, 'rel': rels, 'sim': sims, 'query': q_texts, 'title': title_texts, 'corpus': doc_texts})
df = pd.DataFrame({'qid': qids, 'docid': docids, 'relevance': rels, 'similarity': sims})
df

Unnamed: 0,qid,docid,relevance,similarity
0,1,wy0y5ztd,0,116.447906
1,1,jb05x03a,0,116.447891
2,1,okqsvg8q,0,115.972672
3,1,qp0h50t3,0,115.140747
4,1,rzpbpxw2,0,114.927849
...,...,...,...,...
24995,50,tw2nls29,0,101.397453
24996,50,kstt07y1,0,101.395493
24997,50,tdocssfu,0,101.395218
24998,50,mxcfkyui,0,101.394890


In [49]:
data_relevance_path = data_path + "_rel.csv"
print(data_relevance_path)

data_metadata_path = data_path + "_metadata.md"
print(data_metadata_path)

../datasets/trec-covid_rel.csv
../datasets/trec-covid_metadata.md


In [52]:
metadata = f"""# {}"""

In [53]:
df.to_csv(data_relevance_path, index=False)

In [26]:
df[df["qid"]=="2"]

Unnamed: 0,qid,docid,rel,query,title,corpus
100,2,j1cdoxqs,0,how does the coronavirus respond to changes in...,Coronavirus,
101,2,gctnx6j1,0,how does the coronavirus respond to changes in...,What next for the coronavirus response?,
102,2,gvfooevu,0,how does the coronavirus respond to changes in...,Coronaviruses,
103,2,be0mr85h,0,how does the coronavirus respond to changes in...,Coronavirus.,
104,2,bp9xz9wk,0,how does the coronavirus respond to changes in...,Coronavirus?,
...,...,...,...,...,...,...
195,2,cf9e6rc7,0,how does the coronavirus respond to changes in...,Ethics in a time of coronavirus,
196,2,ber3dkh2,0,how does the coronavirus respond to changes in...,Coronaviruses that are always present.,
197,2,6ly81dbl,0,how does the coronavirus respond to changes in...,Coronaviruses: a group with unique features,
198,2,k1yknsgf,0,how does the coronavirus respond to changes in...,Coronavirus motif,


In [35]:
df[(df["qid"]=="2") & (df["rel"]==1)]


Unnamed: 0,qid,docid,rel,query,title,corpus
153,2,tjplc5j6,1,how does the coronavirus respond to changes in...,Effects of air temperature and relative humidi...,Assessment of the risks posed by severe acute ...


In [10]:
corpus[docids[0]]

{'text': '', 'title': 'Covid-19'}

In [79]:
df.to_csv('../datasets/scifact_top5_mxbai-embed-large-v1.csv', sep='\t')

In [31]:
top_k = get_top_k(results['13'], 10)
top_k

{'7662395': 193.5150146484375,
 '4791384': 189.52786254882812,
 '26611834': 185.7748260498047,
 '1263446': 184.20188903808594,
 '16204011': 178.81301879882812,
 '25641414': 176.0165252685547,
 '28633594': 174.34109497070312,
 '1606628': 173.04547119140625,
 '11748341': 172.444580078125,
 '13791044': 170.49440002441406}

In [55]:
query = '57'
k = 5
top_k = get_top_k(results[query], k)
get_labels(query, top_k, qrels)

{'4709641': 1,
 '4459491': 0,
 '11659421': 0,
 '10795063': 0,
 '3113630': 0,
 '13814480': 0,
 '4407385': 0,
 '19970015': 0,
 '30351165': 0,
 '14192687': 0,
 '13293033': 0,
 '1447990': 0,
 '9748934': 0,
 '57574395': 0,
 '39545358': 0,
 '8185080': 0,
 '15347087': 0,
 '22543403': 0,
 '16605494': 0,
 '665817': 0}

In [31]:
qrels

{'1': {'31715818': 1},
 '3': {'14717500': 1},
 '5': {'13734012': 1},
 '13': {'1606628': 1},
 '36': {'5152028': 1, '11705328': 1},
 '42': {'18174210': 1},
 '48': {'13734012': 1},
 '49': {'5953485': 1},
 '50': {'12580014': 1},
 '51': {'45638119': 1},
 '53': {'45638119': 1},
 '54': {'49556906': 1},
 '56': {'4709641': 1},
 '57': {'4709641': 1},
 '70': {'5956380': 1, '4414547': 1},
 '72': {'6076903': 1},
 '75': {'4387784': 1},
 '94': {'1215116': 1},
 '99': {'18810195': 1},
 '100': {'4381486': 1},
 '113': {'6157837': 1},
 '115': {'33872649': 1},
 '118': {'6372244': 1},
 '124': {'4883040': 1},
 '127': {'21598000': 1},
 '128': {'8290953': 1},
 '129': {'27768226': 1},
 '130': {'27768226': 1},
 '132': {'7975937': 1},
 '133': {'38485364': 1,
  '6969753': 1,
  '17934082': 1,
  '16280642': 1,
  '12640810': 1},
 '137': {'26016929': 1},
 '141': {'6955746': 1, '14437255': 1},
 '142': {'10582939': 1},
 '143': {'10582939': 1},
 '146': {'10582939': 1},
 '148': {'1084345': 1},
 '163': {'18872233': 1},
 '1

In [83]:
gt = [0,1,0,0,1,0,1,0,0,1,1,0,1,0]
pred = [0,1,0,0,0,0,1,0,0,1,1,1,1,1]


In [85]:
from sklearn.metrics import precision_recall_fscore_support

In [91]:
precision, recall, f1, support = precision_recall_fscore_support(gt, pred, beta=2)
print(precision)
print(recall)
print(f1)
print(support)



[0.85714286 0.71428571]
[0.75       0.83333333]
[0.76923077 0.80645161]
[8 6]
