In [1]:
import os
import json
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px

In [2]:
# The path to the data, key, and result directories
path_result = "../result"
path_data = "../data"
path_key = "../key"

# The mapping between the platform abbreviation and its full name
platform_abbreviation_mapping = {
    "gh": "GitHub",
    "hf": "Hugging Face Spaces",
    "ip": "independent platform",
    "pwc": "Papers With Code",
    "ss": "Spreadsheet",
}

In [None]:
# Scrape the GitHub repositories whose markdown and html files contain "leaderboard" keyword via the SourceGraph API
import subprocess

# Load the environment variables from key.json
with open(f"{path_key}/.sourcegraph.json") as f:
    env_vars = json.load(f)

# Set the environment variables
os.environ.update(env_vars)

# Execute the shell script and pass path_data as an argument
try:
    subprocess.run(["sh", f"{path_data}/Dependents_Lookup.sh", path_data], check=True)
    print("Script executed successfully.")
except subprocess.CalledProcessError as e:
    print(f"Script execution failed with error: {e}")

In [None]:
# manually check retrieved repositories and curate leaderboard URLs 
import json
import webbrowser

with open(f"{path_data}/GitHub.json") as f:
    data = json.load(f)

for index, repo in enumerate(data["Results"]):
    webbrowser.open(f'https://{repo["name"]}')
    if index % 50 == 49:
        print(index)
        code = input("Press enter key to proceed.")
        if code.lower() == "esc":
            raise KeyboardInterrupt

In [184]:
# curated leaderboard URLs and their corresponding references
github_leaderboard_primary_mapping = [
    {
        "leaderboard": "https://github.com/ray-project/llmperf-leaderboard",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/vztu/BVQA_Benchmark",
        "reference": "https://github.com/vztu/RAPIQUE/blob/e637a3d943010940f67ef7b0c19e2f2c81d63b01/README.md?plain=1#L8",
    },
    {"leaderboard": "https://csbench.github.io/#leaderboard", "reference": ""},
    {
        "leaderboard": "https://crowdbenchmark.com/nwpucrowd.html",
        "reference": "https://github.com/gjy3035/NWPU-Crowd-Sample-Code/blob/fe8e986d23e88f7b9defabbd0b3f6f655803152d/README.md?plain=1#L92",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Decentralised-AI/falcon-40b/blob/6e61c89591cc7a3328d4010ceaf701dfec03fd18/README.md?plain=1#L183",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/instance-segmentation-on-coco",
        "reference": "https://github.com/IDEA-Research/DINO/blob/d84a491d41898b3befd8294d1cf2614661fc0953/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/panoptic-segmentation-on-coco-test-dev",
        "reference": "https://github.com/IDEA-Research/DINO/blob/d84a491d41898b3befd8294d1cf2614661fc0953/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semantic-segmentation-on-ade20k",
        "reference": "https://github.com/IDEA-Research/DINO/blob/d84a491d41898b3befd8294d1cf2614661fc0953/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/doantronghieu/El-El-Em/blob/3efadafc91b21dfa5ca3dc8491a0f4917993eec8/Apps/toolkit/others/survey/LLM/Outline.md?plain=1#L25",
    },
    {
        "leaderboard": "https://openrouter.ai/rankings",
        "reference": "https://github.com/doantronghieu/El-El-Em/blob/3efadafc91b21dfa5ca3dc8491a0f4917993eec8/Apps/toolkit/others/survey/LLM/Outline.md?plain=1#L27",
    },
    {"leaderboard": "https://github.com/kaz-Anova/ensemble_amazon", "reference": ""},
    {"leaderboard": "https://github.com/MLBazaar/BTB", "reference": ""},
    {"leaderboard": "https://github.com/Codium-ai/AlphaCodium", "reference": ""},
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/VDIGPKU/HENet/blob/3da133855f0bd7409b03ac59b90221881d6bf744/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/continuedev/what-llm-to-use/blob/3c7cb09e87ef7d9bf24b6fa9543c6da3469cfad6/README.md?plain=1#L39",
    },
    {
        "leaderboard": "https://wilds.stanford.edu/leaderboard",
        "reference": "https://github.com/LFhase/PAIR/blob/a9a434b97e284c7f71641107c542f15db521f9a9/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/dsdanielpark/open-llm-datasets/blob/5b0abd9915038f6800835ef1d6b533b62a35109f/README.md?plain=1#L228",
    },
    {
        "leaderboard": "https://github.com/niais/Awesome-Skeleton-based-Action-Recognition",
        "reference": "",
    },
    {
        "leaderboard": "https://nextplusplus.github.io/TAT-QA",
        "reference": "https://github.com/NExTplusplus/TAT-QA",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/2210/leaderboard",
        "reference": "https://github.com/yanconglin/ICP-Flow/blob/19da224472b6e17216873bddb361af5cd85faa45/README.md?plain=1#L90",
    },
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/zihangdai/xlnet/blob/bbaa3a6fa0b3a2ee694e8cf66167434f9eca9660/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://cmedbenchmark.llmzoo.com/static/leaderboard.html",
        "reference": "https://github.com/X-D-Lab/Sunsimiao/blob/a06f56836b55dd54618e223cacc37d6a733a2c29/README.md?plain=1#L82",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/lsc/leaderboards",
        "reference": "https://github.com/lmqfly/Geometry-Deep-Learning-for-Drug-Discovery/blob/b195e7ec8f4296c6b27ef602231e8c29558f52b4/README.md?plain=1#L104",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1832/leaderboard",
        "reference": "https://github.com/goel-shashank/CyCLIP/blob/707e7c7cc52ef66eb657d2b27fa5f026bc89e815/README.md?plain=1#L82",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_object.php",
        "reference": "https://github.com/fregu856/3DOD_thesis/blob/dcd23d17acf70700e74beb4191fe06a03c572a6f/README.md?plain=1#L616",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/ryoungj/ObsScaling/blob/c10551fe033d09cd2752f376f4c0beaed870ad7b/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/adithya-s-k/indic_eval/blob/0b3f84516e3ef80d4d7993a7012fcfedb03617cb/README.md?plain=1#L544",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Cognitive-Lab/indic_llm_leaderboard",
        "reference": "https://github.com/adithya-s-k/indic_eval/blob/0b3f84516e3ef80d4d7993a7012fcfedb03617cb/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/arjunbansal/awesome-oss-llm-ift-rlhf/blob/c06a469eb22cbf3a52fd93bccad2e644b5d5e284/README.md?plain=1#L51",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/arjunbansal/awesome-oss-llm-ift-rlhf/blob/c06a469eb22cbf3a52fd93bccad2e644b5d5e284/README.md?plain=1#L52",
    },
    {
        "leaderboard": "https://orionlab.space.noa.gr/mesogeos",
        "reference": "https://github.com/Orion-AI-Lab/mesogeos/blob/87721c87f0bb938af88843b8a599828de758e28f/README.md?plain=1#L161",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/allstate-claims-severity/leaderboard",
        "reference": "https://github.com/adornes/spark_python_ml_examples/blob/0cb6bbfba65b0d6d502dc8333059bd420e8dee16/README.md?plain=1#L262",
    },
    {"leaderboard": "https://github.com/salesforce/WikiSQL", "reference": ""},
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/17640",
        "reference": "https://github.com/caiyuanhao1998/Retinexformer/blob/5c74b0d74316372b4a7b093c4a3b38601e14b621/README.md?plain=1#L111",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/356/leaderboard",
        "reference": "https://github.com/MCG-NJU/SparseBEV/blob/024778a5fb4eb98a1b72dd6e2e16b054fea87c94/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://github.com/longvideobench/LongVideoBench",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/crowdAI/crowdai-musical-genre-recognition-starter-kit",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/DetectionTeamUCAS/RRPN_Faster-RCNN_Tensorflow",
        "reference": "",
    },
    {"leaderboard": "https://github.com/tjunlp-lab/M3KE", "reference": ""},
    {"leaderboard": "https://github.com/OpenGVLab/MM-NIAH", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/nycc-matching",
        "reference": "https://github.com/jmhessel/caption_contest_corpus/blob/27a83aeaf889664cbf4d21bace6d473016836a13/README.md?plain=1#L109",
    },
    {
        "leaderboard": "https://microsoft.github.io/MSMARCO-Passage-Ranking-Submissions/leaderboard",
        "reference": "https://github.com/AlibabaResearch/HLATR/blob/52677dd2d22c5743f86190d67f1e18159feb159f/readme.md?plain=1#L12",
    },
    {
        "leaderboard": "https://dki-lab.github.io/GrailQA",
        "reference": "https://github.com/dki-lab/GrailQA/blob/bc15df916ca4101f773722151c90ba3f9eff9df5/README.md?plain=1#L11",
    },
    {"leaderboard": "https://github.com/SilongYong/SQA3D", "reference": ""},
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6",
        "reference": "https://github.com/kjw0612/awesome-deep-vision/blob/9ebfacb00edcab273fd1d7049eaa7eccc69a29cd/README.md?plain=1#L171",
    },
    {"leaderboard": "https://github.com/cathyxl/MAgIC", "reference": ""},
    {
        "leaderboard": "https://github.com/JasonForJoy/Leaderboards-for-Multi-Turn-Response-Selection",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/openai/gym/wiki/Leaderboard",
        "reference": "https://github.com/pat-coady/trpo/blob/5332fb8fce862b082e828bb8acc1cd71af8c5223/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/pascalvoc-sp",
        "reference": "https://github.com/vijaydwivedi75/lrgb/blob/2410e9ee8d9309dd2120809d119a46092788eb89/README.md?plain=1#L111",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=3",
        "reference": "https://github.com/SHI-Labs/GFR-DSOD/blob/c50512bf09abebee57ea929041ff551ee6895fc5/README.md?plain=1#L6",
    },
    {"leaderboard": "https://github.com/OpenMOSS/HalluQA", "reference": ""},
    {
        "leaderboard": "https://www.vellum.ai/llm-leaderboard",
        "reference": "https://github.com/rtahmasbi/LLM/blob/eddc6cd35d907dcdc4d91efe7f25be0e6aff8b41/README.md?plain=1#L471",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/rtahmasbi/LLM/blob/eddc6cd35d907dcdc4d91efe7f25be0e6aff8b41/README.md?plain=1#L473",
    },
    {
        "leaderboard": "https://klu.ai/llm-leaderboard",
        "reference": "https://github.com/rtahmasbi/LLM/blob/eddc6cd35d907dcdc4d91efe7f25be0e6aff8b41/README.md?plain=1#L477",
    },
    {"leaderboard": "https://github.com/casmlab/NPHardEval", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Spico197/Humback/blob/43c0086f7699290a34df1450ec66c9048797dbb6/README.md?plain=1#L122",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/ikaijua/Awesome-AITools/blob/db03b978c94271e759e91a66dd597885a6d6c9b3/README.md?plain=1#L109",
    },
    {
        "leaderboard": "https://artificialanalysis.ai",
        "reference": "https://github.com/ikaijua/Awesome-AITools/blob/db03b978c94271e759e91a66dd597885a6d6c9b3/README.md?plain=1#L110",
    },
    {
        "leaderboard": "https://mcgill-nlp.github.io/bias-bench",
        "reference": "https://github.com/McGill-NLP/bias-bench/blob/b856f99aba01dcfd841290e6292c0bf90de66c4d/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-question-answering-on-mm-vet",
        "reference": "https://github.com/yuweihao/MM-Vet/blob/b57b0b4e442e93375af4808012a4d191c646dabd/README.md?plain=1#L30",
    },
    {"leaderboard": "https://github.com/THUDM/AgentBench", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/winogrande",
        "reference": "https://github.com/allenai/winogrande/blob/727e837f77521ef38bcc56df3b275c8da43f45af/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://dynabench.org/tasks",
        "reference": "https://github.com/eugenesiow/practical-ml/blob/9147be88852546bc20160de169203229983868eb/README.md?plain=1#L78",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/9304",
        "reference": "https://github.com/huzeyann/MemoryEncodingModel/blob/224478873992ec8454513f446a4c90aa9376bf70/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://box.vicos.si/borja/viamaro/index.html#leaderboard",
        "reference": "https://github.com/bborja/modd/blob/875e1a5582316579efc34cdb7f2104054300cac3/README.md?plain=1#L150",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/ntut-ml-2018-computer-vision/leaderboard",
        "reference": "https://github.com/hsuRush/DeepANPR/blob/986ae17e7c35b5f88cf40c91a0d69f86aa2cd8f7/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://fanoutqa.com/leaderboard",
        "reference": "https://github.com/zhudotexe/fanoutqa/blob/ccf127bd0b1e1091e98ffb9aff7dc694eaf58d54/README.md?plain=1#L17",
    },
    {"leaderboard": "https://github.com/XuanwuAI/SecEval", "reference": ""},
    {
        "leaderboard": "https://few-shot.yyliu.net/miniimagenet.html",
        "reference": "https://github.com/indussky8/awesome-few-shot-learning/blob/81710c2b91541f1abcff2e644eb3f0a245db11c1/README.md?plain=1#L279",
    },
    {
        "leaderboard": "https://github.com/iarai/NeurIPS2022-traffic4cast",
        "reference": "",
    },
    {"leaderboard": "https://github.com/cvgroup-njust/CityPersons", "reference": ""},
    {
        "leaderboard": "https://ymcui.com/cmrc2019",
        "reference": "https://github.com/ymcui/cmrc2019/blob/f3808ee33f74a59aaf78f8635e6d5177c49f7cc2/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/fungi-challenge-fgvc-2018/leaderboard",
        "reference": "https://github.com/visipedia/fgvcx_fungi_comp/blob/f80bc703a73b8fd1f27cb820d9ce5b7e96e72d85/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1091/leaderboard",
        "reference": "https://github.com/Raldir/FEVEROUS/blob/32b68ce4e33c53f34ae2e6d88b51cd073ab85ab6/README.md?plain=1#L171",
    },
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/google-research/bert/blob/eedf5716ce1268e56f0a50264a88cafad334ac61/README.md?plain=1#L194",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/siim-isic-melanoma-classification/leaderboard",
        "reference": "https://github.com/haqishen/SIIM-ISIC-Melanoma-Classification-1st-Place-Solution/blob/2c4a5428c6d410e97d2a74aacd5f86b3750d32cf/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://cocodataset.org/#stuff-leaderboard",
        "reference": "https://github.com/nightrome/cocostuff/blob/876a7db61f4957888b609011df25e039e512fb48/README.md?plain=1#L102",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/WeOpenML/PandaLM/blob/3871806e72b4832f815ecafd24d3503c73403f48/README.md?plain=1#L52",
    },
    {"leaderboard": "https://github.com/tianyi-lab/HallusionBench", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/uclaml/SPIN/blob/a12ba808a51c23bb98c7cc892d261e9c2f71ff8e/README.md?plain=1#L247",
    },
    {"leaderboard": "https://github.com/mlfoundations/dclm", "reference": ""},
    {
        "leaderboard": "https://www.boreas.utias.utoronto.ca/#/leaderboard",
        "reference": "https://github.com/utiasASRL/hero_radar_odometry/blob/04bd6717db57d1f003066f5318be7fd213d50afc/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/880",
        "reference": "https://github.com/isaaccorley/dfc2022-baseline/blob/d01e78ee0efd2d286e67a28dff4b506a50b3e897/README.md?plain=1#L34",
    },
    {
        "leaderboard": "https://haitianliu22.github.io/code-scope-benchmark/leaderboard.html",
        "reference": "https://github.com/WeixiangYAN/CodeScope/blob/148e7e9b9a9a362953c321fce09497d7492116c0/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://github.com/databricks/databricks-ml-examples",
        "reference": "",
    },
    {"leaderboard": "https://github.com/OpenBMB/OlympiadBench", "reference": ""},
    {"leaderboard": "https://github.com/AIR-Bench/AIR-Bench", "reference": ""},
    {"leaderboard": "https://github.com/decis-bench/febench", "reference": ""},
    {"leaderboard": "https://github.com/decis-bench/febench", "reference": ""},
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_object.php",
        "reference": "https://github.com/dashidhy/awesome-point-cloud-deep-learning/blob/e39a2e7b40a5ba25667cdeee3fae04e3637253fb/README.md?plain=1#L60",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/dashidhy/awesome-point-cloud-deep-learning/blob/e39a2e7b40a5ba25667cdeee3fae04e3637253fb/README.md?plain=1#L61",
    },
    {
        "leaderboard": "https://github.com/DetectionTeamUCAS/R2CNN-Plus-Plus_Tensorflow",
        "reference": "",
    },
    {"leaderboard": "https://github.com/diffbot/knowledge-net", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/mlabonne/llm-course/blob/e47299840342fe20b5c12dc5c610763a8b24c422/README.md?plain=1#L254",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/mlabonne/llm-course/blob/e47299840342fe20b5c12dc5c610763a8b24c422/README.md?plain=1#L262",
    },
    {
        "leaderboard": "https://kgqa.github.io/leaderboard",
        "reference": "https://github.com/KGQA/leaderboard",
    },
    {"leaderboard": "https://github.com/open-compass/T-Eval", "reference": ""},
    {"leaderboard": "https://github.com/KevinLiao159/Quora", "reference": ""},
    {
        "leaderboard": "https://github.com/inspire-group/patch-defense-leaderboard",
        "reference": "https://github.com/inspire-group/adv-patch-paper-list/blob/88f729db04913c48e69c73690332a005c7111b12/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://github.com/dynamic-superb/dynamic-superb",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/yizhen20133868/Awesome-TOD-NLG-Survey",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/oshizo/japanese-llm-roleplay-benchmark",
        "reference": "",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2023/motion-prediction",
        "reference": "https://github.com/zhejz/TrafficBots/blob/01a367db1ab7b353d50e98d9bfd1ac371d5f4848/README.md?plain=1#L51",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/burst",
        "reference": "https://github.com/Ali2500/BURST-benchmark/blob/c1431986ea67a813ec17ad17b7105132f766c51d/README.md?plain=1#L112",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/video-super-resolution-on-vid4-4x-upscaling",
        "reference": "https://github.com/amanchadha/iSeeBetter/blob/ed35f5acd53f3b5e762eb6605361e424b88a3729/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://github.com/princeton-nlp/LM-Science-Tutor",
        "reference": "",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/mwaskom/seaborn-data/blob/71e2436a092d714350de0fc409ca8a8714e7e78f/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/lm-sys/FastChat/blob/e208d5677c6837d590b81cb03847c0b9de100765/README.md?plain=1#L6",
    },
    {"leaderboard": "https://github.com/google-research/meta-dataset", "reference": ""},
    {
        "leaderboard": "https://stanfordmlgroup.github.io/competitions/chexpert",
        "reference": "https://github.com/stanfordmlgroup/MoCo-CXR/blob/d433acabe6518b332a1345a6a1fed49f0c23c253/README.md?plain=1#L71",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/relation-extraction-on-redocred",
        "reference": "https://github.com/tonytan48/KD-DocRE/blob/2714f6b0882410d59d777ed13ecfff9b5563d77f/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1881/leaderboard",
        "reference": "https://github.com/project-miracl/miracl/blob/fa3a57c89ad8f61f0a02d8c27167d8141cfd77ca/README.md?plain=1#L23",
    },
    {
        "leaderboard": "https://fudanselab-classeval.github.io/leaderboard.html",
        "reference": "https://github.com/FudanSELab/ClassEval/blob/e22643b9f1c9df889886e38b47d54d845d9b4658/README.md?plain=1#L5",
    },
    {
        "leaderboard": "http://noisylabels.com",
        "reference": "https://github.com/weijiaheng/Advances-in-Label-Noise-Learning/blob/668a999e9c7db34489f2c25f75cfbc8980039c61/README.md?plain=1#L63",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/animal",
        "reference": "https://github.com/weijiaheng/Advances-in-Label-Noise-Learning/blob/668a999e9c7db34489f2c25f75cfbc8980039c61/README.md?plain=1#L67",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/clothing1m",
        "reference": "https://github.com/weijiaheng/Advances-in-Label-Noise-Learning/blob/668a999e9c7db34489f2c25f75cfbc8980039c61/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://yale-lily.github.io/sparc",
        "reference": "https://github.com/taoyds/sparc/blob/60d4a2706df31fb10ec4e36bb6da538fa31e1746/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/machine-translation-on-wmt2014-german-english",
        "reference": "https://github.com/joeynmt/joeynmt/blob/cdc4d03d430a1b0f29793a0d95743c5e72ae2f6c/README.md?plain=1#L280",
    },
    {"leaderboard": "https://github.com/hendrycks/robustness", "reference": ""},
    {
        "leaderboard": "https://github.com/huguyuehuhu/Awesome-Group-Activity-Recognition",
        "reference": "",
    },
    {
        "leaderboard": "https://mmcheng.net/videosal",
        "reference": "https://github.com/MichiganCOG/TASED-Net/blob/0cf47b73cf055bfc64dc132a85276cca67308226/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://pmc-patients.github.io",
        "reference": "https://github.com/pmc-patients/pmc-patients/blob/baf069a180980b690c53c91649c9a427fa11f1c6/README.md?plain=1#L104",
    },
    {
        "leaderboard": "https://github.com/Xiangyu-CAS/AICity2020-VOC-ReID",
        "reference": "",
    },
    {
        "leaderboard": "https://agora-evaluation.is.tuebingen.mpg.de",
        "reference": "https://github.com/IDEA-Research/OSX/blob/14b1cb41a3d2b14c9303290f45fa8189f6f685b3/README.md?plain=1#L41",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/robust-camera-only-3d-object-detection-on",
        "reference": "https://github.com/Daniel-xsy/RoboBEV/blob/d4f819034aeb4db389652feb27a33211cda8069c/README.md?plain=1#L90",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges",
        "reference": "https://github.com/Nightmare-n/GD-MAE/blob/abd05ce2c2c0a5d02736e3fb0ad6ddbea0bb2bba/README.md?plain=1#L12",
    },
    {"leaderboard": "https://github.com/GAIR-NLP/auto-j", "reference": ""},
    {
        "leaderboard": "https://zjysteven.github.io/OpenOOD/#leaderboard",
        "reference": "https://github.com/Jingkang50/OpenOOD/blob/be315e6e37e48bf0451576c0c43dcda06dcd8d08/README.md?plain=1#L57",
    },
    {
        "leaderboard": "https://webots.cloud/competition",
        "reference": "https://github.com/cyberbotics/wrestling/blob/7670ede02a79d1845cc77f5c925e8fa73d72670e/README.md?plain=1#L79",
    },
    {"leaderboard": "https://github.com/TellinaTool/nl2bash", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota",
        "reference": "https://github.com/paperswithcode/releasing-research-code/blob/a5b2c85490435108e306d38c64a0d2a558f110e6/README.md?plain=1#L94",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/list",
        "reference": "https://github.com/paperswithcode/releasing-research-code/blob/a5b2c85490435108e306d38c64a0d2a558f110e6/README.md?plain=1#L96",
    },
    {
        "leaderboard": "https://nlpprogress.com",
        "reference": "https://github.com/paperswithcode/releasing-research-code/blob/a5b2c85490435108e306d38c64a0d2a558f110e6/README.md?plain=1#L97",
    },
    {"leaderboard": "https://github.com/SUFE-AIFLM-Lab/FinEval", "reference": ""},
    {"leaderboard": "https://github.com/FuxiaoLiu/LRV-Instruction", "reference": ""},
    {
        "leaderboard": "https://scandeval.com",
        "reference": "https://github.com/BramVanroy/fietje-2/blob/c457cf9fd2b1a8b969577814cfb13982ca51a07a/README.md?plain=1#L32",
    },
    {
        "leaderboard": "https://github.com/JasonForJoy/Leaderboards-for-Multi-Turn-Response-Selection",
        "reference": "https://github.com/chauff/conversationalIR/blob/80e7bf83e61aca0cc62aae2cd7809f1bf3e75be8/README.md?plain=1#L75",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/conversational-response-selection",
        "reference": "https://github.com/chauff/conversationalIR/blob/80e7bf83e61aca0cc62aae2cd7809f1bf3e75be8/README.md?plain=1#L77",
    },
    {
        "leaderboard": "https://multi-trust.github.io/#leaderboard",
        "reference": "https://github.com/thu-ml/MMTrustEval/blob/b38dec2eba3d3b3634c18f88824e928340f95e30/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://www.cluebenchmarks.com/nli.html",
        "reference": "https://github.com/CLUEbenchmark/OCNLI/blob/b53efdee17257a5c33993cf6fcf8ffff0497ea0e/README.md?plain=1#L59",
    },
    {"leaderboard": "https://github.com/princeton-nlp/CharXiv", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/TIGER-Lab/LongICL-Leaderboard",
        "reference": "https://github.com/TIGER-AI-Lab/LongICLBench/blob/9520ac2c120a0d67406ec6978130ebb43e25b74d/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://github.com/CLUEbenchmark/CLUE",
        "reference": "https://github.com/shibing624/pytextclassifier/blob/381042e53a8af816e0b5d34f3e1888a80c513401/README.md?plain=1#L549",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/drop",
        "reference": "https://github.com/llamazing/numnet_plus/blob/43928b2acd02f5a494688ffcd1d3da6e661da5d3/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/MLGroupJLU/LLM-eval-survey/blob/25e2bc10bb8ab11cfc3e23a53a4c359a2ec195ba/README.md?plain=1#L343",
    },
    {
        "leaderboard": "https://agora-evaluation.is.tuebingen.mpg.de",
        "reference": "https://github.com/ttxskk/AiOS/blob/f84947b6a1e59de8dbfdeda30aa473149e279b96/README.md?plain=1#L288",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/HorizonRobotics/Sparse4D/blob/c41df4bbf7bc82490f11ff55173abfcb3fb91425/README.md?plain=1#L59",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/163/leaderboard",
        "reference": "https://github.com/airsplay/lxmert/blob/0db1182b9030da3ce41f17717cc628e1cd0a95d5/README.md?plain=1#L132",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/225/leaderboard",
        "reference": "https://github.com/airsplay/lxmert/blob/0db1182b9030da3ce41f17717cc628e1cd0a95d5/README.md?plain=1#L216",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/225/leaderboard",
        "reference": "https://github.com/airsplay/lxmert/blob/0db1182b9030da3ce41f17717cc628e1cd0a95d5/README.md?plain=1#L216",
    },
    {
        "leaderboard": "https://lil.nlp.cornell.edu/nlvr",
        "reference": "https://github.com/airsplay/lxmert/blob/0db1182b9030da3ce41f17717cc628e1cd0a95d5/README.md?plain=1#L271",
    },
    {
        "leaderboard": "https://evalplus.github.io/leaderboard.html",
        "reference": "https://github.com/ise-uiuc/magicoder/blob/ee89a5a1d93b7012c4c3c57a161c9e4145801bc6/README.md?plain=1#L38",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/qdrant/fastembed/blob/e2e1f93685acc4b64ad30516c8750b4c693a1f14/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://leaderboard.tabbyml.com",
        "reference": "https://github.com/deepseek-ai/awesome-deepseek-coder/blob/e04a60041575221870b8429ef1b453875afde001/README.md?plain=1#L49",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/merveenoyan/awesome-osml-for-devs/blob/23b3d7522df8da761ce80b7bd7d6d25f34e01939/README.md?plain=1#L130",
    },
    {
        "leaderboard": "https://mixeval.github.io/#leaderboard",
        "reference": "https://github.com/Psycoy/MixEval/blob/8eed26b6efe7703a96abd428163728d4d9705bf3/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://github.com/BAAI-DCAI/Multimodal-Robustness-Benchmark",
        "reference": "",
    },
    {
        "leaderboard": "https://vlue-benchmark.github.io/leaderboard.html",
        "reference": "https://github.com/MichaelZhouwang/VLUE/blob/04b1d4298e13b1d3f727a0cb450409c213ca31c9/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/hht1996ok/EA-LSS/blob/193c30141da8625f442d10f0fa29c226694bc3c3/README.md?plain=1#L6",
    },
    {"leaderboard": "https://github.com/ZaloAI-Jaist/VMLU", "reference": ""},
    {
        "leaderboard": "https://github.com/hkust-nlp/ceval",
        "reference": "https://github.com/NicholasCao/Awesome-Chinese-ChatGPT/blob/5ae1cfa4623244e172598ee5e81bfe229d1af1d4/README.md?plain=1#L24",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/NicholasCao/Awesome-Chinese-ChatGPT/blob/5ae1cfa4623244e172598ee5e81bfe229d1af1d4/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://www.superclueai.com",
        "reference": "https://github.com/NicholasCao/Awesome-Chinese-ChatGPT/blob/5ae1cfa4623244e172598ee5e81bfe229d1af1d4/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/NicholasCao/Awesome-Chinese-ChatGPT/blob/5ae1cfa4623244e172598ee5e81bfe229d1af1d4/README.md?plain=1#L27",
    },
    {"leaderboard": "https://github.com/wangxw5/wikiDiverse", "reference": ""},
    {
        "leaderboard": "https://csgaobb.github.io/Projects/mscoco-fsod.html",
        "reference": "https://github.com/gaobb/Few-Shot-Object-Detection-Papers/blob/20ce4a4b3f042b246aa5451c139d54bb0d01f43f/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://csgaobb.github.io/Projects/pascalvoc-fsod.html",
        "reference": "https://github.com/gaobb/Few-Shot-Object-Detection-Papers/blob/20ce4a4b3f042b246aa5451c139d54bb0d01f43f/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://superbbenchmark.org/leaderboard",
        "reference": "https://github.com/sungnyun/ARMHuBERT/blob/e687addf71d826c202d73b61f6fc177a240c5027/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://github.com/THUDM/LongBench",
        "reference": "https://github.com/DAMO-NLP-SG/CLEX/blob/023709d923ea9492ba82a6e8341d8c02f77e34e8/README.md?plain=1#L162",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/leobeeson/llm_benchmarks/blob/53a8bcfeb7fb1f1435fef88abd45e16d202cee53/README.md?plain=1#L316",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/long-context-understanding-on-mmneedle",
        "reference": "https://github.com/Wang-ML-Lab/multimodal-needle-in-a-haystack/blob/3a92467610b7397568a226fc2d63a01a73192520/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/unsupervised-domain-adaptation-on-ucf-hmdb",
        "reference": "https://github.com/ldkong1205/TranSVAE/blob/5879721b9eaef4e9b4b1666e2c4d1d0c67cbe079/README.md?plain=1#L67",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/unsupervised-domain-adaptation-on-epic",
        "reference": "https://github.com/ldkong1205/TranSVAE/blob/5879721b9eaef4e9b4b1666e2c4d1d0c67cbe079/README.md?plain=1#L67",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/chenking2020/FindTheChatGPTer/blob/80ad5c197fdbb9e072fd25861596b893527fd5bb/README.md?plain=1#L539",
    },
    {
        "leaderboard": "https://cevalbenchmark.com/static/leaderboard.html",
        "reference": "https://github.com/chenking2020/FindTheChatGPTer/blob/80ad5c197fdbb9e072fd25861596b893527fd5bb/README.md?plain=1#L549",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/mslr-ms2",
        "reference": "https://github.com/allenai/ms2/blob/a03ab009e00c5e412b4c55f6ec4f9b49c2d8a7f6/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://github.com/insikk/CBIR_LeaderBoard",
        "reference": "https://github.com/keloli/CBIR_EXPLORE/blob/d3be14e0d06f2643b45ea8968c3f37b0489b500d/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://github.com/opstower-ai/devops-ai-open-leaderboard",
        "reference": "",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti",
        "reference": "https://github.com/SeasonDepth/SeasonDepth/blob/576e9f291c7d4ddce0eca1e97febb3d5ff41e262/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://github.com/haonan-li/CMMLU",
        "reference": "https://github.com/yangjianxin1/Firefly-LLaMA2-Chinese/blob/cbc2814c39f54f147ed7734dac13095bab16c40a/README.MD?plain=1#L112",
    },
    {
        "leaderboard": "https://open-compass.github.io/T-Eval/leaderboard.html",
        "reference": "https://github.com/tjtanaa/awesome-large-action-model/blob/34860f41d1c8f88b1277fae3b216c6b92c4f7e63/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/codefuse-ai/MFTCoder/blob/a2508af26df1aa8b9c3c8522ec0a34e5bedd52d0/README.md?plain=1#L55",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=4",
        "reference": "https://github.com/Smorodov/Deep-learning-object-detection-links./blob/365150b5623e1c1ba8eedb3a4e368c54dab143fc/readme.md?plain=1#L31",
    },
    {"leaderboard": "https://github.com/talk2car/Talk2Car", "reference": ""},
    {
        "leaderboard": "https://atlas-challenge.u-bourgogne.fr/leaderboard",
        "reference": "https://github.com/uni-medical/STU-Net/blob/0652eaf5cafda6bc269942238c3695994f2dc4f0/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://sppin.grand-challenge.org/evaluation/final-test-phase/leaderboard",
        "reference": "https://github.com/uni-medical/STU-Net/blob/0652eaf5cafda6bc269942238c3695994f2dc4f0/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://autopet-ii.grand-challenge.org/leaderboard",
        "reference": "https://github.com/uni-medical/STU-Net/blob/0652eaf5cafda6bc269942238c3695994f2dc4f0/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/12239",
        "reference": "https://github.com/uni-medical/STU-Net/blob/0652eaf5cafda6bc269942238c3695994f2dc4f0/README.md?plain=1#L14",
    },
    {"leaderboard": "https://github.com/hkust-nlp/ceval", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard",
        "reference": "https://github.com/ruanchaves/napolab/blob/5420e872f0bcd009692fbf5bd6103c4b6ff9aa4b/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://github.com/Instance-Search/Instance-Search",
        "reference": "",
    },
    {"leaderboard": "https://github.com/LudwigStumpp/llm-leaderboard", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/bigcode-project/bigcode-evaluation-harness/blob/7c02b2f678791ad618ad069a4d21a1cb70915949/leaderboard/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://github.com/awslabs/diagnostic-robustness-text-to-sql",
        "reference": "",
    },
    {
        "leaderboard": "https://openlamm.github.io/ch3ef/leaderboard.html",
        "reference": "https://github.com/OpenGVLab/LAMM/blob/ea571363883ceba58a0f724ef197ed7205e07465/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://visualcommonsense.com/leaderboard",
        "reference": "https://github.com/ict-bigdatalab/awesome-pretrained-models-for-information-retrieval/blob/89968eb02028f232e604595a2310fac37466ffc9/README.md?plain=1#L513",
    },
    {
        "leaderboard": "https://stanfordmlgroup.github.io/competitions/mrnet",
        "reference": "https://github.com/MisaOgura/MRNet/blob/df91d5b901c52551fa46ad4849530b5cbb5c26b2/README.md?plain=1#L278",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/GPT-Alternatives/gpt_alternatives/blob/edee437bb7ccc51b11bc36abab1922fb64560360/README.md?plain=1#L271",
    },
    {
        "leaderboard": "https://github.com/salesforce/localization-xml-mt",
        "reference": "",
    },
    {"leaderboard": "https://github.com/ldkong1205/PointCloud-C", "reference": ""},
    {
        "leaderboard": "https://github.com/AsuradaYuci/awesome_video_person_reid/blob/master/leaderboard.md",
        "reference": "",
    },
    {"leaderboard": "https://github.com/THUDM/ChatGLM-Math", "reference": ""},
    {
        "leaderboard": "https://github.com/LMMMEng/LLD-MMRI2023/blob/main/assets/test_leaderboard.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/jianguoz/Few-Shot-Intent-Detection",
        "reference": "",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1256/leaderboard",
        "reference": "https://github.com/neurallatents/nlb_tools/blob/1ddc15f45b56388ff093d1396b7b87b36fa32a68/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021/leaderboards",
        "reference": "https://github.com/AIcrowd/music-demixing-challenge-starter-kit/blob/ec6591457a482e91a542a01011d049b9d80038cd/README.md?plain=1#L194",
    },
    {
        "leaderboard": "https://yale-lily.github.io/spider",
        "reference": "https://github.com/taoyds/spider/blob/b7b5b8c890cd30e35427348bb9eb8c6d1350ca7c/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://www.nist.gov/programs-projects/face-technology-evaluations-frtefate",
        "reference": "https://github.com/becauseofAI/awesome-face/blob/640700c20b1b9d86ef255a43cd6c938afa899225/README.md?plain=1#L465",
    },
    {
        "leaderboard": "https://comma.ai/leaderboard",
        "reference": "https://github.com/commaai/controls_challenge/blob/b3923cee0742c2e3fe8aca48a2fa410b747fd6a3/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/sherlock",
        "reference": "https://github.com/allenai/sherlock/blob/6802669760582d533dbb815eef1adbd83065ba7b/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://github.com/michuanhaohao/AICITY2021_Track2_DMT",
        "reference": "",
    },
    {
        "leaderboard": "https://opendrivelab.com/challenge2023",
        "reference": "https://github.com/er-muyue/BeMapNet/blob/88e4f091310683eb1c8140e6ae4155fa1ae3404f/README.md?plain=1#L11",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk/leaderboard/displaylb.php?challengeid=11",
        "reference": "https://github.com/openaifab/hair/blob/5dc587d3a70d5c8a656ea280758d6e05ebea9d76/README.md?plain=1#L62",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/image-super-resolution",
        "reference": "https://github.com/isaaccorley/pytorch-enhance/blob/16c0a354812fda7adb4c51f9e5025e8ca8fe317d/README.md?plain=1#L55",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6",
        "reference": "https://github.com/autonomousdrivingkr/Awesome-Autonomous-Driving/blob/a3917dc7db6fa18e9d696150fd52e83df301e51d/README.md?plain=1#L143",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/instructkr/ko-chatbot-arena-leaderboard",
        "reference": "https://github.com/HeegyuKim/open-korean-instructions/blob/197ce44c2c8100b169149f55a43a38b14f756b9e/README.md?plain=1#L73",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/instructkr/LogicKor-leaderboard",
        "reference": "https://github.com/HeegyuKim/open-korean-instructions/blob/197ce44c2c8100b169149f55a43a38b14f756b9e/README.md?plain=1#L74",
    },
    {
        "leaderboard": "https://lil.nlp.cornell.edu/nlvr",
        "reference": "https://github.com/lil-lab/nlvr/blob/18924841aa96d519cfb8584f0945350f07a6d860/README.md?plain=1#L7",
    },
    {"leaderboard": "https://github.com/cardiffnlp/tweeteval", "reference": ""},
    {
        "leaderboard": "https://nealcly.github.io/MuTual-leaderboard",
        "reference": "https://github.com/Nealcly/MuTual/blob/b90cecada5b34226cd3d88e217844c80a6d8ee3f/README.md?plain=1#L65",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/imaterialist-challenge-furniture-2018/leaderboard",
        "reference": "https://github.com/skrypka/imaterialist-furniture-2018/blob/f7fce9ac4befd511fb5d95687e630ca11b2f197c/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://www.nuscenes.org/tracking",
        "reference": "https://github.com/dvlab-research/VoxelNeXt/blob/b5b7d393cd1d0ecbbaeaca365b453b488791035d/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/instance-segmentation-on-coco-minival",
        "reference": "https://github.com/IDEA-Research/DN-DETR/blob/ff3902a20d521ead052d1243ff249b19bc1ce531/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/panoptic-segmentation-on-coco-minival",
        "reference": "https://github.com/IDEA-Research/DN-DETR/blob/ff3902a20d521ead052d1243ff249b19bc1ce531/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semantic-segmentation-on-ade20k",
        "reference": "https://github.com/IDEA-Research/DN-DETR/blob/ff3902a20d521ead052d1243ff249b19bc1ce531/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://few-shot.yyliu.net/miniimagenet.html",
        "reference": "https://github.com/icoz69/DeepEMD/blob/5f96d86bae19f783ffc00ee84f964e32b372be0a/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://insightface.ai/mfr21",
        "reference": "https://github.com/deepinsight/insightface/blob/4f8c54a09b7341a757657adbccfe1f8751f6548c/README.md?plain=1#L70",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/12599",
        "reference": "https://github.com/dongkyunk/CheXFusion/blob/6bbf469d346bff9c2aeb8b9299b5cd8882cfe66f/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://github.com/openai/gym/wiki/Leaderboard",
        "reference": "https://github.com/dgriff777/rl_a3c_pytorch/blob/4170f7d94852129dc70265648c219e6b412965be/README.MD?plain=1#L22",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semi-supervised-semantic-segmentation-on-25",
        "reference": "https://github.com/ldkong1205/LaserMix/blob/2ae972006653965e0f68a8e0e9b55835f15d07d2/README.md?plain=1#L73",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semi-supervised-semantic-segmentation-on-24",
        "reference": "https://github.com/ldkong1205/LaserMix/blob/2ae972006653965e0f68a8e0e9b55835f15d07d2/README.md?plain=1#L73",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semi-supervised-semantic-segmentation-on-23",
        "reference": "https://github.com/ldkong1205/LaserMix/blob/2ae972006653965e0f68a8e0e9b55835f15d07d2/README.md?plain=1#L73",
    },
    {"leaderboard": "https://github.com/Coder-Yu/SELFRec", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm",
        "reference": "https://github.com/InfiMM/InfiMM-Eval-Tool/blob/fa0e4a09b70529d304c94c38855e445126ec8f9c/README.md?plain=1#L51",
    },
    {
        "leaderboard": "https://infimm.github.io/InfiMM-Eval",
        "reference": "https://github.com/InfiMM/InfiMM-Eval-Tool/blob/fa0e4a09b70529d304c94c38855e445126ec8f9c/README.md?plain=1#L51",
    },
    {
        "leaderboard": "https://arctic-leaderboard.is.tuebingen.mpg.de/leaderboard",
        "reference": "https://github.com/zc-alexfan/arctic/blob/9f5770966350c66d8bf0ac3fd4cfde74434a109b/README.md?plain=1#L44",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/text-to-sql-on-kaggledbqa",
        "reference": "https://github.com/Chia-Hsuan-Lee/KaggleDBQA/blob/ab6325c9b5749f2f3509a1f64299bfa30396e6b0/README.md?plain=1#L4",
    },
    {
        "leaderboard": "http://ccl.pku.edu.cn:8084/SpaCE2021/rank",
        "reference": "https://github.com/2030NLP/SpaCE2021/blob/4bf18d84c6f5779473ba0da797354cdb5a5cd56b/README.md?plain=1#L98",
    },
    {
        "leaderboard": "http://ccl.pku.edu.cn:8084/SpaCE2021/rank",
        "reference": "https://github.com/z-fabian/HUMUS-Net/blob/d1495dc33f5641754448143962f4595b3efec1ce/README.md?plain=1#L22",
    },
    {"leaderboard": "https://github.com/MinghuiChen43/CIL-ReID", "reference": ""},
    {
        "leaderboard": "https://github.com/pddasig/Machine-Learning-Competition-2020",
        "reference": "",
    },
    {
        "leaderboard": "https://yale-lily.github.io/spider",
        "reference": "https://github.com/BeachWang/DAIL-SQL/blob/2965e8daeccd5e6dbf5e1697833d990eca84fde6/README.md?plain=1#L4",
    },
    {
        "leaderboard": "http://got-10k.aitestunion.com/leaderboard",
        "reference": "https://github.com/wangdongdut/Online-Visual-Tracking-SOTA/blob/9cb086c2ebe6d6f0c04b4c5d200fd3dbc9712a6c/README.md?plain=1#L116",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Pan-ML/panml/blob/4caaeaa7f48fac5a87a68399ea7d8197c00b9d10/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Slep/LRVSF-Leaderboard",
        "reference": "https://github.com/Simon-Lepage/CondViT-LRVSF/blob/130e9b4259bef2f1f4e70051adef7e4ad8344989/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://mmbench.opencompass.org.cn/leaderboard",
        "reference": "https://github.com/open-compass/MMBench/blob/20faed3d116ce3a76a4b50820dcc46c696fd8f43/README.md?plain=1#L26",
    },
    {"leaderboard": "https://github.com/THUDM/VisualAgentBench", "reference": ""},
    {
        "leaderboard": "https://www.kaggle.com/c/nyu-cv-fall-2018/leaderboard",
        "reference": "https://github.com/poojahira/gtsrb-pytorch/blob/494d4843bdf7f996f8cef5ec7e19d9928c215410/README.md?plain=1#L5",
    },
    {"leaderboard": "https://github.com/lupantech/PromptPG", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/vidore/vidore-leaderboard",
        "reference": "https://github.com/illuin-tech/vidore-benchmark/blob/da1c683337ed9cea9a0428d89b903a30a0a3326f/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://www.kaggle.com/competitions/mlsp-2013-birds/leaderboard",
        "reference": "https://github.com/gaborfodor/MLSP_2013/blob/ecd02f1185812d750d38cc16146a9176d2a14ffa/readme.md?plain=1#L42",
    },
    {
        "leaderboard": "https://github.com/shjung13/Standardized-max-logits",
        "reference": "",
    },
    {"leaderboard": "https://github.com/k2-fsa/libriheavy", "reference": ""},
    {
        "leaderboard": "http://got-10k.aitestunion.com/leaderboard",
        "reference": "https://github.com/huanglianghua/siamrpn-pytorch/blob/5b19570de55dcdbb163c2664ab9d4b1c6f819932/README.md?plain=1#L13",
    },
    {"leaderboard": "https://github.com/Xtra-Computing/NIID-Bench", "reference": ""},
    {
        "leaderboard": "https://github.com/brohrer/parameter_efficiency_leaderboard",
        "reference": "",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1247/leaderboard",
        "reference": "https://github.com/SoonminHwang/rgbt-ped-detection/blob/4ec3637724d009c0a64f862ae2aa0e32e61942a3/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://visualcommonsense.com/leaderboard",
        "reference": "https://github.com/zhegan27/VILLA/blob/1d5bc2bdbff301f5171385f1f47150450101a782/README.md?plain=1#L130",
    },
    {
        "leaderboard": "https://microsoft.github.io/AirSim-NeurIPS2019-Drone-Racing/leaderboard_final.html",
        "reference": "https://github.com/microsoft/AirSim-NeurIPS2019-Drone-Racing/blob/eb589692bac8a8f69179caca52977e4999f29b36/README.md?plain=1#L206",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/BestWishYsh/ChronoMagic-Bench",
        "reference": "https://github.com/yhZhai/mcm/blob/77cf4f528e5be26bf97665efa5f5acb4c6815b8f/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://github.com/hemingkx/Spec-Bench/blob/main/Leaderboard.md",
        "reference": "",
    },
    {
        "leaderboard": "https://open-xai.github.io/leaderboard",
        "reference": "https://github.com/AI4LIFE-GROUP/OpenXAI/blob/a18288620464250856b55234266a6d1dabb64656/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-llm",
        "reference": "https://github.com/TommyZihao/ChatMarx/blob/35adc6d95bfa7df3977fa5dc11fcb30cdc9fe949/README.md?plain=1#L129",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/454/leaderboard",
        "reference": "https://github.com/ZikangZhou/QCNet/blob/55cacb418cbbce3753119c1f157360e66993d0d0/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1719/leaderboard",
        "reference": "https://github.com/ZikangZhou/QCNet/blob/55cacb418cbbce3753119c1f157360e66993d0d0/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/change-point-detection-on-tep",
        "reference": "https://github.com/YKatser/CPDE/blob/c8b25303e7a6b7e89939c28176fcde711962eafa/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/change-point-detection-on-skab",
        "reference": "https://github.com/YKatser/CPDE/blob/c8b25303e7a6b7e89939c28176fcde711962eafa/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://github.com/ai-forever/digital_peter_aij2020",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/echo840/ocrbench-leaderboard",
        "reference": "https://github.com/Yuliang-Liu/MultimodalOCR/blob/eb7ecc408f55dc01cdab2ee9bc773f0461228e52/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://leaderboard.sea-lion.ai",
        "reference": "https://github.com/aisingapore/sealion/blob/9d7df689c9fc1fc12865af1667906304cfbd959f/README.md?plain=1#L81",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/image-harmonization-on-iharmony4",
        "reference": "https://github.com/bcmi/Awesome-Image-Harmonization/blob/c586f046673219bfe630510cdc1c6dbdaa7a38da/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://challenge.isic-archive.com/landing/2018",
        "reference": "https://github.com/ngessert/isic2018/blob/f551ea244da038619762c7f2b66a652bf90c20a7/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/chujiezheng/LLM-Extrapolation/blob/765251a6d57e491cc7a903274db73db766175c3d/README.md?plain=1#L78",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/chujiezheng/LLM-Extrapolation/blob/765251a6d57e491cc7a903274db73db766175c3d/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://aider.chat/docs/leaderboards",
        "reference": "https://github.com/chujiezheng/LLM-Extrapolation/blob/765251a6d57e491cc7a903274db73db766175c3d/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://opening-up-chatgpt.github.io",
        "reference": "https://github.com/opening-up-chatgpt/opening-up-chatgpt.github.io/blob/d759b188f037400280992e088f7335c89a0266a0/readme.md?plain=1#L1",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/visualcomet",
        "reference": "https://github.com/jamespark3922/visual-comet/blob/30eff8a838c275d87c293311119f46016255d760/README.md?plain=1#L153",
    },
    {
        "leaderboard": "https://github.com/StarStyleSky/awesome-face-detection",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/junqiangchen/LiTS---Liver-Tumor-Segmentation-Challenge",
        "reference": "",
    },
    {
        "leaderboard": "https://tgb.complexdatalab.com/docs/leader_linkprop",
        "reference": "https://github.com/shenyangHuang/TGB/blob/170f60c5e8bfd430716f0e5c3014589b0801c1f4/README.md?plain=1#L27",
    },
    {"leaderboard": "https://github.com/vztu/BVQA_Benchmark", "reference": ""},
    {
        "leaderboard": "https://rrc.cvc.uab.es",
        "reference": "https://github.com/EriCongMa/awesome-transformer-ocr/blob/495ee4966061337f2274af980cb0913723c409b0/README.md?plain=1#L229",
    },
    {"leaderboard": "https://github.com/open-compass/BotChat", "reference": ""},
    {
        "leaderboard": "https://trustllmbenchmark.github.io/TrustLLM-Website/leaderboard.html",
        "reference": "https://github.com/jphall663/awesome-machine-learning-interpretability/blob/d552a0e2c179e5c77d9bf9733843b7161ec56a82/README.md?plain=1#L1158",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021/leaderboards",
        "reference": "https://github.com/yoyololicon/music-demixing-challenge-ismir-2021-entry/blob/2f08f9b8050b7b2fc360c625ec5a939825156585/README.md?plain=1#L1",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/a-okvqa",
        "reference": "https://github.com/allenai/aokvqa/blob/83786de0b24c4fef0bfe39420684f6f73d9f3581/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/seismic-facies-identification-challenge/leaderboards",
        "reference": "https://github.com/AIcrowd/seismic-facies-identification-starter-kit/blob/636c4a3748dd550a33d1bdbc97e295ef3cb414de/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://github.com/haoweiz23/Awesome-Fine-grained-Visual-Classification",
        "reference": "",
    },
    {
        "leaderboard": "https://hfl-rc.github.io/cmrc2017/leaderboard",
        "reference": "https://github.com/ymcui/Chinese-RC-Datasets/blob/3e0fd5abad48a38ff1299ba0ca1643a96b6ef8d3/README.md?plain=1#L63",
    },
    {
        "leaderboard": "https://aistudio.baidu.com/competition/detail/720/0/leaderboard",
        "reference": "https://github.com/ymcui/Chinese-RC-Datasets/blob/3e0fd5abad48a38ff1299ba0ca1643a96b6ef8d3/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://hfl-rc.github.io/cmrc2018/open_challenge",
        "reference": "https://github.com/ymcui/Chinese-RC-Datasets/blob/3e0fd5abad48a38ff1299ba0ca1643a96b6ef8d3/README.md?plain=1#L100",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/landmark-recognition-2020/leaderboard",
        "reference": "https://github.com/bestfitting/instance_level_recognition/blob/683f021b4e65876835f028797ec28b0d1071bb45/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/baichuan-inc/Baichuan-7B/blob/6f3ef4633a90c2d8a3e0763d0dec1b8dc11588f5/README.md?plain=1#L128",
    },
    {
        "leaderboard": "https://github.com/google-research-datasets/seahorse",
        "reference": "",
    },
    {"leaderboard": "https://github.com/netease-youdao/BCEmbedding", "reference": ""},
    {"leaderboard": "https://github.com/yxgeee/MMT-plus", "reference": ""},
    {"leaderboard": "https://github.com/infinigence/LVEval", "reference": ""},
    {
        "leaderboard": "https://stanfordnlp.github.io/coqa",
        "reference": "https://github.com/stevezheng23/xlnet_extension_tf/blob/3efa272c4dcd126a0d3c79fb912b8cffae5ee572/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://quac.ai",
        "reference": "https://github.com/stevezheng23/xlnet_extension_tf/blob/3efa272c4dcd126a0d3c79fb912b8cffae5ee572/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/CoIR-team/coir/blob/d997dae3fb7abab29c8242de7da66657d9bfe62b/README.md?plain=1#L24",
    },
    {
        "leaderboard": "https://archersama.github.io/coir",
        "reference": "https://github.com/CoIR-team/coir/blob/d997dae3fb7abab29c8242de7da66657d9bfe62b/README.md?plain=1#L24",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Xwin-LM/Xwin-LM/blob/4587c1097bf4b8d6c462e7506663eecbe46492f9/README.md?plain=1#L83",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/food-recognition-benchmark-2022/leaderboards",
        "reference": "https://github.com/AIcrowd/food-recognition-benchmark-starter-kit/blob/a8f1eb7b34844f133b22ec888b60a9710d79a070/README.md?plain=1#L193",
    },
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/carla-simulator/scenario_runner/blob/7758d066080f180f8296887ed89b7c723a54706a/README.md?plain=1#L49",
    },
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/Nagakiran1/Extending-Google-BERT-as-Question-and-Answering-model-and-Chatbot/blob/ee151418fb63b8ba12fe04ad6941d7ec663cb45d/README.md?plain=1#L422",
    },
    {
        "leaderboard": "https://github.com/yanyanSann/Long-Tailed-Classification-Leaderboard",
        "reference": "https://github.com/Keke921/GCLLoss/blob/0ba279917c47890df4a9deb86d5d881d6babe078/README.md?plain=1#L65",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/yongzhuo/qwen2-sft/blob/91d4dd876d31f65911fe46216298c590233b67b0/README.md?plain=1#L106",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/103/leaderboard",
        "reference": "https://github.com/batra-mlp-lab/visdial/blob/4b1b89cf1f02591bac066bc59ed27404be341fca/README.md?plain=1#L293",
    },
    {
        "leaderboard": "https://github.com/FormulaMonks/llm-benchmarker-suite",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/inspire-group/patch-defense-leaderboard",
        "reference": "https://github.com/inspire-group/PatchGuard/blob/76e4d8f5083b544924e71b9154f0e35300e11469/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://github.com/open-mmlab/OpenUnReID/blob/master/docs/LEADERBOARD.md",
        "reference": "",
    },
    {"leaderboard": "https://github.com/declare-lab/instruct-eval", "reference": ""},
    {
        "leaderboard": "https://github.com/VITA-Group/Deep_GCN_Benchmarking",
        "reference": "",
    },
    {
        "leaderboard": "https://dki-lab.github.io/GrailQA",
        "reference": "https://github.com/microsoft/vert-papers/blob/0a121f718fe2a9adfee1d863938ff0105909f48d/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://mathverse-cuhk.github.io/#leaderboard",
        "reference": "https://github.com/ZrrSkywalker/MAVIS/blob/f72665c3be80bacfa582788490edaf759ebed021/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/244/leaderboard",
        "reference": "https://github.com/xinke-wang/Awesome-Text-VQA/blob/c98d15bcc284873b25aec808c138c53d44ee170e/README.md?plain=1#L108",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/551/leaderboard",
        "reference": "https://github.com/xinke-wang/Awesome-Text-VQA/blob/c98d15bcc284873b25aec808c138c53d44ee170e/README.md?plain=1#L109",
    },
    {
        "leaderboard": "https://rrc.cvc.uab.es/?ch=11&com=evaluation&task=1",
        "reference": "https://github.com/xinke-wang/Awesome-Text-VQA/blob/c98d15bcc284873b25aec808c138c53d44ee170e/README.md?plain=1#L129",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/popgym",
        "reference": "https://github.com/proroklab/popgym/blob/d7b0b42766e91775d4723bf797dad53f24e24926/README.md?plain=1#L76",
    },
    {
        "leaderboard": "https://taolusi.github.io/CSpider-explorer",
        "reference": "https://github.com/taolusi/chisp/blob/36c1ea8d2dae61fa0f59308180ca4713b1e71c82/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/mlabonne/llm-autoeval/blob/eca29921dd3a2d8397289f3132ba1dce5837e9cf/README.md?plain=1#L40",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard",
        "reference": "https://github.com/mlabonne/llm-autoeval/blob/eca29921dd3a2d8397289f3132ba1dce5837e9cf/README.md?plain=1#L67",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/charles9n/bert-sklearn/blob/9cb510ae16209c1cb26b078e0e5037e1344600af/README.md?plain=1#L105",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/MIBlue119/awesome-llama-resources/blob/86c93699bc8fa8179a2d911f9b819549db248f7d/README.md?plain=1#L106",
    },
    {
        "leaderboard": "https://ml.energy/leaderboard",
        "reference": "https://github.com/AmberLJC/LLMSys-PaperList/blob/e18fed6259d6f9344911464146089c716de64acb/README.md?plain=1#L159",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
        "reference": "https://github.com/AmberLJC/LLMSys-PaperList/blob/e18fed6259d6f9344911464146089c716de64acb/README.md?plain=1#L160",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/AmberLJC/LLMSys-PaperList/blob/e18fed6259d6f9344911464146089c716de64acb/README.md?plain=1#L162",
    },
    {
        "leaderboard": "https://crfm.stanford.edu/helm",
        "reference": "https://github.com/AmberLJC/LLMSys-PaperList/blob/e18fed6259d6f9344911464146089c716de64acb/README.md?plain=1#L163",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/AmberLJC/LLMSys-PaperList/blob/e18fed6259d6f9344911464146089c716de64acb/README.md?plain=1#L164",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
        "reference": "https://github.com/AI-in-Health/MedLLMsPracticalGuide/blob/38a481770f6f9d233ead282eb06362bae56cc7b6/README.md?plain=1#L269",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/ymcui/Chinese-LLaMA-Alpaca-3/blob/58a4b6c10d44d7350e9f198088cf1e9271b666f5/README.md?plain=1#L240",
    },
    {"leaderboard": "https://github.com/RPC-Dataset/RPC-Leaderboard", "reference": ""},
    {
        "leaderboard": "https://github.com/alibaba/AICITY2024_Track2_AliOpenTrek_CityLLaVA",
        "reference": "",
    },
    {"leaderboard": "https://github.com/THUDM/SciGLM", "reference": ""},
    {
        "leaderboard": "https://www.hilti-challenge.com",
        "reference": "https://github.com/Hilti-Research/hilti-slam-challenge-2021/blob/2fb87cb60845f957bf287c05597ecd99a3ab3c8e/README.md?plain=1#L18",
    },
    {"leaderboard": "https://github.com/thu-coai/Safety-Prompts", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm",
        "reference": "https://github.com/core-mm/core-mm/blob/b9d961e00451e7cf5f35d451c3bbe5ae8d94c9de/README.md?plain=1#L52",
    },
    {
        "leaderboard": "https://core-mm.github.io",
        "reference": "https://github.com/core-mm/core-mm/blob/b9d961e00451e7cf5f35d451c3bbe5ae8d94c9de/README.md?plain=1#L52",
    },
    {
        "leaderboard": "https://opendrivelab.com/challenge2023",
        "reference": "https://github.com/wenjie710/PivotNet/blob/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://www.crowdbenchmark.com/nwpucrowdloc.html",
        "reference": "https://github.com/taohan10200/Awesome-Crowd-Localization/blob/260da666257b2a45690b342e5419c0e762fefd91/README.md?plain=1#L88",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/17672",
        "reference": "https://github.com/cage-challenge/cage-challenge-4/blob/313bf330e5229663606de08df0d0d5135b55961d/README.md?plain=1#L187",
    },
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/erdos-project/pylot/blob/a71ae927328388dc44acc784662bf32a99f273f0/README.md?plain=1#L266",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/NexaAIDev/domain_llm_leaderboard",
        "reference": "https://github.com/NexaAI/octopus-v4/blob/40523680cabea733df4d3ba124daa8013b825ae2/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://github.com/shikras/d-cube/blob/main/eval_sota/README.md",
        "reference": "https://github.com/Charles-Xie/awesome-described-object-detection/blob/2999a65ad2132b208b1ec91afe95ec9061f3f5a1/README.md?plain=1#L42",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/opencompass/open_vlm_leaderboard",
        "reference": "https://github.com/scenarios/WeMM/blob/9fc4777fc1d8f8c13681c90c7a654d32cabd02a1/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://opencompass.org.cn/leaderboard-multimodal",
        "reference": "https://github.com/scenarios/WeMM/blob/9fc4777fc1d8f8c13681c90c7a654d32cabd02a1/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://lab.kb.se/leaderboard/results",
        "reference": "https://github.com/YoungXinyu1802/HuggingFace-Dataset-Card-Analysis/blob/af88ea0afa57612e73718d6c85ef5b8480af12a2/Data/Dataset_Card/AI-Sweden_SuperLim.md?plain=1#L42",
    },
    {"leaderboard": "https://github.com/GAIR-NLP/OpenResearcher", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L132",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L133",
    },
    {
        "leaderboard": "https://github.com/isen-zhang/ACLUE",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L134",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L135",
    },
    {
        "leaderboard": "https://gair-nlp.github.io/BeHonest/#leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L136",
    },
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L137",
    },
    {
        "leaderboard": "https://github.com/jeinlee1991/chinese-llm-benchmark",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L138",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L139",
    },
    {
        "leaderboard": "https://qa.mpi-inf.mpg.de/compmix",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L140",
    },
    {
        "leaderboard": "https://dreambenchplus.github.io/#leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L141",
    },
    {
        "leaderboard": "https://hkust-nlp.github.io/felm",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L142",
    },
    {
        "leaderboard": "https://infi-coder.github.io/infibench",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L143",
    },
    {
        "leaderboard": "https://lawbench.opencompass.org.cn/leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L144",
    },
    {
        "leaderboard": "http://llmeval.com",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L145",
    },
    {
        "leaderboard": "https://lightchen233.github.io/m3cot.github.io/leaderboard.html",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L146",
    },
    {
        "leaderboard": "https://matheval.ai",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L147",
    },
    {
        "leaderboard": "https://mixeval.github.io/#leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L148",
    },
    {
        "leaderboard": "https://henrychur.github.io/MultilingualMedQA",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L149",
    },
    {
        "leaderboard": "https://chuanyangjin.com/mmtom-qa-leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L150",
    },
    {
        "leaderboard": "https://gair-nlp.github.io/OlympicArena/#leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L151",
    },
    {
        "leaderboard": "https://pubmedqa.github.io",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L152",
    },
    {
        "leaderboard": "https://scibench-ucla.github.io/#leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L153",
    },
    {
        "leaderboard": "https://fm.ai.tsinghua.edu.cn/superbench/#/leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L154",
    },
    {
        "leaderboard": "https://lab.kb.se/leaderboard/results",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L155",
    },
    {
        "leaderboard": "https://nextplusplus.github.io/TAT-DQA",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L156",
    },
    {
        "leaderboard": "https://nextplusplus.github.io/TAT-QA",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L157",
    },
    {
        "leaderboard": "https://jykoh.com/vwa",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L158",
    },
    {
        "leaderboard": "https://we-math.github.io/#leaderboard",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L159",
    },
    {
        "leaderboard": "https://whoops-benchmark.github.io",
        "reference": "https://github.com/Hannibal046/Awesome-LLM/blob/d0c73f03fe9da740ff16a1b5ca3f6e0086ad28ba/README.md?plain=1#L160",
    },
    {
        "leaderboard": "https://github.com/svilupp/Julia-LLM-Leaderboard",
        "reference": "https://github.com/svilupp/awesome-generative-ai-meets-julia-language/blob/fcd652bbf88deaed25c1dbd506a1018638a1196d/README.md?plain=1#L95",
    },
    {
        "leaderboard": "https://github.com/ethz-spylab/rlhf_trojan_competition",
        "reference": "",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/463/leaderboard",
        "reference": "https://github.com/zd11024/NaviLLM/blob/a221f42ad52893e861779921df336d63c0b4f3a6/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1715/leaderboard",
        "reference": "https://github.com/zd11024/NaviLLM/blob/a221f42ad52893e861779921df336d63c0b4f3a6/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://github.com/bcmi/Image-Harmonization-Dataset-iHarmony4",
        "reference": "",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/nv-tlabs/planning-centric-metrics/blob/f6865f2b473303f2ff01a477bf6de4dce7109742/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/global-wheat-detection/leaderboard",
        "reference": "https://github.com/liaopeiyuan/TransferDet/blob/65d6468f6c9a0f25c2ca6360d2a3a34b864cca45/README.md?plain=1#L7",
    },
    {"leaderboard": "https://github.com/adobe-research/deft_corpus", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/yongzhuo/LLM-SFT/blob/6f87b332b2b612dcf2dadf942151efc1cf3a2559/README.md?plain=1#L71",
    },
    {
        "leaderboard": "https://lv-mhp.github.io/human_parsing_lb",
        "reference": "https://github.com/ZhaoJ9014/Multi-Human-Parsing/blob/a24eae67e9b4e730c75bcd8aec3e2ed06cb4b046/README.md?plain=1#L71",
    },
    {
        "leaderboard": "https://bop.felk.cvut.cz/leaderboards/pose-estimation-unseen-bop23/core-datasets",
        "reference": "https://github.com/NVlabs/FoundationPose/blob/cd3ca4bc080529c53d5e5235212ca476d82bccf7/readme.md?plain=1#L16",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/seizure-prediction/leaderboard",
        "reference": "https://github.com/april-org/april-ann/blob/0f7b1e823b57cfeac413fc6d910fd9c256817601/README.md?plain=1#L299",
    },
    {"leaderboard": "https://github.com/romainloiseau/HelixNet", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-object-tracking-on-lasot",
        "reference": "https://github.com/Little-Podi/AiATrack/blob/cdb3a4ef08964e591ee519009c00b3b90ac1a046/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-object-tracking-on-trackingnet",
        "reference": "https://github.com/Little-Podi/AiATrack/blob/cdb3a4ef08964e591ee519009c00b3b90ac1a046/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-object-tracking-on-got-10k",
        "reference": "https://github.com/Little-Podi/AiATrack/blob/cdb3a4ef08964e591ee519009c00b3b90ac1a046/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-object-tracking-on-needforspeed",
        "reference": "https://github.com/Little-Podi/AiATrack/blob/cdb3a4ef08964e591ee519009c00b3b90ac1a046/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-object-tracking-on-otb-100",
        "reference": "https://github.com/Little-Podi/AiATrack/blob/cdb3a4ef08964e591ee519009c00b3b90ac1a046/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-object-tracking-on-uav123",
        "reference": "https://github.com/Little-Podi/AiATrack/blob/cdb3a4ef08964e591ee519009c00b3b90ac1a046/README.md?plain=1#L31",
    },
    {"leaderboard": "https://microsoft.github.io/GLUECoS", "reference": ""},
    {
        "leaderboard": "https://jailbreakbench.github.io",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L112",
    },
    {
        "leaderboard": "https://hkust-nlp.github.io/agentboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L178",
    },
    {
        "leaderboard": "https://artificialanalysis.ai/text-to-image",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L180",
    },
    {
        "leaderboard": "https://scale.com/leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L181",
    },
    {
        "leaderboard": "https://crfm.stanford.edu/helm",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L182",
    },
    {
        "leaderboard": "https://www.vals.ai",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L183",
    },
    {
        "leaderboard": "https://trustllmbenchmark.github.io/TrustLLM-Website/leaderboard.html",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L184",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L185",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L186",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L187",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hallucinations-leaderboard/leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L188",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/NPHardEval/NPHardEval-leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L189",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AI-Secure/llm-trustworthy-leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L190",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L191",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/gaia-benchmark/leaderboard",
        "reference": "https://github.com/PetroIvaniuk/llms-tools/blob/180e5189ba3c63c735713090ce96e99d60398765/README.md?plain=1#L193",
    },
    {
        "leaderboard": "https://superbbenchmark.org/leaderboard",
        "reference": "https://github.com/mechanicalsea/lighthubert/blob/d53e5450b0bf45c5f93ed95f2a5e2a453840f408/README.md?plain=1#L7",
    },
    {"leaderboard": "https://github.com/PengYu-Team/Co-LRIO", "reference": ""},
    {"leaderboard": "https://github.com/dvlab-research/DSGN", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-lane-detection-on-openlane",
        "reference": "https://github.com/OpenDriveLab/OpenLane/blob/8a0ce6b0057278729f4753a57be38c12929d7ad9/README.md?plain=1#L24",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/shure-dev/Awesome-LLM-Papers-Comprehensive-Topics/blob/faaa0253df89193a74c7f0ebe87e53e17244c093/README.md?plain=1#L239",
    },
    {
        "leaderboard": "https://github.com/LudwigStumpp/llm-leaderboard",
        "reference": "https://github.com/shure-dev/Awesome-LLM-Papers-Comprehensive-Topics/blob/faaa0253df89193a74c7f0ebe87e53e17244c093/README.md?plain=1#L413",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Abdullahw72/langchain-chatbot-multiple-PDF/blob/911bdc36e7b9117feb7b392b6b01e441c562c916/README.md?plain=1#L56",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/propara",
        "reference": "https://github.com/allenai/propara/blob/51a3cccbedcbe5f92276f9925e0ce1e6ec4247bb/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/scifact",
        "reference": "https://github.com/allenai/scifact/blob/68b98a56d93e0f9da0d2aab4e6c3294699a0f72e/README.md?plain=1#L33",
    },
    {
        "leaderboard": "https://hkust-nlp.github.io/agentboard",
        "reference": "https://github.com/hkust-nlp/AgentBoard/blob/bb7255e2daf1989069a186dad9e53f70680961db/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://adeval.cstcloud.cn/content/leaderboard",
        "reference": "https://github.com/dawnvince/EasyTSAD/blob/55eff2c6d62f9c792bf6253c046dcc04636efe5a/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/GeneZC/MiniMA/blob/4842dc053c697f38e29ff7effbc3448313497ed1/README.md?plain=1#L58",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/GeneZC/MiniMA/blob/4842dc053c697f38e29ff7effbc3448313497ed1/README.md?plain=1#L62",
    },
    {"leaderboard": "https://github.com/deepchem/moleculenet", "reference": ""},
    {"leaderboard": "https://github.com/CLUEbenchmark/CLUE", "reference": ""},
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/454/leaderboard",
        "reference": "https://github.com/Henry1iu/TNT-Trajectory-Prediction/blob/bcbccdc1d35a717793e3caa1d599c1f700612227/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/thestriver/ai-for-javascript-course/blob/3459b21868b20b5d5a35af7d2765b39795964c38/README.md?plain=1#L411",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/thestriver/ai-for-javascript-course/blob/3459b21868b20b5d5a35af7d2765b39795964c38/README.md?plain=1#L413",
    },
    {
        "leaderboard": "https://scandeval.com",
        "reference": "https://github.com/ScandEval/ScandEval",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php",
        "reference": "https://github.com/jiahaopang/crl/blob/eab40c4d32e9fbff6b0cceaf0ddfa3b756083722/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-entailment-on-snli-ve-val",
        "reference": "https://github.com/necla-ml/SNLI-VE/blob/e6cf745d2e677a2ad878b3cc095cde75c0bbb144/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1938/leaderboard",
        "reference": "https://github.com/buildingnet/buildingnet_dataset/blob/4907bebd6990b1f80d5edd88d1366756b33dd77c/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1663/leaderboard",
        "reference": "https://github.com/alexa/alexa-with-dstc9-track1-dataset/blob/7ebb4c767b64826c1ac0f8bae409c0fab9cc0ae4/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/skeskinen/llama-lite/blob/80df7a021fe33e3743deca557555ade8c0051a29/README.md?plain=1#L67",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
        "reference": "https://github.com/samuelrince/awesome-green-ai/blob/d9f3aa82c2a0267eb4b1b16d752ae20e918d0bca/README.md?plain=1#L87",
    },
    {
        "leaderboard": "https://ml.energy/leaderboard",
        "reference": "https://github.com/samuelrince/awesome-green-ai/blob/d9f3aa82c2a0267eb4b1b16d752ae20e918d0bca/README.md?plain=1#L88",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/EnergyStarAI/2024_Leaderboard",
        "reference": "https://github.com/samuelrince/awesome-green-ai/blob/d9f3aa82c2a0267eb4b1b16d752ae20e918d0bca/README.md?plain=1#L89",
    },
    {
        "leaderboard": "https://sites.google.com/view/pgdl2020/leaderboard",
        "reference": "https://github.com/parthnatekar/pgdl/blob/fadc4705c3dbf18546703c5d196e4cca661a2cfd/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://sites.google.com/view/pgdl2020/leaderboard",
        "reference": "https://github.com/z-x-yang/AOT/blob/d6128fe24b162938885ecb63b12341d4a1e049d6/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://github.com/ws-choi/Conditioned-Source-Separation-LaSAFT",
        "reference": "",
    },
    {
        "leaderboard": "https://eu.aihub.ml/competitions/201",
        "reference": "https://github.com/z-x-yang/AOT/blob/d6128fe24b162938885ecb63b12341d4a1e049d6/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://eu.aihub.ml/competitions/201",
        "reference": "https://github.com/z-x-yang/AOT/blob/d6128fe24b162938885ecb63b12341d4a1e049d6/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/606/leaderboard",
        "reference": "https://github.com/YuankaiQi/REVERIE/blob/ea14a7b9fabec4a6e77cd92c0da941b111086749/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/2049/leaderboard",
        "reference": "https://github.com/eric-ai-lab/Aerial-Vision-and-Dialog-Navigation/blob/44df9c1bda3b731c27334098176edae10eebfd0a/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://github.com/robot-pesg/BotanicGarden/blob/main/leaderboard/README.md",
        "reference": "",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d",
        "reference": "https://github.com/sshaoshuai/PointRCNN/blob/1d0dee91262b970f460135252049112d80259ca0/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://rrc.cvc.uab.es/?ch=17&com=evaluation&task=4",
        "reference": "https://github.com/rubenpt91/MP-DocVQA-Framework/blob/7939f521fe5a82a442f54831b1c0e96daab331d9/readme.md?plain=1#L39",
    },
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/allenai/document-qa/blob/2f9fa6878b60ed8a8a31bcf03f802cde292fe48b/README.md?plain=1#L167",
    },
    {"leaderboard": "https://github.com/pangeo-data/WeatherBench", "reference": ""},
    {
        "leaderboard": "https://cares-ai.github.io/#leaderboard",
        "reference": "https://github.com/richard-peng-xia/CARES/blob/c8ae2ad1e6d08ae3ba22a72495fd305496e1214b/README.md?plain=1#L12",
    },
    {"leaderboard": "https://github.com/GAIR-NLP/benbench", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard",
        "reference": "https://github.com/ajdavidl/Portuguese-NLP/blob/8d5797fb75ee49d682a5dfc20ad0439879bbbcfd/README.md?plain=1#L237",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/97/leaderboard",
        "reference": "https://github.com/peteanderson80/Matterport3DSimulator/blob/589d091b111333f9e9f9d6cfd021b2eb68435925/README.md?plain=1#L50",
    },
    {"leaderboard": "https://github.com/Master-PLC/FreDF", "reference": ""},
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/eugeneyan/open-llms/blob/96abc93762f5d93702f75d51128c0263ee92078d/README.md?plain=1#L109",
    },
    {
        "leaderboard": "https://twitter.com/jefrankle/status/1654631746506301441",
        "reference": "https://github.com/eugeneyan/open-llms/blob/96abc93762f5d93702f75d51128c0263ee92078d/README.md?plain=1#L110",
    },
    {
        "leaderboard": "https://crfm.stanford.edu/helm",
        "reference": "https://github.com/eugeneyan/open-llms/blob/96abc93762f5d93702f75d51128c0263ee92078d/README.md?plain=1#L111",
    },
    {
        "leaderboard": "https://github.com/LudwigStumpp/llm-leaderboard",
        "reference": "https://github.com/eugeneyan/open-llms/blob/96abc93762f5d93702f75d51128c0263ee92078d/README.md?plain=1#L112",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/eugeneyan/open-llms/blob/96abc93762f5d93702f75d51128c0263ee92078d/README.md?plain=1#L114",
    },
    {"leaderboard": "https://github.com/lfy79001/TableQAKit", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/open_book_qa",
        "reference": "https://github.com/allenai/OpenBookQA/blob/b51971646e9371a61508d9953fc706645e194a71/README.md?plain=1#L16",
    },
    {
        "leaderboard": "http://www.cvlibs.net/datasets/kitti/eval_tracking.php",
        "reference": "https://github.com/aleksandrkim61/EagerMOT/blob/2792a76a044250e55e7f25b3e75cd5961366bd33/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://www.nuscenes.org/tracking",
        "reference": "https://github.com/aleksandrkim61/EagerMOT/blob/2792a76a044250e55e7f25b3e75cd5961366bd33/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/thunlp/UltraChat/blob/1f613e1b8dfc6d1e3a02efb6905de608ed06645b/README.md?plain=1#L35",
    },
    {"leaderboard": "https://github.com/jiachens/ModelNet40-C", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/Anush008/fastembed-go/blob/98bb32d57bcbf964ee5a9c3c42fd33d001a832b9/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/830/leaderboard",
        "reference": "https://github.com/microsoft/unilm/blob/db5e102cddbea73605b10a2f972694865062bec8/README.md?plain=1#L139",
    },
    {
        "leaderboard": "https://superbbenchmark.org/leaderboard",
        "reference": "https://github.com/microsoft/unilm/blob/db5e102cddbea73605b10a2f972694865062bec8/README.md?plain=1#L143",
    },
    {
        "leaderboard": "https://sites.research.google/xtreme",
        "reference": "https://github.com/microsoft/unilm/blob/db5e102cddbea73605b10a2f972694865062bec8/README.md?plain=1#L146",
    },
    {
        "leaderboard": "https://rrc.cvc.uab.es/?ch=17&com=evaluation&task=1",
        "reference": "https://github.com/microsoft/unilm/blob/db5e102cddbea73605b10a2f972694865062bec8/README.md?plain=1#L158",
    },
    {
        "leaderboard": "https://rrc.cvc.uab.es/?ch=13&com=evaluation&task=3",
        "reference": "https://github.com/microsoft/unilm/blob/db5e102cddbea73605b10a2f972694865062bec8/README.md?plain=1#L158",
    },
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/duyunshu/bert-sentiment-analysis/blob/93d1e78064e43938e3bf34f7c09633a0b9b39d16/README.md?plain=1#L158",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/code-generation-on-humaneval",
        "reference": "https://github.com/alekst23/simple-coder/blob/1f940b10081cd8b328d5645271978ea36f9d47af/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://hotpotqa.github.io",
        "reference": "https://github.com/woshiyyya/DFGN-pytorch/blob/569bfdd67d8e54bb244339965a9268fb64806014/readme.md?plain=1#L15",
    },
    {
        "leaderboard": "https://www.swebench.com",
        "reference": "https://github.com/aorwall/moatless-tools/blob/a50e3ef9da4e73e916e71294649f42038b9df47b/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/HqWu-HITCS/Awesome-Chinese-LLM/blob/119dddee96cc3720f7a05c5ac6ca3593de7f9322/README.md?plain=1#L1067",
    },
    {
        "leaderboard": "https://lvbench.github.io/#leaderboard",
        "reference": "https://github.com/THUDM/LVBench/blob/f998d95541cd5f0953c52449b185c6467b9b099d/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://github.com/cbvrp-acmmm-2019/cbvrp-acmmm-2019",
        "reference": "",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/leaderboard",
        "reference": "https://github.com/thuwyh/Jigsaw-Unintended-Bias-in-Toxicity-Classification/blob/83329b273f6d688d98caaef5a03dfb42ed4fc1ad/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
        "reference": "https://github.com/TsinghuaC3I/UltraMedical/blob/e53eab0a5d0bf981c8cb9c55744d61dc0b4d56a9/README.md?plain=1#L129",
    },
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/autonomousvision/carla_garage/blob/1448bed63642bfe7c25e49e79e15713b20d104e6/README.md?plain=1#L176",
    },
    {"leaderboard": "https://github.com/yuh-zha/AlignScore", "reference": ""},
    {
        "leaderboard": "https://cocodataset.org/#captions-leaderboard",
        "reference": "https://github.com/peteanderson80/Up-Down-Captioner/blob/0b4e8e5e34953bac5f32368503a68be8a84b6866/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mike-ravkine/can-ai-code-results",
        "reference": "https://github.com/the-crypt-keeper/can-ai-code/blob/7782631fe9ce39532cf4da929e68165def7b12ca/README.md?plain=1#L37",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/WildBench",
        "reference": "https://github.com/allenai/WildBench/blob/d6b8dcaf377d173d031980f97c16e1a82618c03d/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/node-classification-on-non-homophilic",
        "reference": "https://github.com/SitaoLuan/ACM-GNN/blob/2d4f213979f030762b31a0a694df94f36493c5c5/README.md?plain=1#L129",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2020/3d-detection",
        "reference": "https://github.com/PJLab-ADG/LoGoNet/blob/a231f99682d6e8943f660867448fc00a1d3339a9/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-depth-estimation-on-relative-human",
        "reference": "https://github.com/Arthur151/Relative_Human/blob/14cb1e0fb081e970ffa03151e0a0b225dcf358c0/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/infrawhispers/anansi/blob/35db8b28537696481973743d10a1d6859aef679d/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/google-quest-challenge/leaderboard",
        "reference": "https://github.com/oleg-yaroshevskiy/quest_qa_labeling/blob/730a9632314e54584f69f909d5e2ef74d843e02c/README.md?plain=1#L109",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/jincheng9/gpt-tutorial/blob/c8dafc0b5ea60ecd054a63f71b71e7c337f94af2/README.md?plain=1#L114",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/unity-obstacle-tower-challenge/leaderboards",
        "reference": "https://github.com/Unity-Technologies/obstacle-tower-source/blob/7dfbf7c5a290823d7d8f08b323dcd5b24d188a68/README.md?plain=1#L40",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/neurips-2021-minerl-diamond-competition/leaderboards",
        "reference": "https://github.com/minerllabs/competition_submission_template/blob/ed23a9159c21a45fb2c445b615017c32b55a4680/README.md?plain=1#L14",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb_dt.php?challengeid=6&compid=4",
        "reference": "https://github.com/rbgirshick/rcnn/blob/43b0334e96e9e910bc45c94902a093b5a6f35d0a/README.md?plain=1#L39",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb_dt.php?challengeid=11&compid=4",
        "reference": "https://github.com/rbgirshick/rcnn/blob/43b0334e96e9e910bc45c94902a093b5a6f35d0a/README.md?plain=1#40",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/robothor_objectnav",
        "reference": "https://github.com/allenai/robothor-challenge/blob/f3c4f35bc397aff4d5236e269efe3ff41f6d218e/README.md?plain=1#L105",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/math-word-problem-solving-on-svamp",
        "reference": "https://github.com/chuanyang-Zheng/Progressive-Hint/blob/aca5acc6bbdede3eee79d7e4f6b0445f5f9b4c43/README.md?plain=1#L32",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/arithmetic-reasoning-on-gsm8k",
        "reference": "https://github.com/chuanyang-Zheng/Progressive-Hint/blob/aca5acc6bbdede3eee79d7e4f6b0445f5f9b4c43/README.md?plain=1#L32",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/math-word-problem-solving-on-math",
        "reference": "https://github.com/chuanyang-Zheng/Progressive-Hint/blob/aca5acc6bbdede3eee79d7e4f6b0445f5f9b4c43/README.md?plain=1#L32",
    },
    {
        "leaderboard": "https://mmbench.opencompass.org.cn/leaderboard",
        "reference": "https://github.com/codefuse-ai/CodeFuse-MFT-VLM/blob/a195b90d89c5ffbd7cd63fb38825e77226306dd9/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://github.com/codefuse-ai/codefuse-devops-eval",
        "reference": "",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/feedback-prize-2021/leaderboard",
        "reference": "https://github.com/affjljoo3581/Feedback-Prize-Competition/blob/034427117cc8a3e1dd63401b3519fc28e3f18830/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://github.com/AI-secure/Certified-Robustness-SoK-Oldver",
        "reference": "",
    },
    {
        "leaderboard": "https://sokcertifiedrobustness.github.io/leaderboard",
        "reference": "https://github.com/AI-secure/Certified-Robustness-SoK-Oldver/blob/fcb1a85c7336ae23cfe18c6f87419469b38ad0a2/README.md?plain=1#L1",
    },
    {
        "leaderboard": "https://ml.energy/leaderboard",
        "reference": "https://github.com/Green-Software-Foundation/awesome-green-software/blob/daa4f9d143e9f990dbaabfd920df09a20b366e34/readme.md?plain=1#L125",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2020/3d-detection",
        "reference": "https://github.com/ADLab-AutoDrive/INT/blob/988157ff131a0c027472bd0f00c0bda0e08cded0/README.md?plain=1#L37",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2023/motion-prediction",
        "reference": "https://github.com/zhejz/HPTR/blob/646e07084da8049bc3d8f682b7378f640225b258/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1719/leaderboard",
        "reference": "https://github.com/zhejz/HPTR/blob/646e07084da8049bc3d8f682b7378f640225b258/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/yentinglin/open-tw-llm-leaderboard",
        "reference": "https://github.com/MiuLab/Taiwan-LLM/blob/26ee41dbe1fe7417a5beb7949c635c0685ac036d/README.md?plain=1#L77",
    },
    {
        "leaderboard": "https://llm-council.com",
        "reference": "https://github.com/llm-council/llm-council/blob/b6d968a2c0a027ac33d1690d3f6de4e0f5d1f182/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/home-credit-default-risk/leaderboard",
        "reference": "https://github.com/KazukiOnodera/Home-Credit-Default-Risk/blob/41eb0c0679dbf6cc156f8d3e699b40be731d6e9a/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/4318",
        "reference": "https://github.com/shreyashampali/kypt_transformer/blob/2abaf9e5323da4dfd330d6a3671a1c14f2c4f391/README.md?plain=1#L120",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/4897",
        "reference": "https://github.com/shreyashampali/kypt_transformer/blob/2abaf9e5323da4dfd330d6a3671a1c14f2c4f391/README.md?plain=1#L138",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/689/leaderboard",
        "reference": "https://github.com/martiansideofthemoon/hurdles-longform-qa/blob/805baf71bffff2f7c210714274f9c534ef9efcb1/README.md?plain=1#L115",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/6738",
        "reference": "https://github.com/psunlpgroup/MultiHiertt/blob/a0f59261650f120d2c83b38d2b79cf8e352fea5b/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/sci-m-wang/LangGPT-tools/blob/9e6e2c253a7dc9c63e577062cbb2b4ab3c3c3f04/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://skyworkai.github.io/agent-studio",
        "reference": "https://github.com/SkyworkAI/agent-studio/blob/35bc0670ea86fe77476f9ea971c2326cca79b665/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://flageval.baai.ac.cn/#/leaderboard/multimodal",
        "reference": "https://github.com/Kwai-Kolors/Kolors/blob/0fafa56a76b7acf1e147b153d1e7b8fd65f9055b/README.md?plain=1#L60",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/wel3kxial/AIGC_Resources/blob/01bef3156aa947986d39a334f253f05bc7dda8b4/README.md?plain=1#L758",
    },
    {"leaderboard": "https://github.com/THUDM/ReST-MCTS", "reference": ""},
    {
        "leaderboard": "https://github.com/CLUEbenchmark/CLUE",
        "reference": "https://github.com/aymericdamien/TopDeepLearning/blob/5c826a12b78e4dd38f73ea77aad169d3610b7a4d/README.md?plain=1#L733",
    },
    {"leaderboard": "https://github.com/lampts/my_dl_solutions", "reference": ""},
    {"leaderboard": "https://github.com/DengPingFan/DAVSOD", "reference": ""},
    {
        "leaderboard": "http://convai.io",
        "reference": "https://github.com/aliannejadi/ClariQ/blob/46885a544581a0af8aff0681d29e4971807e2912/README.md?plain=1#L475",
    },
    {
        "leaderboard": "http://got-10k.aitestunion.com/leaderboard",
        "reference": "https://github.com/got-10k/siamfc/blob/fd1c384e2fac7c5e88f1e3cb8da66d3632ee88ec/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://github.com/SiatMMLab/Awesome-Diffusion-Model-Based-Image-Editing-Methods/blob/main/Leaderboard.md",
        "reference": "",
    },
    {
        "leaderboard": "https://klue-benchmark.com/leaderboard",
        "reference": "https://github.com/KLUE-benchmark/KLUE/blob/3efd98708a40ff49251fddde35453f8fbb11f536/README.md?plain=1#L70",
    },
    {
        "leaderboard": "https://crux-eval.github.io/leaderboard.html",
        "reference": "https://github.com/facebookresearch/cruxeval/blob/ded6ba10b415d5625d8548656a763661a2ceb34f/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://github.com/inspire-group/patch-defense-leaderboard",
        "reference": "https://github.com/inspire-group/PatchCleanser/blob/6ec448862611386abb8f084645409256520ccad7/README.md?plain=1#L27",
    },
    {"leaderboard": "https://github.com/Magnetic2014/RoleEval", "reference": ""},
    {"leaderboard": "https://github.com/SoccerNet/sn-spotting", "reference": ""},
    {
        "leaderboard": "https://github.com/sejong-rcv/MLPD-Multi-Label-Pedestrian-Detection",
        "reference": "",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/imageclef-2019-vqa-med/leaderboards",
        "reference": "https://github.com/abachaa/VQA-Med-2019/blob/442dfa265af51a969e33c094e284f787710eca7a/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-arxiv",
        "reference": "https://github.com/ytchx1999/PyG-OGB-Tricks/blob/5db909016ecf233653f9a44978300b0cbb8718a2/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://github.com/THUDM/AlignBench/blob/master/README-en.md",
        "reference": "",
    },
    {
        "leaderboard": "hhttps://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-mag",
        "reference": "https://github.com/ytchx1999/PyG-OGB-Tricks/blob/5db909016ecf233653f9a44978300b0cbb8718a2/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-products",
        "reference": "https://github.com/ytchx1999/PyG-OGB-Tricks/blob/5db909016ecf233653f9a44978300b0cbb8718a2/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_nodeprop/#ogbn-proteins",
        "reference": "https://github.com/ytchx1999/PyG-OGB-Tricks/blob/5db909016ecf233653f9a44978300b0cbb8718a2/README.md?plain=1#L36",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/514/leaderboard",
        "reference": "https://github.com/pzzhang/VinVL/blob/39788c3e1939cc3c7f36d3c421d808dcba438bf9/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/355/leaderboard",
        "reference": "https://github.com/pzzhang/VinVL/blob/39788c3e1939cc3c7f36d3c421d808dcba438bf9/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/vcr",
        "reference": "https://github.com/pzzhang/VinVL/blob/39788c3e1939cc3c7f36d3c421d808dcba438bf9/README.md?plain=1#L31",
    },
    {"leaderboard": "https://github.com/layer6ai-labs/dgm-eval", "reference": ""},
    {"leaderboard": "https://github.com/salesforce/decaNLP", "reference": ""},
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/carla-simulator/carla/blob/d6f23ed84ef3f9c4344e2a758c87055b3b29be08/README.md?plain=1#L56",
    },
    {"leaderboard": "https://github.com/eugenesiow/super-image", "reference": ""},
    {"leaderboard": "https://github.com/liuye6666/EWR-PGD", "reference": ""},
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6",
        "reference": "https://github.com/TheLegendAli/DeepLab-Context/blob/fb04e9e2fc2682490ad9f60533b9d6c4c0e0479c/README.md?plain=1#L39",
    },
    {"leaderboard": "https://github.com/SoccerNet/sn-tracking", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/mctaco",
        "reference": "https://github.com/CogComp/MCTACO/blob/0ba9b790ad5a87ab3eef0f0cb7819408192f9729/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://cl-detection2023.grand-challenge.org/evaluation/challenge/leaderboard",
        "reference": "https://github.com/5k5000/CLdetection2023/blob/d1a01536ad892134c4dd728c87dc9ac1d87b8e11/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://cl-detection2023.grand-challenge.org/evaluation/testing/leaderboard",
        "reference": "https://github.com/5k5000/CLdetection2023/blob/d1a01536ad892134c4dd728c87dc9ac1d87b8e11/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://sokcertifiedrobustness.github.io/leaderboard",
        "reference": "https://github.com/yinizhilian/ICLR2024-Papers-with-Code/blob/df7bfdc3452c53b248029c23e9ba375f4361fd0b/README.md?plain=1#L2853",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/7395",
        "reference": "https://github.com/thunlp/Knowledge-Plugin/blob/3b042d43f2cddefaa50ce08cd5feb89a41e7dba4/README.md?plain=1#L137",
    },
    {
        "leaderboard": "https://github.com/Kali-Hac/Awesome-Skeleton-Based-Models/blob/main/skeleton-based-action-recognition/README.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/Kali-Hac/Awesome-Skeleton-Based-Models/blob/main/3D-pose-estimation/README.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/Kali-Hac/Awesome-Skeleton-Based-Models/blob/main/skeleton-based-person-reID/README.md",
        "reference": "",
    },
    {
        "leaderboard": "https://nl2sql360.github.io/#leaderboard",
        "reference": "https://github.com/HKUSTDial/NL2SQL360/blob/ef8a588db0a3d6113d0a70c99725b1ee9407f415/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://github.com/yanyanSann/Long-Tailed-Classification-Leaderboard",
        "reference": "https://github.com/zhangyongshun/BagofTricks-LT/blob/ce7859a930649d10fb2a4fd4ed9242007eb25e2f/README.md?plain=1#L309",
    },
    {
        "leaderboard": "https://teddy-xionggz.github.io/MIRAGE",
        "reference": "https://github.com/Teddy-XiongGZ/MIRAGE/blob/ddf4a47cbe77eec47658075701a809a6a4901ac0/README.md?plain=1#L115",
    },
    {"leaderboard": "https://github.com/mkliegl/kaggle-Facebook-V", "reference": ""},
    {
        "leaderboard": "https://yuchen814.github.io/CodeTransOcean/#leadboard",
        "reference": "https://github.com/WeixiangYAN/CodeTransOcean/blob/42e2cd3b41b3a18a6dba3dfdf425f772360304ca/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://mmmu-benchmark.github.io/#leaderboard",
        "reference": "https://github.com/MMMU-Benchmark/MMMU/blob/646d158ee3420f1b2c9f6665048b1dafaa667c50/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://yale-lily.github.io/spider",
        "reference": "https://github.com/paulfitz/mlsql/blob/2f2f9cff35dce24580b06085072b44a8ce17fb54/README.md?plain=1#L173",
    },
    {
        "leaderboard": "https://github.com/salesforce/WikiSQL",
        "reference": "https://github.com/paulfitz/mlsql/blob/2f2f9cff35dce24580b06085072b44a8ce17fb54/README.md?plain=1#L174",
    },
    {
        "leaderboard": "https://yale-lily.github.io/sparc",
        "reference": "https://github.com/paulfitz/mlsql/blob/2f2f9cff35dce24580b06085072b44a8ce17fb54/README.md?plain=1#L175",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota",
        "reference": "https://github.com/Nativeatom/NaturalLanguageProcessing/blob/e1254d10432af0f38071d33df465c2abd197fbbf/README.md?plain=1#L322",
    },
    {"leaderboard": "https://github.com/hou-yz/MultiviewX", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L68",
    },
    {
        "leaderboard": "https://bird-bench.github.io",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mike-ravkine/can-ai-code-results",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L70",
    },
    {
        "leaderboard": "https://leaderboard.tabbyml.com",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L71",
    },
    {
        "leaderboard": "https://crux-eval.github.io/leaderboard.html",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L72",
    },
    {
        "leaderboard": "(https://evalplus.github.io/leaderboard.html",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L73",
    },
    {
        "leaderboard": "https://intercode-benchmark.github.io",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L76",
    },
    {
        "leaderboard": "https://accubits.com/open-source-program-synthesis-models-leaderboard",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L77",
    },
    {
        "leaderboard": "https://yale-lily.github.io/spider",
        "reference": "https://github.com/huybery/Awesome-Code-LLM/blob/9974cc5752ae36324801bd41f2d696ef0218b818/README.md?plain=1#L78",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/Qihoo360/360zhinao/blob/1c5657686f98a4a2580c3e2e808344a010b79aee/README.md?plain=1#L41",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-llm",
        "reference": "https://github.com/Qihoo360/360zhinao/blob/1c5657686f98a4a2580c3e2e808344a010b79aee/README.md?plain=1#L77",
    },
    {"leaderboard": "https://github.com/MadryLab/cifar10_challenge", "reference": ""},
    {
        "leaderboard": "https://datahack.analyticsvidhya.com/contest/ltfs-datascience-finhack-an-online-hackathon/#LeaderBoard",
        "reference": "https://github.com/rajat5ranjan/AV-LTFS-Data-Science-FinHack-ML-Hackathon/blob/bf8b964624c028f6a59a6675c0cf8f77bc228efc/README.md?plain=1#L49",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/Wang-Shuo/A-Guide-to-Retrieval-Augmented-LLM/blob/4acecb4f1dadd598ec459f02c2d6303843c4a44d/README.md?plain=1#L235",
    },
    {"leaderboard": "https://github.com/songlab-cal/tape", "reference": ""},
    {
        "leaderboard": "https://www.kaggle.com/c/nyu-cv-fall-2018/leaderboard",
        "reference": "https://github.com/soumith/traffic-sign-detection-homework/blob/8c223326d7f2bf2c52a97f4e41bd1a0e9bf59bae/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://github.com/wenhao728/awesome-diffusion-v2v/blob/main/doc/leaderboard.md",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/TheFinAI/FinBen",
        "reference": "https://github.com/adlnlp/FinLLMs/blob/8117033c5bee6fa330013e826b21af4b8735af10/README.md?plain=1#L61",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/10740",
        "reference": "https://github.com/osyvokon/awesome-ukrainian-nlp/blob/98c9e537e21fa9f93be4e17bd7f54a09282fafa5/README.md?plain=1#L150",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/674/leaderboard",
        "reference": "https://github.com/magnumresearchgroup/Magnum-NLC2CMD/blob/e63da427e7f3bbce6d8b16f176fe229f54b4c54f/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/princeton-nlp/SimPO/blob/ed54e415be88340a341273f4451adfdafe6934b7/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://www.crowdbenchmark.com/nwpucrowdloc.html",
        "reference": "https://github.com/taohan10200/IIM/blob/9d54b7c9e9409bb73759de33aa0bca74479fd902/README.md?plain=1#L116",
    },
    {
        "leaderboard": "https://github.com/bolgebrygg/Force-2020-Machine-Learning-competition",
        "reference": "",
    },
    {
        "leaderboard": "https://rowanzellers.com/hellaswag",
        "reference": "https://github.com/rowanz/hellaswag/blob/a29ff8e9a04bba4bd6588223785ce105328adc57/hellaswag_models/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/face-alignment-on-wflw",
        "reference": "https://github.com/starhiking/HeatmapInHeatmap/blob/7174ea28ebf5846d1406d06c665fc09f06630022/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/face-alignment-on-cofw",
        "reference": "https://github.com/starhiking/HeatmapInHeatmap/blob/7174ea28ebf5846d1406d06c665fc09f06630022/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/face-alignment-on-300w",
        "reference": "https://github.com/starhiking/HeatmapInHeatmap/blob/7174ea28ebf5846d1406d06c665fc09f06630022/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://matbench.materialsproject.org/Leaderboards%20Per-Task/matbench_v0.1_matbench_dielectric",
        "reference": "https://github.com/MasterAI-EAM/Darwin/blob/2096eeb99dd9393861844fbed9782865a4ed7834/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hf-audio/open_asr_leaderboard",
        "reference": "https://github.com/huggingface/open_asr_leaderboard/blob/fe50cf007429be69c08038f7be25df01c6f2db4a/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://blumenstiel.github.io/mess-benchmark/leaderboard",
        "reference": "https://github.com/blumenstiel/MESS/blob/cc701b5cac56b4c871397c21dd7a11b5f1bcdbe2/README.md?plain=1#L44",
    },
    {"leaderboard": "https://github.com/aliyun/cflue", "reference": ""},
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/unilight/R-NET-in-Tensorflow/blob/f3a5dc6bb495f652a9b09b244fc13ee4ae297437/README.md?plain=1#L87",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hf-audio/open_asr_leaderboard",
        "reference": "https://github.com/nyrahealth/CrisperWhisper/blob/4a24d3de6fa000075f3758cb2c4b2da6759759f3/README.md?plain=1#L37",
    },
    {
        "leaderboard": "https://github.com/Doragd/Awesome-Sentence-Embedding",
        "reference": "",
    },
    {
        "leaderboard": "https://ymcui.com/expmrc",
        "reference": "https://github.com/ymcui/expmrc/blob/9827fed6466f295252f9054c56ec7b233d46e636/README.md?plain=1#L23",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/www-2018-challenge-learning-to-recognize-musical-genre/leaderboards",
        "reference": "https://github.com/kristijanbartol/Deep-Music-Tagger/blob/f47295c3171e77733be5b80ddcec9790dfc3165b/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/musique_ans",
        "reference": "https://github.com/StonyBrookNLP/musique/blob/922ac98f19a201998dbdae6d7f2887a5258dbdeb/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/musique_full",
        "reference": "https://github.com/StonyBrookNLP/musique/blob/922ac98f19a201998dbdae6d7f2887a5258dbdeb/README.md?plain=1#L69",
    },
    {"leaderboard": "https://github.com/songlab-cal/tape-neurips2019", "reference": ""},
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_linkprop",
        "reference": "https://github.com/facebookresearch/SEAL_OGB/blob/ea01ef509df1a90bc246e4e9828e192eeec4289c/README.md?plain=1#L11",
    },
    {"leaderboard": "https://github.com/coastalcph/lex-glue", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/arc",
        "reference": "https://github.com/allenai/macaw/blob/5a0b32b7879e39cb43551830833dcaed8e208773/README.md?plain=1#L142",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/arc_easy",
        "reference": "https://github.com/allenai/macaw/blob/5a0b32b7879e39cb43551830833dcaed8e208773/README.md?plain=1#L143",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/genie-arcda",
        "reference": "https://github.com/allenai/macaw/blob/5a0b32b7879e39cb43551830833dcaed8e208773/README.md?plain=1#L144",
    },
    {"leaderboard": "https://github.com/JUNJIE99/MLVU", "reference": ""},
    {
        "leaderboard": "http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d",
        "reference": "https://github.com/charlesq34/frustum-pointnets/blob/2ffdd345e1fce4775ecb508d207e0ad465bcca80/README.md?plain=1#L9",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=4",
        "reference": "https://github.com/DataXujing/CNN-paper2/blob/f039c9599ca189ea400b20bb0a03c2657b9e4805/README.md?plain=1#L930",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6",
        "reference": "https://github.com/HymEric/Segmentation-Series-Chaos/blob/08675d0a8636ce703227917222d0244e6578d80e/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/semantic-segmentation",
        "reference": "https://github.com/HymEric/Segmentation-Series-Chaos/blob/08675d0a8636ce703227917222d0244e6578d80e/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://klejbenchmark.com/leaderboard",
        "reference": "https://github.com/allegro/HerBERT/blob/0be17826a1f33d97218dfe46d2b21e6432eec547/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/break",
        "reference": "https://github.com/allenai/Break/blob/cef3ed359754e5b0942bb1f90e6b12586898d593/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/break_high_level",
        "reference": "https://github.com/allenai/Break/blob/cef3ed359754e5b0942bb1f90e6b12586898d593/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/korean-gender-bias-detection/leaderboard",
        "reference": "https://github.com/kocohub/korean-hate-speech/blob/f8d05dce2b22007bb149e5139c0060c68ad8f94b/README.md?plain=1#L75",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/korean-bias-detection/leaderboard",
        "reference": "https://github.com/kocohub/korean-hate-speech/blob/f8d05dce2b22007bb149e5139c0060c68ad8f94b/README.md?plain=1#L76",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/korean-hate-speech-detection/leaderboard",
        "reference": "https://github.com/kocohub/korean-hate-speech/blob/f8d05dce2b22007bb149e5139c0060c68ad8f94b/README.md?plain=1#L77",
    },
    {
        "leaderboard": "http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d",
        "reference": "https://github.com/chonepieceyb/reading-frustum-pointnets-code/blob/d389ac7215a1e4d8fee9c0db8e5b2b9e99f88c20/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://aistudio.baidu.com/competition/detail/157/0/leaderboard",
        "reference": "https://github.com/baidu/DuReader/blob/c625076b06da8f56d59f19c41c73bd580a98a347/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://aistudio.baidu.com/competition/detail/116/0/leaderboard",
        "reference": "https://github.com/baidu/DuReader/blob/c625076b06da8f56d59f19c41c73bd580a98a347/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://aistudio.baidu.com/competition/detail/66/0/leaderboard",
        "reference": "https://github.com/baidu/DuReader/blob/c625076b06da8f56d59f19c41c73bd580a98a347/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://aistudio.baidu.com/competition/detail/49/0/leaderboard",
        "reference": "https://github.com/baidu/DuReader/blob/c625076b06da8f56d59f19c41c73bd580a98a347/README.md?plain=1#L17",
    },
    {
        "leaderboard": "https://aistudio.baidu.com/competition/detail/720/0/leaderboard",
        "reference": "https://github.com/baidu/DuReader/blob/c625076b06da8f56d59f19c41c73bd580a98a347/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/Barnacle-ai/awesome-llm-list/blob/f3874e96ac94e8c516ad4a8c8c222fd411ebdde2/README.md?plain=1#L115",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/gaia-benchmark/leaderboard",
        "reference": "https://github.com/Barnacle-ai/awesome-llm-list/blob/f3874e96ac94e8c516ad4a8c8c222fd411ebdde2/README.md?plain=1#L116",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Barnacle-ai/awesome-llm-list/blob/f3874e96ac94e8c516ad4a8c8c222fd411ebdde2/README.md?plain=1#L117",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/Barnacle-ai/awesome-llm-list/blob/f3874e96ac94e8c516ad4a8c8c222fd411ebdde2/README.md?plain=1#L118",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/Barnacle-ai/awesome-llm-list/blob/f3874e96ac94e8c516ad4a8c8c222fd411ebdde2/README.md?plain=1#L119",
    },
    {
        "leaderboard": "https://inklab.usc.edu/CommonGen/leaderboard.html",
        "reference": "https://github.com/Barnacle-ai/awesome-llm-list/blob/f3874e96ac94e8c516ad4a8c8c222fd411ebdde2/README.md?plain=1#L120",
    },
    {
        "leaderboard": "https://opencompass.org.cn/leaderboard-llm",
        "reference": "https://github.com/Barnacle-ai/awesome-llm-list/blob/f3874e96ac94e8c516ad4a8c8c222fd411ebdde2/README.md?plain=1#L121",
    },
    {"leaderboard": "https://github.com/RecList/evalRS-CIKM-2022", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/dataset/worldview-3-pancollection",
        "reference": "https://github.com/XiaoXiao-Woo/PanCollection/blob/43d367f95d45549aa65fff100e046b4446f3c808/README.md?plain=1#L184",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/fudan-zvg/PolarFormer/blob/dadd1bfd213e00ddf3e6c77c4733acc089131142/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/fudan-zvg/PolarFormer/blob/dadd1bfd213e00ddf3e6c77c4733acc089131142/README.md?plain=1#L22",
    },
    {"leaderboard": "https://github.com/pkunlp-icler/PCA-EVAL", "reference": ""},
    {
        "leaderboard": "https://stanfordmlgroup.github.io/competitions/mura",
        "reference": "https://github.com/beamandrew/medical-data/blob/a205fbb5664e4d35a456b0ef9c5fb2f99ff02611/README.md?plain=1#L422",
    },
    {"leaderboard": "https://github.com/Mercury7353/PyBench", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard",
        "reference": "https://github.com/Marker-Inc-Korea/KO-Platypus/blob/cc67a54f5fe9b75aa032e80c63b8c57922592798/README.md?plain=1#L77",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/qasc",
        "reference": "https://github.com/allenai/qasc/blob/c44a320c2bb682d614a675d2ba4be072150db4eb/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/open-images-2019-instance-segmentation/leaderboard",
        "reference": "https://github.com/ZFTurbo/Keras-Mask-RCNN-for-Open-Images-2019-Instance-Segmentation/blob/82b01e60cf734c1f0a9692163f1fb838215b5ea1/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://github.com/layumi/Person_reID_baseline_pytorch/blob/master/leaderboard/README.md",
        "reference": "https://github.com/bismex/Awesome-person-re-identification/blob/7185322f85d74fa4bdfeffd30e233ee3475ee86e/README.md?plain=1#L799",
    },
    {
        "leaderboard": "https://github.com/Mercury7353/PyBench",
        "reference": "https://github.com/zilliztech/VectorDBBench/blob/b364fe316f72c86809d3203dc2b75437e9eabc90/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://zilliz.com/vector-database-benchmark-tool",
        "reference": "https://github.com/zilliztech/VectorDBBench/blob/b364fe316f72c86809d3203dc2b75437e9eabc90/README.md?plain=1#L6",
    },
    {"leaderboard": "https://github.com/h2oai/h2o-LLM-eval", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota",
        "reference": "https://github.com/Robinwho/Deep-Learning/blob/fb27c53bb96b9c080bc3360030b5a2313c894c51/README.md?plain=1#L151",
    },
    {
        "leaderboard": "https://tenrec0.github.io/#leaderboard",
        "reference": "https://github.com/yuangh-x/2022-NIPS-Tenrec/blob/a6c242da0dab9c547410e8fdcde611436931dea6/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://github.com/cardiffnlp/tweetnlp/blob/main/FINETUNING_RESULT.md",
        "reference": "",
    },
    {
        "leaderboard": "https://rowanzellers.com/advice/#leaderboard",
        "reference": "https://github.com/rowanz/turingadvice/blob/c7328fb0f24879ae2815f8e241bfa7f698a89e2a/README.md?plain=1#L78",
    },
    {
        "leaderboard": "https://evalai.cloudcv.org/web/challenges/challenge-page/503/leaderboard",
        "reference": "https://github.com/yuweihao/reclor/blob/19b9d6c6025866ceafb4a4028819654b3817069b/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/cfq.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/complexquestions.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/complexwebquestions.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/grailqa.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/kqapro.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/lc-quad.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/metaqa.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/webquestions.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/webquestionsp.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/cfq.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/JBoRu/Awesome-KBQA/blob/main/leaderboard/cfq.md",
        "reference": "",
    },
    {
        "leaderboard": "https://dki-lab.github.io/GrailQA",
        "reference": "https://github.com/JBoRu/Awesome-KBQA/blob/847d92a2a46c70d0916f155eb01421ae140ed0a4/README.md?plain=1#L37",
    },
    {
        "leaderboard": "https://github.com/google-research/google-research/blob/master/cfq/README.md",
        "reference": "https://github.com/JBoRu/Awesome-KBQA/blob/847d92a2a46c70d0916f155eb01421ae140ed0a4/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/trajectory-prediction-on-stanford-drone",
        "reference": "https://github.com/karttikeya/awesome-human-pose-prediction/blob/3ae65d64af2c2fa94327f5f5b748a51063ea8198/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/osunlp/TravelPlannerLeaderboard",
        "reference": "https://github.com/OSU-NLP-Group/TravelPlanner/blob/f3c2a19fcca38d719dffee8a75efcc2d01320c5f/README.md?plain=1#L122",
    },
    {
        "leaderboard": "https://opendataval.github.io/leaderboards",
        "reference": "https://github.com/opendataval/opendataval/blob/747f1b8d2414e0fef193a5e4de20cd9cb4a5e8b8/README.md?plain=1#L24",
    },
    {
        "leaderboard": "https://research.nianticlabs.com/mapfree-reloc-benchmark/leaderboard",
        "reference": "https://github.com/nianticlabs/map-free-reloc/blob/b5182dc7b32231edb3b0f01f8cc97f289ec8a241/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://github.com/ocean-data-challenges/2020a_SSH_mapping_NATL60",
        "reference": "",
    },
    {
        "leaderboard": "https://microsoft.github.io/XGLUE",
        "reference": "https://github.com/microsoft/XGLUE/blob/e417c4c53088d9685c177085ada172fcc793e42e/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Auto-Arena/Leaderboard",
        "reference": "https://github.com/DAMO-NLP-SG/Auto-Arena-LLMs/blob/66497d7ef4282904eab140b1660f1cecf6f3f886/readme.md?plain=1#L3",
    },
    {
        "leaderboard": "https://github.com/DetectionTeamUCAS/R2CNN_Faster-RCNN_Tensorflow",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/openai/gym/wiki/Leaderboard",
        "reference": "https://github.com/Datatouille/rl-workshop/blob/08aae1df7f4f1df77f7531df81c41837e5bbad88/README.md?plain=1#L93",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/1846",
        "reference": "https://github.com/czyssrs/FinQA/blob/0f16e2867befa6840783e58be38c9efb9229d742/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/4138",
        "reference": "https://github.com/czyssrs/FinQA/blob/0f16e2867befa6840783e58be38c9efb9229d742/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://cocodataset.org/#keypoints-leaderboard",
        "reference": "https://github.com/caiyuanhao1998/RSN/blob/ca3ba3a253ddc75c72dd2c1eff390aaecabda37d/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://ai.google.com/research/tydiqa",
        "reference": "https://github.com/google-research-datasets/tydiqa/blob/43cde6d598c1cf88c1a8b9ed32e89263ffb5e03b/README.md?plain=1#L163",
    },
    {"leaderboard": "https://github.com/primeqa/primeqa", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/anli",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L409",
    },
    {
        "leaderboard": "https://www.tau-nlp.org/csqa-leaderboard",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L419",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/cosmosqa",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L422",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/hellaswag",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L424",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/physicaliqa",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L430",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/rainbow",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L444",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/socialiqa",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L449",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/winogrande",
        "reference": "https://github.com/allenai/rainbow/blob/ddc370727bb145bbc53c65f5bd39db4beca4115d/readme.md?plain=1#L458",
    },
    {
        "leaderboard": "https://github.com/MadryLab/backgrounds_challenge",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/BUPT-GAMMA/OpenHGNN/blob/main/openhgnn/dataset/ohgb.md",
        "reference": "",
    },
    {"leaderboard": "https://github.com/morecry/CharacterEval", "reference": ""},
    {
        "leaderboard": "https://docs.google.com/spreadsheets/d/1M801lEpBbKSNwP-vDBkC_pF7LdyGU1f_ufZb_NWNBZQ",
        "reference": "https://github.com/web-arena-x/webarena/blob/1469b7c9d8eaec3177855b3131569751f43a40d6/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://bedlam-leaderboard.is.tuebingen.mpg.de/leaderboard",
        "reference": "https://github.com/pixelite1201/BEDLAM/blob/ebf8bb14a43de46cc74dca4c00c13e571b325726/README.md?plain=1#L75",
    },
    {
        "leaderboard": "https://evalplus.github.io/leaderboard.html",
        "reference": "https://github.com/nlpxucan/WizardLM/blob/b4e47af70d816e341915e0d40cae51ae65aab21e/README.md?plain=1#L32",
    },
    {"leaderboard": "https://github.com/LLVM-AD/MAPLM", "reference": ""},
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/chiayewken/bert-qa/blob/2b690e0083b6cbd3d84088b767a5aecdba52f1e4/README.md?plain=1#L71",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/HaizeLabs/red-teaming-resistance-benchmark",
        "reference": "https://github.com/haizelabs/redteaming-resistance-benchmark/blob/26dc0592e2879c6cb4d1627500fb5d3cc097df5d/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/bigcode-project/starcoder2/blob/ce2ddd0867d6390f8e1eea7d045b8abf361def4b/README.md?plain=1#L168",
    },
    {
        "leaderboard": "https://aider.chat/docs/leaderboards",
        "reference": "https://github.com/SakanaAI/AI-Scientist/blob/8383c8a7d835a8b93a08dec50a93cd745242c1e1/README.md?plain=1#L271",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/dccuchile/CC6205/blob/696809d1646af7b198d6927432bc9c94e70b3325/README.md?plain=1#L102",
    },
    {
        "leaderboard": "https://eu.aihub.ml/competitions/201",
        "reference": "https://github.com/yoxu515/aot-benchmark/blob/6852c2d2284b1ebeb7e4dd0c0f05fdf4102bd34d/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://aesbench.github.io",
        "reference": "https://github.com/yipoh/AesBench/blob/576471e9bfcff51fab343eeae06ae4d1fabb2f77/README.md?plain=1#L44",
    },
    {
        "leaderboard": "https://github.com/OpenGenerativeAI/llm-colosseum",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/GaParmar/clean-fid#cleanfid-leaderboard-for-common-tasks",
        "reference": "",
    },
    {
        "leaderboard": "https://hotpotqa.github.io",
        "reference": "https://github.com/qipeng/golden-retriever/blob/c806574a373a4ee86b7e754f169bb2a54d3ba15f/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2021/real-time-3d-prediction",
        "reference": "https://github.com/Nightmare-n/GraphRCNN/blob/d8f209556b399916780ffef8b8ba989ac8a5f838/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://github.com/gabrielhuang/awesome-few-shot-object-detection",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/liguodongiot/llm-resource/blob/fd3abd82d0c3c39b80ebe02f102f4ba50d95684b/README.md?plain=1#L210",
    },
    {
        "leaderboard": "https://comma.ai/leaderboard",
        "reference": "https://github.com/commaai/commavq/blob/048e825079949b86b8f6ccaeee5315d846c633dd/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/Anush008/fastembed-js/blob/06e425d1440df04ce3ac09b77e08ca6db57fb59d/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://video-mme.github.io/home_page.html#leaderboard",
        "reference": "https://github.com/BradyFU/Video-MME/blob/f95c420a2ce85a05c66beba5c26a48bf6c0ade86/README.md?plain=1#L144",
    },
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/stepthom/text_mining_resources/blob/31fb395f04a716f027d39ade01a5fd8cba10c5b9/README.md?plain=1#L438",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/stepthom/text_mining_resources/blob/31fb395f04a716f027d39ade01a5fd8cba10c5b9/README.md?plain=1#L441",
    },
    {
        "leaderboard": "https://decathlon-10.grand-challenge.org/evaluation/challenge/leaderboard",
        "reference": "https://github.com/ljwztc/CLIP-Driven-Universal-Model/blob/459c8d3f644b4230f3c18ea3fed9aecb1e690279/README.md?plain=1#L14",
    },
    {"leaderboard": "https://github.com/IGNF/FLAIR-2", "reference": ""},
    {
        "leaderboard": "https://github.com/FranxYao/chain-of-thought-hub",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Nexusflow/Nexus_Function_Calling_Leaderboard",
        "reference": "https://github.com/nexusflowai/NexusRaven-V2/blob/7794bc603a92856d075bb4a6607db550c96131c2/README.md?plain=1#L9",
    },
    {"leaderboard": "https://github.com/nyu-dl/dl4ir-doc2query", "reference": ""},
    {"leaderboard": "https://github.com/SoccerNet/sn-calibration", "reference": ""},
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L410",
    },
    {
        "leaderboard": "https://github.com/lm-sys/arena-hard-auto",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L411",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L413",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/gaia-benchmark/leaderboard",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L415",
    },
    {
        "leaderboard": "https://www.trustbit.tech/en/llm-benchmarks",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L416",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L417",
    },
    {
        "leaderboard": "https://bird-bench.github.io",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L418",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/rodion-m/awesome_ai_for_programmers/blob/792781c92c78678340f9aebb4b531bbd7a55a675/README.md?plain=1#L419",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/jxzhangjhu/Awesome-LLM-Uncertainty-Reliability-Robustness/blob/bb0fc5a0d1d944b6c9df54d56ccbae9563cd3f7b/README.md?plain=1#L657",
    },
    {
        "leaderboard": "https://pages.nist.gov/jarvis_leaderboard",
        "reference": "https://github.com/usnistgov/chemnlp/blob/c60f99ffc02ab08871b22a78f803e258954a66c5/README.md?plain=1#L96",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/EdinburghNLP/awesome-hallucination-detection/blob/de02138cb0e0c4bf59a7329c9f141b0104e3fe23/README.md?plain=1#L398",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/454/leaderboard",
        "reference": "https://github.com/AIprogrammer/vehicle-trajectory-prediction/blob/281eb6914179239c98071aac81a146d7d39c736f/README.md?plain=1#L74",
    },
    {
        "leaderboard": "https://github.com/declare-lab/multimodal-deep-learning",
        "reference": "",
    },
    {
        "leaderboard": "https://aistudio.baidu.com/competition/detail/157/0/leaderboard",
        "reference": "https://github.com/PaddlePaddle/RocketQA/blob/e2bfcfcfa902ac6cef7f0d359606a9da05b795ac/README.md?plain=1#L20",
    },
    {"leaderboard": "https://github.com/seg/2016-ml-contest", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/named-entity-recognition-on-few-nerd-sup",
        "reference": "https://github.com/tomaarsen/SpanMarkerNER/blob/507740da96be620472acd256dc14d267e70bbf27/README.md?plain=1#L148",
    },
    {
        "leaderboard": "https://videovista.github.io/#leaderboard",
        "reference": "https://github.com/KangarooGroup/Kangaroo/blob/ffd689f11e02c94c1f454f41700fd98a3d1796d4/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://video-mme.github.io/home_page.html#leaderboard",
        "reference": "https://github.com/KangarooGroup/Kangaroo/blob/ffd689f11e02c94c1f454f41700fd98a3d1796d4/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://github.com/Pranav-chib/End-to-End-Autonomous-Driving",
        "reference": "",
    },
    {
        "leaderboard": "https://nextplusplus.github.io/TAT-DQA",
        "reference": "https://github.com/NExTplusplus/TAT-DQA",
    },
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/Pranav-chib/End-to-End-Autonomous-Driving/blob/4fea8f57f337822b27dd6cb62161362608af3cb5/README.md?plain=1#L332",
    },
    {"leaderboard": "https://github.com/NREL/BuildingsBench", "reference": ""},
    {"leaderboard": "https://github.com/GAIR-NLP/MetaCritique", "reference": ""},
    {"leaderboard": "https://github.com/sociocom/JMED-LLM", "reference": ""},
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/OpenBMB/MiniCPM/blob/c58c9a2827ac9529cfb1d2d2939bb5470d09c867/README.md?plain=1#L275",
    },
    {
        "leaderboard": "http://got-10k.aitestunion.com/leaderboard",
        "reference": "https://github.com/huanglianghua/siamfc-pytorch/blob/39572051d7c3d9850ca6100efa04091e18c2f706/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://hotpotqa.github.io",
        "reference": "https://github.com/shmsw25/DecompRC/blob/0d1c92635097e7f277ccef33bb08a12ba98597ef/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://super.gluebenchmark.com/leaderboard",
        "reference": "https://github.com/nlpfromscratch/nlp-llms-resources/blob/df11a9969205bc0d626cbc410f6e6bf1dbe4f3e0/README.md?plain=1#L509",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/nlpfromscratch/nlp-llms-resources/blob/df11a9969205bc0d626cbc410f6e6bf1dbe4f3e0/README.md?plain=1#L517",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/nlpfromscratch/nlp-llms-resources/blob/df11a9969205bc0d626cbc410f6e6bf1dbe4f3e0/README.md?plain=1#L518",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/nlpfromscratch/nlp-llms-resources/blob/df11a9969205bc0d626cbc410f6e6bf1dbe4f3e0/README.md?plain=1#L519",
    },
    {
        "leaderboard": "https://opencompass.org.cn/leaderboard-llm",
        "reference": "https://github.com/nlpfromscratch/nlp-llms-resources/blob/df11a9969205bc0d626cbc410f6e6bf1dbe4f3e0/README.md?plain=1#L520",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/uukuguy/multi_loras/blob/d6ec050f1a2da28f3c5e3fbbb1622dcd6ea68785/README.md?plain=1#L125",
    },
    {
        "leaderboard": "https://mmstar-benchmark.github.io/#Leaderboard%20Title",
        "reference": "https://github.com/MMStar-Benchmark/MMStar/blob/88f243ab4a39cb339530085c33aecb22819881a1/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-multimodal",
        "reference": "https://github.com/MMStar-Benchmark/MMStar/blob/88f243ab4a39cb339530085c33aecb22819881a1/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://robustbench.github.io",
        "reference": "https://github.com/RobustBench/robustbench/blob/776bc95bb4167827fb102a32ac5aea62e46cfaab/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://github.com/fra31/auto-attack",
        "reference": "https://github.com/RobustBench/robustbench/blob/776bc95bb4167827fb102a32ac5aea62e46cfaab/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://github.com/brohrer/parameter_efficiency_leaderboard",
        "reference": "https://github.com/brohrer/sharpened-cosine-similarity/blob/8296581f14ad1be94101d8618576145c86562acc/README.md?plain=1#L49",
    },
    {
        "leaderboard": "https://routingchallenge.mit.edu/last-mile-routing-challenge-team-performance-and-leaderboard",
        "reference": "https://github.com/aws-samples/amazon-sagemaker-amazon-routing-challenge-sol/blob/774061efab12c8661b2cdfe294eab644f851430d/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1839/leaderboard",
        "reference": "https://github.com/microsoft/GLIP/blob/9dda9558c1ef59bb6cdc8e896e2bcab775a68ff0/README.md?plain=1#L342",
    },
    {"leaderboard": "https://github.com/HanxunH/RobustWRN", "reference": ""},
    {
        "leaderboard": "https://robustbench.github.io",
        "reference": "https://github.com/HanxunH/RobustWRN/blob/7c96fca4b57ccb038e8d510291fbaf1c1c45c7ed/README.md?plain=1#L54",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/imagenet-s",
        "reference": "https://github.com/LUSSeg/ImageNet-S/blob/b2312ba7396151f883362bdc86d71469aa81066e/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://www.tau-nlp.org/csqa-leaderboard",
        "reference": "https://github.com/jonathanherzig/commonsenseqa/blob/6f5690bd539b67a4f965d8572aae3ed9195fd4a1/README.md?plain=1#L118",
    },
    {
        "leaderboard": "https://tbrain.trendmicro.com.tw/Competitions/Details/19",
        "reference": "https://github.com/yaoching0/Traditional-Chinese-Street-View-Text-Recognition/blob/2470e34a036c9f9fab473f56259a3cd50423b06e/README.md?plain=1#L2",
    },
    {"leaderboard": "https://github.com/cardiffnlp/xlm-t", "reference": ""},
    {
        "leaderboard": "https://www.aicrowd.com/challenges/trajnet-a-trajectory-forecasting-challenge/leaderboards",
        "reference": "https://github.com/theDebugger811/human-trajectory-forecasting-papers/blob/1378d1404049626c2338e52de8faabe0566d6d9e/README.md?plain=1#L124",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/multi-object-tracking-on-personpath22",
        "reference": "https://github.com/amazon-science/tracking-dataset/blob/2f9c757f847e172ce375372913bdbb3a47074e4f/README.md?plain=1#L28",
    },
    {"leaderboard": "https://github.com/Hritikbansal/videophy", "reference": ""},
    {
        "leaderboard": "https://allenai.github.io/re-align/just_eval.html#leaderboard",
        "reference": "https://github.com/Re-Align/just-eval/blob/3e1a1265e210be1d6ad71624c91da3efc36493ca/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/abhinand5/tamil-llama/blob/475e0a374a4ed8e5c2c65d4a4012bcb1ef119711/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/BramVanroy/open_dutch_llm_leaderboard",
        "reference": "https://github.com/Rijgersberg/GEITje/blob/209a30f6e9edf51e243c6ed7714b8184e6980109/README.md?plain=1#L202",
    },
    {"leaderboard": "https://github.com/darraghdog/avito-demand", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Leeroo-AI/leeroo_orchestrator/blob/6f113d3b08f2b8b9a0fab00edf9a33d91534db33/README.md?plain=1#L146",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2021/real-time-3d-prediction",
        "reference": "https://github.com/gwenzhang/Voxel-Mamba/blob/ad8172ae963a2067a8c7d938f890bce3b7e46642/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/gwenzhang/Voxel-Mamba/blob/ad8172ae963a2067a8c7d938f890bce3b7e46642/README.md?plain=1#L41",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2020/3d-detection",
        "reference": "https://github.com/PJLab-ADG/DetZero/blob/051fad709be900f79a23f37f0c0507e0cf84c54e/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/lmmlzn/Awesome-LLMs-Datasets/blob/40aedad58ef85d40e65b800de30a9ff9bb8f41a5/README.md?plain=1#L81",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/lmmlzn/Awesome-LLMs-Datasets/blob/40aedad58ef85d40e65b800de30a9ff9bb8f41a5/README.md?plain=1#L81",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/lmmlzn/Awesome-LLMs-Datasets/blob/40aedad58ef85d40e65b800de30a9ff9bb8f41a5/README.md?plain=1#L81",
    },
    {
        "leaderboard": "https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md",
        "reference": "https://github.com/lmmlzn/Awesome-LLMs-Datasets/blob/40aedad58ef85d40e65b800de30a9ff9bb8f41a5/README.md?plain=1#L81",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/lmmlzn/Awesome-LLMs-Datasets/blob/40aedad58ef85d40e65b800de30a9ff9bb8f41a5/README.md?plain=1#L1648",
    },
    {
        "leaderboard": "https://domaineval.github.io/leaderboard.html",
        "reference": "https://github.com/lmmlzn/Awesome-LLMs-Datasets/blob/40aedad58ef85d40e65b800de30a9ff9bb8f41a5/README.md?plain=1#L2707",
    },
    {
        "leaderboard": "https://multi-trust.github.io/#leaderboard",
        "reference": "https://github.com/lmmlzn/Awesome-LLMs-Datasets/blob/40aedad58ef85d40e65b800de30a9ff9bb8f41a5/README.md?plain=1#L4339",
    },
    {
        "leaderboard": "https://github.com/google-research-datasets/ToTTo",
        "reference": "",
    },
    {"leaderboard": "https://github.com/krystalan/Multi-hopRC", "reference": ""},
    {"leaderboard": "https://github.com/google/BIG-bench", "reference": ""},
    {"leaderboard": "https://github.com/nitsaick/kits19-challenge", "reference": ""},
    {
        "leaderboard": "https://av.superbbenchmark.org/leaderboard",
        "reference": "https://github.com/roger-tseng/av-superb/blob/696e269abc9d7bf7ebe4ecae6800b39a528f78ad/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/IS2Lab/S-Eval",
        "reference": "https://github.com/IS2Lab/S-Eval/blob/1673270e4290520186a5ffdce12ab1ab788354ad/README.md?plain=1#L267",
    },
    {
        "leaderboard": "https://eurocity-dataset.tudelft.nl/eval/benchmarks/detection",
        "reference": "https://github.com/hasanirtiza/Pedestron/blob/a2ac3cf0f081d6233f04b5c3995f7b0e7235dfed/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://github.com/JUNJIE99/MLVU",
        "reference": "https://github.com/NVlabs/VILA/blob/355fda9114c966cf46af3057e9b977064af08751/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://mmmu-benchmark.github.io/#leaderboard",
        "reference": "https://github.com/NVlabs/VILA/blob/355fda9114c966cf46af3057e9b977064af08751/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://video-mme.github.io/home_page.html#leaderboard",
        "reference": "https://github.com/NVlabs/VILA/blob/355fda9114c966cf46af3057e9b977064af08751/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://opencompass.org.cn/leaderboard-multimodal",
        "reference": "https://github.com/NVlabs/VILA/blob/355fda9114c966cf46af3057e9b977064af08751/README.md?plain=1#L171",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/leaderboard",
        "reference": "https://github.com/RodolfoFerro/psychopathology-fer-assistant/blob/1447a52a830c84bed0860258d23f36e5253d41a7/README.md?plain=1#L161",
    },
    {
        "leaderboard": "http://cuge.baai.ac.cn/#/leaderboard",
        "reference": "https://github.com/TsinghuaAI/CUGE/blob/1a6e530e2de22c034a5e07efd7027d9c21cb7579/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard",
        "reference": "https://github.com/ShishirPatil/gorilla/blob/37f61bf6ebb8beb412f02a19968e8b6f05cfb030/README.md?plain=1#L8",
    },
    {"leaderboard": "https://github.com/bytedance/MTVQA", "reference": ""},
    {
        "leaderboard": "https://livecodebench.github.io/leaderboard.html",
        "reference": "https://github.com/LiveCodeBench/LiveCodeBench/blob/3b208295bf649db39503a0083bf97f77b4d7d23b/README.md?plain=1#L7",
    },
    {"leaderboard": "https://github.com/DeepPavlovAdmin/convai", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/image-classification-on-dtd",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L43",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/image-classification-on-food-101-1",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L44",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/fine-grained-image-classification-on-sun397",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L45",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/image-classification-on-flowers-102",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L46",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/fine-grained-image-classification-on-caltech",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L47",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/fine-grained-image-classification-on-oxford-1",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L48",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/image-classification-on-cifar-100",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L49",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/fine-grained-image-classification-on-stanford",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/image-classification-on-cifar-10",
        "reference": "https://github.com/ZhangYuanhan-AI/Bamboo/blob/f6337a76b3e48a1c5eff2bbd5b9dd044e42f0654/README.md?plain=1#L51",
    },
    {
        "leaderboard": "https://klejbenchmark.com/leaderboard",
        "reference": "https://github.com/Ermlab/PoLitBert/blob/6efb5abbc694226b15c8dc49a2b80d85ef1ce50f/README.md?plain=1#L145",
    },
    {
        "leaderboard": "https://few-shot.yyliu.net/miniimagenet.html",
        "reference": "https://github.com/yaoyao-liu/few-shot-classification-leaderboard/blob/451a97af777f5835ef2afa416a5728678dfc0ac9/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://tianchi.aliyun.com/competition",
        "reference": "https://github.com/gathierry/FashionAI-KeyPointsDetectionOfApparel/blob/2e0942b42b4a9cd974cdddc151675738dc8a8cb4/README.md?plain=1#L125",
    },
    {
        "leaderboard": "https://www.tanksandtemples.org/leaderboard",
        "reference": "https://github.com/yhw-yhw/PVAMVSNet/blob/d50ae05bb9b5e6edd2dc034c1bec016232d6e6a0/README.md?plain=1#L56",
    },
    {
        "leaderboard": "https://github.com/Tsinghua-MARS-Lab/Online-HD-Map-Construction-CVPR2023",
        "reference": "",
    },
    {"leaderboard": "https://github.com/jingtaozhan/RepBERT-Index", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/llm-jp/awesome-japanese-llm/blob/5d1d3e47327ab0e6ef83aed2a85a9eacd5d9b8e7/README.md?plain=1#L385",
    },
    {"leaderboard": "https://github.com/GammaTauAI/leetcode-hard-gym", "reference": ""},
    {
        "leaderboard": "https://lamp-benchmark.github.io/leaderboard",
        "reference": "https://github.com/Longyichen/Alpaca-family-library/blob/751511e7d56afb2c3afc09e055487f0af74d908b/README.md?plain=1#L1287",
    },
    {"leaderboard": "https://github.com/kaiko-ai/eva", "reference": ""},
    {
        "leaderboard": "http://ednet-leaderboard.s3-website-ap-northeast-1.amazonaws.com",
        "reference": "https://github.com/riiid/ednet/blob/27db572eeaf4455a1f6c029ba27e34f78bb49d73/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/reward-bench",
        "reference": "https://github.com/RLHFlow/RLHF-Reward-Modeling/blob/3a79367908a3c677fb5728d82c496155d74529aa/README.md?plain=1#L65",
    },
    {
        "leaderboard": "https://superbbenchmark.org/leaderboard",
        "reference": "https://github.com/pyf98/DPHuBERT/blob/c18093fe4b56a0027a80bf9b9b1a23f932cbf14c/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/ghimiresunil/LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing/blob/1a4504d5a56fc1dfe5c71d51d67084f9bfdb17b6/README.md?plain=1#L464",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/ghimiresunil/LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing/blob/1a4504d5a56fc1dfe5c71d51d67084f9bfdb17b6/README.md?plain=1#L329",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/ghimiresunil/LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing/blob/1a4504d5a56fc1dfe5c71d51d67084f9bfdb17b6/README.md?plain=1#L318",
    },
    {"leaderboard": "https://github.com/yushundong/PyGDebias", "reference": ""},
    {"leaderboard": "https://github.com/MadryLab/mnist_challenge", "reference": ""},
    {
        "leaderboard": "https://marble-bm.shef.ac.uk/leaderboard",
        "reference": "https://github.com/yizhilll/MERT/blob/0c142bdf88ae5f62adab677983a2b54bcdb18f93/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://yale-lily.github.io/spider",
        "reference": "https://github.com/AlibabaResearch/DAMO-ConvAI/blob/273988c587c3f27066cf472fb072ad62db020232/README.md?plain=1#L24",
    },
    {
        "leaderboard": "https://yale-lily.github.io/sparc",
        "reference": "https://github.com/AlibabaResearch/DAMO-ConvAI/blob/273988c587c3f27066cf472fb072ad62db020232/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://yale-lily.github.io/cosql",
        "reference": "https://github.com/AlibabaResearch/DAMO-ConvAI/blob/273988c587c3f27066cf472fb072ad62db020232/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/sentiment-analysis-on-sst-2-binary",
        "reference": "https://github.com/dsindex/iclassifier/blob/66ef45fda26944e88a233e3465720590f0ac83d3/README.md?plain=1#L430",
    },
    {
        "leaderboard": "https://github.com/DiligentPanda/Tencent_Ads_Algo_2018",
        "reference": "",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/rgb-d-salient-object-detection",
        "reference": "https://github.com/zwbx/BTS-Net/blob/6c937249e93c0e8dcb4092152c31d8a7123313ab/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/medmcqa",
        "reference": "https://github.com/medmcqa/medmcqa/blob/c59ef14ca1990266c4107c7864b45a20fd93e5e0/README.md?plain=1#L45",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2023/motion-prediction",
        "reference": "https://github.com/sshaoshuai/MTR/blob/a5ba7bdafa09a1a355cc34f8a895499a2b14ddb3/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/freesound-audio-tagging-2019/leaderboard",
        "reference": "https://github.com/ebouteillon/freesound-audio-tagging-2019/blob/7b0f634cdbecbd42ffca161cfcd21d1a6e9948b9/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://github.com/fastai/imagenette",
        "reference": "https://github.com/lessw2020/Ranger-Mish-ImageWoof-5/blob/b0aa73508870de072329d058f0add165da462d6d/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://fewnlu.github.io",
        "reference": "https://github.com/THUDM/FewNLU/blob/669fc05ea3df9e3fb388d96b8859001177cd9fcc/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://korquad.github.io",
        "reference": "https://github.com/lyeoni/KorQuAD/blob/95b26efefaa14c31066ba8921b463636fbf2bec1/README.md?plain=1#L9",
    },
    {"leaderboard": "https://github.com/helme/ecg_ptbxl_benchmarking", "reference": ""},
    {"leaderboard": "https://github.com/HaohanWang/ImageNet-Sketch", "reference": ""},
    {
        "leaderboard": "https://rrc.cvc.uab.es/?ch=17&com=evaluation&task=1",
        "reference": "https://github.com/anisha2102/docvqa/blob/4d79af6d063482e94304766b3e3875c47b4b09d2/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://cevalbenchmark.com/static/leaderboard.html",
        "reference": "https://github.com/InternLM/InternLM-techreport/blob/56efc23bb8c84a9f13874f746aef589f76a059ad/README.md?plain=1#L61",
    },
    {
        "leaderboard": "https://github.com/pfnet-research/japanese-lm-fin-harness",
        "reference": "",
    },
    {
        "leaderboard": "https://www.tanksandtemples.org/leaderboard",
        "reference": "https://github.com/FangjinhuaWang/IterMVS/blob/afb870c2d3e15b15e15f244f8d6822c4acb207d5/README.md?plain=1#L97",
    },
    {
        "leaderboard": "https://stanfordmlgroup.github.io/competitions/mrnet",
        "reference": "https://github.com/yashbhalgat/MRNet-Competition/blob/9cd9d70b7e0f384cba99187c6f05a6aea5b156c8/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021/leaderboards",
        "reference": "https://github.com/ws-choi/LASAFT-Net-v2/blob/56ec6cc6908ab0ffc9dcdff97fcd1ed7c43a46d2/README.md?plain=1#L25",
    },
    {"leaderboard": "https://github.com/rajasagashe/JuICe", "reference": ""},
    {
        "leaderboard": "https://github.com/FDU-VTS/Awesome-Person-Re-Identification",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/WildBench",
        "reference": "https://github.com/magpie-align/magpie/blob/9823e93f9a75b8c5e5c598d455cb849b19efacad/README.md?plain=1#L24",
    },
    {"leaderboard": "https://github.com/taohan10200/WEATHER-5K", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/conversational-web-navigation-on-weblinx",
        "reference": "https://github.com/McGill-NLP/weblinx/blob/a6e2c6324eae220db166f8beededb1b9b68dab63/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/felipemaiapolo/tinyBenchmarks/blob/9c7e20302301ad531bfdfd9a7288e6e916bf22e9/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://github.com/JiaxinZhuang/Skin-Lesion-Recognition.Pytorch/blob/master/Leaderboard.png",
        "reference": "",
    },
    {
        "leaderboard": "https://www.drivendata.org/competitions/70/hateful-memes-phase-2/leaderboard",
        "reference": "https://github.com/rizavelioglu/hateful_memes-hate_detectron/blob/e55d7c95930c4ca47bbb550a291290133cc11e15/README.md?plain=1#L34",
    },
    {
        "leaderboard": "http://got-10k.aitestunion.com/leaderboard",
        "reference": "https://github.com/got-10k/toolkit/blob/956e7286fdf209cbb125adac9a46376bd8297ffb/README.md?plain=1#L165",
    },
    {"leaderboard": "https://github.com/SmartLi8/stella", "reference": ""},
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/fastnlp/ElasticBERT/blob/ffb827b4b1f017d81b42c540b989ff9bb98dcabe/README.md?plain=1#L41",
    },
    {
        "leaderboard": "https://l-eval.github.io",
        "reference": "https://github.com/OpenLMLab/LEval/blob/cd34b050269148aed75acbbe4a599873ad0f37e9/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/google-ai-open-images-object-detection-track/leaderboard",
        "reference": "https://github.com/ZFTurbo/Keras-RetinaNet-for-Open-Images-Challenge-2018/blob/af744232e13aef039b01c929cadbe5f3a03d9487/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://github.com/bethgelab/robust-detection-benchmark",
        "reference": "",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/161/leaderboard",
        "reference": "https://github.com/salesforce/VD-BERT/blob/72f982ad95ca54037aa36d49fef7a2a6f0da11ac/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://www.ai4climatecoop.org",
        "reference": "https://github.com/mila-iqia/climate-cooperation-competition/blob/148ed8576e77a0f5ece6cdcd6016a25e54a73332/README.md?plain=1#L22",
    },
    {"leaderboard": "https://github.com/cambridgeltl/xcopa", "reference": ""},
    {"leaderboard": "https://github.com/GPT-Fathom/GPT-Fathom", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/GPT-Fathom/GPT-Fathom/blob/4b7e78bba947cf54813030e83788c510ec772854/README.md?plain=1#L94",
    },
    {
        "leaderboard": "https://github.com/ecker-lab/object-centric-representation-benchmark",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AIR-Bench/leaderboard",
        "reference": "https://github.com/FlagOpen/FlagEmbedding/blob/43d4154fcc6b049a475a0f4fb3fc0051851c79f8/README.md?plain=1#L57",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/FlagOpen/FlagEmbedding/blob/43d4154fcc6b049a475a0f4fb3fc0051851c79f8/README.md?plain=1#L74",
    },
    {"leaderboard": "https://github.com/bcollazo/catanatron", "reference": ""},
    {
        "leaderboard": "https://nextplusplus.github.io/TAT-HQA",
        "reference": "https://github.com/NExTplusplus/TAT-HQA/blob/8b821d89f0fbfeceea07e4f5faf46b33150fee07/README.md?plain=1#L95",
    },
    {
        "leaderboard": "https://evalai.cloudcv.org/featured-challenges/80/leaderboard",
        "reference": "https://github.com/jokieleung/awesome-visual-question-answering/blob/b889431acb1a0910825324a71b88f70a78350c06/README.md?plain=1#L439",
    },
    {
        "leaderboard": "https://evalai.cloudcv.org/featured-challenges/1/leaderboard",
        "reference": "https://github.com/jokieleung/awesome-visual-question-answering/blob/b889431acb1a0910825324a71b88f70a78350c06/README.md?plain=1#L441",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/244/leaderboard",
        "reference": "https://github.com/jokieleung/awesome-visual-question-answering/blob/b889431acb1a0910825324a71b88f70a78350c06/README.md?plain=1#L445",
    },
    {
        "leaderboard": "https://github.com/cdancette/vqa-cp-leaderboard",
        "reference": "https://github.com/jokieleung/awesome-visual-question-answering/blob/b889431acb1a0910825324a71b88f70a78350c06/README.md?plain=1#L449",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=3",
        "reference": "https://github.com/kuangliu/torchcv/blob/6291f3e1e4bbf6467fd6b1e79001d34a59481bb6/README.md?plain=1#L21",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6",
        "reference": "https://github.com/TheLegendAli/DeepLab-Context2/blob/4aa33272b3d30a066cfdebe0b2d31caf499b5091/README.md?plain=1#L61",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn",
        "reference": "https://github.com/open-compass/opencompass/blob/22a4e7651180f0940ea7173e58e8121abe46ca11/README.md?plain=1#L78",
    },
    {
        "leaderboard": "https://github.com/ku21fan/COO-Comic-Onomatopoeia",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/InstaDeepAI/nucleotide_transformer_benchmark",
        "reference": "https://github.com/kuleshov-group/caduceus/blob/6b451a8b1a8e5d55b76b4e68565ed551fc53de64/README.md?plain=1#L273",
    },
    {
        "leaderboard": "https://chartmimic.github.io",
        "reference": "https://github.com/ChartMimic/ChartMimic/blob/7d232b3bba2fe59d6ff5cb7a011209a9bb11b879/README.md?plain=1#L17",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard",
        "reference": "https://github.com/nlp-uoregon/mlmm-evaluation/blob/0590a08356140243523b2befbb8817361aed2487/README.md?plain=1#L59",
    },
    {
        "leaderboard": "https://microsoft.github.io/MSMARCO-Document-Ranking-Submissions/leaderboard",
        "reference": "https://github.com/thunlp/BERT-KPE/blob/0f21ca0f9a5017b0f134d559734988437134e4c4/README.md?plain=1#L168",
    },
    {"leaderboard": "https://github.com/PJLab-ADG/DriveArena", "reference": ""},
    {"leaderboard": "https://github.com/xeneta/LeadQualifier", "reference": ""},
    {"leaderboard": "https://github.com/thuml/Time-Series-Library", "reference": ""},
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1897/leaderboard",
        "reference": "https://github.com/mddunlap924/LangChain-SynData-RAG-Eval/blob/57364fc29d7ed19195adfb14e5b480c3d40a77e2/README.md?plain=1#L169",
    },
    {
        "leaderboard": "https://dataset.org/dream",
        "reference": "https://github.com/nlpdata/dream/blob/bb64644c209cb6497bb9e13244fbf220c900a740/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://dfki-ric-underactuated-lab.github.io/real_ai_gym_leaderboard",
        "reference": "https://github.com/dfki-ric-underactuated-lab/double_pendulum/blob/15f8edcf3d2cd25bbad51ce44338cc28dfffd206/README.md?plain=1#L180",
    },
    {"leaderboard": "https://github.com/SpeechColab/GigaSpeech", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/yangjianxin1/Firefly/blob/56d20b03ac574186918a32b537c9539da34e746b/README.md?plain=1#L88",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/landmark-retrieval-2021/leaderboard",
        "reference": "https://github.com/WesleyZhang1991/Google_Landmark_Retrieval_2021_2nd_Place_Solution/blob/684278e75428efc618b7e7384943627b5d7956a1/README.md?plain=1#L77",
    },
    {
        "leaderboard": "https://github.com/scottcha/OpenAvalancheProject",
        "reference": "",
    },
    {"leaderboard": "https://github.com/salesforce/QAConv", "reference": ""},
    {
        "leaderboard": "https://www.kaggle.com/competitions/restaurant-revenue-prediction/leaderboard",
        "reference": "https://github.com/justmarkham/DAT8/blob/ff04af83e8ac1409cd86f3cb3524e4141644c5a1/README.md?plain=1#L477",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/uclaml/SPPO/blob/b124cdaa43853b2bc997ce69c6e4587e9dff1934/README.md?plain=1#L147",
    },
    {
        "leaderboard": "https://apolloscape.auto/leader_board.html",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L68",
    },
    {
        "leaderboard": "https://motional.com/news/2023-nuplan-challenge",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L107",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-multi-object-tracking-on-nuscenes",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-object-detection-on-kitti-cars-moderate",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semantic-segmentation-on-cityscapes",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-semantic-segmentation-on-semantickitti",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/video-panoptic-segmentation-on-cityscapes-vps",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/lane-detection-on-culane",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/point-cloud-retrieval-on-oxford-robotcar",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/prediction-of-occupancy-grid-maps-on-occ3d",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L84",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-object-tracking-on-argoverse-cvpr-2020",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L85",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/traffic-object-detection-on-bdd100k-val",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L85",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/multi-object-tracking-and-segmentation-on-3",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L85",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/lidar-semantic-segmentation-on-nuscenes",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L85",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/lane-detection-on-tusimple",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L85",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/multi-object-tracking-on-mot17",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L86",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-object-detection-on-waymo-pedestrian",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L86",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semantic-segmentation-on-kitti-360",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L86",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-lane-detection-on-openlane",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L86",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-object-detection-on-waymo-vehicle",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L87",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/lane-detection-on-curvelanes",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L87",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/trajectory-prediction-on-apolloscape-1",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L96",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/trajectory-prediction-on-nuscenes",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L97",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/motion-forecasting-on-argoverse-cvpr-2020",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L98",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/trajectory-prediction-on-lyft-level-5",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L100",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/autonomous-driving-on-carla-leaderboard",
        "reference": "https://github.com/LincanLi98/Awesome-Data-Centric-Autonomous-Driving/blob/be4d4ab6cfefdbf05aa02835889a4757e6e3cf0e/README.md?plain=1#L108",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_linkprop/#ogbl-wikikg2",
        "reference": "https://github.com/migalkin/NodePiece/blob/9adc57efe302919d017d74fc648f853308cf75fd/readme.md?plain=1#L138",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/themanyone/whisper_dictation/blob/bb3da145d65b8f54284ac6c0592a8789a3f8abaf/README.md?plain=1#L82",
    },
    {
        "leaderboard": "https://ai-sandbox.list.lu/llm-leaderboard",
        "reference": "https://github.com/based2/KB/blob/cfef6b3a5f22c3f4887071f5f6cbd5ac62d087d9/machine_learning/test.md?plain=1#L6",
    },
    {"leaderboard": "https://github.com/zeyofu/BLINK_Benchmark", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/thinknimble/embeddings-search-demo/blob/d53cec9e90f2858e0f396a63e18ae6344159355f/README.md?plain=1#L17",
    },
    {
        "leaderboard": "https://github.com/vztu/BVQA_Benchmark",
        "reference": "https://github.com/vztu/VIDEVAL/blob/8a86166bb9a9c8fc5e5eac5db7a77771cf576947/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://github.com/BinWang28/EvalRank-Embedding-Evaluation",
        "reference": "",
    },
    {
        "leaderboard": "https://blackboxbench.github.io",
        "reference": "https://github.com/SCLBD/BlackboxBench/blob/7767ab009f9f88e29857de02ae9eecab406c3205/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/xfactlab/orpo/blob/208d2d648068e700cbb1ae0e8320b1dc99b014ec/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/xfactlab/orpo/blob/208d2d648068e700cbb1ae0e8320b1dc99b014ec/README.md?plain=1#L8",
    },
    {"leaderboard": "https://github.com/QQ-MM/Video-CCAM", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/facial-expression-recognition-on-fer2013",
        "reference": "https://github.com/LetheSec/Fer2013-Facial-Emotion-Recognition-Pytorch/blob/f1f6cfd40337fc42f3542d3953e8f4dd2f16f3a7/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://espere-1119-song.github.io/LOVEU-CVPR-24-Track-1-Leaderboard",
        "reference": "https://github.com/rese1f/MovieChat/blob/583c4484086fc09d44c05144efbe4bbf19941afb/README.md?plain=1#L24",
    },
    {"leaderboard": "https://github.com/csdongxian/AWP", "reference": ""},
    {
        "leaderboard": "https://visualqa.org/roe.html",
        "reference": "https://github.com/CCYChongyanChen/VQA_AlgorithmDatasets/blob/8e4f29736000873511ccd498a693465c276eb98d/readme.md?plain=1#L26",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1560/leaderboard",
        "reference": "https://github.com/CCYChongyanChen/VQA_AlgorithmDatasets/blob/8e4f29736000873511ccd498a693465c276eb98d/readme.md?plain=1#L51",
    },
    {"leaderboard": "https://github.com/neulab/REALSumm", "reference": ""},
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/503/leaderboard",
        "reference": "https://github.com/SiyuanWangw/LReasoner/blob/f196adb38d2f52c2653950d03b05f1fa15320755/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://robustbench.github.io",
        "reference": "https://github.com/fra31/auto-attack/blob/a39220048b3c9f2cca9a4d3a54604793c68eca7e/README.md?plain=1#L139",
    },
    {
        "leaderboard": "https://ai.google.com/research/NaturalQuestions",
        "reference": "https://github.com/google-research-datasets/natural-questions/blob/fb26a3073b1fe636c97302890a27b491d6530130/README.md?plain=1#L9",
    },
    {"leaderboard": "https://github.com/sintel-dev/Orion", "reference": ""},
    {
        "leaderboard": "https://github.com/ParallelDots/generic-sku-detection-benchmark",
        "reference": "",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/lsc/leaderboards",
        "reference": "https://github.com/PaddlePaddle/PaddleHelix/blob/82ee80ae44d4df61c2f649443e5d9f4930a90700/README.md?plain=1#L31",
    },
    {"leaderboard": "https://github.com/reka-ai/reka-vibe-eval", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/mallahyari/llm-hub/blob/0656015728a7c7a886b1fa30ef7f95b008061db2/README.md?plain=1#L462",
    },
    {
        "leaderboard": "https://russiansuperglue.com/leaderboard/2",
        "reference": "https://github.com/RussianNLP/RussianSuperGLUE/blob/1aff0918f226252eb304bdcdadf9eef5926cc924/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://microsoft.github.io/CodeXGLUE",
        "reference": "https://github.com/microsoft/CodeXGLUE/blob/ac74a62802a0dd159b3258c78a2df8ad36cdf2b9/README.md?plain=1#L33",
    },
    {"leaderboard": "https://github.com/fastai/imagenette", "reference": ""},
    {
        "leaderboard": "https://ai.google.com/research/rxr/competition",
        "reference": "https://github.com/google-research-datasets/RxR/blob/d22df5940a0486a42b703cd445937210ce21e9f5/README.md?plain=1#L128",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/locuslab/tofu_leaderboard",
        "reference": "https://github.com/locuslab/tofu/blob/80159d8ea39edf147fb09cd82aefa08e506e6718/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/mshojaei77/Awesome-AI/blob/5322abd2f02d7dfa95719939ed040827c49d0940/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/mshojaei77/Awesome-AI/blob/5322abd2f02d7dfa95719939ed040827c49d0940/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/CLUEbenchmark/SuperCLUE-Open/blob/142d4df89cfe523b0ef323a87290ec8417a6ccd5/README.md?plain=1#L269",
    },
    {
        "leaderboard": "https://microsoft.github.io/XGLUE",
        "reference": "https://github.com/microsoft/Unicoder/blob/b3d4afdecb6ca5bda73708fc4d0a01e4beca2eb3/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://github.com/gjy3035/Awesome-Crowd-Counting",
        "reference": "",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/453/leaderboard",
        "reference": "https://github.com/johnwlambert/argoverse_cbgs_kf_tracker/blob/9268cb6dd9844f80eb107a0cc5e77e880d3b3e76/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://github.com/zwq2018/Multi-modal-Self-instruct",
        "reference": "",
    },
    {
        "leaderboard": "https://www.tanksandtemples.org/leaderboard",
        "reference": "https://github.com/XYZ-qiyh/Awesome-Learning-MVS/blob/8f1c8b9f85df6d5b55bcdf002e65245846ecb360/README.md?plain=1#L132",
    },
    {
        "leaderboard": "https://proteinshake.ai/#leaderboard",
        "reference": "https://github.com/BorgwardtLab/proteinshake/blob/82f1acc5556664a3d46f6fc0a0b8a603d219d78d/README.md?plain=1#L24",
    },
    {
        "leaderboard": "https://stanfordnlp.github.io/coqa",
        "reference": "https://github.com/microsoft/SDNet/blob/3cf7d36ea561994992d2343b282bf8209939d411/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://kobiso.github.io/Computer-Vision-Leaderboard/imagenet",
        "reference": "https://github.com/kobiso/Computer-Vision-Leaderboard/blob/189abbdc7a27bc2d69c495829bb0e61b074c3e5a/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://kobiso.github.io/Computer-Vision-Leaderboard/sop",
        "reference": "https://github.com/kobiso/Computer-Vision-Leaderboard/blob/189abbdc7a27bc2d69c495829bb0e61b074c3e5a/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://kobiso.github.io/Computer-Vision-Leaderboard/cars",
        "reference": "https://github.com/kobiso/Computer-Vision-Leaderboard/blob/189abbdc7a27bc2d69c495829bb0e61b074c3e5a/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://kobiso.github.io/Computer-Vision-Leaderboard/cub",
        "reference": "https://github.com/kobiso/Computer-Vision-Leaderboard/blob/189abbdc7a27bc2d69c495829bb0e61b074c3e5a/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://kobiso.github.io/Computer-Vision-Leaderboard/in-shop",
        "reference": "https://github.com/kobiso/Computer-Vision-Leaderboard/blob/189abbdc7a27bc2d69c495829bb0e61b074c3e5a/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/relbench/leaderboard",
        "reference": "https://github.com/snap-stanford/relbench/blob/0e1dc97ec68ce8c28e9aa719ce8ae5b6c0cbc63d/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://boson.ai/rpbench",
        "reference": "https://github.com/Neph0s/awesome-llm-role-playing-with-persona/blob/fcc1fa48abe7e4c6e0accb46acb87d6a347568dd/README.md?plain=1#L23",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/OpenCodeInterpreter/OpenCodeInterpreter/blob/9674d8a521df7f92becd8a7a8e60a8792e71869a/README.md?plain=1#L137",
    },
    {"leaderboard": "https://github.com/OpenGVLab/MMT-Bench", "reference": ""},
    {
        "leaderboard": "https://open-compass.github.io/MathBench",
        "reference": "https://github.com/open-compass/MathBench/blob/7846202f5a699a766c531d6275397a5dd681f688/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/teticio/llama-squad/blob/27e17cb07c7e1f8a2b5fdc0334f7b9c2993e0ed6/README.md?plain=1#L264",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/entity-alignment",
        "reference": "https://github.com/THU-KEG/Entity_Alignment_Papers/blob/5eed75a9e5fa0d218bedf2718a6fb44ab63973aa/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/flex",
        "reference": "https://github.com/allenai/flex/blob/75d6d1cea66df2c8a7e3d429c6af5008ccf1544b/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/flex_meta",
        "reference": "https://github.com/allenai/flex/blob/75d6d1cea66df2c8a7e3d429c6af5008ccf1544b/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/9121",
        "reference": "https://github.com/bao18/open_earth_map/blob/f16da336f8abf549e94a206ef67ab869654a557e/README.md?plain=1#L55",
    },
    {
        "leaderboard": "https://cl-detection2023.grand-challenge.org/evaluation/challenge/leaderboard",
        "reference": "https://github.com/szuboy/CL-Detection2023/blob/dc1ce2bd0a3f317de4160cde17e4a6f60371e67c/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/OpenDriveLab/End-to-end-Autonomous-Driving/blob/99acb2cf1df04d9c5e6e95f08500b918a3ca9490/README.md?plain=1#L134",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1856/leaderboard",
        "reference": "https://github.com/OpenDriveLab/End-to-end-Autonomous-Driving/blob/99acb2cf1df04d9c5e6e95f08500b918a3ca9490/README.md?plain=1#L138",
    },
    {
        "leaderboard": "https://lamalab-org.github.io/chem-bench/leaderboard",
        "reference": "https://github.com/materials-data-facility/matchem-llm/blob/4a9efc880b1cba1b70287ac9ae2b2cf1fa73b651/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://hf.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/neuml/txtai/blob/ad2d288d6607295e793f9904d715a41d8cb68dc1/README.md?plain=1#L197",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/neuml/txtai/blob/ad2d288d6607295e793f9904d715a41d8cb68dc1/README.md?plain=1#L198",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/neuml/txtai/blob/ad2d288d6607295e793f9904d715a41d8cb68dc1/README.md?plain=1#L199",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/lsc/leaderboards/#pcqm4mv2",
        "reference": "https://github.com/graphcore/ogb-lsc-pcqm4mv2/blob/9c206603eab62f09d61e649a778ac8efe251dede/README.md?plain=1#L42",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo",
        "reference": "https://github.com/JiaRenChang/PSMNet/blob/87ac9093afbf6545c093bd9d26c5ffd66e49a7b8/README.md?plain=1#L113",
    },
    {
        "leaderboard": "https://rucola-benchmark.com/leaderboard",
        "reference": "https://github.com/RussianNLP/RuCoLA/blob/d86f3dc9bca7365e057fb66bc871ff8b066cae8e/README.md?plain=1#L7",
    },
    {"leaderboard": "https://github.com/Kinkelin/WordleCompetition", "reference": ""},
    {"leaderboard": "https://github.com/scicode-bench/SciCode", "reference": ""},
    {
        "leaderboard": "https://github.com/VideoNIAH/videoniah.github.io",
        "reference": "",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow",
        "reference": "https://github.com/gengshan-y/VCN/blob/00c4befdbdf4e42050867996a6f686f52086e01a/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2021/real-time-3d-prediction",
        "reference": "https://github.com/tusen-ai/RangeDet/blob/1df87b2d9aa9ef3f77ad634c2656b96867c7eac8/README.md?plain=1#L79",
    },
    {
        "leaderboard": "https://www.tau-nlp.org/csqa-leaderboard",
        "reference": "https://github.com/microsoft/KEAR/blob/7376a3d190e5c04d5da9b99873abe621ae562edf/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/01-ai/Yi/blob/292dc819604e87810178288b321eebf9107ae5e3/README.md?plain=1#L136",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/01-ai/Yi/blob/292dc819604e87810178288b321eebf9107ae5e3/README.md?plain=1#L1263",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-llm",
        "reference": "https://github.com/01-ai/Yi/blob/292dc819604e87810178288b321eebf9107ae5e3/README.md?plain=1#L1265",
    },
    {
        "leaderboard": "https://dki-lab.github.io/GrailQA",
        "reference": "https://github.com/microsoft/KC/blob/84cfbb6a6cddfd004e6d088687109a3be6fc5f41/README.md?plain=1#L16",
    },
    {"leaderboard": "https://github.com/ChineseGLUE/ChineseGLUE", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/alfred",
        "reference": "https://github.com/askforalfred/alfred/blob/39e4dfb13e9018d82da940ef43de53bddf59d111/README.md?plain=1#L141",
    },
    {"leaderboard": "https://github.com/MozerWang/Loong", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/object-detection-on-coco",
        "reference": "https://github.com/digantamisra98/Mish/blob/5b9771c5914caff244fd938cbe5c479c51e181ac/README.md?plain=1#L47",
    },
    {"leaderboard": "https://github.com/SpeechColab/Leaderboard", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
        "reference": "https://github.com/horseee/Awesome-Efficient-LLM/blob/f213be55c2edc958f9bf8b3db3533c05d34ce3e2/leaderboard.md?plain=1#L5",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AI-Secure/llm-trustworthy-leaderboard",
        "reference": "https://github.com/horseee/Awesome-Efficient-LLM/blob/f213be55c2edc958f9bf8b3db3533c05d34ce3e2/leaderboard.md?plain=1#L6",
    },
    {
        "leaderboard": "https://github.com/ray-project/llmperf-leaderboard",
        "reference": "https://github.com/horseee/Awesome-Efficient-LLM/blob/f213be55c2edc958f9bf8b3db3533c05d34ce3e2/leaderboard.md?plain=1#L7",
    },
    {
        "leaderboard": "https://artificialanalysis.ai/leaderboards/providers",
        "reference": "https://github.com/horseee/Awesome-Efficient-LLM/blob/f213be55c2edc958f9bf8b3db3533c05d34ce3e2/leaderboard.md?plain=1#L8",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/ml-energy/leaderboard",
        "reference": "https://github.com/horseee/Awesome-Efficient-LLM/blob/f213be55c2edc958f9bf8b3db3533c05d34ce3e2/leaderboard.md?plain=1#L9",
    },
    {
        "leaderboard": "https://artificialanalysis.ai/leaderboards/models",
        "reference": "https://github.com/horseee/Awesome-Efficient-LLM/blob/f213be55c2edc958f9bf8b3db3533c05d34ce3e2/leaderboard.md?plain=1#L10",
    },
    {
        "leaderboard": "https://leaderboard.withmartian.com",
        "reference": "https://github.com/horseee/Awesome-Efficient-LLM/blob/f213be55c2edc958f9bf8b3db3533c05d34ce3e2/leaderboard.md?plain=1#L11",
    },
    {
        "leaderboard": "https://bop.felk.cvut.cz/leaderboards",
        "reference": "https://github.com/ylabbe/cosypose/blob/c90a04f434b1e89f02341cc03899eb63ea8facba/README.md?plain=1#L51",
    },
    {"leaderboard": "https://github.com/hendrycks/ethics", "reference": ""},
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/11784",
        "reference": "https://github.com/tanfiona/CausalNewsCorpus/blob/4796c7e89d9b346f8e1280dac441dacde2be6b86/README.md?plain=1#L36",
    },
    {
        "leaderboard": "https://www.creatis.insa-lyon.fr/Challenge/acdc/results.html",
        "reference": "https://github.com/MIC-DKFZ/ACDC2017/blob/4c0feea30805c9b6b68b1d2a5204fff5928ee8b3/README.md?plain=1#L9",
    },
    {"leaderboard": "https://github.com/cs-chan/Total-Text-Dataset", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/ntunlp/OpenSource-LLMs-better-than-OpenAI/blob/ece38fe7da9049f898c2748d4896ba580dc20a68/README.md?plain=1#L13",
    },
    {"leaderboard": "https://github.com/THUDM/NaturalCodeBench", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/mlfoundations/VisIT-Bench-Leaderboard",
        "reference": "https://github.com/mlfoundations/VisIT-Bench/blob/45b9bf8d080be86bb13e05697bc8db468698c634/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AudioLLMs/AudioBench-Leaderboard",
        "reference": "https://github.com/AudioLLMs/AudioLLM/blob/dfb3f2c7b40539e19804cfc2f3fbb0d63defe6ab/README.md?plain=1#L66",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/rsna-intracranial-hemorrhage-detection/leaderboard",
        "reference": "https://github.com/darraghdog/rsna/blob/521a95781ce35ec0ea66e2ee3043451af37f542c/README.md?plain=1#L87",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/EleutherAI/lm-evaluation-harness/blob/15ffb0dafa9c869c7436ba9a3cf3067ac4c9d846/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://microsoft.github.io/MSMARCO-Document-Ranking-Submissions/leaderboard",
        "reference": "https://github.com/castorini/docTTTTTquery/blob/581539a7737a473739b1e3465df7d7e4e7203ede/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org",
        "reference": "https://github.com/allenai/aristo-leaderboard/blob/60d5be31b5a7d36f29d13223ece29a8d2bfa8b5f/README.md?plain=1#L1",
    },
    {
        "leaderboard": "https://junjie-chu.github.io/Public_Comprehensive_Assessment_Jailbreak/leaderboard",
        "reference": "https://github.com/TrustAIRLab/Comprehensive_Jailbreak_Assessment/blob/c268845cbb54960cdbc9e8957322d2f10cdd8873/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://github.com/LuoweiZhou/YouCook2-Leaderboard",
        "reference": "",
    },
    {
        "leaderboard": "https://pages.nist.gov/jarvis_leaderboard",
        "reference": "https://github.com/usnistgov/jarvis_leaderboard/blob/7b672310c106fdaad80cf33519323ba16238a0bb/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://opus.nlpl.eu/dashboard",
        "reference": "https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/d34a89ac102fd236503a1911dd1050564bf4e682/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://www.kaggle.com/competitions/2023-gwdata-bootcamp/leaderboard",
        "reference": "https://github.com/iphysresearch/GWData-Bootcamp/blob/6c6ed9627ab9fef13e5f00cab3874134133ce6a9/README.md?plain=1#L623",
    },
    {
        "leaderboard": "https://github.com/bcmi/Awesome-Generative-Image-Composition",
        "reference": "",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org",
        "reference": "https://github.com/allenai/ai2thor-rearrangement/blob/7fcf881b09ddd88be37ee87bbeea319c01e62bc4/README.md?plain=1#L352",
    },
    {
        "leaderboard": "https://github.com/AINativeLab/gptstore-data-backup",
        "reference": "",
    },
    {
        "leaderboard": "https://www.scrolls-benchmark.com/leaderboard",
        "reference": "https://github.com/tau-nlp/scrolls/blob/bfc0da0747976418cd0c4b8837db023ea567ba84/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://github.com/Helsinki-NLP/OPUS-MT-leaderboard",
        "reference": "https://github.com/Helsinki-NLP/OPUS/blob/f6a38fe0eaa7c4efb025dfb7b415206378be1186/README.md?plain=1#L44",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/foodseg103",
        "reference": "https://github.com/LARC-CMU-SMU/FoodSeg103-Benchmark-v1/blob/65e9fd6ff7bca7df23dc421216cd77a4259c22eb/README.md?plain=1#L35",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1697/leaderboard",
        "reference": "https://github.com/alexa/massive/blob/f966f21846043aabef9b0f974fa7970027f43738/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://github.com/shikras/d-cube/blob/main/eval_sota/README.md",
        "reference": "https://github.com/shikras/d-cube/blob/fa0ccd6358b2bb958e8dcf810fc758717f18e4ec/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://codetlingua.github.io/leaderboard.html",
        "reference": "https://github.com/Intelligent-CAT-Lab/PLTranslationEmpirical/blob/024e43bf184af5d4460c3b3e4cfac8a8d5783a1f/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://github.com/eosphoros-ai/Awesome-Text2SQL",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AILab-CVC/SEED-Bench_Leaderboard",
        "reference": "https://github.com/AILab-CVC/SEED-Bench/blob/38582011e20bd1c3b153f839055b2f704cd83732/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AGC2024-P/e2e-driving-navsim",
        "reference": "https://github.com/autonomousvision/navsim/blob/2623b40b587d93ac4d66b54594cc0f33a9e0051a/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
        "reference": "https://github.com/huggingface/optimum-benchmark/blob/d070104543478b722db56ba828611524c5fbdbb5/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://appworld.dev/leaderboard",
        "reference": "https://github.com/open-thought/system-2-research/blob/e6169ac081cc130550f9959507353c3224dba5b3/README.md?plain=1#L334",
    },
    {
        "leaderboard": "https://docs.google.com/spreadsheets/d/1M801lEpBbKSNwP-vDBkC_pF7LdyGU1f_ufZb_NWNBZQ",
        "reference": "https://github.com/open-thought/system-2-research/blob/e6169ac081cc130550f9959507353c3224dba5b3/README.md?plain=1#L336",
    },
    {
        "leaderboard": "https://arcprize.org/leaderboard",
        "reference": "https://github.com/open-thought/system-2-research/blob/e6169ac081cc130550f9959507353c3224dba5b3/README.md?plain=1#L337",
    },
    {
        "leaderboard": "https://gaia-benchmark-leaderboard.hf.space",
        "reference": "https://github.com/open-thought/system-2-research/blob/e6169ac081cc130550f9959507353c3224dba5b3/README.md?plain=1#L339",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/ZebraLogic",
        "reference": "https://github.com/open-thought/system-2-research/blob/e6169ac081cc130550f9959507353c3224dba5b3/README.md?plain=1#L342",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard",
        "reference": "https://github.com/open-thought/system-2-research/blob/e6169ac081cc130550f9959507353c3224dba5b3/README.md?plain=1#L348",
    },
    {
        "leaderboard": "https://commit-0.github.io/analysis",
        "reference": "https://github.com/open-thought/system-2-research/blob/e6169ac081cc130550f9959507353c3224dba5b3/README.md?plain=1#L350",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/4318",
        "reference": "https://github.com/amathislab/HOISDF/blob/666e5b7fb1140905fcc81048360aacf2fe9c2c00/README.md?plain=1#L74",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/vidore/vidore-leaderboard",
        "reference": "https://github.com/illuin-tech/colpali/blob/782edcd50108d1842d154730ad3ce72476a2d17d/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://ultraeval.openbmb.cn/rank",
        "reference": "https://github.com/OpenBMB/UltraEval/blob/5d967b4ea5725ab1252904520bcaa87b40165b4b/README.md?plain=1#L227",
    },
    {
        "leaderboard": "https://good.readthedocs.io/en/latest/leaderboard.html",
        "reference": "https://github.com/divelab/GOOD/blob/fff0e2dc311e1d0e0a23052f6c601db4f185e12e/README.md?plain=1#L54",
    },
    {"leaderboard": "https://github.com/hendrycks/test", "reference": ""},
    {
        "leaderboard": "https://github.com/google-research/long-range-arena",
        "reference": "",
    },
    {
        "leaderboard": "https://pages.cs.huji.ac.il/adiyoss-lab/salmon",
        "reference": "https://github.com/slp-rl/salmon/blob/85e2daf7e73f15bcfa30ba2ad6da17f3c8a5edcd/README.md?plain=1#L62",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/strategyqa",
        "reference": "https://github.com/eladsegal/strategyqa/blob/1ba1e97452e189569357876f2854b01357ffbe37/README.md?plain=1#L146",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/yoshitomo-matsubara/torchdistill/blob/36d42b1d125b465c4c65494d1d190f6a9045e057/README.md?plain=1#L81",
    },
    {
        "leaderboard": "https://github.com/hendrycks/test",
        "reference": "https://github.com/openseg-group/openseg.pytorch/blob/aefc75517b09068d7131a69420bc5f66cb41f0ee/README.md?plain=1#L37",
    },
    {
        "leaderboard": "https://github.com/JokerJohn/PRCV-VSLAM-Challenge-2022",
        "reference": "",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/santander-customer-transaction-prediction/leaderboard",
        "reference": "https://github.com/KazukiOnodera/Santander-Customer-Transaction-Prediction/blob/f30e800aedacd66d744389a61bf4439934525a2c/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://github.com/JokerJohn/PRCV-VSLAM-Challenge-2022",
        "reference": "https://github.com/openseg-group/openseg.pytorch/blob/aefc75517b09068d7131a69420bc5f66cb41f0ee/README.md?plain=1#L40",
    },
    {
        "leaderboard": "https://github.com/MadryLab/mnist_challenge",
        "reference": "https://github.com/tianzheng4/Distributionally-Adversarial-Attack/blob/1d0ae8c0ac176c87fccba187f07f6f2ca3a61992/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://github.com/MadryLab/cifar10_challenge",
        "reference": "https://github.com/tianzheng4/Distributionally-Adversarial-Attack/blob/1d0ae8c0ac176c87fccba187f07f6f2ca3a61992/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://www.kaggle.com/competitions/llm-merging-competition/leaderboard",
        "reference": "https://github.com/llm-merging/LLM-Merging/blob/c260fc5edb6bab9794cf2ac383948b5de0ed08fc/README.md?plain=1#L91",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/margsli/merging_competition",
        "reference": "https://github.com/llm-merging/LLM-Merging/blob/c260fc5edb6bab9794cf2ac383948b5de0ed08fc/README.md?plain=1#L94",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1333",
    },
    {
        "leaderboard": "http://www.lavicleva.com",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1334",
    },
    {
        "leaderboard": "https://flageval.baai.ac.cn/#/leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1335",
    },
    {
        "leaderboard": "https://crfm.stanford.edu/helm",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1336",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1337",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1340",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1341",
    },
    {
        "leaderboard": "https://www.superclueai.com",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1342",
    },
    {
        "leaderboard": "https://toloka.ai/llm-leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1344",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1345",
    },
    {
        "leaderboard": "http://openeval.org.cn/rank",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1346",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AngoHF/ANGO-Leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1347",
    },
    {
        "leaderboard": "https://cevalbenchmark.com/static/leaderboard.html",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1348",
    },
    {
        "leaderboard": "http://cgeval.besteasy.com",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1349",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/CoreyMorris/MMLU-by-task-Leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1350",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AILab-CVC/SEED-Bench_Leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1352",
    },
    {
        "leaderboard": "https://super.gluebenchmark.com/leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1353",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/qiantong-xu/toolbench-leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1354",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1355",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1356",
    },
    {
        "leaderboard": "https://llmbench.ai/agent/data",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1357",
    },
    {
        "leaderboard": "https://intercode-benchmark.github.io",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1358",
    },
    {
        "leaderboard": "https://llmbench.ai/safety/data",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1359",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/InstaDeepAI/nucleotide_transformer_benchmark",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1360",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1362",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
        "reference": "https://github.com/tjunlp-lab/Awesome-LLMs-Evaluation-Papers/blob/a4895bc1a7dee9e7de90f52c4c30900391dc3933/README.md?plain=1#L1363",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/xtekky/gpt4local/blob/9925b90fc307e11c73c185514c496d4ac8f661d3/README.md?plain=1#L110",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/Sense-X/HoP/blob/9f6e8823c26a7b126091d6dffaf7eb277a14b5d5/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/loveu-tgve/loveu-tgve-leaderboard",
        "reference": "https://github.com/showlab/loveu-tgve-2023/blob/d75589f29fc240df9b6ea36d6143913300a5ccfa/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://acdc.vision.ee.ethz.ch/benchmarks",
        "reference": "https://github.com/brdav/refign/blob/4efc6a8db31a8624068612e85b21fd841ba8bfd0/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/3783",
        "reference": "https://github.com/brdav/refign/blob/4efc6a8db31a8624068612e85b21fd841ba8bfd0/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://ml.energy/leaderboard",
        "reference": "https://github.com/ml-energy/zeus/blob/4be7f4993941e6feb67e001225f1b627bdc0a57f/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://adamszq.github.io/smp2019ecdt_task1",
        "reference": "https://github.com/OnionWang/SMP2019-ECDT-NLU/blob/8a014cc516484257af71d5957e656fc51236501a/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo",
        "reference": "https://github.com/gangweiX/ACVNet/blob/dbaf6c8ed9d168bef6fa06da6afa63b8c26f2c16/README.md?plain=1#L85",
    },
    {
        "leaderboard": "https://appworld.dev/leaderboard",
        "reference": "https://github.com/StonyBrookNLP/appworld/blob/f726cd110e8e33df1acb4436f65f9c8f1aca16d6/README.md?plain=1#L1135",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Intel/low_bit_open_llm_leaderboard",
        "reference": "https://github.com/intel/auto-round/blob/3a70be84a64556c694a75b4670b152cb68e3ccc3/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php?benchmark=stereo",
        "reference": "https://github.com/aeolusguan/NMRF/blob/bdd4059c356d509d86ed2454bff2b61db8a1ddbf/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo",
        "reference": "https://github.com/aeolusguan/NMRF/blob/bdd4059c356d509d86ed2454bff2b61db8a1ddbf/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://www.swebench.com",
        "reference": "https://github.com/nus-apr/auto-code-rover/blob/0cc64b67f6c934fdd317561290e6236f59156624/README.md?plain=1#L29",
    },
    {"leaderboard": "https://github.com/onejune2018/Awesome-LLM-Eval", "reference": ""},
    {
        "leaderboard": "https://hf.co/spaces/allenai/reward-bench",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L117",
    },
    {
        "leaderboard": "http://lucyeval.besteasy.com",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L140",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L152",
    },
    {
        "leaderboard": "https://mceval.github.io/leaderboard.html",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L238",
    },
    {
        "leaderboard": "https://llmbench.ai/agent/data",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L288",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L289",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AngoHF/ANGO-Leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L290",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L291",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L292",
    },
    {
        "leaderboard": "http://www.lavicleva.com",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L293",
    },
    {
        "leaderboard": "https://cevalbenchmark.com/static/leaderboard.html",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L294",
    },
    {
        "leaderboard": "https://flageval.baai.ac.cn/#/leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L295",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L296",
    },
    {
        "leaderboard": "https://crfm.stanford.edu/helm",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L297",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L298",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L299",
    },
    {
        "leaderboard": "https://intercode-benchmark.github.io",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L301",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/CoreyMorris/MMLU-by-task-Leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L305",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L306",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L309",
    },
    {
        "leaderboard": "https://llmbench.ai/safety",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L310",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AILab-CVC/SEED-Bench_Leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L311",
    },
    {
        "leaderboard": "https://www.superclueai.com",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L312",
    },
    {
        "leaderboard": "https://super.gluebenchmark.com/leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L313",
    },
    {
        "leaderboard": "https://toloka.ai/llm-leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L315",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/qiantong-xu/toolbench-leaderboard",
        "reference": "https://github.com/onejune2018/Awesome-LLM-Eval/blob/25e3dcce16fad0f4e4fc5dfc9d870425f9f8e24e/README.md?plain=1#L316",
    },
    {"leaderboard": "https://github.com/VRU-NExT/VideoQA", "reference": ""},
    {
        "leaderboard": "https://whoiswho.biendata.xyz",
        "reference": "https://github.com/napsternxg/awesome-scholarly-data-analysis/blob/8a897bb76beeb1f3a6fa8f586fcd57e31dc1d0ac/README.md?plain=1#L222",
    },
    {
        "leaderboard": "https://bop.felk.cvut.cz/leaderboards",
        "reference": "https://github.com/yuecideng/Misc3D/blob/acc083d1f35bb4c9bcf78d4618be3b509d0f16a2/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://modelscope.cn/leaderboard/58/ranking?type=free",
        "reference": "https://github.com/modelscope/evalscope/blob/5cd27891989298c21616037f2c5f07789af44143/README.md?plain=1#L270",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/chaytonmin/Awesome-BEV-Perception-Multi-Cameras/blob/22e10bca08082249275ab40e3f7a9b97f343bdfe/README.md?plain=1#L104",
    },
    {"leaderboard": "https://github.com/OFA-Sys/AIR-Bench", "reference": ""},
    {"leaderboard": "https://github.com/zehuichen123/Graph-DETR4D", "reference": ""},
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/Kin-Zhang/carla-expert/blob/b3a78159e0996b78294168843b76074ba16bf3ac/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://github.com/bcmi/Object-Shadow-Generation-Dataset-DESOBA",
        "reference": "",
    },
    {
        "leaderboard": "https://good.readthedocs.io/en/latest/leaderboard.html",
        "reference": "https://github.com/Graph-COM/GSAT/blob/6812f5ca6c4c93149cbd13c44e9f022d18861908/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/embeddings-benchmark/mteb/blob/647c295c4dc178f902ab4633d4e1d6e8213487eb/README.md?plain=1#L22",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6",
        "reference": "https://github.com/SharifAmit/DilatedFCNSegmentation/blob/28481a28c70faafce37949ec2381cd4a1417cc27/README.md?plain=1#L46",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/orionw/FollowIR/blob/61ad33e0af1a39bfa08ffb70ffe457d2c0501e6c/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/object-detection",
        "reference": "https://github.com/ddlee-cn/Obj_Det_Progress_Tracker/blob/befd4f6636887a8ac6beb7b7f229aa19d95bd1ce/README.md?plain=1#L10",
    },
    {"leaderboard": "https://github.com/thuml/TimesNet", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard",
        "reference": "https://github.com/huggingface/optimum-habana/blob/a7fe09e8fc47afd19257813134bdbe3e3e03568a/README.md?plain=1#L277",
    },
    {
        "leaderboard": "https://github.com/openphilanthropy/unrestricted-adversarial-examples",
        "reference": "",
    },
    {"leaderboard": "https://github.com/thu-coai/SafetyBench", "reference": ""},
    {
        "leaderboard": "https://fm.ai.tsinghua.edu.cn/superbench/#/leaderboard",
        "reference": "https://github.com/thu-coai/SafetyBench/blob/8ef657b5bbc4961bb21c142e19d810b4fac1fb51/README.md?plain=1#L14",
    },
    {"leaderboard": "https://github.com/fixie-ai/ai-benchmarks", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/mrdbourke/rag-resources/blob/20004ab7de5297819ac8d4c721e56d9f9a51d10f/README.md?plain=1#L80",
    },
    {"leaderboard": "https://github.com/casmlab/NPHardEval", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/GTBench/GTBench",
        "reference": "https://github.com/jinhaoduan/GTBench/blob/b75e1d7068d6935208810affa78b436072d08051/README.md?plain=1#L5",
    },
    {"leaderboard": "https://github.com/1x-technologies/1xgpt", "reference": ""},
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_completion",
        "reference": "https://github.com/JUGGHM/PENet_ICRA2021/blob/ee4318aaa82f72aa39fa97770196b167722e9515/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/a-okvqa",
        "reference": "https://github.com/MILVLG/prophet/blob/5188cae3533680a664252b2e1b60a97f37130254/README.md?plain=1#L288",
    },
    {
        "leaderboard": "https://yale-lily.github.io//spider",
        "reference": "https://github.com/X-LANCE/text2sql-lgesql/blob/841582cedbba280247cc5fe6a710970c881c1320/README.md?plain=1#L83",
    },
    {
        "leaderboard": "http://backdoorbench.com/leader_cifar10",
        "reference": "https://github.com/SCLBD/BackdoorBench/blob/1651ce53fc459602c8eb159fdda8ac3160d46fe9/README.md?plain=1#L79",
    },
    {
        "leaderboard": "https://cmedbenchmark.llmzoo.com/static/leaderboard.html",
        "reference": "https://github.com/FreedomIntelligence/CMB/blob/ebcffb27b87e2b894acf0b73e520eae5f3ad1ce8/README.md?plain=1#L43",
    },
    {"leaderboard": "https://github.com/yaodongyu/TRADES", "reference": ""},
    {
        "leaderboard": "http://www.swebench.com",
        "reference": "https://github.com/princeton-nlp/SWE-bench/blob/58d3d81b482c8165b5f6af0ea796dbb2dca7f144/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://junjie-chu.github.io/Public_Comprehensive_Assessment_Jailbreak/leaderboard",
        "reference": "https://github.com/Junjie-Chu/CJA_Comprehensive_Jailbreak_Assessment/blob/05469dc8e18907546e95bae43aecc2f6177d9d20/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/zhengzangw/awesome-huge-models/blob/e9e855843d1b2e2f85b3e10a2d74586132556d2b/README.md?plain=1#L55",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/leaderboard",
        "reference": "https://github.com/predibase/llm_distillation_playbook/blob/6ccc4707eebbb60bce336e4711b742f9f9342fc8/README.md?plain=1#L114",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/predibase/llm_distillation_playbook/blob/6ccc4707eebbb60bce336e4711b742f9f9342fc8/README.md?plain=1#L204",
    },
    {
        "leaderboard": "https://crfm.stanford.edu/helm/lite/latest/#/leaderboard",
        "reference": "https://github.com/predibase/llm_distillation_playbook/blob/6ccc4707eebbb60bce336e4711b742f9f9342fc8/README.md?plain=1#L204",
    },
    {
        "leaderboard": "https://cogdl.ai/grb/leaderboard",
        "reference": "https://github.com/THUDM/grb/blob/904f47b07cf82710abfb023beae3ade41d58c3a8/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://boson.ai/rpbench",
        "reference": "https://github.com/boson-ai/RPBench-Auto/blob/01a257926b196076b3e7829b4a4ec194b37dd110/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/siim-acr-pneumothorax-segmentation/leaderboard",
        "reference": "https://github.com/earhian/SIIM-ACR-Pneumothorax-Segmentation-5th/blob/9d698bdc2bbec53f8d52b8b2c54f8380d50f1d3a/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://visualcommonsense.com/leaderboard",
        "reference": "https://github.com/jackroos/VL-BERT/blob/4373674cbf2bcd6c09a2c26abfdb6705b870e3be/README.md?plain=1#L154",
    },
    {
        "leaderboard": "https://nlp.cs.unc.edu/data/jielei/tvqa/tvqa_public_html/leaderboard.html",
        "reference": "https://github.com/jayleicn/TVQA/blob/dfb0e5fe4582efca574dfddfeafd1008db3b33ef/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/named-entity-recognition-ner-on-conll-2003",
        "reference": "https://github.com/dsindex/ntagger/blob/ccaf2ddb3b4a36953c79e84b8d85cc3d8de2de9c/README.md?plain=1#L539",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/named-entity-recognition-on-conll",
        "reference": "https://github.com/dsindex/ntagger/blob/ccaf2ddb3b4a36953c79e84b8d85cc3d8de2de9c/README.md?plain=1#L556",
    },
    {
        "leaderboard": "http://deephack.me/leaderboard_hack",
        "reference": "https://github.com/sld/convai-bot-1337/blob/58bcec4873f40a4992cce0bce9d0df244e77b2e0/README.md?plain=1#L100",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/outbrain-click-prediction/leaderboard",
        "reference": "https://github.com/gabrielspmoreira/kaggle_outbrain_click_prediction_google_cloud_ml_engine/blob/d0eb5d3feb0bdc40c935f214cd245fef2f18ecfb/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://www.tanksandtemples.org/leaderboard",
        "reference": "https://github.com/whubaichuan/M3VSNet/blob/ecbd30aecc9afd4e58f63a949747296e5a1368da/README.md?plain=1#L52",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d",
        "reference": "https://github.com/hlesmqh/WS3D/blob/6816eeb135923a59de34ee5d94be2d0fd3ec83f9/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/homesite-quote-conversion/leaderboard",
        "reference": "https://github.com/Far0n/kaggle-homesite/blob/038d4367c99a617b338bff3c79e11d23ac7287e4/README.md?plain=1#L2",
    },
    {"leaderboard": "https://github.com/lm-sys/arena-hard-auto", "reference": ""},
    {
        "leaderboard": "https://github.com/lyuqin/HydraNet-WikiSQL/releases/tag/20200207_105347",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/RMT-team/babilong",
        "reference": "https://github.com/booydar/babilong/blob/23f0303d456061cb71bc6a9cd6d3850575201943/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://ai.google.com/research/rxr/competition?active_tab=leaderboard",
        "reference": "https://github.com/jacobkrantz/VLN-CE/blob/5344526b26aa59212117c8ac864deaaf58e930ef/README.md?plain=1#L135",
    },
    {"leaderboard": "https://github.com/Lordog/R-Judge", "reference": ""},
    {
        "leaderboard": "https://nlp.cs.washington.edu/xorqa",
        "reference": "https://github.com/AkariAsai/XORQA/blob/c894f529b3adb046dc5fec871b2be0e8535f526c/README.md?plain=1#L5",
    },
    {"leaderboard": "https://github.com/SpeechColab/GigaSpeech2", "reference": ""},
    {
        "leaderboard": "https://www.kaggle.com/c/walmart-recruiting-store-sales-forecasting/leaderboard",
        "reference": "https://github.com/davidthaler/Walmart_competition_code/blob/9e68180f0668cef4bd0b1562311d310ff17e0b6b/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://microsoft.github.io/task_oriented_dialogue_as_dataflow_synthesis",
        "reference": "https://github.com/microsoft/task_oriented_dialogue_as_dataflow_synthesis/blob/bbd16fe69687b1052a7b5937ee7d20b641f2e642/README.md?plain=1#L52",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-llm",
        "reference": "https://github.com/OrionStarAI/OrionStar-Yi-34B-Chat/blob/6fee2fa5711690d1af0c200b340d899ca95ffdf0/README.MD?plain=1#L62",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/UpstageAI/evalverse/blob/06f85eefd4b82385c549ea3bc29ff3e177f3a84b/README.md?plain=1#L88",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard",
        "reference": "https://github.com/UpstageAI/evalverse/blob/06f85eefd4b82385c549ea3bc29ff3e177f3a84b/README.md?plain=1#L154",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/zphang/bert_on_stilts/blob/2d9c8c6ba47e06c0f171a7452a916dd3b7a09a6a/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/visual-question-answering-on-vip-bench",
        "reference": "https://github.com/WisconsinAIVision/ViP-LLaVA/blob/77079b1e1990472598f1f8a6bb3f7f470100d191/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://github.com/Xiangyu-CAS/Yet-Another-reid-baseline",
        "reference": "",
    },
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/SalesforceAIResearch/xLAM/blob/dee27c4e3f47eea8266d0e504081ed07ed239181/README.md?plain=1#L34",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard",
        "reference": "https://github.com/davidkim205/nox/blob/d3d7e79acb5b066b21fe753d2f1844d06d199255/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "",
    },
    {
        "leaderboard": "https://hilti-challenge.com/leader-board-2022.html",
        "reference": "https://github.com/MAVIS-SLAM/OpenMAVIS/blob/7f2de61a003041983448f76e68b865da5bc6b9a1/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://multi-swe-bench.github.io",
        "reference": "https://github.com/NL2Code/CodeR/blob/d63468344bc215bbabb8f0c1ba4af24897a946df/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lyx97/TempCompass",
        "reference": "https://github.com/llyx97/TempCompass/blob/4d74832495991b2951cc32e37cea04aa7f2392f1/README.md?plain=1#L40",
    },
    {
        "leaderboard": "https://mathvision-cuhk.github.io/#leaderboard",
        "reference": "https://github.com/mathvision-cuhk/MATH-V/blob/0e4e691a72e14ca8e7b5f40095484e0b0460a115/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/out-of-distribution-detection-on-cifar-100-vs",
        "reference": "https://github.com/stanislavfort/exploring_the_limits_of_OOD_detection/blob/0cbe0026737b661a0de8f8d554e57d39bf1b13ae/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2020/3d-detection",
        "reference": "https://github.com/MaxLeo-LMZ/BEVFusion_Learningnote/blob/09a190b2ec3d90bd36e8006ffc57f17a9c4882ff/README.md?plain=1#L54",
    },
    {
        "leaderboard": "https://llm-eval.github.io/pages/leaderboard.html",
        "reference": "https://github.com/microsoft/promptbench/blob/fcda538bd779ad11612818e0645a387a462b5c3b/README.md?plain=1#L48",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/SunLemuria/OpenGPTAndBeyond/blob/2d9e9bc2a16d16a68bdd583ba29e6e54058ab075/README.md?plain=1#L212",
    },
    {
        "leaderboard": "https://opencompass.org.cn/leaderboard-llm",
        "reference": "https://github.com/SunLemuria/OpenGPTAndBeyond/blob/2d9e9bc2a16d16a68bdd583ba29e6e54058ab075/README.md?plain=1#L327",
    },
    {"leaderboard": "https://github.com/findalexli/SciGraphQA", "reference": ""},
    {"leaderboard": "https://github.com/duncanwp/ClimateBench", "reference": ""},
    {
        "leaderboard": "https://pages.nist.gov/jarvis_leaderboard",
        "reference": "https://github.com/usnistgov/alignn/blob/983ca4f9891fbcf61636c48970a1afdd7bf6378c/README.md?plain=1#L264",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/ParsBench/leaderboard",
        "reference": "https://github.com/ParsBench/ParsBench/blob/a7dc376f6473fac3e1926eea42001e6560a61475/README.md?plain=1#L150",
    },
    {"leaderboard": "https://github.com/mtbench101/mt-bench-101", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/spoken-language-understanding-on-spoken-squad",
        "reference": "https://github.com/Chia-Hsuan-Lee/Spoken-SQuAD/blob/834be9a52253a2563ba87d1f22c7fb95243769ca/README.md?plain=1#L28",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1897/leaderboard",
        "reference": "https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/README.md?plain=1#L57",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/magdalenakuhn17/awesome-cheap-llms/blob/907f20265d058fce9dc558b53ee4c42445d3c963/README.md?plain=1#L33",
    },
    {
        "leaderboard": "https://cmedbenchmark.llmzoo.com/static/leaderboard.html",
        "reference": "https://github.com/WangRongsheng/CareGPT/blob/73700012a8bbfe71456dd1da08b58cb847e7828d/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://medbench.opencompass.org.cn/leaderboard",
        "reference": "https://github.com/WangRongsheng/CareGPT/blob/73700012a8bbfe71456dd1da08b58cb847e7828d/README.md?plain=1#L839",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
        "reference": "https://github.com/WangRongsheng/CareGPT/blob/73700012a8bbfe71456dd1da08b58cb847e7828d/README.md?plain=1#L840",
    },
    {"leaderboard": "https://github.com/ZhuiyiTechnology/AutoIE", "reference": ""},
    {
        "leaderboard": "https://ymcui.com/cmrc2018",
        "reference": "https://github.com/ymcui/cmrc2018/blob/c0eb1b6ba219847457e6af3180da722bbeb656af/README.md?plain=1#L23",
    },
    {
        "leaderboard": "https://jailbreakbench.github.io/#leaderboard",
        "reference": "https://github.com/JailbreakBench/jailbreakbench/blob/04f9f94844f09bd2168d3d83e5dd870cded9fc57/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-multimodal",
        "reference": "https://github.com/shell-nlp/gpt_server/blob/66740eb6ebfe543c6ccda1ae47cdaaa846af386c/README.md?plain=1#L226",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/shell-nlp/gpt_server/blob/66740eb6ebfe543c6ccda1ae47cdaaa846af386c/README.md?plain=1#L258",
    },
    {"leaderboard": "https://github.com/zehuichen123/AutoAlignV2", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/image-classification-on-omnibenchmark",
        "reference": "https://github.com/ZhangYuanhan-AI/OmniBenchmark/blob/8f2cce58fa858a905096b7373cb4deff02fc0df2/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/multi-frame-super-resolution-on-proba-v",
        "reference": "https://github.com/Suanmd/TR-MISR/blob/6fd987aeb59e1bed643464efe45c151941684ad4/README.md?plain=1#L82",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-multimodal",
        "reference": "https://github.com/open-compass/VLMEvalKit/blob/9726f414bdc3cbc77f33375110ddbb820f36c7bc/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/opencompass/open_vlm_leaderboard",
        "reference": "https://github.com/open-compass/VLMEvalKit/blob/9726f414bdc3cbc77f33375110ddbb820f36c7bc/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://github.com/cage-challenge/cage-challenge-3",
        "reference": "",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/epfml-text/leaderboard",
        "reference": "https://github.com/Wronskia/Sentiment-Analysis-on-Twitter-data/blob/b44a45c04fe6cba4d2ac4556b41dd979a8df5858/README.md?plain=1#L38",
    },
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/opendilab/InterFuser/blob/4145d6ca58c19ce2a29d319c47980cea05586b06/README.md?plain=1#L10",
    },
    {"leaderboard": "https://github.com/matsui528/annbench", "reference": ""},
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/7088",
        "reference": "https://github.com/SCNU-RISLAB/MF-MOS/blob/0c702445a39b978efc107cf7d0a2a33246f857ba/README.md?plain=1#L10",
    },
    {"leaderboard": "https://github.com/awsaf49/artifact", "reference": ""},
    {
        "leaderboard": "https://asoca.grand-challenge.org/evaluation/challenge/leaderboard",
        "reference": "https://github.com/JunMa11/SOTA-MedSeg/blob/0cf735f3bb7270061f18fe352f16cdb0c6f134e5/README.md?plain=1#L27",
    },
    {
        "leaderboard": "https://video-mme.github.io/home_page.html#leaderboard",
        "reference": "https://github.com/BradyFU/Awesome-Multimodal-Large-Language-Models/blob/876a1194665d386b1aab12a4b6b7505875a13b51/README.md?plain=1#L41",
    },
    {
        "leaderboard": "https://github.com/BradyFU/Awesome-Multimodal-Large-Language-Models/blob/Evaluation/README.md",
        "reference": "https://github.com/BradyFU/Awesome-Multimodal-Large-Language-Models/blob/876a1194665d386b1aab12a4b6b7505875a13b51/README.md?plain=1#L54",
    },
    {
        "leaderboard": "https://tgb.complexdatalab.com",
        "reference": "https://github.com/yule-BUAA/DyGLib/blob/3aacc36b94b8d2d8293d70a74fdf6d39089b4163/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://matbench.materialsproject.org",
        "reference": "https://github.com/ppdebreuck/modnet/blob/e14188d3b8a036bba0a1d9c0a3f538dc58c3cd29/README.md?plain=1#L18",
    },
    {"leaderboard": "https://github.com/THUDM/LongBench", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/facebook/CyberSecEval",
        "reference": "https://github.com/meta-llama/PurpleLlama/blob/7aef978f57f6bfef6c3be498d3dc6578449f47b0/README.md?plain=1#L90",
    },
    {
        "leaderboard": "https://matbench-discovery.materialsproject.org",
        "reference": "https://github.com/janosh/matbench-discovery/blob/51c3559ec3457b80e82b3f39d76a9f6cbfdb459b/readme.md?plain=1#L18",
    },
    {
        "leaderboard": "https://github.com/fastai/imagenette",
        "reference": "https://github.com/KeremTurgutlu/self_supervised/blob/360df7f1fa06b0ab1e2abe2e1ea6e2230b073a12/README.md?plain=1#L179",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6",
        "reference": "https://github.com/xmyqsh/deeplab-v2/blob/5c10b1ef7cc33ced425c98601e8340e040b67919/README.md?plain=1#L70",
    },
    {
        "leaderboard": "https://pubmedqa.github.io",
        "reference": "https://github.com/pubmedqa/pubmedqa",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L514",
    },
    {
        "leaderboard": "https://llm.extractum.io/static/llm-leaderboards",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L743",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/reward-bench",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L744",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L781",
    },
    {
        "leaderboard": "https://indicodata.ai/llm",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L798",
    },
    {
        "leaderboard": "https://openrouter.ai/rankings",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L799",
    },
    {
        "leaderboard": "https://llmleaderboard.goml.io",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L800",
    },
    {
        "leaderboard": "https://lmexam.com",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L801",
    },
    {
        "leaderboard": "https://pile.eleuther.ai",
        "reference": "https://github.com/filipecalegario/awesome-generative-ai/blob/cd45dd14c8b201009a3fd785dc104262ed82e9b5/README.md?plain=1#L802",
    },
    {
        "leaderboard": "https://gair-nlp.github.io/OlympicArena/#leaderboard",
        "reference": "https://github.com/GAIR-NLP/OlympicArena",
    },
    {"leaderboard": "https://github.com/GAIR-NLP/factool", "reference": ""},
    {
        "leaderboard": "https://github.com/GAIR-NLP/factool/blob/main/datasets/chinese/README.md",
        "reference": "",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_linkprop",
        "reference": "https://github.com/melifluos/subgraph-sketching/blob/3562d94a07d1166faa0949030824bf75ad9bb2c4/README.md?plain=1#L100",
    },
    {"leaderboard": "https://github.com/TIGER-AI-Lab/MMLU-Pro", "reference": ""},
    {
        "leaderboard": "https://destwang.github.io/CTC2021-explorer",
        "reference": "https://github.com/destwang/CTC2021/blob/de655747820e1a254dc9698d44daaa296d2031ef/README.md?plain=1#L33",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/830/leaderboard",
        "reference": "https://github.com/WillDreamer/Aurora/blob/1b189aeb75b3471a494277dc2ff9d1d2a0396383/README.md?plain=1#L45",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn/leaderboard-multimodal",
        "reference": "https://github.com/YunxinLi/LingCloud/blob/542ce558e26c6c9419ad1ae9f866bf936ccd46d6/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/Kannada-MNIST/leaderboard",
        "reference": "https://github.com/vinayprabhu/Kannada_MNIST/blob/f8390ece0c6990551e6f35189ccc42757bb29c43/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://www.cs.ubc.ca/research/image-matching-challenge/2021/leaderboard",
        "reference": "https://github.com/ubc-vision/image-matching-benchmark/blob/e3c1cbd6bf5cfea5f272e310e8b70a34ded572d5/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/q-future/Q-Bench-Leaderboard",
        "reference": "https://github.com/Q-Future/Q-Bench/blob/f19783ea1890212794ab220c90c7f71342a6289b/README.md?plain=1#L134",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
        "reference": "https://github.com/burglarhobbit/Awesome-Medical-Large-Language-Models/blob/dadef2e55e994687c8fa3c42dc44699e5a5f131e/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-bigcodebench.md?plain=1#L104",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hf-vision/object_detection_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/object-detection-leaderboard.md?plain=1#L16",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/open-llm-leaderboard-drop.md?plain=1#L15",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/open-llm-leaderboard-mmlu.md?plain=1#L14",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/open-llm-leaderboard-rlhf.md?plain=1#L40",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/open-llm-leaderboard-rlhf.md?plain=1#L42",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/OALL/Open-Arabic-LLM-Leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-arabic.md?plain=1#L105",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/ArtificialAnalysis/LLM-Performance-Leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-artificial-analysis.md?plain=1#L22",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/ArtificialAnalysis/Text-to-Image-Leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-artificial-analysis2.md?plain=1#L21",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/OpenGenAI/parti-prompts-leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-artificial-analysis2.md?plain=1#L53",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/ucla-contextual/contextual_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-contextual.md?plain=1#L29",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/logikon/open_cot_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-cot.md?plain=1#L176",
    },
    {
        "leaderboard": "https://decodingtrust.github.io/leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-decodingtrust.md?plain=1#L20",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AI-Secure/llm-trustworthy-leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-decodingtrust.md?plain=1#L24",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/HaizeLabs/red-teaming-resistance-benchmark",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-haizelab.md?plain=1#L23",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hallucinations-leaderboard/leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-hallucinations.md?plain=1#L23",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-hebrew.md?plain=1#L50",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hebrew-llm-leaderboard/leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-hebrew.md?plain=1#L55",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/livecodebench/leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-livecodebench.md?plain=1#L22",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/facebook/CyberSecEval",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-llamaguard.md?plain=1#L56",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-medicalllm.md?plain=1#L162",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/NPHardEval/NPHardEval-leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-nphardeval.md?plain=1#L16",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/PatronusAI/leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-patronus.md?plain=1#L14",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-upstage.md?plain=1#L15",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard",
        "reference": "https://github.com/huggingface/blog/blob/6f63a7065716ce2d4830c010a11b9239d6487921/leaderboard-vectara.md?plain=1#L13",
    },
    {
        "leaderboard": "https://epic-kitchens.github.io/2024",
        "reference": "https://github.com/fpv-iplab/rulstm/blob/e648c7ee40a1a71381bfaa7563f2e8a6935275ab/README.md?plain=1#L178",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/vidsitu-verbs",
        "reference": "https://github.com/TheShadow29/VidSitu/blob/b794988a2de9fd5ce453ea732b27566947b3a2da/README.md?plain=1#L126",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/vidsitu-semantic-roles",
        "reference": "https://github.com/TheShadow29/VidSitu/blob/b794988a2de9fd5ce453ea732b27566947b3a2da/README.md?plain=1#L127",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/vidsitu-event-relations",
        "reference": "https://github.com/TheShadow29/VidSitu/blob/b794988a2de9fd5ce453ea732b27566947b3a2da/README.md?plain=1#L128",
    },
    {
        "leaderboard": "https://github.com/LuoweiZhou/YouCook2-Leaderboard",
        "reference": "https://github.com/salesforce/densecap/blob/5d08369ffdcb7db946ae11a8e9c8a056e47d28c2/README.md?plain=1#L108",
    },
    {
        "leaderboard": "https://3dmatch.cs.princeton.edu/#keypoint-matching-benchmark",
        "reference": "https://github.com/andyzeng/3dmatch-toolbox/blob/f6496fd5aaa36c971de36b456843266e6d8a196a/README.md?plain=1#L249",
    },
    {"leaderboard": "https://github.com/APEXLAB/CodeApex", "reference": ""},
    {"leaderboard": "https://github.com/bboylyg/ABL", "reference": ""},
    {
        "leaderboard": "https://microsoft.github.io/glge",
        "reference": "https://github.com/microsoft/glge/blob/291e3270803ab6b9876a866f3660f4872a53e257/README.md?plain=1#L58",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/airaria/TextBrewer/blob/2c105fa664bbc10669cd2888bd846d2c9b8afca8/README.md?plain=1#L75",
    },
    {
        "leaderboard": "https://github.com/openai/gym/wiki/Leaderboard#pendulum-v0",
        "reference": "https://github.com/msinto93/D4PG/blob/538d2f25d0abe0542f88f9d8b749bf66d953f16e/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/kddcup2021/results",
        "reference": "https://github.com/PaddlePaddle/PGL/blob/6dbb47c4559352ea1b1e327ee0039c47095583af/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/grit-ablation-restricted",
        "reference": "https://github.com/allenai/grit_official/blob/f1860b39e9b3b86f2a279c0a71c20752c1dfb3ed/README.md?plain=1#L93",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/grit-ablation-unrestricted",
        "reference": "https://github.com/allenai/grit_official/blob/f1860b39e9b3b86f2a279c0a71c20752c1dfb3ed/README.md?plain=1#L94",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/grit-test-restricted",
        "reference": "https://github.com/allenai/grit_official/blob/f1860b39e9b3b86f2a279c0a71c20752c1dfb3ed/README.md?plain=1#L95",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/grit-test-unrestricted",
        "reference": "https://github.com/allenai/grit_official/blob/f1860b39e9b3b86f2a279c0a71c20752c1dfb3ed/README.md?plain=1#L96",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/dogs-vs-cats/leaderboard",
        "reference": "https://github.com/amitrajitbose/cat-v-dog-classifier-pytorch/blob/a99d44f1caec03a2ee4e2c30a749613ff714c9d3/README.md?plain=1#L32",
    },
    {
        "leaderboard": "https://hfl-rc.github.io/cmrc2022/results",
        "reference": "https://github.com/iflytek/HFL-Anthology/blob/ad837862bded30fe265c1579f6f2303ba0951133/README.md?plain=1#L76",
    },
    {
        "leaderboard": "https://destwang.github.io/CTC2021-explorer",
        "reference": "https://github.com/iflytek/HFL-Anthology/blob/ad837862bded30fe265c1579f6f2303ba0951133/README.md?plain=1#L77",
    },
    {
        "leaderboard": "https://hfl-rc.github.io/cmrc2019/leaderboard",
        "reference": "https://github.com/iflytek/HFL-Anthology/blob/ad837862bded30fe265c1579f6f2303ba0951133/README.md?plain=1#L79",
    },
    {
        "leaderboard": "https://hfl-rc.github.io/cmrc2018/leaderboard",
        "reference": "https://github.com/iflytek/HFL-Anthology/blob/ad837862bded30fe265c1579f6f2303ba0951133/README.md?plain=1#L81",
    },
    {
        "leaderboard": "https://hfl-rc.github.io/cmrc2017/leaderboard",
        "reference": "https://github.com/iflytek/HFL-Anthology/blob/ad837862bded30fe265c1579f6f2303ba0951133/README.md?plain=1#L82",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/deepfake-detection-challenge/leaderboard",
        "reference": "https://github.com/polimi-ispl/icpr2020dfdc/blob/bbd64115e612e50416fb64fa8f60393fe4642dc0/README.md?plain=1#L18",
    },
    {"leaderboard": "https://github.com/AIFlames/Flames", "reference": ""},
    {"leaderboard": "https://github.com/apple/ml-mkqa", "reference": ""},
    {
        "leaderboard": "https://www.nuscenes.org/lidar-segmentation",
        "reference": "https://github.com/wzzheng/TPVFormer/blob/459bc060901c9c4920f802252f04b290a449e4a1/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://ibug.doc.ic.ac.uk/resources/fg-2020-competition-affective-behavior-analysis",
        "reference": "https://github.com/wtomin/Multitask-Emotion-Recognition-with-Incomplete-Labels/blob/b8089ee916c0d679c33522d009915745b28c7ef1/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/landmark-recognition-2020/leaderboard",
        "reference": "https://github.com/psinger/kaggle-landmark-recognition-2020-1st-place/blob/4e39676daf8b8d06f3a0586388fa78c370becb51/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://github.com/JUNJIE99/MLVU",
        "reference": "https://github.com/DAMO-NLP-SG/VideoLLaMA2/blob/99bce703036a498f8e76a2adb9fd3f50c969beb0/README.md?plain=1#L47",
    },
    {
        "leaderboard": "https://video-mme.github.io/home_page.html#leaderboard",
        "reference": "https://github.com/DAMO-NLP-SG/VideoLLaMA2/blob/99bce703036a498f8e76a2adb9fd3f50c969beb0/README.md?plain=1#L48",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/homebrewltd/awesome-local-ai/blob/b166e9f243c8e6587b58b50751ec411c1cf0572b/README.md?plain=1#L112",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/homebrewltd/awesome-local-ai/blob/b166e9f243c8e6587b58b50751ec411c1cf0572b/README.md?plain=1#L113",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/homebrewltd/awesome-local-ai/blob/b166e9f243c8e6587b58b50751ec411c1cf0572b/README.md?plain=1#L114",
    },
    {
        "leaderboard": "https://llm-leaderboard.streamlit.app",
        "reference": "https://github.com/homebrewltd/awesome-local-ai/blob/b166e9f243c8e6587b58b50751ec411c1cf0572b/README.md?plain=1#L115",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/homebrewltd/awesome-local-ai/blob/b166e9f243c8e6587b58b50751ec411c1cf0572b/README.md?plain=1#L116",
    },
    {
        "leaderboard": "https://bop.felk.cvut.cz/leaderboards/segmentation-unseen-bop23/core-datasets",
        "reference": "https://github.com/YoungSean/NIDS-Net/blob/c7685a442157a1f28f2d7771e10dd9c7afdd7154/README.md?plain=1#L23",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/Y1ran/NLP-BERT--ChineseVersion/blob/57d13e86260c93b43bbc066130ecf4c94c35b64a/README.md?plain=1#L201",
    },
    {
        "leaderboard": "https://github.com/JialeCao001/PedSurvey/blob/master/Comparison.md",
        "reference": "",
    },
    {
        "leaderboard": "https://opendrivelab.com/challenge2023",
        "reference": "https://github.com/CVPR2023-3D-Occupancy-Prediction/CVPR2023-3D-Occupancy-Prediction/blob/4781df91900246029ec34d1b0ed0831ef069c7d9/README.md?plain=1#L45",
    },
    {
        "leaderboard": "https://visualcommonsense.com/leaderboard",
        "reference": "https://github.com/yuweijiang/HGL-pytorch/blob/80238500b96edf051d750670de7300168e456424/README.md?plain=1#L78",
    },
    {
        "leaderboard": "https://mathverse-cuhk.github.io/#leaderboard",
        "reference": "https://github.com/ZrrSkywalker/MathVerse/blob/c7daf391deda2eaf03e5798594a7d56fcd6d8b8c/README.md?plain=1#L17",
    },
    {
        "leaderboard": "http://nlpprogress.com/english/sentiment_analysis.html",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L2",
    },
    {
        "leaderboard": "https://nlp.stanford.edu/projects/snli",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L8",
    },
    {
        "leaderboard": "https://stanfordnlp.github.io/coqa",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L11",
    },
    {
        "leaderboard": "https://rajpurkar.github.io/SQuAD-explorer",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L12",
    },
    {
        "leaderboard": "http://quac.ai",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L13",
    },
    {
        "leaderboard": "https://aclweb.org/aclwiki/Question_Answering_(State_of_the_art)",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L16",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L19",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/swag",
        "reference": "https://github.com/xwzhong/papernote/blob/8bc1e82ff52af2e7ce7304038094e7e90757a8f6/leaderboard.md?plain=1#L20",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/hkust-nlp/deita/blob/b8465c3c2f13e71054ca9443e89f69b005d579a9/README.md?plain=1#L312",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota",
        "reference": "https://github.com/eastmountyxz/AI-Security-Paper/blob/4e6343afeaa68bb3252468594870e61116db3ecb/README.md?plain=1#L110",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota",
        "reference": "https://github.com/FangjinhuaWang/PatchmatchNet/blob/c2e13b1bd77177d9646322959bd36cef1dab904e/README.md?plain=1#L113",
    },
    {
        "leaderboard": "https://www.tanksandtemples.org/leaderboard",
        "reference": "https://github.com/FangjinhuaWang/PatchmatchNet/blob/c2e13b1bd77177d9646322959bd36cef1dab904e/README.md?plain=1#L113",
    },
    {"leaderboard": "https://github.com/Alab-NII/2wikimultihop", "reference": ""},
    {
        "leaderboard": "https://github.com/svilupp/Julia-LLM-Leaderboard",
        "reference": "",
    },
    {
        "leaderboard": "https://www.cluebenchmarks.com",
        "reference": "https://github.com/CLUEbenchmark/PyCLUE/blob/c16af32dd7dc195e77f352b6b3d2d5b963e193ba/README.md?plain=1#L11",
    },
    {"leaderboard": "https://github.com/MikeGu721/XiezhiBenchmark", "reference": ""},
    {"leaderboard": "https://github.com/allenai/CommonGen-Eval", "reference": ""},
    {
        "leaderboard": "https://paperswithcode.com/sota/person-re-identification-on-sysu-30k",
        "reference": "https://github.com/wanggrun/triplet/blob/c1bbaa941f1b5867145deb2bbf7f1c0ff0bf238a/README.md?plain=1#L16",
    },
    {
        "leaderboard": "http://www.robustvision.net/leaderboard.php",
        "reference": "https://github.com/princeton-vl/RAFT-Stereo/blob/6068c1a26f84f8132de10f60b2bc0ce61568e085/README.md?plain=1#L77",
    },
    {"leaderboard": "https://github.com/ymcui/Eval-on-NN-of-RC", "reference": ""},
    {
        "leaderboard": "https://wilds.stanford.edu/leaderboard",
        "reference": "https://github.com/facebookresearch/DomainBed/blob/dad3ca34803aa6dc62dfebe9ccfb57452f0bb821/README.md?plain=1#L65",
    },
    {"leaderboard": "https://github.com/layumi/AICIty-reID-2020", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L144",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L731",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L732",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L733",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mike-ravkine/can-ai-code-results",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L734",
    },
    {
        "leaderboard": "https://cevalbenchmark.com/static/leaderboard.html",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L735",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L736",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/vectara/leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L737",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L738",
    },
    {
        "leaderboard": "https://evalplus.github.io/leaderboard.html",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L739",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/PatronusAI/enterprise_scenarios_leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L740",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/NPHardEval/NPHardEval-leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L741",
    },
    {
        "leaderboard": "https://prollm.toqan.ai/leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L742",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L743",
    },
    {
        "leaderboard": "https://scale.com/leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L745",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AIR-Bench/leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L746",
    },
    {
        "leaderboard": "https://huggingface.co/collections/clefourrier/leaderboards-and-benchmarks-64f99d2e11e92ca5568a7cce",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L747",
    },
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L748",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/WildVision/vision-arena",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L749",
    },
    {
        "leaderboard": "https://aider.chat/docs/leaderboards",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L750",
    },
    {
        "leaderboard": "https://evalplus.github.io/repoqa.html",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L751",
    },
    {
        "leaderboard": "https://bigcode-bench.github.io",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L752",
    },
    {
        "leaderboard": "https://www.vellum.ai/llm-leaderboard",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L753",
    },
    {
        "leaderboard": "https://eqbench.com",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L754",
    },
    {
        "leaderboard": "https://oobabooga.github.io/benchmark.html",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L755",
    },
    {
        "leaderboard": "https://livebench.ai",
        "reference": "https://github.com/underlines/awesome-ml/blob/30ba30fdebd817f12131ef67c073c565eab56233/llm-tools.md?plain=1#L756",
    },
    {
        "leaderboard": "https://llm-aggrefact.github.io",
        "reference": "https://github.com/Liyan06/MiniCheck/blob/1e837a31d9d37182eaa97aa7bb590e3fd5dadfd5/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://sites.google.com/view/englishasrchallenge/leaderboard",
        "reference": "https://github.com/AI4Bharat/NPTEL2020-Indian-English-Speech-Dataset/blob/64f8d5ef7c2346563779c0bd7e8487f7739be482/README.md?plain=1#L79",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/MM-UPD/MM-UPD_Leaderboard",
        "reference": "https://github.com/AtsuMiyai/UPD/blob/086e569f328422f105dbd5c1e84617f03b9409da/README.md?plain=1#L2",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions",
        "reference": "https://github.com/guyfe/LongSumm/blob/844b652269072ae35ea77d18ac717e2801cf80cf/README.md?plain=1#L205",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/jax-diffusers-event/leaderboard",
        "reference": "https://github.com/fast-codi/CoDi/blob/67fc0edabd9dfc41da882277111ff116c8bb60c5/README.md?plain=1#L60",
    },
    {
        "leaderboard": "https://dc-bench.github.io",
        "reference": "https://github.com/justincui03/dc_benchmark/blob/2a57711961670859ea4c3102b766d1b16f7dea85/README.md?plain=1#L13",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/8582",
        "reference": "https://github.com/czyssrs/ConvFinQA/blob/cf3eed2d5984960bf06bb8145bcea5e80b0222a6/README.md?plain=1#L64",
    },
    {
        "leaderboard": "https://predibase.com/fine-tuning-index",
        "reference": "https://github.com/keigohtr/state-of-mlops/blob/1d4208edf5280f96bc9f71f0648e2f3fd5474553/README.md?plain=1#L315",
    },
    {"leaderboard": "https://github.com/YJiangcm/FollowBench", "reference": ""},
    {"leaderboard": "https://github.com/tatsu-lab/alpaca_eval", "reference": ""},
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/MeetKai/functionary/blob/9afb17be2cbec6ca4ad11d9e8e4ab528b5f55cf0/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://www.kaggle.com/c/allstate-claims-severity/leaderboard",
        "reference": "https://github.com/adornes/spark_scala_ml_examples/blob/49337f641f176824a1a58975bc2b9992c7c321f4/README.md?plain=1#L320",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/gaia-benchmark/leaderboard",
        "reference": "https://github.com/aymeric-roucher/GAIA/blob/4ae5bafa436ee91523fda0e435edcefa565df187/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/EvolvingLMMs-Lab/lmms-eval/blob/7c2d91cb9f9b5583a8d81d5994d0091271198f6e/README.md?plain=1#L43",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti",
        "reference": "https://github.com/philbort/awesome-self-driving-cars/blob/152f976282c659166613f4d5b5ae42d0e305d5ce/README.md?plain=1#L23",
    },
    {"leaderboard": "https://github.com/YLab-Open/METS-CoV", "reference": ""},
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo",
        "reference": "https://github.com/XiandaGuo/OpenStereo/blob/9b0c9e6a1f6b91e0b3d9c008956e260576d997d2/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard",
        "reference": "https://github.com/huggingface/optimum-intel/blob/9cc0ee41b5e9ef114afeaeb11ba715d4165b7d74/README.md?plain=1#L194",
    },
    {
        "leaderboard": "https://github.com/xliu1231/Mementos-bench.github.io",
        "reference": "",
    },
    {
        "leaderboard": "https://rank.opencompass.org.cn",
        "reference": "https://github.com/InternLM/InternLM/blob/10b97b7a41ffbd2cb0f62cb506dda4d5df0d6dc3/README.md?plain=1#L128",
    },
    {
        "leaderboard": "https://yuzuai.jp/benchmark",
        "reference": "https://github.com/yuzu-ai/japanese-llm-ranking/blob/fae1ee79c69b0c3acf8e8a1994e43c7952e7bd9c/readme.md?plain=1#L10",
    },
    {"leaderboard": "https://github.com/wanggrun/SYSU-30k", "reference": ""},
    {
        "leaderboard": "https://github.com/AI-secure/Certified-Robustness-SoK-Oldver",
        "reference": "https://github.com/AI-secure/VeriGauge/blob/a76f6d93416cfd5b9272eae085dd2ee64736d76f/readme.md?plain=1#L7",
    },
    {"leaderboard": "https://github.com/TianxingChen/RoboTwin", "reference": ""},
    {
        "leaderboard": "https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/leaderboard",
        "reference": "https://github.com/rafacarrascosa/samr/blob/ebab09d1e48727bf133a31fd7912a2d1edd6b404/README.md?plain=1#L42",
    },
    {
        "leaderboard": "https://microsoft.github.io/MSMARCO-Document-Ranking-Submissions/leaderboard",
        "reference": "https://github.com/Albert-Ma/PROP/blob/8f0248f2cd262ea9a7ce8ffd8ca494028449ebd8/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://www.nuscenes.org/object-detection",
        "reference": "https://github.com/Sense-X/Co-DETR/blob/28a60116d9bcdd22a876a358b160a27efee785ec/README.md?plain=1#L23",
    },
    {"leaderboard": "https://github.com/incognite-lab/myGym", "reference": ""},
    {
        "leaderboard": "https://www.kaggle.com/c/pycon-2015-tutorial/leaderboard",
        "reference": "https://github.com/justmarkham/kaggle-pycon-2015/blob/d3eb029a6a7953d2184c2babf34790962e26b60f/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://yale-lily.github.io/spider",
        "reference": "https://github.com/taoyds/test-suite-sql-eval/blob/e97acc546ecbee8fa27fa8dbf025ef61493a876c/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://yale-lily.github.io/sparc",
        "reference": "https://github.com/taoyds/test-suite-sql-eval/blob/e97acc546ecbee8fa27fa8dbf025ef61493a876c/README.md?plain=1#L21",
    },
    {
        "leaderboard": "https://yale-lily.github.io/cosql",
        "reference": "https://github.com/taoyds/test-suite-sql-eval/blob/e97acc546ecbee8fa27fa8dbf025ef61493a876c/README.md?plain=1#L21",
    },
    {"leaderboard": "https://github.com/Yale-LILY/dart", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/michaelfeil/infinity/blob/bb39cbdf792c866f91d0332e523ef17d2efa80c1/README.md?plain=1#L303",
    },
    {
        "leaderboard": "https://spokenwoz.github.io",
        "reference": "https://github.com/bansky-cl/tods-arxiv-daily-paper/blob/fc62ccde301349192b8fad136ced0763af08e2ca/README.md?plain=1#L73",
    },
    {
        "leaderboard": "https://decisionintelligence.github.io/OpenTS",
        "reference": "https://github.com/decisionintelligence/TFB/blob/fbed8df59c919dd172021c01489b8bc8f279bca4/README.md?plain=1#L18",
    },
    {
        "leaderboard": "https://henrychur.github.io/MultilingualMedQA",
        "reference": "https://github.com/MAGIC-AI4Med/MMedLM/blob/f8653ee59fe7919ab7a36a917c69b22fd9c3280e/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Vchitect/VBench_Leaderboard",
        "reference": "https://github.com/harpreetsahota204/awesome-cvpr-2024/blob/7ab7bbf04473e9954084485982c703fb707fbc26/README.md?plain=1#L78",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/code-generation-on-humaneval",
        "reference": "https://github.com/samholt/L2MAC/blob/b3445db4f7d70229552fd2d4f1e1ed8402d584b6/README.md?plain=1#L143",
    },
    {
        "leaderboard": "https://waymo.com/open/challenges/2021/real-time-3d-prediction",
        "reference": "https://github.com/happinesslz/SEED/blob/ddca047866f58d09e33706b0596a177db0b21b9a/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://datahack.analyticsvidhya.com/contest/amexpert-2019-machine-learning-hackathon/#LeaderBoard",
        "reference": "https://github.com/rajat5ranjan/AV-AmExpert-2019-ML-Hackathon/blob/deeabf45abc2ca0eb9bc0f80ddd1e67ec967bbaa/README.md?plain=1#L153",
    },
    {
        "leaderboard": "https://matbench.materialsproject.org",
        "reference": "https://github.com/hackingmaterials/automatminer/blob/860a19ef4078029f0063a4d8d7a9d69b75455b24/README.md?plain=1#L15",
    },
    {
        "leaderboard": "http://got-10k.aitestunion.com/leaderboard",
        "reference": "https://github.com/Giveupfree/SOTDrawRect/blob/e6c5ad93c064039f792b1f07b37960868aa0bf2e/README.md?plain=1#L211",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/hollobit/GenAI_LLM_timeline/blob/4bafe9eb79aa5f78aff7ddaf520708bd0b00176d/README.md?plain=1#L2669",
    },
    {
        "leaderboard": "https://lamp-benchmark.github.io/leaderboard",
        "reference": "https://github.com/hollobit/GenAI_LLM_timeline/blob/4bafe9eb79aa5f78aff7ddaf520708bd0b00176d/README.md?plain=1#L3609",
    },
    {"leaderboard": "https://github.com/lupantech/ScienceQA", "reference": ""},
    {
        "leaderboard": "https://leaderboard.allenai.org/scifact",
        "reference": "https://github.com/dwadden/multivers/blob/a6ce033f0e17ae38c1f102eae1ee4ca213fbbe2e/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://www.datafountain.cn/competitions/533/ranking?sch=1844",
        "reference": "https://github.com/chaineypung/CCFBDCI-2021-Ultrasonic-Tumor-Segmentation-Rank1st/blob/a0e32cf006ffc006dfbf238d0b57d6ce09708d12/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://www.kaggle.com/competitions",
        "reference": "https://github.com/GovLab/toolsforcollaboration/blob/5bcd36a9ebd660286802e572a4cbf8a5f14946a9/README.md?plain=1#L56",
    },
    {
        "leaderboard": "https://opus.nlpl.eu/dashboard",
        "reference": "https://github.com/Helsinki-NLP/Opus-MT/blob/a44ab31d7c987f997129e28659a10a6c29b5a47c/README.md?plain=1#L8",
    },
    {
        "leaderboard": "http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo",
        "reference": "https://github.com/cogsys-tuebingen/mobilestereonet/blob/3c2931ab87561535469206bc366b942af67a2230/README.md?plain=1#L50",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_linkprop/#ogbl-wikikg2",
        "reference": "https://github.com/facebookresearch/ssl-relation-prediction/blob/a050286473933b6b7b743db43db3529bd305359a/README.md?plain=1#L125",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/leader_linkprop/#ogbl-biokg",
        "reference": "https://github.com/facebookresearch/ssl-relation-prediction/blob/a050286473933b6b7b743db43db3529bd305359a/README.md?plain=1#L126",
    },
    {"leaderboard": "https://github.com/lupantech/ScienceQA", "reference": ""},
    {
        "leaderboard": "https://opendrivelab.com/challenge2024/#mapless_driving",
        "reference": "https://github.com/OpenDriveLab/OpenLane-V2/blob/f085b3dad6bf12e88282e990cbc3704cff226821/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AGC2024/mapless-driving-2024",
        "reference": "https://github.com/OpenDriveLab/OpenLane-V2/blob/f085b3dad6bf12e88282e990cbc3704cff226821/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://opendrivelab.com/challenge2023/#openlane_topology",
        "reference": "https://github.com/OpenDriveLab/OpenLane-V2/blob/f085b3dad6bf12e88282e990cbc3704cff226821/README.md?plain=1#L33",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1925/leaderboard",
        "reference": "https://github.com/OpenDriveLab/OpenLane-V2/blob/f085b3dad6bf12e88282e990cbc3704cff226821/README.md?plain=1#L33",
    },
    {"leaderboard": "https://github.com/ml-energy/leaderboard", "reference": ""},
    {
        "leaderboard": "https://github.com/opstower-ai/devops-ai-open-leaderboard",
        "reference": "https://github.com/opstower-ai/llm-opstower/blob/a8e657810f08d985744567ad0a03c65b277f71ff/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/EffiBench/effibench-leaderboard",
        "reference": "https://github.com/huangd1999/EffiBench/blob/dfa232bf8410ecb9deeef4ba113bd2fcdb1a04b8/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/3d-semantic-segmentation-on-scribblekitti",
        "reference": "https://github.com/ouenal/scribblekitti/blob/6a46b097f834b3278d8d3960cfbf864b89e0a9c5/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://leaderboard.carla.org",
        "reference": "https://github.com/zhejz/carla-roach/blob/00d6f5528296900161bcb53b62197f9d5745330c/README.md?plain=1#L19",
    },
    {
        "leaderboard": "https://leaderboard.carla.org/leaderboard",
        "reference": "https://github.com/shuishida/LangProp/blob/5ae6a513461bd4e5c93cb0f93d2fd908833084cc/README.md?plain=1#L300",
    },
    {
        "leaderboard": "https://github.com/pddasig/Machine-Learning-Competition-2021",
        "reference": "",
    },
    {
        "leaderboard": "https://livebench.ai",
        "reference": "https://github.com/LiveBench/LiveBench/blob/174d07c7126cc91b9dc4bb0c0521e05d57173e6e/README.md?plain=1#L6",
    },
    {
        "leaderboard": "https://yuzuai.jp/benchmark",
        "reference": "https://github.com/taishi-i/awesome-japanese-nlp-resources/blob/53345357ebbee61e49081e71257b19f909f51019/README.md?plain=1#L253",
    },
    {
        "leaderboard": "https://open-xai.github.io/leaderboard",
        "reference": "https://github.com/hbaniecki/adversarial-explainable-ai/blob/200c3e9330247e4b0da5d80cfb0f6dcd07d40005/README.md?plain=1#L737",
    },
    {"leaderboard": "https://github.com/SHI-Labs/Agriculture-Vision", "reference": ""},
    {"leaderboard": "https://github.com/VSainteuf/pastis-benchmark", "reference": ""},
    {"leaderboard": "https://github.com/lupantech/MathVista", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/LanguageBind/Video-Bench",
        "reference": "https://github.com/PKU-YuanGroup/Video-Bench/blob/8b2101d1f80370121c754caefa3f1cf8b6b979ca/README.md?plain=1#L48",
    },
    {
        "leaderboard": "https://evo-eval.github.io/leaderboard.html",
        "reference": "https://github.com/evo-eval/evoeval/blob/d5ca3104ec30b99f1076f51d4476eb4c3f29effa/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/SUSTech-IDEA/SUS-Chat/blob/ebb7884fdb910114a76f111c27624b5307ed3f62/README.md?plain=1#L90",
    },
    {
        "leaderboard": "https://novelchallenge.github.io/index.html",
        "reference": "https://github.com/marzenakrp/nocha/blob/64946c134424a33dd6279442c2651804253256f0/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://breakend.github.io/RL-Energy-Leaderboard/reinforcement_learning_energy_leaderboard",
        "reference": "https://github.com/Breakend/experiment-impact-tracker/blob/c1b0fcb4e75f04511805a1e57b2d8a97266d2977/README.md?plain=1#L160",
    },
    {
        "leaderboard": "https://www.cbica.upenn.edu/BraTS18/lboardValidation.html",
        "reference": "https://github.com/sacmehta/3D-ESPNet/blob/edcc3b916abb0e97063ff0d52e56664733418cd8/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/reward-bench",
        "reference": "https://github.com/allenai/reward-bench/blob/e42d40fb73616eee7de4713801c51f81e2c4b969/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://github.com/Visual-Attention-Network/SegNeXt",
        "reference": "",
    },
    {"leaderboard": "https://github.com/crux-eval/eval-arena", "reference": ""},
    {
        "leaderboard": "http://calvin.cs.uni-freiburg.de",
        "reference": "https://github.com/mees/calvin/blob/23ad18da15c8734a61e303e7c1ff22a241bf06a0/README.md?plain=1#L169",
    },
    {
        "leaderboard": "https://nyu-mll.github.io/quality",
        "reference": "https://github.com/nyu-mll/quality/blob/4d31d2f99d9f8f94488f1ca35eefc101c6d467ff/README.md?plain=1#L29",
    },
    {"leaderboard": "https://github.com/CUHK-ARISE/GAMABench", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/prometheus-eval/BiGGen-Bench-Leaderboard",
        "reference": "https://github.com/prometheus-eval/prometheus-eval/blob/b7a431a553b320e0a7cc49c6c5d3c54b1b840d39/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://github.com/yizhen20133868/Awesome-SLU-Survey",
        "reference": "",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/mikeroyal/Machine-Learning-Guide/blob/20cec9bf95a1e42086553146682e5a82cec200f5/README.md?plain=1#L479",
    },
    {
        "leaderboard": "https://github.com/LudwigStumpp/llm-leaderboard",
        "reference": "https://github.com/mikeroyal/Machine-Learning-Guide/blob/20cec9bf95a1e42086553146682e5a82cec200f5/README.md?plain=1#L480",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/mikeroyal/Machine-Learning-Guide/blob/20cec9bf95a1e42086553146682e5a82cec200f5/README.md?plain=1#L481",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1663/leaderboard",
        "reference": "https://github.com/alexa/alexa-with-dstc10-track2-dataset/blob/4882acc6226d8d0826508a240a4a6bd66e651c12/README.md?plain=1#L15",
    },
    {"leaderboard": "https://github.com/ruixiangcui/AGIEval", "reference": ""},
    {
        "leaderboard": "https://www.aicrowd.com/challenges/mediqa-2019-natural-language-inference-nli/leaderboards",
        "reference": "https://github.com/abachaa/MEDIQA2019/blob/ae5d3826d6c841923b9db831159fef0f1b893593/README.md?plain=1#L46",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/mediqa-2019-recognizing-question-entailment-rqe/leaderboards",
        "reference": "https://github.com/abachaa/MEDIQA2019/blob/ae5d3826d6c841923b9db831159fef0f1b893593/README.md?plain=1#L47",
    },
    {
        "leaderboard": "https://www.aicrowd.com/challenges/mediqa-2019-question-answering-qa/leaderboards",
        "reference": "https://github.com/abachaa/MEDIQA2019/blob/ae5d3826d6c841923b9db831159fef0f1b893593/README.md?plain=1#L48",
    },
    {"leaderboard": "https://github.com/declare-lab/MELD", "reference": ""},
    {
        "leaderboard": "https://www.conceptualcaptions.com/winners-and-data",
        "reference": "https://github.com/google-research-datasets/Image-Caption-Quality-Dataset/blob/9435387ed56001416778f4946819e582a82428ed/README.md?plain=1#L93",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/sudarshan-koirala/llm-resources/blob/f5ae6094ae243a97b06cb27f54df16cef3492ee5/README.md?plain=1#L79",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/sudarshan-koirala/llm-resources/blob/f5ae6094ae243a97b06cb27f54df16cef3492ee5/README.md?plain=1#L82",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/sudarshan-koirala/llm-resources/blob/f5ae6094ae243a97b06cb27f54df16cef3492ee5/README.md?plain=1#L83",
    },
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/sudarshan-koirala/llm-resources/blob/f5ae6094ae243a97b06cb27f54df16cef3492ee5/README.md?plain=1#L92",
    },
    {"leaderboard": "https://github.com/declare-lab/MELD", "reference": ""},
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/kimtth/awesome-azure-openai-llm/blob/9bab76e9b2d3411dbc4766510a5eb25d2ef77af2/README.md?plain=1#L1502",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/kimtth/awesome-azure-openai-llm/blob/9bab76e9b2d3411dbc4766510a5eb25d2ef77af2/README.md?plain=1#L1581",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/kimtth/awesome-azure-openai-llm/blob/9bab76e9b2d3411dbc4766510a5eb25d2ef77af2/README.md?plain=1#L1581",
    },
    {
        "leaderboard": "https://github.com/ray-project/llmperf-leaderboard",
        "reference": "https://github.com/kimtth/awesome-azure-openai-llm/blob/9bab76e9b2d3411dbc4766510a5eb25d2ef77af2/README.md?plain=1#L2252",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/ArtificialAnalysis/LLM-Performance-Leaderboard",
        "reference": "https://github.com/kimtth/awesome-azure-openai-llm/blob/9bab76e9b2d3411dbc4766510a5eb25d2ef77af2/README.md?plain=1#L2253",
    },
    {
        "leaderboard": "https://gluebenchmark.com/leaderboard",
        "reference": "https://github.com/kimtth/awesome-azure-openai-llm/blob/9bab76e9b2d3411dbc4766510a5eb25d2ef77af2/README.md?plain=1#L2269",
    },
    {
        "leaderboard": "https://super.gluebenchmark.com/leaderboard",
        "reference": "https://github.com/kimtth/awesome-azure-openai-llm/blob/9bab76e9b2d3411dbc4766510a5eb25d2ef77af2/README.md?plain=1#L2269",
    },
    {"leaderboard": "https://github.com/facebookresearch/CompilerGym", "reference": ""},
    {
        "leaderboard": "https://github.com/RUCAIBox/RecBole-GNN/blob/main/results/general/ml-1m.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/RUCAIBox/RecBole-GNN/blob/main/results/sequential/diginetica.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/RUCAIBox/RecBole-GNN/blob/main/results/social/lastfm.md",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/abenhamadou/3DTeethSeg22_challenge",
        "reference": "",
    },
    {
        "leaderboard": "https://github.com/Arthur151/SOTA-on-monocular-3D-pose-and-shape-estimation",
        "reference": "",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/sun-seg-easy",
        "reference": "https://github.com/GewelsJI/VPS/blob/0dd9c7fcdda339289a28aea062a390e20d8bb64b/README.md?plain=1#L130",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/sun-seg-hard",
        "reference": "https://github.com/GewelsJI/VPS/blob/0dd9c7fcdda339289a28aea062a390e20d8bb64b/README.md?plain=1#L130",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti",
        "reference": "https://github.com/daohu527/awesome-self-driving-car/blob/95b3fc55a4b9d50a622862e962291d9a3c584669/README.md?plain=1#L243",
    },
    {"leaderboard": "https://github.com/JusperLee/SonicSim", "reference": ""},
    {
        "leaderboard": "https://github.com/bethgelab/InDomainGeneralizationBenchmark",
        "reference": "",
    },
    {
        "leaderboard": "https://matbench.materialsproject.org",
        "reference": "https://github.com/materialsproject/matbench/blob/936176db18ca4cd7b38cbd957c017a5bac770c6b/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/shm007g/LLaMA-Cult-and-More/blob/546c9a1bfbd3651a44921ed1bc42549106a91b46/README.md?plain=1#L95",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Vchitect/VBench_Leaderboard",
        "reference": "https://github.com/modelscope/data-juicer/blob/1aaad212604e01e1cd6e1c997ebc1481153565e2/README.md?plain=1#L42",
    },
    {
        "leaderboard": "https://github.com/OpenGVLab/Multi-Modality-Arena",
        "reference": "",
    },
    {"leaderboard": "https://github.com/HowieHwong/TrustLLM", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/huggingface/text-embeddings-inference/blob/416efe19f2ad687a3461ae453c32003be0892749/README.md?plain=1#L87",
    },
    {
        "leaderboard": "https://mcgill-nlp.github.io/imagecode",
        "reference": "https://github.com/McGill-NLP/imagecode/blob/609f07611aed2599f946c30d730f40a41af1079b/README.md?plain=1#L82",
    },
    {
        "leaderboard": "https://www.nuscenes.org/tracking",
        "reference": "https://github.com/lixiaoyu2000/Rock-Track/blob/35a1620f89a8dfe085e833c3366828ccf4ed744b/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://www.scrolls-benchmark.com/leaderboard",
        "reference": "https://github.com/huankoh/long-doc-summarization/blob/d3776e1945eb6832b987f6376f047c4108c9bcee/README.md?plain=1#L72",
    },
    {"leaderboard": "https://github.com/SoccerNet/sn-reid", "reference": ""},
    {
        "leaderboard": "https://dreambenchplus.github.io/#leaderboard",
        "reference": "https://github.com/dreambenchplus/dreambenchplus.github.io",
    },
    {
        "leaderboard": "https://paperswithcode.com/task/cross-domain-few-shot-object-detection",
        "reference": "https://github.com/lovelyqian/CDFSOD-benchmark/blob/fdaebeb1946a25fb72ba11421d171ea548a698cd/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/ChenYi99/EgoPlan-Bench_Leaderboard",
        "reference": "https://github.com/ChenYi99/EgoPlan/blob/8e5b2ead2561502589c9e93380736e3f8714b391/README.md?plain=1#L241",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/HenryHZY/Awesome-Multimodal-LLM/blob/1a0a9dafd56a16acd855ffe3236063eef596f653/README.md?plain=1#L283",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/421",
        "reference": "https://github.com/Junjue-Wang/LoveDA/blob/da1cdd083747c01c1cd36f6fcbab917077b906fc/README.md?plain=1#L9",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/424",
        "reference": "https://github.com/Junjue-Wang/LoveDA/blob/da1cdd083747c01c1cd36f6fcbab917077b906fc/README.md?plain=1#L10",
    },
    {
        "leaderboard": "https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques/leaderboard",
        "reference": "https://github.com/veb-101/Data-Science-Projects/blob/a78346b424e4e34b6acfa14446dcec5dae621ffe/README.md?plain=1#L22",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/opencompass/open_vlm_leaderboard",
        "reference": "https://github.com/dylanhogg/awesome-python/blob/8aacb170a7b90b43bdb0d62117d13df0c5515636/README.md?plain=1#L749",
    },
    {
        "leaderboard": "https://torchprotein.ai/benchmark",
        "reference": "https://github.com/DeepGraphLearning/PEER_Benchmark/blob/b40eec914273f204c50acca690df2a892e36c4a5/README.md?plain=1#L135",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/Exorust/LLM-Cookbook/blob/8ecec656320c672c80a801b3dff54f171d90fc0d/README.md?plain=1#L166",
    },
    {
        "leaderboard": "https://henrychur.github.io/MedS-Bench",
        "reference": "https://github.com/MAGIC-AI4Med/MedS-Ins/blob/d68f3ca170812ffd61df783af8c92d41a04bd4ab/README.md?plain=1#L6",
    },
    {"leaderboard": "https://github.com/waico/SKAB", "reference": ""},
    {"leaderboard": "https://github.com/GAIR-NLP/abel", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/BestWishYsh/ChronoMagic-Bench",
        "reference": "https://github.com/PKU-YuanGroup/ChronoMagic-Bench/blob/4a8e83f401918986927754a205c945e02dd59e20/README.md?plain=1#L206",
    },
    {
        "leaderboard": "https://github.com/facebookresearch/anli",
        "reference": "https://github.com/thunlp/TAADpapers/blob/8bcd10a41d69cef2c6dab219d330770445308b3e/README.md?plain=1#L207",
    },
    {
        "leaderboard": "https://www.llm-reasoners.net/leaderboard",
        "reference": "https://github.com/maitrix-org/llm-reasoners/blob/3334c4e85f15a6411240ad2d9f505bd51ba664c2/README.md?plain=1#L52",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/463/leaderboard",
        "reference": "https://github.com/mmurray/cvdn/blob/97a4ae94702d6f7b90bc92683e6d02e4437ac605/README.md?plain=1#L64",
    },
    {
        "leaderboard": "https://opendrivelab.com/challenge2023",
        "reference": "https://github.com/OpenDriveLab/TopoNet/blob/02246f663a89d820a8735ff34ce5a51ecc5f45b2/README.md?plain=1#L41",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AGC2024/driving-with-language-official",
        "reference": "https://github.com/OpenDriveLab/DriveLM/blob/89b2fc218ffe23803343076f7d70de23ae266840/README.md?plain=1#L88",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/csbl-br/awesome-compbio-chatgpt/blob/ee52917584722698e657195f711885d9ae4aafff/README.md?plain=1#L80",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/Anush008/fastembed-rs/blob/474d4e62c87666781b580ffc076b8475b693fc34/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://evalplus.github.io/leaderboard.html",
        "reference": "https://github.com/evalplus/evalplus/blob/8ae676234c6b1f4dfdbba3b262c1e0463ee22f6e/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://github.com/google-research-datasets/ToTTo",
        "reference": "",
    },
    {
        "leaderboard": "http://103.238.162.37:31622",
        "reference": "https://github.com/THU-KEG/KoLA/blob/daf1cf4f36e9d20f6def3e406cabbd5474d3f9c6/README.md?plain=1#L19",
    },
    {"leaderboard": "https://github.com/ASSERT-KTH/CodRep", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/peggy1502/Amazing-Resources/blob/5b5106b3d8a3583fe32678d3169a28f07924912e/README.md?plain=1#L1183",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/peggy1502/Amazing-Resources/blob/5b5106b3d8a3583fe32678d3169a28f07924912e/README.md?plain=1#L1184",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/vectara/leaderboard",
        "reference": "https://github.com/peggy1502/Amazing-Resources/blob/5b5106b3d8a3583fe32678d3169a28f07924912e/README.md?plain=1#L1185",
    },
    {
        "leaderboard": "https://www.cvlibs.net/datasets/kitti/eval_scene_flow.php",
        "reference": "https://github.com/MCG-NJU/CamLiFlow/blob/3bf1974f77a8b3b3323cd6e99b6d04eb55473d24/README.md?plain=1#L20",
    },
    {
        "leaderboard": "https://github.com/ortec/euro-neurips-vrp-2022-quickstart",
        "reference": "",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/huggingface/lighteval/blob/994fe73ce501ca44a55f274eabfa9c6b5150b7db/README.md?plain=1#L95",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/medical-code-prediction-on-mimic-iii",
        "reference": "https://github.com/acadTags/Awesome-medical-coding-NLP/blob/727681260dd3cf49895fd65b0b6269eed11c5f6b/README.md?plain=1#L104",
    },
    {
        "leaderboard": "https://eqbench.com",
        "reference": "https://github.com/EQ-bench/EQ-Bench/blob/337df8abdb8f71dfd3bdcc2cc361d40e3cbb245a/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/HaizeLabs/red-teaming-resistance-benchmark",
        "reference": "https://github.com/ThuCCSLab/Awesome-LM-SSP/blob/cf235f0aa899225f63326eb7cd397444a20d67a1/collection/leaderboard.md?plain=1#L2",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/AI-Secure/llm-trustworthy-leaderboard",
        "reference": "https://github.com/ThuCCSLab/Awesome-LM-SSP/blob/cf235f0aa899225f63326eb7cd397444a20d67a1/collection/leaderboard.md?plain=1#L3",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hallucinations-leaderboard/leaderboard",
        "reference": "https://github.com/ThuCCSLab/Awesome-LM-SSP/blob/cf235f0aa899225f63326eb7cd397444a20d67a1/collection/leaderboard.md?plain=1#L4",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1/leaderboard",
        "reference": "https://github.com/yuzcccc/vqa-mfb/blob/7fab8dddca5924ed1023149795cdaeacf029ff34/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard",
        "reference": "https://github.com/oneapi-src/oneAPI-samples/blob/6be606bd5532dfd7f5f5acf3e27c29f30caa7028/README.md?plain=1#L116",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/daniel-furman/sft-demos/blob/05f9453b6cf064608a9759aff0f21320546f7373/README.md?plain=1#L27",
    },
    {
        "leaderboard": "hhttps://huggingface.co/spaces/AudioLLMs/AudioBench-Leaderboard",
        "reference": "https://github.com/AudioLLMs/AudioBench/blob/0a045da5c288b2ce2fc547a2b0e88450c606cac8/README.md?plain=1#L23",
    },
    {"leaderboard": "https://github.com/sierra-research/tau-bench", "reference": ""},
    {"leaderboard": "https://github.com/OpenDFM/MULTI-Benchmark", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/opencompass/open_vlm_leaderboard",
        "reference": "https://github.com/matatonic/openedai-vision/blob/0fdf8390d37d87a44a68427940875388b4cff0ae/README.md?plain=1#L12",
    },
    {
        "leaderboard": "https://pages.nist.gov/jarvis_leaderboard",
        "reference": "https://github.com/JuDFTteam/best-of-atomistic-machine-learning/blob/62b7eca545273760dae16c7ab32cee97ddbad18c/README.md?plain=1#L337",
    },
    {
        "leaderboard": "https://github.com/deepchem/moleculenet",
        "reference": "https://github.com/JuDFTteam/best-of-atomistic-machine-learning/blob/62b7eca545273760dae16c7ab32cee97ddbad18c/README.md?plain=1#L476",
    },
    {
        "leaderboard": "https://stanfordmlgroup.github.io/competitions/chexpert",
        "reference": "https://github.com/LengerichLab/CompBioDatasetsForMachineLearning/blob/65398d69579b7eafd780cac426f32c19922314c2/README.md?plain=1#L85",
    },
    {
        "leaderboard": "https://wilds.stanford.edu/leaderboard",
        "reference": "https://github.com/p-lambda/wilds/blob/472677590de351857197a9bf24958838c39c272b/README.md?plain=1#L338",
    },
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/8020",
        "reference": "https://github.com/xlxwalex/FCGEC/blob/822490e0db7c336366f447ceb9873c84ee2d93d7/README.md?plain=1#L146",
    },
    {
        "leaderboard": "https://github.com/TILOS-AI-Institute/HypergraphPartitioning",
        "reference": "",
    },
    {"leaderboard": "https://github.com/isen-zhang/ACLUE", "reference": ""},
    {
        "leaderboard": "https://huggingface.co/spaces/Vchitect/VBench_Leaderboard",
        "reference": "https://github.com/Vchitect/VBench/blob/377f18f322a1baa1ac77909b2cc2cc9758c50ba0/README.md?plain=1#L8",
    },
    {
        "leaderboard": "http://host.robots.ox.ac.uk:8080/leaderboard/main_bootstrap.php",
        "reference": "https://github.com/idrl-lab/Adversarial-Attacks-on-Object-Detectors-Paperlist/blob/1594662bc5fc46376d7cdf195b9624890031e4bd/README.md?plain=1#L97",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboar",
        "reference": "https://github.com/SeanLee97/AnglE/blob/b04eae166d8596b47293c75b4664d3ad820d7331/README.md?plain=1#L61",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/instance-segmentation-on-coco",
        "reference": "https://github.com/IDEA-Research/MaskDINO/blob/3831d8514a3728535ace8d4ecc7d28044c42dd14/README.md?plain=1#L46",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/panoptic-segmentation-on-coco-test-dev",
        "reference": "https://github.com/IDEA-Research/MaskDINO/blob/3831d8514a3728535ace8d4ecc7d28044c42dd14/README.md?plain=1#L46",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/qiantong-xu/toolbench-leaderboard",
        "reference": "https://github.com/sambanova/toolbench/blob/40a7a96c5123c05be023b98800b36606ad56792b/README.md?plain=1#L11",
    },
    {
        "leaderboard": "https://open-compass.github.io/CriticBench/leaderboard_subjective.html",
        "reference": "https://github.com/open-compass/CriticBench/blob/d8f0394b7925719fb6ad41986026831cfdc9dc72/README.md?plain=1#L25",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/semantic-segmentation-on-ade20k",
        "reference": "https://github.com/IDEA-Research/MaskDINO/blob/3831d8514a3728535ace8d4ecc7d28044c42dd14/README.md?plain=1#L46",
    },
    {
        "leaderboard": "https://github.com/google-research-datasets/totto",
        "reference": "https://github.com/luka-group/Lattice/blob/9f64c9a469e622a7421458c4aa800acb15d89683/README.md?plain=1#L40",
    },
    {
        "leaderboard": "https://ogb.stanford.edu/docs/lsc/leaderboards",
        "reference": "https://github.com/lenscloth/GRPE/blob/05f31e990f99f98ff848d8971c2ea2a323d96e89/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://research.nianticlabs.com/mapfree-reloc-benchmark/leaderboard",
        "reference": "https://github.com/nianticlabs/mickey/blob/8fec03e535e5a125bcf9fc740645e38f0ffffe36/README.md?plain=1#L58",
    },
    {
        "leaderboard": "https://research.nianticlabs.com/mapfree-reloc-benchmark/leaderboard",
        "reference": "https://github.com/neheller/kits19/blob/57eac1b36e2568c08fe43a7d0b50713ae76a834c/README.md?plain=1#L103",
    },
    {
        "leaderboard": "https://adwardlee.github.io/salad_bench/leaderboard.html",
        "reference": "https://github.com/OpenSafetyLab/SALAD-BENCH/blob/45209195717ae4e22075c3229d3d37b0844d19a5/README.md?plain=1#L29",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/shizhediao/Post-Training-Data-Flywheel/blob/1cfc1e96053d681732b7b461ed4eb02183d44e7a/README.md?plain=1#L76",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard",
        "reference": "https://github.com/bigcode-project/bigcodebench/blob/8ffe6d3d69e6d38a7f4ca626e93064374746af3b/README.md?plain=1#L36",
    },
    {
        "leaderboard": "https://yale-lily.github.io/spider",
        "reference": "https://github.com/aws-samples/text-to-sql-bedrock-workshop/blob/d4bad3e55895e0ee4a12166e690ae38e81711949/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://bird-bench.github.io",
        "reference": "https://github.com/aws-samples/text-to-sql-bedrock-workshop/blob/d4bad3e55895e0ee4a12166e690ae38e81711949/README.md?plain=1#L16",
    },
    {
        "leaderboard": "https://www.kaggle.com/competitions/otto-recommender-system/leaderboard",
        "reference": "https://github.com/cdeotte/Kaggle-OTTO-Comp/blob/2bb896260c329806ee1d2170fa1e581351485440/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://trishullab.github.io/PutnamBench/leaderboard.html",
        "reference": "https://github.com/trishullab/PutnamBench/blob/737065dd78481f0fe63748653cb42607de687dee/README.md?plain=1#L15",
    },
    {
        "leaderboard": "https://test.leaderboard.librai.tech/LeaderBoard",
        "reference": "https://github.com/Libr-AI/OpenRedTeaming/blob/65b11f97d1c7a3ec9793817c709b1ebf95514323/leaderboard/README.md?plain=1#L4",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/Libr-AI/OpenRedTeaming/blob/65b11f97d1c7a3ec9793817c709b1ebf95514323/leaderboard/README.md?plain=1#L47",
    },
    {
        "leaderboard": "https://eval.ai/web/challenges/challenge-page/1832/leaderboard",
        "reference": "https://github.com/Computer-Vision-in-the-Wild/Elevater_Toolkit_IC/blob/00d0af78559d5f3d800ae4668210e6bd1f2f84b9/README.md?plain=1#L118",
    },
    {
        "leaderboard": "https://github.com/marqo-ai/marqo-FashionCLIP/blob/main/LEADERBOARD.md",
        "reference": "",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/cosmosqa",
        "reference": "https://github.com/wilburOne/cosmosqa/blob/b6eb99cca4e2a51dd28a9a6f562534872d851639/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://scir-sp.github.io",
        "reference": "https://github.com/Waste-Wood/e-CARE/blob/c3b52648adcc919303a297a377811e24d703e197/README.md?plain=1#L138",
    },
    {"leaderboard": "https://github.com/patrick-tssn/VideoHallucer", "reference": ""},
    {
        "leaderboard": "http://leaderboard.roboflow.com",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/ComputerVision.md?plain=1#L128",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/NLP.md?plain=1#L447",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/NLP.md?plain=1#L448",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/NLP.md?plain=1#L449",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mike-ravkine/can-ai-code-results",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/NLP.md?plain=1#L450",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/WildBench",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/NLP.md?plain=1#L454",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/NLP.md?plain=1#L497",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/hf-audio/open_asr_leaderboard",
        "reference": "https://github.com/ahkarami/Great-Deep-Learning-Tutorials/blob/302f8643b0815e10e01283c6a3a33f192612c857/Speech.md?plain=1#L126",
    },
    {
        "leaderboard": "https://paperswithcode.com/dataset/storybench",
        "reference": "https://github.com/google/storybench/blob/e67b803d69d6d9e650b4d73560f0db7c082b7381/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/carla-map-leaderboard-on-carla",
        "reference": "https://github.com/Kin-Zhang/mmfn/blob/759ed049a2a7890e3df80c9e6d35f4ed524399be/README.md?plain=1#L5",
    },
    {
        "leaderboard": "https://fasteval.github.io/FastEval",
        "reference": "https://github.com/FastEval/FastEval/blob/9f7cc4e034216d72640fe359e0ee58667559dff1/README.md?plain=1#L7",
    },
    {"leaderboard": "https://github.com/CMMMU-Benchmark/CMMMU", "reference": ""},
    {"leaderboard": "https://github.com/ababier/open-kbp", "reference": ""},
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/raphaelmansuy/digital_palace/blob/8c2a86672ada32fbffefa6a87579d699bd259611/README.md?plain=1#L276",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/raphaelmansuy/digital_palace/blob/8c2a86672ada32fbffefa6a87579d699bd259611/README.md?plain=1#L277",
    },
    {
        "leaderboard": "https://evalplus.github.io/leaderboard.html",
        "reference": "https://github.com/raphaelmansuy/digital_palace/blob/8c2a86672ada32fbffefa6a87579d699bd259611/README.md?plain=1#L278",
    },
    {
        "leaderboard": "https://leaderboard.carla.org",
        "reference": "https://github.com/autonomousvision/king/blob/6acd2154cd689b3121664e60760378ba484659c6/README.md?plain=1#L101",
    },
    {"leaderboard": "https://github.com/seketeam/EvoCodeBench", "reference": ""},
    {
        "leaderboard": "https://github.com/layumi/Person_reID_baseline_pytorch/blob/master/leaderboard/README.md",
        "reference": "https://github.com/Yimin-Liu/Awesome-Unsupervised-Person-Re-identification/blob/ac26d61b4786f0dae0d9199a02ff06766ff3fb4c/README.md?plain=1#L34",
    },
    {
        "leaderboard": "https://mllm-judge.github.io/leaderboard.html",
        "reference": "https://github.com/Dongping-Chen/MLLM-Judge/blob/1bf9f6f8673fd0c9002c81a16687ff8fe5fecdc0/README.md?plain=1#L8",
    },
    {
        "leaderboard": "https://github.com/hkust-nlp/llm-compression-intelligence",
        "reference": "",
    },
    {"leaderboard": "https://github.com/CyberOrigin2077/CyberGPT", "reference": ""},
    {
        "leaderboard": "https://comma.ai/leaderboard",
        "reference": "https://github.com/CyberOrigin2077/CyberGPT/blob/4296e2e3372efe8eb484d794263057fd423a3aec/Readme.md?plain=1#L270",
    },
    {
        "leaderboard": "http://www.lavicleva.com",
        "reference": "https://github.com/lyy1994/awesome-data-contamination/blob/fff6cc7d0633a5efd97fa3289fe02ce11b3b1071/README.md?plain=1#L111",
    },
    {
        "leaderboard": "https://evo-eval.github.io/leaderboard.html",
        "reference": "https://github.com/lyy1994/awesome-data-contamination/blob/fff6cc7d0633a5efd97fa3289fe02ce11b3b1071/README.md?plain=1#L319",
    },
    {
        "leaderboard": "https://gair-nlp.github.io/benbench",
        "reference": "https://github.com/lyy1994/awesome-data-contamination/blob/fff6cc7d0633a5efd97fa3289fe02ce11b3b1071/README.md?plain=1#L349",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/physicaliqa",
        "reference": "https://github.com/lyy1994/awesome-data-contamination/blob/fff6cc7d0633a5efd97fa3289fe02ce11b3b1071/README.md?plain=1#L495",
    },
    {
        "leaderboard": "https://xwang.dev/mint-bench",
        "reference": "https://github.com/xingyaoww/mint-bench/blob/3f7f12c10bf763be1e6dbdeb42feb57624121f61/README.md?plain=1#L7",
    },
    {
        "leaderboard": "https://leaderboard.edustudio.ai",
        "reference": "https://github.com/HFUT-LEC/EduStudio/blob/c4c7413fd86ecb3f8ba55edda3242c6b477e1946/README.md?plain=1#L26",
    },
    {
        "leaderboard": "https://github.com/IllinoisGraphBenchmark/IGB-Datasets/blob/main/results/README.md",
        "reference": "",
    },
    {"leaderboard": "https://github.com/LauraRuis/groundedSCAN", "reference": ""},
    {
        "leaderboard": "https://mme-realworld.github.io/home_page.html#leaderboard",
        "reference": "https://github.com/yfzhang114/MME-RealWorld/blob/7a47315ee3b12d65bc938888211adaf1b9deca0a/README.md?plain=1#L14",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L63",
    },
    {
        "leaderboard": "https://lmarena.ai/?leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L64",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/Salesforce/ContextualBench-Leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L65",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/mteb/leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L67",
    },
    {
        "leaderboard": "https://www.swebench.com",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L68",
    },
    {
        "leaderboard": "https://tatsu-lab.github.io/alpaca_eval",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L69",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L70",
    },
    {
        "leaderboard": "https://gorilla.cs.berkeley.edu/leaderboard.html",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L71",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/allenai/WildBench",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L72",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/PatronusAI/enterprise_scenarios_leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L73",
    },
    {
        "leaderboard": "https://github.com/vectara/hallucination-leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L74",
    },
    {
        "leaderboard": "https://github.com/ray-project/llmperf-leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L75",
    },
    {
        "leaderboard": "ttps://huggingface.co/spaces/ArtificialAnalysis/LLM-Performance-Leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L76",
    },
    {
        "leaderboard": "https://paperswithcode.com/sota/multi-task-language-understanding-on-mmlu",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L77",
    },
    {
        "leaderboard": "https://leaderboard.allenai.org/arc_easy",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L205",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard",
        "reference": "https://github.com/alopatenko/LLMEvaluation/blob/beca4f0115aa78907f787452ef9270218a15b360/README.md?plain=1#L235",
    },
    {"leaderboard": "https://github.com/cybermetric/CyberMetric", "reference": ""},
    {
        "leaderboard": "https://mmsearch.github.io/#leaderboard",
        "reference": "https://github.com/CaraJ7/MMSearch/blob/e96c140746adf69026a8ab17e96b00df06fa5d9d/README.md?plain=1#L15",
    },
    {
        "leaderboard": "http://www.cvlibs.net/datasets/kitti/eval_mots.php",
        "reference": "https://github.com/detectRecog/PointTrack/blob/921189316a869f77e5c02a50fdd989007e99895c/README.md?plain=1#L30",
    },
    {
        "leaderboard": "https://enigma-agent.github.io/#results",
        "reference": "https://github.com/princeton-nlp/SWE-agent/blob/e6d7cb8277cbf79c97e363e1fd89d22e6be10bd7/README.md?plain=1#L60",
    },
    {
        "leaderboard": "https://video-mme.github.io/home_page.html#leaderboard",
        "reference": "https://github.com/Oryx-mllm/Oryx/blob/1ad4fa6dfbca39e8732e160dece104c1a7f5713f/README.md?plain=1#L39",
    },
    {
        "leaderboard": "https://github.com/JUNJIE99/MLVU",
        "reference": "https://github.com/Oryx-mllm/Oryx/blob/1ad4fa6dfbca39e8732e160dece104c1a7f5713f/README.md?plain=1#L41",
    },
    {"leaderboard": "https://github.com/stefp/FOR-species", "reference": ""},
    {
        "leaderboard": "https://github.com/zhxlia/Awesome-TableReasoning-LLM-Survey",
        "reference": "",
    },
    {
        "leaderboard": "https://lab.kb.se/leaderboard",
        "reference": "https://github.com/kb-labb/superlim-submissions/blob/1bbddc2a6b09798f0a6853a784bd4bed9b3d6ecf/README.md?plain=1#L3",
    },
    {
        "leaderboard": "https://docs.google.com/spreadsheets/d/1M801lEpBbKSNwP-vDBkC_pF7LdyGU1f_ufZb_NWNBZQ",
        "reference": "https://github.com/Agent-Tools/awesome-autonomous-web/blob/d49cfba22d20c90f5c3b8815251bbf4962bbfae7/README.md?plain=1#L48",
    },
    {
        "leaderboard": "https://github.com/Delay-Xili/Wireframe",
        "reference": "https://github.com/Delay-Xili/F-Clip/blob/e30d307e728aa530b5601e4581510bcd6093b620/README.md?plain=1#L31",
    },
    {
        "leaderboard": "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
        "reference": "https://github.com/disler/elm-itv-benchmark/blob/79a1b9e51c36678b921ecfa2e671e3cf3cdf8851/README.md?plain=1#L47",
    },
    {
        "leaderboard": "https://tarsier-vlm.github.io/#leaderboard",
        "reference": "https://github.com/bytedance/tarsier/blob/9ff5567a8882cbcc81060f392bead76afb16e19d/README.md?plain=1#L34",
    },
    {"leaderboard": "https://github.com/DeepSE/SageMakerActions", "reference": ""},
    {
        "leaderboard": "https://codalab.lisn.upsaclay.fr/competitions/12672",
        "reference": "https://github.com/layumi/University1652-Baseline/blob/cf8cd3f342a20493a50ccd8a8265af78b4c959b3/README.md?plain=1#L55",
    },
    {"leaderboard": "https://github.com/keisks/jfleg", "reference": ""},
    {
        "leaderboard": "https://github.com/cage-challenge/cage-challenge-2",
        "reference": "",
    },
    {
        "leaderboard": "http://rrc.cvc.uab.es",
        "reference": "https://github.com/hwalsuklee/awesome-deep-text-detection-recognition/blob/0958b3875043676c61bdf64df95ea81a43ad3c92/README.md?plain=1#L13",
    },
]

github_leaderboard_primary_mapping_unique = set()
for lb in github_leaderboard_primary_mapping:
    github_leaderboard_primary_mapping_unique.add(lb["leaderboard"])
len(github_leaderboard_primary_mapping_unique), len(github_leaderboard_primary_mapping)

(1090, 1652)

In [35]:
# extra leaderboards recognized via examination of leaderboard websites
github_leaderboard_secondary_mapping = [
    {
        "provenance": "https://www.superclueai.com",
        "leaderboards": [
            "https://github.com/CLUEbenchmark/SuperCLUEgkzw",
            "https://www.langyb.com",
        ],
    },
    {
        "provenance": "https://lmarena.ai/?leaderboard",
        "leaderboards": ["https://redarena.ai/leaderboard"],
    },
    {
        "provenance": "https://accubits.com/open-source-program-synthesis-models-leaderboard",
        "leaderboards": [
            "https://accubits.com/large-language-models-leaderboard",
            "https://accubits.com/text-to-image-models-leaderboard",
            "https://accubits.com/instructeval-leaderboard",
            "https://accubits.com/business-friendly-llms-leaderboard",
        ],
    },
]

count_secondary = 0
for leaderboard in github_leaderboard_secondary_mapping:
    count_secondary += len(leaderboard["leaderboards"])
count_secondary

7

In [9]:
# scrape Hugging Face spaces that contain the "leaderboard" keyword in their content 
import json
from huggingface_hub import list_spaces

# Perform individual searches for different variations of the keyword
spaces_leaderboard = list(list_spaces(search="leaderboard"))

# Extract unique space IDs and combine them into a set to remove duplicates
space_links = set(
    f"https://huggingface.co/spaces/{space.id}" for space in spaces_leaderboard
)

# Write the spaces to a JSON file
with open(f"{path_data}/HuggingFace.json", "w") as json_file:
    json.dump(space_links, json_file, indent="\t")

In [None]:
# manually check retrieved spaces and curate leaderboard URLs
import webbrowser

# Read the spaces to a JSON file
with open(f"{path_data}/HuggingFace.json", "r") as json_file:
    space_links = json.load(json_file)

huggingface_leaderboard_primary = []
for index, link in enumerate(space_links):
    print(index)
    webbrowser.open(link)
    match input("Press enter key to proceed."):
        case "esc":
            raise KeyboardInterrupt

In [7]:
# leaderboard spaces and their status
huggingface_leaderboard_primary = [
    "https://huggingface.co/spaces/mteb/leaderboard",
    "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard",
    "https://huggingface.co/spaces/la-leaderboard/la-leaderboard",
    "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard",
    "https://huggingface.co/spaces/ArtificialAnalysis/Text-to-Image-Leaderboard",
    "https://huggingface.co/spaces/DontPlanToEnd/UGI-Leaderboard",
    "https://huggingface.co/spaces/opencompass/openvlm_video_leaderboard",
    "https://huggingface.co/spaces/opencompass/open_vlm_leaderboard",
    "https://huggingface.co/spaces/Salesforce/ContextualBench-Leaderboard",
    "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard",
    "https://huggingface.co/spaces/hf-audio/open_asr_leaderboard",
    "https://huggingface.co/spaces/echo840/ocrbench-leaderboard",
    "https://huggingface.co/spaces/vidore/vidore-leaderboard",
    "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard",
    "https://huggingface.co/spaces/Vchitect/VBench_Leaderboard",
    "https://huggingface.co/spaces/EnergyStarAI/2024_Leaderboard",  # duplicate
    "https://huggingface.co/spaces/JMMMU/JMMMU_Leaderboard",
    "https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard",
    "https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard",
    "https://huggingface.co/spaces/ArtificialAnalysis/LLM-Performance-Leaderboard",
    "https://huggingface.co/spaces/TheFinAI/Open-Financial-LLM-Leaderboard",
    "https://huggingface.co/spaces/optimum/llm-perf-leaderboard",
    "https://huggingface.co/spaces/BAAI/open_cn_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/meg/leaderboard",
    "https://huggingface.co/spaces/openGPT-X/european-llm-leaderboard",
    "https://huggingface.co/spaces/ParsBench/leaderboard",
    "https://huggingface.co/spaces/TTS-AGI/TTS-Arena",
    "https://huggingface.co/spaces/MIMIC-CDM/leaderboard",  # error
    "https://huggingface.co/spaces/qiantong-xu/toolbench-leaderboard",
    "https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard",
    "https://huggingface.co/spaces/allenai/URIAL-Bench",
    "https://huggingface.co/spaces/hf-vision/object_detection_leaderboard",
    "https://huggingface.co/spaces/opencompass/opencompass-llm-leaderboard",
    "https://huggingface.co/spaces/hallucinations-leaderboard/leaderboard",
    "https://huggingface.co/spaces/vectara/leaderboard",
    "https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard",
    "https://huggingface.co/spaces/mii-llm/open_ita_llm_leaderboard",
    "https://huggingface.co/spaces/CultriX/Alt_LLM_LeaderBoard",  # duplicate
    "https://huggingface.co/spaces/logikon/open_cot_leaderboard",
    "https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/gorilla-llm/berkeley-function-calling-leaderboard",
    "https://huggingface.co/spaces/sparse-generative-ai/open-moe-llm-leaderboard",
    "https://huggingface.co/spaces/hebrew-llm-leaderboard/leaderboard",
    "https://huggingface.co/spaces/instructkr/LogicKor-leaderboard",
    "https://huggingface.co/spaces/open-rl-leaderboard/leaderboard",  # duplicate
    "https://huggingface.co/spaces/meval/multilingual-chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/occiglot/euro-llm-leaderboard",
    "https://huggingface.co/spaces/SeaLLMs/SeaExam_leaderboard",
    "https://huggingface.co/spaces/OALL/Open-Arabic-LLM-Leaderboard",
    "https://huggingface.co/spaces/sartifyllc/Swahili_LLM_Leaderboard",
    "https://huggingface.co/spaces/yentinglin/open-tw-llm-leaderboard",
    "https://huggingface.co/spaces/Weyaxi/compute-power-leaderboard",  # duplicate
    "https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard",
    "https://huggingface.co/spaces/Salesforce/crm_llm_leaderboard",
    "https://huggingface.co/spaces/relbench/leaderboard",
    "https://huggingface.co/spaces/AudioLLMs/AudioBench-Leaderboard",
    "https://huggingface.co/spaces/ThaiLLM-Leaderboard/leaderboard"
    "https://huggingface.co/spaces/colab-potsdam/multimodal-clem-leaderboard"  # duplicate
    "https://huggingface.co/spaces/timm/leaderboard",
    "https://huggingface.co/spaces/speakleash/polish_medical_leaderboard",  # duplicate
    "https://huggingface.co/spaces/CIIRC-NLP/czechbench_leaderboard",
    "https://huggingface.co/spaces/ricdomolm/caselawqa_leaderboard",
    "https://huggingface.co/spaces/Shitqq/Openness-leaderboard",
    "https://huggingface.co/spaces/evilfreelancer/msnp-leaderboard",
    "https://huggingface.co/spaces/crynux-ai/genki-dama-leaderboard",
    "https://huggingface.co/spaces/leukas/cute_leaderboard",
    "https://huggingface.co/spaces/neelalex/leaderboard",  # error
    "https://huggingface.co/spaces/ought/raft-leaderboard",  # error
    "https://huggingface.co/spaces/speech-recognition-community-v2/Leaderboard",  # error
    "https://huggingface.co/spaces/osanseviero/llama-leaderboard",  # error
    "https://huggingface.co/spaces/autoevaluate/leaderboards",  # archived
    "https://huggingface.co/spaces/Classroom-workshop/assignments-leaderboard",  # error
    "https://huggingface.co/spaces/CVPR/Leaderboard",  # error
    "https://huggingface.co/spaces/hugginglearners/Hearts_Leaderboard",  # error
    "https://huggingface.co/spaces/CALM/Leaderboard",  # empty
    "https://huggingface.co/spaces/ICML2022/Leaderboard",  # error
    "https://huggingface.co/spaces/reach-vb/leaderboards",  # duplicate # error
    "https://huggingface.co/spaces/sanchit-gandhi/leaderboards",  # duplicate # error
    "https://huggingface.co/spaces/ThomasSimonini/Deep-Reinforcement-Learning-Leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/dreambooth-hackathon/leaderboard"
    "https://huggingface.co/spaces/ThomasSimonini/Deep-Reinforcement-Learning-Leaderboard-test",  # duplicate # error
    "https://huggingface.co/spaces/xtreme-s/leaderboard",  # error
    "https://huggingface.co/spaces/esb/leaderboard",  # error
    "https://huggingface.co/spaces/EuroPython2022/Leaderboard",  # error
    "https://huggingface.co/spaces/ECCV2022/Leaderboard",  # error
    "https://huggingface.co/spaces/NAACL2022/Spaces-Leaderboard",  # error
    "https://huggingface.co/spaces/platzi/platzi-leaderboard",  # error
    "https://huggingface.co/spaces/SIGGRAPH2022/Leaderboard",  # error
    "https://huggingface.co/spaces/EuroSciPy2022/Leaderboard",  # error
    "https://huggingface.co/spaces/nouamanetazi/mteb-leaderboard-old",  # error
    "https://huggingface.co/spaces/whisper-event/leaderboard",  # error
    "https://huggingface.co/spaces/Short-Answer-Feedback/Leaderboard",
    "https://huggingface.co/spaces/deprem-ml/intent-leaderboard",  # error
    "https://huggingface.co/spaces/keras-dreambooth/leaderboard",
    "https://huggingface.co/spaces/deprem-ml/intent-leaderboard-v13",  # error
    "https://huggingface.co/spaces/demo-crafters/leaderboard",  # error
    "https://huggingface.co/spaces/somosnlp-hackathon-2023/leaderboard",  # duplicate
    "https://huggingface.co/spaces/jax-diffusers-event/leaderboard",
    "https://huggingface.co/spaces/ludwigstumpp/llm-leaderboard",
    "https://huggingface.co/spaces/Alfasign/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/OpenGenAI/parti-prompts-leaderboard",
    "https://huggingface.co/spaces/loveu-tgve/loveu-tgve-leaderboard",
    "https://huggingface.co/spaces/gaia-benchmark/leaderboard",
    "https://huggingface.co/spaces/starmorph/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/DeepBrainz/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/seikwan/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/ml-energy/leaderboard",
    "https://huggingface.co/spaces/JetBrains-Research/long-code-arena",
    "https://huggingface.co/spaces/HuggingFaceH4/human_eval_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/mlfoundations/VisIT-Bench-Leaderboard",
    "https://huggingface.co/spaces/nlphuji/WHOOPS-Leaderboard",  # error
    "https://huggingface.co/spaces/nlphuji/WHOOPS-Leaderboard-Full",
    "https://huggingface.co/spaces/CoreyMorris/MMLU-by-task-Leaderboard",
    "https://huggingface.co/spaces/gsaivinay/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/b1sheng/kg_llm_leaderboard_test",  # duplicate # error
    "https://huggingface.co/spaces/vsd-benchmark/vsd_leaderboard",
    "https://huggingface.co/spaces/AILab-CVC/SEED-Bench_Leaderboard",
    "https://huggingface.co/spaces/felixz/meta_open_llm_leaderboard",
    "https://huggingface.co/spaces/Weyaxi/huggingface-leaderboard",
    "https://huggingface.co/spaces/pngwn/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/pngwn/open_llm_leaderboard_two",  # duplicate # error
    "https://huggingface.co/spaces/toloka/open-llm-leaderboard",
    "https://huggingface.co/spaces/wasertech/open_asr_leaderboard",  # duplicate
    "https://huggingface.co/spaces/choco9966/LeaderboardTest",  # deleted
    "https://huggingface.co/spaces/wishwork/Persian-LLM-Leaderboard",  # error
    "https://huggingface.co/spaces/clibrain/Spanish-Embeddings-Leaderboard",
    "https://huggingface.co/spaces/Docfile/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/AngoHF/ANGO-Leaderboard",
    "https://huggingface.co/spaces/hjzhou/chatbot-arena-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/choco9966/open-ko-llm-leaderboard",  # deleted
    "https://huggingface.co/spaces/abidlabs/mteb-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/awacke1/CanAICode-Leaderboard-Customized",  # duplicate # error
    "https://huggingface.co/spaces/vlsp-2023-vllm/VLLMs-Leaderboard",
    "https://huggingface.co/spaces/nesticot/nhl_leaderboards",  # duplicate
    "https://huggingface.co/spaces/patrickvonplaten/parti-prompts-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/ema-arte/LRS-leaderboard",
    "https://huggingface.co/spaces/mesolitica/malay-llm-leaderboard",
    "https://huggingface.co/spaces/BramVanroy/open_dutch_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/felixz/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/vectara/Hallucination-evaluation-leaderboard",
    "https://huggingface.co/spaces/openskyml/diffusion-models-leaderboard-template",  # error
    "https://huggingface.co/spaces/glitchbench/Leaderboard",
    "https://huggingface.co/spaces/daishen/SCULAiW",
    "https://huggingface.co/spaces/Koshti10/leaderboard",  # error
    "https://huggingface.co/spaces/khhuiyh/AutoEval-Video_LeaderBoard",
    "https://huggingface.co/spaces/mesolitica/malaysian-embedding-leaderboard",
    "https://huggingface.co/spaces/3B-Group/ConvRe-Leaderboard",
    "https://huggingface.co/spaces/vietgpt/VLLMs-Leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/Nexusflow/Nexus_Function_Calling_Leaderboard",
    "https://huggingface.co/spaces/locuslab/tofu_leaderboard",
    "https://huggingface.co/spaces/smothiki/open_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/gar1t/llm-leaderboard",  # empty
    "https://huggingface.co/spaces/homunculus/Deep-Reinforcement-Learning-Leaderboard",  # duplicate
    "https://huggingface.co/spaces/PatronusAI/enterprise_scenarios_leaderboard",
    "https://huggingface.co/spaces/Wwwduojin/MLLM_leaderboard",
    "https://huggingface.co/spaces/TogetherAI/chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/colab-potsdam/clem-leaderboard",
    "https://huggingface.co/spaces/OpenGVLab/MVBench_Leaderboard",
    "https://huggingface.co/spaces/SeaEval/SeaEval_Leaderboard",
    "https://huggingface.co/spaces/oslook/chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/Weyaxi/followers-leaderboard",
    "https://huggingface.co/spaces/med-llm-leaderboard/medicalworkflow-llm-leaderboard-backend",  # empty
    "https://huggingface.co/spaces/ramiroluo/LLMHallucination_Leaderboard",
    "https://huggingface.co/spaces/DavidVivancos/MindBigData-Leaderboard",
    "https://huggingface.co/spaces/mesolitica/malaysian-stt-leaderboard",  # error
    "https://huggingface.co/spaces/21world/bigcode-models-leaderboard",  # duplicate
    "https://huggingface.co/spaces/21world/chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/gblazex/leaderboard",  # error
    "https://huggingface.co/spaces/CathieDaDa/LLM_leaderboard",
    "https://huggingface.co/spaces/AI-Secure/llm-trustworthy-leaderboard",
    "https://huggingface.co/spaces/luisrguerra/guerra-llm-ai-leaderboard",  # duplicate
    "https://huggingface.co/spaces/yutohub/japanese-chatbot-arena-leaderboard",
    "https://huggingface.co/spaces/NPHardEval/NPHardEval-leaderboard",
    "https://huggingface.co/spaces/Bias-Leaderboard/leaderboard",
    "https://huggingface.co/spaces/PeepDaSlan9/B2BMGMT_chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/devingulliver/subquadratic-llm-leaderboard",
    "https://huggingface.co/spaces/ucla-contextual/contextual_leaderboard",
    "https://huggingface.co/spaces/CDT-BMAI-GP/biomed_probing_leaderboard",  # error
    "https://huggingface.co/spaces/Nymbo/followers-leaderboard",  # duplicate
    "https://huggingface.co/spaces/rusticluftig/9-leaderboard",  # error
    "https://huggingface.co/spaces/openreviewer/reviewer-arena",
    "https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard",  # error
    "https://huggingface.co/spaces/instructkr/ko-chatbot-arena-leaderboard",
    "https://huggingface.co/spaces/LuxOAI/chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/abidlabs/chatbot-arena-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/hmb/chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/q-future/Q-Bench-Leaderboard",
    "https://huggingface.co/spaces/PCA-Bench/PCA-Bench-Leaderboard",
    "https://huggingface.co/spaces/kevinwang676/ocrbench-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/rodrigomasini/data-only-mteb-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/Nymbo/leaderboard_main",  # duplicate # error
    "https://huggingface.co/spaces/rodrigomasini/data_only_llm_perf_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/rodrigomasini/data_only_hallucination_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/rodrigomasini/data_only_enterprise_scenarios_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/rodrigomasini/data_only_open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/paulml/Yet_Another_LLM_Leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/stemdataset/stem-leaderboard",  # error
    "https://huggingface.co/spaces/bittensor-dataset/leaderboard",  # error
    "https://huggingface.co/spaces/CathieDaDa/LLM_leaderboard_en",
    "https://huggingface.co/spaces/xioio/chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/winglian/finetuning_subnet_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/sam-paech/EQ-Bench-Leaderboard",
    "https://huggingface.co/spaces/speakleash/open_pl_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/Weyaxi/data-leaderboard",  # duplicate
    "https://huggingface.co/spaces/yzeng58/CoBSAT_Leaderboard",  # error
    "https://huggingface.co/spaces/Lihuchen/pearl_leaderboard",
    "https://huggingface.co/spaces/malhajar/OpenLLMTurkishLeaderboard",
    "https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard",
    "https://huggingface.co/spaces/automerger/Yet_Another_LLM_Leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/dimbyTa/open-llm-leaderboard-viz",
    "https://huggingface.co/spaces/OpenSafetyLab/Salad-Bench-Leaderboard",
    "https://huggingface.co/spaces/zweifisch/chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/kevinpro/Open-Multilingual-Reasoning-Leaderboard",
    "https://huggingface.co/spaces/climateset/Leaderboard",  # empty
    "https://huggingface.co/spaces/0x9/finetuning_subnet_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/somosnlp/likes_leaderboard",  # error
    "https://huggingface.co/spaces/PatrickHaller/pecc-leaderboard",
    "https://huggingface.co/spaces/0x9/pretraining-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/lovodkin93/FuseReviews-Leaderboard",
    "https://huggingface.co/spaces/asir0z/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/Aarifkhan/leaderboard-results-to-modelcard",  # duplicate # error
    "https://huggingface.co/spaces/AI-Vietnam/prompt-translation-vie-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/mrfakename/open-leaderboards-leaderboard",
    "https://huggingface.co/spaces/arshy/leaderboard",  # error
    "https://huggingface.co/spaces/facebook/CyberSecEval",
    "https://huggingface.co/spaces/valory/olas-prediction-leaderboard",
    "https://huggingface.co/spaces/myshell-test/tts-subnet-leaderboard",
    "https://huggingface.co/spaces/Slep/LRVSF-Leaderboard",
    "https://huggingface.co/spaces/CZLC/BenCzechMark",
    "https://huggingface.co/spaces/malhajar/OpenLLMTurkishLeaderboard_v0.2",  # duplicate
    "https://huggingface.co/spaces/autogenCTF/agent_ctf_leaderboard",  # error
    "https://huggingface.co/spaces/AdityaNaidu/Open_LLM_Leaderboard",  # empty
    "https://huggingface.co/spaces/must-ai/open_vn_llm_leaderboard",  # empty
    "https://huggingface.co/spaces/must-ai/open_pk_llm_leaderboard",  # empty
    "https://huggingface.co/spaces/must-ai/open_in-llm_leaderboard",  # empty
    "https://huggingface.co/spaces/Cognitive-Lab/indic_llm_leaderboard",
    "https://huggingface.co/spaces/ichigoberry/Yet_Another_LLM_Leaderboard",  # duplicate
    "https://huggingface.co/spaces/kbmlcoding/open_llm_leaderboard_free",  # duplicate # error
    "https://huggingface.co/spaces/livecodebench/leaderboard",
    "https://huggingface.co/spaces/arshy/leaderboard-docker",  # error
    "https://huggingface.co/spaces/agicommies/synthia_subnet_leaderboard",
    "https://huggingface.co/spaces/wenhu/Science-Leaderboard",
    "https://huggingface.co/spaces/arshy/leaderboard-gradio",  # error
    "https://huggingface.co/spaces/FSMBench/Leaderboard",
    "https://huggingface.co/spaces/JarvisKi/Stable_Tool_Bench_Leaderboard",
    "https://huggingface.co/spaces/stabletoolbench/Stable_Tool_Bench_Leaderboard",
    "https://huggingface.co/spaces/PlixAI/pixel-subnet-leaderboard",  # error
    "https://huggingface.co/spaces/pngwn/open_llm_leaderboard-check",  # duplicate # error
    "https://huggingface.co/spaces/MohamedRashad/arabic-tokenizers-leaderboard",
    "https://huggingface.co/spaces/0x1668/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/ameerazam08/Paper-LeaderBoard",
    "https://huggingface.co/spaces/GPTEval3D/Leaderboard_dev",
    "https://huggingface.co/spaces/Infin/ai-detection-leaderboard",
    "https://huggingface.co/spaces/NexaAIDev/domain_llm_leaderboard",
    "https://huggingface.co/spaces/Adeco/open_medical_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/Mollel/swahili-llm-leaderboard",  # error
    "https://huggingface.co/spaces/nan/leaderboard",  # error
    "https://huggingface.co/spaces/mteb/arena",
    "https://huggingface.co/spaces/Intel/low_bit_open_llm_leaderboard",
    "https://huggingface.co/spaces/TemryL/LLM-Disease-Risk-Leaderboard",
    "https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena",
    "https://huggingface.co/spaces/yanolja/arena",
    "https://huggingface.co/spaces/k8si/mteb_leaderboard_mtr",  # duplicate # error
    "https://huggingface.co/spaces/rstless-research/italian_open_llm_leaderboard",
    "https://huggingface.co/spaces/AIR-Bench/leaderboard",  # duplicate
    "https://huggingface.co/spaces/Manavshah/dippy-leaderboard",  # error
    "https://huggingface.co/spaces/ybjeong/leaderboard_kr",  # error
    "https://huggingface.co/spaces/TIGER-Lab/LongICL-Leaderboard",
    "https://huggingface.co/spaces/Auto-Arena/Leaderboard",
    "https://huggingface.co/spaces/philipp-zettl/whisper-leaderboard",  # duplicate
    "https://huggingface.co/spaces/cot-leaderboard/open-cot-dashboard",
    "https://huggingface.co/spaces/EffiBench/effibench-leaderboard",
    "https://huggingface.co/spaces/MohamedRashad/timm-leaderboard",
    "https://huggingface.co/spaces/hmb/chatbot-arena-leaderboard2",  # duplicate
    "https://huggingface.co/spaces/hackathonM/Roleplay_leaderboard",  # duplicate
    "https://huggingface.co/spaces/ashikshaffi08/Synthia_Leaderboard",
    "https://huggingface.co/spaces/Exploration-Lab/IL-TUR-Leaderboard",
    "https://huggingface.co/spaces/dbasu/multilingual-chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/verilog-llm/leaderboard",  # empty
    "https://huggingface.co/spaces/bdx33/GLUE-leaderboard",
    "https://huggingface.co/spaces/ashikshaffi08/Zangief-Leaderboard",  # error
    "https://huggingface.co/spaces/Open-Style/OSQ-Leaderboard",
    "https://huggingface.co/spaces/Exploration-Lab/BookSQL-Leaderboard",
    "https://huggingface.co/spaces/scb10x/multimodal-hallucination-leaderboard",
    "https://huggingface.co/spaces/MM-UPD/MM-UPD_Leaderboard",
    "https://huggingface.co/spaces/prometheus-eval/BiGGen-Bench-Leaderboard",
    "https://huggingface.co/spaces/eisenzopf/cc_leaderboard",  # empty
    "https://huggingface.co/spaces/Krisseck/IFEval-Leaderboard",
    "https://huggingface.co/spaces/ybjeong/leaderboard_kr_static",
    "https://huggingface.co/spaces/ChenYi99/EgoPlan-Bench_Leaderboard",
    "https://huggingface.co/spaces/LuxOAI/zen-chatbot-leaderboard",  # duplicate
    "https://huggingface.co/spaces/zhaorui-nb/de-identification-leaderboard",
    "https://huggingface.co/spaces/diluyedi/streamlit_leaderboard",
    "https://huggingface.co/spaces/snoopydoopy/Leaderboard",  # empty
    "https://huggingface.co/spaces/macrocosm-os/pretraining-leaderboard",  # error
    "https://huggingface.co/spaces/LuxOAI/ZEN_Hosted_Text-to-Image-Leaderboard",  # duplicate
    "https://huggingface.co/spaces/sled-umich/3D-POPE-leaderboard",
    "https://huggingface.co/spaces/AIM-Harvard/rabbits-leaderboard",
    "https://huggingface.co/spaces/GenSEC-LLM/task2_speaker_tagging_leaderboard",  # duplicate
    "https://huggingface.co/spaces/PL-MTEB/leaderboard",
    "https://huggingface.co/spaces/abhijeethp/embedding_arena",
    "https://huggingface.co/spaces/zhangzw16/ProbTS_leaderboard",  # error
    "https://huggingface.co/spaces/PersianLLM/Persian_Tokenizer_Leaderboard",  # empty
    "https://huggingface.co/spaces/yanirmr/hebrew-ASR-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/aichampions/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/Randolphzeng/Meta-Reasoning-Leaderboard",
    "https://huggingface.co/spaces/macrocosm-os/finetuning-leaderboard",
    "https://huggingface.co/spaces/Korean-AI-Malpyeong-Leaderboard/Korean-AI-Malpyeong-Leaderboard",
    "https://huggingface.co/spaces/ziemke/hf-rl-leaderboard",  # empty
    "https://huggingface.co/spaces/bench-llms/or-bench-leaderboard",
    "https://huggingface.co/spaces/Adeco/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/samNakamoto/nya-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/yajuniverse/BlinkCode_leaderboard",
    "https://huggingface.co/spaces/MJ-Bench/MJ-Bench-Leaderboard",
    "https://huggingface.co/spaces/PKU-Alignment/ProgressGym-LeaderBoard",
    "https://huggingface.co/spaces/emwebaze/SB_ASR_Leaderboard",
    "https://huggingface.co/spaces/hubistrauss/princeton_benchmarks_leaderboard",  # error
    "https://huggingface.co/spaces/3DGen/3DGen-Leaderboard",  # empty
    "https://huggingface.co/spaces/chan030609/muse_leaderboard",
    "https://huggingface.co/spaces/muse-bench/MUSE-Leaderboard",
    "https://huggingface.co/spaces/amu-cai/pl-asr-leaderboard",
    "https://huggingface.co/spaces/dylanebert/3d-arena",
    "https://huggingface.co/spaces/Aiera/aiera-finance-leaderboard",
    "https://huggingface.co/spaces/samNakamoto/yama-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/chentong00/CopyBench-leaderboard",
    "https://huggingface.co/spaces/deepcode-ai/deepcode-models-leaderboard",  # error
    "https://huggingface.co/spaces/sartifyllc/Swahili-Text-Embeddings-Leaderboard",
    "https://huggingface.co/spaces/jszheng/RACE_leaderboard",
    "https://huggingface.co/spaces/meghsn/WebAgent-Leaderboard",
    "https://huggingface.co/spaces/opencompass/medbench_llm_leaderboard",
    "https://huggingface.co/spaces/seawolf2357/leaderboard_llm_price",
    "https://huggingface.co/spaces/Infin/sn32-testnet-leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/kunato-lab/leaderboard",  # deleted
    "https://huggingface.co/spaces/TwinDoc/leaderboard",
    "https://huggingface.co/spaces/onekq-ai/WebApp1K-models-leaderboard",
    "https://huggingface.co/spaces/Bowieee/StructEval_leaderboard",
    "https://huggingface.co/spaces/ai-forever/LIBRA-Leaderboard",
    "https://huggingface.co/spaces/SeaEval/SeaEval_Leaderboard_v1",  # duplicate
    "https://huggingface.co/spaces/Booking-com/rectour24-review-ranking-leaderboard-test",
    "https://huggingface.co/spaces/wombo/edge-maxxing-leaderboard",
    "https://huggingface.co/spaces/braindao/soliditybench-leaderboard",
    "https://huggingface.co/spaces/anirudh937/open_llm_leaderboard",  # duplicate # error
    "https://huggingface.co/spaces/smothiki/open_llm_leaderboard2",  # duplicate # error
    "https://huggingface.co/spaces/agent-evals/leaderboard",  # deleted
    "https://huggingface.co/spaces/junkim100/self-improving-leaderboard",
    "https://huggingface.co/spaces/choco9966/open-ko-llm-leaderboard-old",  # duplicate
    "https://huggingface.co/spaces/mii-llm/pinocchio-ita-leaderboard",
    "https://huggingface.co/spaces/aminabbasi/psycholex_leaderboard",
    "https://huggingface.co/spaces/smothiki/open_llm_leaderboard_old",  # duplicate
    "https://huggingface.co/spaces/awacke1/lmsys-chatbot-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/BioMed-VITAL/leaderboard",  # error
    "https://huggingface.co/spaces/TIGER-Lab/VideoScore-Leaderboard",
    "https://huggingface.co/spaces/MLSB/leaderboard2024",
    "https://huggingface.co/spaces/HishamYahya/ZeroSumEval_Leaderboard",
    "https://huggingface.co/spaces/astroBench/astrobench-leaderboard",  # empty
    "https://huggingface.co/spaces/MatinaAI/persian_llm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/cambioml/parser-leaderboard",
    "https://huggingface.co/spaces/Koshti10/multimodal-clem-leaderboard",  # duplicate
    "https://huggingface.co/spaces/wzxii/Memorization-or-Generation-of-Big-Code-Models-Leaderboard",
    "https://huggingface.co/spaces/yeqingzhao/open-ko-llm-leaderboard-old",
    "https://huggingface.co/spaces/hivex-research/hivex-leaderboard",
    "https://huggingface.co/spaces/taresco/open_african_languages_eval_leaderboard",
    "https://huggingface.co/spaces/pngwn/2024_Leaderboard",  # duplicate
    "https://huggingface.co/spaces/justus-tobias/Open_ASR_Leaderboard",
    "https://huggingface.co/spaces/LLMArena/llmarena-leaderboard",
    "https://huggingface.co/spaces/thinszx/open_vlm_leaderboard",  # duplicate
    "https://huggingface.co/spaces/lmarena/chatbot-arena-leaderboard",
    "https://huggingface.co/spaces/bitmind/dfd-arena-leaderboard",  # duplicate
    "https://huggingface.co/spaces/anonymous-birder/Leaderboard",
    "https://huggingface.co/spaces/caliangandrew/dfd-leaderboard",
    "https://huggingface.co/spaces/anglesplato/Deep-Reinforcement-Learning-Leaderboard-V2",  # duplicate # error
    "https://huggingface.co/spaces/Koshti10/clem-leaderboard",  # duplicate
    "https://huggingface.co/spaces/bhaskartripathi/llm_leaderboards360",
    "https://huggingface.co/spaces/EmbodiedAgentInterface/leaderboard",  # template
    "https://huggingface.co/spaces/AlyxTeam/leaderboard",  # template
    "https://huggingface.co/spaces/Atharv756/leaderboard",  # template
    "https://huggingface.co/spaces/kevinconka/leaderboard-test",  # template
    "https://huggingface.co/spaces/caliangandrew/leaderboard",  # template
    "https://huggingface.co/spaces/SisinfLab/multimodal-recommendation-leaderboard",  # template
    "https://huggingface.co/spaces/sebakc/leaderboard",  # template
    "https://huggingface.co/spaces/ariakang/test_leaderboard",  # template
    "https://huggingface.co/spaces/alvations/test-leaderboard",  # template
    "https://huggingface.co/spaces/sai9949/Gradio-Leaderboard",  # template
    "https://huggingface.co/spaces/lasamao/leaderboard",  # template
    "https://huggingface.co/spaces/KG0101/Diagnosis_Please-Leaderboard",  # template
    "https://huggingface.co/spaces/1tbfree/LeaderboardTest",  # template
    "https://huggingface.co/spaces/ivrit-ai/hebrew-transcription-leaderboard",  # template
    "https://huggingface.co/spaces/pidipirmansyah/Leaderboard",  # template
    "https://huggingface.co/spaces/not-lain/solidity-leaderboard",  # template
    "https://huggingface.co/spaces/sureshimprint/leaderboard",  # template
    "https://huggingface.co/spaces/Fadil369/LEADERBOARD",  # template
    "https://huggingface.co/spaces/RUCKBReasoning/spreadsheetbench-leaderboard",  # template
    "https://huggingface.co/spaces/innerspeech/open-speechbci-leaderboard",  # template
    "https://huggingface.co/spaces/albertvillanova/tmp-ds-leaderboard",  # template
    "https://huggingface.co/spaces/lczerolens/lichess-puzzles-leaderboard",  # template
    "https://huggingface.co/spaces/enochsj/leaderboard",  # template
    "https://huggingface.co/spaces/dbenayou/LeaderBoardTest_Gradio",  # template
    "https://huggingface.co/spaces/giux78/leaderboard",  # template
    "https://huggingface.co/spaces/orionai/model-leaderboard",  # template
    "https://huggingface.co/spaces/sronger/leaderboard",  # template
    "https://huggingface.co/spaces/freddyaboulton/gradio_leaderboard",  # template
    "https://huggingface.co/spaces/demo-leaderboard-backend/leaderboard",  # template
]

len(huggingface_leaderboard_primary)

In [185]:
# download the leaderboard archive from the Papers with Code website
import os
import requests
import gzip
import shutil

# # The Papers with Code leaderboards are conveniently accessible via the following URL.
url = "https://production-media.paperswithcode.com/about/evaluation-tables.json.gz"
download_path = f'{os.getcwd()}/{url.split("/")[-1]}'  # Path where you want to download the .gz file
extracted_path = f"{path_data}/.PapersWithCode.json"  # Path to save the extracted file

# Download the .gz file
response = requests.get(url, stream=True)
with open(download_path, "wb") as f:
    shutil.copyfileobj(response.raw, f)
print(f"Downloaded file saved to {download_path}")

# Unzip the .gz file
with gzip.open(download_path, "rb") as f_in:
    with open(extracted_path, "wb") as f_out:
        shutil.copyfileobj(f_in, f_out)
print(f"Extracted file saved to {extracted_path}")

# Remove the downloaded .gz file
os.remove(download_path)
print(f"Deleted the downloaded .gz file from {download_path}")

with open(f"{path_data}/.PapersWithCode.json", "r") as file:
    evaluation_data = json.load(file)

# extrac leaderboard URLs from the Papers with Code leaderboard archive
link_list = set()
for task in evaluation_data:
    for subtask in task["subtasks"]:
        for subsubtask in subtask["subtasks"]:
            for dataset in task["datasets"]:
                for dataset_link in dataset["dataset_links"]:
                    link_list.add(dataset_link["url"])
    for dataset in task["datasets"]:
        for dataset_link in dataset["dataset_links"]:
            link_list.add(dataset_link["url"])

link_list = list(link_list)
with open(f"{path_data}/PapersWithCode.json", "w") as json_file:
    json.dump(link_list, json_file, indent="\t")

len(link_list)

7538

In [None]:
# manually curate FM leaderboards hosted on Papers with Code
import webbrowser

# Read the spaces to a JSON file
with open(f"{path_data}/PapersWithCode.json", "r") as json_file:
    link_list = json.load(json_file)

paperswithcode_leaderboard_primary = []

for index, link in enumerate(link_list):
    print(index)
    webbrowser.open(link)
    match input("Press enter key to proceed."):
        case "esc":
            raise KeyboardInterrupt
        case "":
            continue
        case _:
            paperswithcode_leaderboard_primary.append(link)

In [148]:
# FM leaderboards hosted on Papers with Code
paperswithcode_leaderboard_primary = [
    "https://paperswithcode.com/sota/question-answering-on-quac",
    "https://paperswithcode.com/sota/aspect-based-sentiment-analysis-absa-on-asqp",
    "https://paperswithcode.com/sota/code-generation-on-turbulence",
    "https://paperswithcode.com/sota/speech-recognition-on-aishell-2-test-mic-1",
    "https://paperswithcode.com/sota/part-of-speech-tagging-on-xglue",
    "https://paperswithcode.com/sota/logical-reasoning-on-big-bench-logic-grid",
    "https://paperswithcode.com/sota/language-modelling-on-philpapers",
    "https://paperswithcode.com/sota/named-entity-recognition-on-ace-2005",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sts13",
    "https://paperswithcode.com/sota/generative-visual-question-answering-on-pmc",
    "https://paperswithcode.com/sota/visual-object-tracking-on-tnl2k",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-big-bench-winowhy",
    "https://paperswithcode.com/sota/question-answering-on-social-iqa",
    "https://paperswithcode.com/sota/question-answering-on-boolq",
    "https://paperswithcode.com/sota/econometrics-on-big-bench",
    "https://paperswithcode.com/sota/mathematical-reasoning-on-lila-ood",
    "https://paperswithcode.com/sota/fact-checking-on-scifact-beir",
    "https://paperswithcode.com/sota/video-captioning-on-msr-vtt-1",
    "https://paperswithcode.com/sota/protein-structure-prediction-on-paenseq",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-dutch",
    "https://paperswithcode.com/sota/text-summarization-on-reddit-tifu",
    "https://paperswithcode.com/sota/audio-classification-on-vggsound",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-ai2d",
    "https://paperswithcode.com/sota/question-answering-on-hotpotqa-beir",
    "https://paperswithcode.com/sota/machine-translation-on-frmt-portuguese",
    "https://paperswithcode.com/sota/professional-law-on-big-bench",
    "https://paperswithcode.com/sota/visual-question-answering-on-mm-vet",
    "https://paperswithcode.com/sota/clinical-knowledge-on-big-bench",
    "https://paperswithcode.com/sota/image-captioning-on-chebi-20",
    "https://paperswithcode.com/sota/factual-inconsistency-detection-in-chart",
    "https://paperswithcode.com/sota/object-counting-on-tallyqa-simple",
    "https://paperswithcode.com/sota/named-entity-recognition-on-bc2gm",
    "https://paperswithcode.com/sota/prehistory-on-big-bench",
    "https://paperswithcode.com/sota/video-question-answering-on-dramaqa",
    "https://paperswithcode.com/sota/logical-reasoning-on-big-bench-logical",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-german",
    "https://paperswithcode.com/sota/code-generation-on-bigcodebench-complete",
    "https://paperswithcode.com/sota/aspect-based-sentiment-analysis-absa-on-tasd",
    "https://paperswithcode.com/sota/question-answering-on-uniprotqa",
    "https://paperswithcode.com/sota/question-answering-on-websrc",
    "https://paperswithcode.com/sota/multi-label-text-classification-on-cc3m",
    "https://paperswithcode.com/sota/emotional-intelligence-on-emotional",
    "https://paperswithcode.com/sota/coreference-resolution-on-xwinograd-fr",
    "https://paperswithcode.com/sota/text-to-image-generation-on-ms-coco",
    "https://paperswithcode.com/sota/machine-translation-on-flores95-devtest-x-eng",
    "https://paperswithcode.com/sota/language-modelling-on-enwiki8-1",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-illusionvqa",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-4",
    "https://paperswithcode.com/sota/language-modelling-on-fewclue-cluewsc-fc",
    "https://paperswithcode.com/sota/named-entity-recognition-ner-on-ncbi-disease",
    "https://paperswithcode.com/sota/language-modelling-on-dm-mathematics",
    "https://paperswithcode.com/sota/speech-to-text-translation-on-covost-2-eng-x",
    "https://paperswithcode.com/sota/language-modelling-on-hackernews",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-coco",
    "https://paperswithcode.com/sota/video-based-generative-performance-1",
    "https://paperswithcode.com/sota/molecular-property-prediction-on-sider-1",
    "https://paperswithcode.com/sota/question-answering-on-docvqa",
    "https://paperswithcode.com/sota/visual-question-answering-on-msvd-qa-1",
    "https://paperswithcode.com/sota/automated-theorem-proving-on-minif2f-test",
    "https://paperswithcode.com/sota/protein-function-prediction-on-paenseq",
    "https://paperswithcode.com/sota/coreference-resolution-on-winograd-schema",
    "https://paperswithcode.com/sota/change-detection-on-levir-cd",
    "https://paperswithcode.com/sota/code-generation-on-bigcodebench-instruct",
    "https://paperswithcode.com/sota/question-answering-on-danetqa",
    "https://paperswithcode.com/sota/few-shot-text-classification-on-average-on",
    "https://paperswithcode.com/sota/visual-question-answering-on-vcr-qa-r-test",
    "https://paperswithcode.com/sota/image-captioning-on-flickr30k-captions-test",
    "https://paperswithcode.com/sota/video-question-answering-on-activitynet-qa",
    "https://paperswithcode.com/sota/molecular-property-prediction-on-bace-1",
    "https://paperswithcode.com/sota/legal-reasoning-on-legalbench-rule-recall",
    "https://paperswithcode.com/sota/machine-translation-on-wmt2014-english-german",
    "https://paperswithcode.com/sota/us-foreign-policy-on-big-bench",
    "https://paperswithcode.com/sota/visual-question-answering-on-vcr-q-a-test",
    "https://paperswithcode.com/sota/high-school-world-history-on-big-bench",
    "https://paperswithcode.com/sota/machine-translation-on-flores95-devtest-eng-x",
    "https://paperswithcode.com/sota/image-captioning-on-coco",
    "https://paperswithcode.com/sota/few-shot-learning-on-casehold",
    "https://paperswithcode.com/sota/language-modelling-on-bookcorpus2",
    "https://paperswithcode.com/sota/video-question-answering-on-situated",
    "https://paperswithcode.com/sota/zero-shot-learning-on-medconceptsqa",
    "https://paperswithcode.com/sota/speech-recognition-on-ted-lium",
    "https://paperswithcode.com/sota/personality-trait-recognition-on-synthpai",
    "https://paperswithcode.com/sota/visual-question-answering-on-vizwiz-2020-vqa",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-6",
    "https://paperswithcode.com/sota/reading-comprehension-on-muserc",
    "https://paperswithcode.com/sota/action-recognition-on-rareact",
    "https://paperswithcode.com/sota/language-modelling-on-c4",
    "https://paperswithcode.com/sota/speech-recognition-on-aishell-1",
    "https://paperswithcode.com/sota/world-religions-on-big-bench",
    "https://paperswithcode.com/sota/question-answering-on-copa",
    "https://paperswithcode.com/sota/relation-extraction-on-ace-2005",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-cub-200-5",
    "https://paperswithcode.com/sota/semantic-segmentation-on-ade20k",
    "https://paperswithcode.com/sota/translation-on-iwslt-2017",
    "https://paperswithcode.com/sota/language-modelling-on-clue-ocnli-50k",
    "https://paperswithcode.com/sota/math-word-problem-solving-on-mawps",
    "https://paperswithcode.com/sota/math-word-problem-solving-on-asdiv-a",
    "https://paperswithcode.com/sota/analogical-similarity-on-big-bench",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-cifar-fs-5-1",
    "https://paperswithcode.com/sota/recipe-generation-on-now-youre-cooking",
    "https://paperswithcode.com/sota/fact-checking-on-climate-fever-beir",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-commonsenseqa",
    "https://paperswithcode.com/sota/named-entity-recognition-on-ncbi-disease",
    "https://paperswithcode.com/sota/high-school-us-history-on-big-bench",
    "https://paperswithcode.com/sota/marketing-on-big-bench",
    "https://paperswithcode.com/sota/image-classification-on-imagenet",
    "https://paperswithcode.com/sota/question-answering-on-coco-visual-question",
    "https://paperswithcode.com/sota/on-big-bench-snarks",
    "https://paperswithcode.com/sota/stereotypical-bias-analysis-on-crows-pairs",
    "https://paperswithcode.com/sota/video-question-answering-on-ivqa",
    "https://paperswithcode.com/sota/text-classification-on-mteb",
    "https://paperswithcode.com/sota/language-modelling-on-arxiv",
    "https://paperswithcode.com/sota/biomedical-information-retrieval-on-nfcorpus-1",
    "https://paperswithcode.com/sota/human-sexuality-on-big-bench",
    "https://paperswithcode.com/sota/fact-checking-on-fever-beir",
    "https://paperswithcode.com/sota/natural-language-inference-on-rcb",
    "https://paperswithcode.com/sota/scene-text-recognition-on-icdar2013",
    "https://paperswithcode.com/sota/language-modelling-on-wikitext-2",
    "https://paperswithcode.com/sota/memorization-on-big-bench-hindu-knowledge",
    "https://paperswithcode.com/sota/parameter-efficient-fine-tuning-on-winogrande",
    "https://paperswithcode.com/sota/visual-question-answering-on-mmhal-bench",
    "https://paperswithcode.com/sota/image-to-text-retrieval-on-whoops",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-imagenet-1-1",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-37",
    "https://paperswithcode.com/sota/international-law-on-big-bench",
    "https://paperswithcode.com/sota/nutrition-on-big-bench",
    "https://paperswithcode.com/sota/hate-speech-detection-on-ethos-binary",
    "https://paperswithcode.com/sota/attribute-value-extraction-on-oa-mine",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-9",
    "https://paperswithcode.com/sota/molecular-property-prediction-on-clintox-1",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-tiered-1",
    "https://paperswithcode.com/sota/word-sense-disambiguation-on-russe",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-22",
    "https://paperswithcode.com/sota/music-question-answering-on-musicqa",
    "https://paperswithcode.com/sota/video-based-generative-performance",
    "https://paperswithcode.com/sota/language-modelling-on-clue-wsc1-1",
    "https://paperswithcode.com/sota/image-to-text-retrieval-on-flickr30k",
    "https://paperswithcode.com/sota/text-classification-on-this-is-not-a-dataset",
    "https://paperswithcode.com/sota/question-answering-on-convfinqa",
    "https://paperswithcode.com/sota/language-modelling-on-clue-drcd",
    "https://paperswithcode.com/sota/speech-recognition-on-tedlium",
    "https://paperswithcode.com/sota/logical-fallacies-on-big-bench",
    "https://paperswithcode.com/sota/on-big-bench-ruin-names",
    "https://paperswithcode.com/sota/logical-reasoning-on-lingoly",
    "https://paperswithcode.com/sota/speech-recognition-on-chime-6-eval",
    "https://paperswithcode.com/sota/code-generation-on-apps",
    "https://paperswithcode.com/sota/language-modelling-on-salmon",
    "https://paperswithcode.com/sota/molecular-property-prediction-on-moleculenet",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-19",
    "https://paperswithcode.com/sota/action-recognition-in-videos-on-something-1",
    "https://paperswithcode.com/sota/visual-question-answering-on-vqa-v2-test-dev-1",
    "https://paperswithcode.com/sota/mmr-total-on-mrr-benchmark",
    "https://paperswithcode.com/sota/legal-reasoning-on-legalbench-issue-spotting",
    "https://paperswithcode.com/sota/scene-text-recognition-on-iiit5k",
    "https://paperswithcode.com/sota/arithmetic-reasoning-on-multiarith",
    "https://paperswithcode.com/sota/audio-classification-on-epic-sounds",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sts16",
    "https://paperswithcode.com/sota/question-answering-on-truthfulqa",
    "https://paperswithcode.com/sota/open-information-extraction-on-carb",
    "https://paperswithcode.com/sota/language-modelling-on-wikitext-103",
    "https://paperswithcode.com/sota/language-modelling-on-openwebtext2",
    "https://paperswithcode.com/sota/speech-recognition-on-aishell-2-test-ios",
    "https://paperswithcode.com/sota/visual-question-answering-on-gqa-test-dev",
    "https://paperswithcode.com/sota/medical-genetics-on-big-bench",
    "https://paperswithcode.com/sota/speech-recognition-on-open-slr",
    "https://paperswithcode.com/sota/image-captioning-on-nocaps-out-of-domain",
    "https://paperswithcode.com/sota/math-word-problem-solving-on-svamp",
    "https://paperswithcode.com/sota/text-to-image-generation-on-coco",
    "https://paperswithcode.com/sota/speech-recognition-on-robust-speech-event",
    "https://paperswithcode.com/sota/visual-question-answering-on-vcr-q-ar-test",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-mteb",
    "https://paperswithcode.com/sota/discourse-parsing-on-rst-dt",
    "https://paperswithcode.com/sota/professional-medicine-on-big-bench",
    "https://paperswithcode.com/sota/entity-resolution-on-abt-buy",
    "https://paperswithcode.com/sota/semantic-parsing-on-webquestionssp",
    "https://paperswithcode.com/sota/language-modelling-on-big-bench-lite",
    "https://paperswithcode.com/sota/security-studies-on-big-bench",
    "https://paperswithcode.com/sota/question-answering-on-triviaqa",
    "https://paperswithcode.com/sota/code-generation-on-pecc",
    "https://paperswithcode.com/sota/language-modelling-on-the-pile",
    "https://paperswithcode.com/sota/future-hand-prediction-on-ego4d",
    "https://paperswithcode.com/sota/action-recognition-in-videos-on-something",
    "https://paperswithcode.com/sota/question-answering-on-popqa",
    "https://paperswithcode.com/sota/natural-language-inference-on-qnli",
    "https://paperswithcode.com/sota/bias-detection-on-rt-inod-bias",
    "https://paperswithcode.com/sota/video-retrieval-on-vatex",
    "https://paperswithcode.com/sota/auto-debugging-on-big-bench-lite",
    "https://paperswithcode.com/sota/intent-recognition-on-big-bench",
    "https://paperswithcode.com/sota/high-school-european-history-on-big-bench",
    "https://paperswithcode.com/sota/entity-resolution-on-wdc-products-80-cc-seen",
    "https://paperswithcode.com/sota/question-answering-on-bioasq",
    "https://paperswithcode.com/sota/image-captioning-on-nocaps-val-overall",
    "https://paperswithcode.com/sota/visual-question-answering-on-vqa-v2-val-1",
    "https://paperswithcode.com/sota/zero-shot-learning-on-tvqa",
    "https://paperswithcode.com/sota/grammatical-error-correction-on-ua-gec",
    "https://paperswithcode.com/sota/speech-recognition-on-librispeech-test-clean",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-record",
    "https://paperswithcode.com/sota/speaker-attribution-in-german-parliamentary",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-big-bench-causal",
    "https://paperswithcode.com/sota/aspect-based-sentiment-analysis-on-semeval-6",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-physics",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-whoops",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-civic",
    "https://paperswithcode.com/sota/natural-language-queries-on-ego4d",
    "https://paperswithcode.com/sota/code-generation-on-verified-smart-contract",
    "https://paperswithcode.com/sota/visual-question-answering-on-ok-vqa",
    "https://paperswithcode.com/sota/action-recognition-in-videos-on-ucf101",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-japanese",
    "https://paperswithcode.com/sota/short-term-object-interaction-anticipation-on",
    "https://paperswithcode.com/sota/question-answering-on-strategyqa",
    "https://paperswithcode.com/sota/natural-language-inference-on-wnli",
    "https://paperswithcode.com/sota/action-recognition-in-videos-on-hmdb-51",
    "https://paperswithcode.com/sota/semantic-parsing-on-cfq",
    "https://paperswithcode.com/sota/machine-translation-on-frmt-portuguese-brazil",
    "https://paperswithcode.com/sota/video-captioning-on-shot2story20k",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-literature",
    "https://paperswithcode.com/sota/visual-question-answering-on-vip-bench",
    "https://paperswithcode.com/sota/video-retrieval-on-msvd",
    "https://paperswithcode.com/sota/information-retrieval-on-cqadupstack",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-inaturalist-1",
    "https://paperswithcode.com/sota/natural-language-inference-on-multinli",
    "https://paperswithcode.com/sota/video-question-answering-on-intentqa",
    "https://paperswithcode.com/sota/speech-recognition-on-robust-speech-event-dev",
    "https://paperswithcode.com/sota/language-modelling-on-pubmed-abstracts",
    "https://paperswithcode.com/sota/molecular-property-prediction-on-bbbp-1",
    "https://paperswithcode.com/sota/identify-odd-metapor-on-big-bench",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-arc-challenge",
    "https://paperswithcode.com/sota/question-answering-on-story-cloze",
    "https://paperswithcode.com/sota/on-gpqa",
    "https://paperswithcode.com/sota/protein-structure-prediction-on-uniprotseq",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-inaturalist-2",
    "https://paperswithcode.com/sota/language-modelling-on-freelaw",
    "https://paperswithcode.com/sota/odd-one-out-on-big-bench",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-french",
    "https://paperswithcode.com/sota/language-modelling-on-opensubtitles",
    "https://paperswithcode.com/sota/video-retrieval-on-didemo",
    "https://paperswithcode.com/sota/sentiment-analysis-on-mr",
    "https://paperswithcode.com/sota/video-captioning-on-activitynet-captions",
    "https://paperswithcode.com/sota/logical-reasoning-on-big-bench-penguins-in-a",
    "https://paperswithcode.com/sota/few-shot-learning-on-medconceptsqa",
    "https://paperswithcode.com/sota/high-school-macroeconomics-on-big-bench",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-imagenet-5",
    "https://paperswithcode.com/sota/protein-structure-prediction-on-caspseq",
    "https://paperswithcode.com/sota/human-organs-senses-multiple-choice-on-big",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-big-bench-logical",
    "https://paperswithcode.com/sota/language-modelling-on-gutenberg-pg-19",
    "https://paperswithcode.com/sota/text-to-video-generation-on-ucf-101",
    "https://paperswithcode.com/sota/language-modelling-on-ubuntu-irc",
    "https://paperswithcode.com/sota/question-answering-on-graphquestions",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-1",
    "https://paperswithcode.com/sota/visual-question-answering-on-tgif-qa",
    "https://paperswithcode.com/sota/image-captioning-on-iu-x-ray",
    "https://paperswithcode.com/sota/explanation-generation-on-whoops",
    "https://paperswithcode.com/sota/video-question-answering-on-vlep",
    "https://paperswithcode.com/sota/robot-task-planning-on-sheetcopilot",
    "https://paperswithcode.com/sota/linguistic-acceptability-on-cola",
    "https://paperswithcode.com/sota/factual-inconsistency-detection-in-chart-3",
    "https://paperswithcode.com/sota/semantic-parsing-on-spider",
    "https://paperswithcode.com/sota/moment-retrieval-on-charades-sta",
    "https://paperswithcode.com/sota/text-clustering-on-mteb",
    "https://paperswithcode.com/sota/visual-question-answering-on-mm-vet-w-o",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-english",
    "https://paperswithcode.com/sota/motion-planning-on-nuscenes",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-big-bench",
    "https://paperswithcode.com/sota/question-answering-on-fiqa-2018-beir",
    "https://paperswithcode.com/sota/high-school-microeconomics-on-big-bench",
    "https://paperswithcode.com/sota/language-modelling-on-curation-corpus",
    "https://paperswithcode.com/sota/natural-language-inference-on-terra",
    "https://paperswithcode.com/sota/meme-classification-on-hateful-memes",
    "https://paperswithcode.com/sota/data-to-text-generation-on-totto",
    "https://paperswithcode.com/sota/text-summarization-on-x-sum",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-imagenet-10",
    "https://paperswithcode.com/sota/translation-on-phomt",
    "https://paperswithcode.com/sota/code-generation-on-codecontests",
    "https://paperswithcode.com/sota/question-answering-on-bamboogle",
    "https://paperswithcode.com/sota/question-answering-on-quora-question-pairs",
    "https://paperswithcode.com/sota/openapi-code-completion-on-openapi-code",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sts12",
    "https://paperswithcode.com/sota/multimodal-reasoning-on-rebus",
    "https://paperswithcode.com/sota/high-school-psychology-on-big-bench",
    "https://paperswithcode.com/sota/public-relations-on-big-bench",
    "https://paperswithcode.com/sota/question-answering-on-nq-beir",
    "https://paperswithcode.com/sota/general-knowledge-on-big-bench",
    "https://paperswithcode.com/sota/question-answering-on-wikitablequestions",
    "https://paperswithcode.com/sota/visual-question-answering-on-v-bench",
    "https://paperswithcode.com/sota/question-answering-on-quality",
    "https://paperswithcode.com/sota/language-modelling-on-clue-c3",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-frisian",
    "https://paperswithcode.com/sota/machine-translation-on-frmt-chinese-taiwan",
    "https://paperswithcode.com/sota/parameter-efficient-fine-tuning-on-boolq",
    "https://paperswithcode.com/sota/language-modelling-on-lambada",
    "https://paperswithcode.com/sota/professional-psychology-on-big-bench",
    "https://paperswithcode.com/sota/arabic-text-diacritization-on-catt-dataset",
    "https://paperswithcode.com/sota/molecule-captioning-on-chebi-20",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-hindi",
    "https://paperswithcode.com/sota/long-context-understanding-on-ada-leval-tsort",
    "https://paperswithcode.com/sota/anatomy-on-big-bench",
    "https://paperswithcode.com/sota/text-to-audio-retrieval-on-clotho",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-geography",
    "https://paperswithcode.com/sota/question-answering-on-natural-questions",
    "https://paperswithcode.com/sota/relation-extraction-on-semeval-2010-task-8-1",
    "https://paperswithcode.com/sota/question-answering-on-drop",
    "https://paperswithcode.com/sota/visual-question-answering-on-vqa-v2-test-dev",
    "https://paperswithcode.com/sota/scene-text-recognition-on-icdar2015",
    "https://paperswithcode.com/sota/scene-text-recognition-on-svt",
    "https://paperswithcode.com/sota/aspect-based-sentiment-analysis-absa-on-aste",
    "https://paperswithcode.com/sota/question-answering-on-squad11-dev",
    "https://paperswithcode.com/sota/open-information-extraction-on-oie2016",
    "https://paperswithcode.com/sota/image-classification-on-imagenet-v2",
    "https://paperswithcode.com/sota/video-question-answering-on-next-qa",
    "https://paperswithcode.com/sota/image-captioning-on-nocaps-val-out-domain",
    "https://paperswithcode.com/sota/biomedical-information-retrieval-on-bioasq-1",
    "https://paperswithcode.com/sota/college-medicine-on-big-bench",
    "https://paperswithcode.com/sota/language-modelling-on-fewclue-ocnli-fc",
    "https://paperswithcode.com/sota/object-localization-on-illusionvqa",
    "https://paperswithcode.com/sota/speech-recognition-on-aishell-2-test-android-1",
    "https://paperswithcode.com/sota/video-prediction-on-kinetics-600-12-frames",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-3",
    "https://paperswithcode.com/sota/computer-security-on-big-bench",
    "https://paperswithcode.com/sota/image-captioning-on-nocaps-val-near-domain",
    "https://paperswithcode.com/sota/visual-reasoning-on-bongard-openworld",
    "https://paperswithcode.com/sota/visual-grounding-on-refcoco-testa",
    "https://paperswithcode.com/sota/video-grounding-on-qvhighlights",
    "https://paperswithcode.com/sota/generative-3d-object-classification-on-1",
    "https://paperswithcode.com/sota/action-recognition-in-videos-on-activitynet",
    "https://paperswithcode.com/sota/speech-recognition-on-projecte-aina-parlament",
    "https://paperswithcode.com/sota/question-answering-on-blurb",
    "https://paperswithcode.com/sota/attribute-value-extraction-on-wdc-pave",
    "https://paperswithcode.com/sota/code-completion-on-safim",
    "https://paperswithcode.com/sota/question-answering-on-drop-test",
    "https://paperswithcode.com/sota/biomedical-information-retrieval-on-trec-1",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sts-benchmark",
    "https://paperswithcode.com/sota/temporal-action-localization-on-activitynet",
    "https://paperswithcode.com/sota/mathematical-reasoning-on-mmlu-mathematics",
    "https://paperswithcode.com/sota/protein-structure-prediction-on-caspsimseq",
    "https://paperswithcode.com/sota/3d-object-captioning-on-objaverse-1",
    "https://paperswithcode.com/sota/language-modelling-on-clue-cmnli",
    "https://paperswithcode.com/sota/scene-text-recognition-on-uber-text",
    "https://paperswithcode.com/sota/action-recognition-on-ava-v2-2",
    "https://paperswithcode.com/sota/question-answering-on-naturalqa",
    "https://paperswithcode.com/sota/math-word-problem-solving-on-paramawps",
    "https://paperswithcode.com/sota/protein-secondary-structure-prediction-on-6",
    "https://paperswithcode.com/sota/coreference-resolution-on-xwinograd-en",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-21",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-mini-2",
    "https://paperswithcode.com/sota/on-big-bench-hard",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-big-bench-sports",
    "https://paperswithcode.com/sota/long-context-understanding-on-mmneedle",
    "https://paperswithcode.com/sota/sentiment-analysis-on-sst-2-binary",
    "https://paperswithcode.com/sota/named-entity-recognition-on-conll03",
    "https://paperswithcode.com/sota/zero-shot-action-recognition-on-kinetics",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-mini-3",
    "https://paperswithcode.com/sota/sentiment-analysis-on-cr",
    "https://paperswithcode.com/sota/chart-question-answering-on-chartqa",
    "https://paperswithcode.com/sota/question-answering-on-medqa-usmle",
    "https://paperswithcode.com/sota/moment-retrieval-on-qvhighlights",
    "https://paperswithcode.com/sota/question-answering-on-infographicvqa",
    "https://paperswithcode.com/sota/state-change-object-detection-on-ego4d",
    "https://paperswithcode.com/sota/language-modelling-on-clue-cmrc2018",
    "https://paperswithcode.com/sota/few-shot-learning-on-pubmedqa",
    "https://paperswithcode.com/sota/image-classification-on-id-pattern-dataset",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-mrpc",
    "https://paperswithcode.com/sota/protein-function-prediction-on-uniprotseq",
    "https://paperswithcode.com/sota/text-generation-on-harmfulqa",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on",
    "https://paperswithcode.com/sota/image-captioning-on-peir-gross",
    "https://paperswithcode.com/sota/code-generation-on-mbpp",
    "https://paperswithcode.com/sota/image-retrieval-on-coco",
    "https://paperswithcode.com/sota/generative-3d-object-classification-on-2",
    "https://paperswithcode.com/sota/machine-translation-on-frmt-chinese-mainland",
    "https://paperswithcode.com/sota/text-to-sql-on-spider",
    "https://paperswithcode.com/sota/text-based-de-novo-molecule-generation-on",
    "https://paperswithcode.com/sota/video-question-answering-on-tvqa",
    "https://paperswithcode.com/sota/text-simplification-on-turkcorpus",
    "https://paperswithcode.com/sota/image-classification-on-jft-300m",
    "https://paperswithcode.com/sota/language-modelling-on-clue-afqmc",
    "https://paperswithcode.com/sota/named-entity-recognition-ner-on-conll-2003",
    "https://paperswithcode.com/sota/visual-reasoning-on-winoground",
    "https://paperswithcode.com/sota/question-answering-on-storycloze",
    "https://paperswithcode.com/sota/code-generation-on-res-q",
    "https://paperswithcode.com/sota/image-registration-on-unpaired-abdomen-ct",
    "https://paperswithcode.com/sota/text-to-sql-on-spider-1",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-rucos",
    "https://paperswithcode.com/sota/high-school-government-and-politics-on-big",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-chemistry",
    "https://paperswithcode.com/sota/high-school-geography-on-big-bench",
    "https://paperswithcode.com/sota/natural-language-inference-on-tabfact",
    "https://paperswithcode.com/sota/question-answering-on-kqa-pro",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-4",
    "https://paperswithcode.com/sota/logical-reasoning-on-big-bench-temporal",
    "https://paperswithcode.com/sota/probing-language-models-on-kamel",
    "https://paperswithcode.com/sota/on-big-bench-navigate",
    "https://paperswithcode.com/sota/image-captioning-on-nocaps-in-domain",
    "https://paperswithcode.com/sota/entity-resolution-on-amazon-google",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-mathematics-1",
    "https://paperswithcode.com/sota/question-answering-on-agi-eval",
    "https://paperswithcode.com/sota/vision-and-language-navigation-on-touchdown",
    "https://paperswithcode.com/sota/language-modelling-on-nih-exporter",
    "https://paperswithcode.com/sota/audio-captioning-on-clotho",
    "https://paperswithcode.com/sota/speech-recognition-on-chime-6-dev-gss12",
    "https://paperswithcode.com/sota/attribute-value-extraction-on-ae-110k",
    "https://paperswithcode.com/sota/sentence-ordering-on-econlogicqa",
    "https://paperswithcode.com/sota/language-modelling-on-github",
    "https://paperswithcode.com/sota/classification-on-medsecid",
    "https://paperswithcode.com/sota/visual-question-answering-on-a-okvqa",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-cub-200-5-1",
    "https://paperswithcode.com/sota/named-entity-recognition-on-bc4chemd",
    "https://paperswithcode.com/sota/code-generation-on-taco-code",
    "https://paperswithcode.com/sota/reading-comprehension-on-race",
    "https://paperswithcode.com/sota/text-summarization-on-samsum-corpus",
    "https://paperswithcode.com/sota/word-sense-disambiguation-on-words-in-context",
    "https://paperswithcode.com/sota/language-modelling-on-pile-cc",
    "https://paperswithcode.com/sota/recipe-generation-on-allrecipescom",
    "https://paperswithcode.com/sota/text-to-audio-retrieval-on-audiocaps",
    "https://paperswithcode.com/sota/machine-translation-on-flores-200",
    "https://paperswithcode.com/sota/video-retrieval-on-lsmdc",
    "https://paperswithcode.com/sota/molecular-property-prediction-on-tox21-1",
    "https://paperswithcode.com/sota/question-answering-on-piqa",
    "https://paperswithcode.com/sota/language-modelling-on-fewclue-chid-fc",
    "https://paperswithcode.com/sota/age-estimation-on-imdb-clean",
    "https://paperswithcode.com/sota/virology-on-big-bench",
    "https://paperswithcode.com/sota/sociology-on-big-bench",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-imagenet",
    "https://paperswithcode.com/sota/math-word-problem-solving-on-math-minival",
    "https://paperswithcode.com/sota/visual-question-answering-on-amber",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sts14",
    "https://paperswithcode.com/sota/jurisprudence-on-big-bench",
    "https://paperswithcode.com/sota/speech-recognition-on-librispeech-test-other",
    "https://paperswithcode.com/sota/visual-question-answering-on-vqa-v2-val",
    "https://paperswithcode.com/sota/question-answering-on-next-qa-open-ended",
    "https://paperswithcode.com/sota/language-modelling-on-pubmed-central",
    "https://paperswithcode.com/sota/image-captioning-on-nocaps-val-in-domain",
    "https://paperswithcode.com/sota/question-answering-on-multirc",
    "https://paperswithcode.com/sota/video-retrieval-on-msr-vtt-1ka",
    "https://paperswithcode.com/sota/speaker-attribution-in-german-parliamentary-1",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-big-bench-known",
    "https://paperswithcode.com/sota/question-answering-on-pubmedqa",
    "https://paperswithcode.com/sota/parameter-efficient-fine-tuning-on-hellaswag",
    "https://paperswithcode.com/sota/sentence-completion-on-hellaswag",
    "https://paperswithcode.com/sota/few-shot-learning-on-mednli",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-8-0-13",
    "https://paperswithcode.com/sota/human-aging-on-big-bench",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-arc-easy",
    "https://paperswithcode.com/sota/cross-lingual-transfer-on-xcopa",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-3",
    "https://paperswithcode.com/sota/time-series-forecasting-on-etth1-48-3",
    "https://paperswithcode.com/sota/scene-text-recognition-on-wost",
    "https://paperswithcode.com/sota/management-on-big-bench",
    "https://paperswithcode.com/sota/logical-reasoning-on-big-bench-strategyqa",
    "https://paperswithcode.com/sota/instruction-following-on-ifeval",
    "https://paperswithcode.com/sota/language-modelling-on-fewclue-eprstmt",
    "https://paperswithcode.com/sota/arithmetic-reasoning-on-gsm8k",
    "https://paperswithcode.com/sota/video-question-answering-on-mvbench",
    "https://paperswithcode.com/sota/question-answering-on-finqa",
    "https://paperswithcode.com/sota/acoustic-scene-classification-on-tut-acoustic",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-biology",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-winogrande",
    "https://paperswithcode.com/sota/video-retrieval-on-youcook2",
    "https://paperswithcode.com/sota/machine-translation-on-wmt2016-english-1",
    "https://paperswithcode.com/sota/protein-function-prediction-on-caspsimseq",
    "https://paperswithcode.com/sota/audio-classification-on-fsd50k",
    "https://paperswithcode.com/sota/recipe-generation-on-foodcom",
    "https://paperswithcode.com/sota/temporal-action-localization-on-fineaction",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-cn",
    "https://paperswithcode.com/sota/video-summarization-on-shot2story20k",
    "https://paperswithcode.com/sota/math-word-problem-solving-on-math",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-5",
    "https://paperswithcode.com/sota/question-answering-on-race",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-17",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm-1",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sick-r-1",
    "https://paperswithcode.com/sota/visual-grounding-on-refcoco-test-b",
    "https://paperswithcode.com/sota/mathematical-reasoning-on-lila-iid",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-activitynet-1",
    "https://paperswithcode.com/sota/information-retrieval-on-mteb",
    "https://paperswithcode.com/sota/image-retrieval-on-flickr30k",
    "https://paperswithcode.com/sota/video-question-answering-on-msrvtt-qa",
    "https://paperswithcode.com/sota/question-answering-on-webquestionssp",
    "https://paperswithcode.com/sota/logical-reasoning-on-big-bench-reasoning",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-7-0-hindi",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-english",
    "https://paperswithcode.com/sota/philosophy-on-big-bench",
    "https://paperswithcode.com/sota/video-captioning-on-vatex-1",
    "https://paperswithcode.com/sota/language-modelling-on-books3",
    "https://paperswithcode.com/sota/visual-question-answering-on-mmbench",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sick",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-big-bench-date",
    "https://paperswithcode.com/sota/question-answering-on-pubchemqa",
    "https://paperswithcode.com/sota/machine-translation-on-wmt2014-french-english",
    "https://paperswithcode.com/sota/video-retrieval-on-msr-vtt",
    "https://paperswithcode.com/sota/few-shot-image-classification-on-inaturalist-3",
    "https://paperswithcode.com/sota/long-context-understanding-on-ada-leval",
    "https://paperswithcode.com/sota/named-entity-recognition-on-anatem",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-infoseek",
    "https://paperswithcode.com/sota/acoustic-scene-classification-on-cochlscene",
    "https://paperswithcode.com/sota/chatbot-on-alpacaeval",
    "https://paperswithcode.com/sota/question-answering-on-webquestions",
    "https://paperswithcode.com/sota/question-answering-on-vnhsge-history",
    "https://paperswithcode.com/sota/visual-grounding-on-refcoco-val",
    "https://paperswithcode.com/sota/visual-question-answering-on-docvqa-test",
    "https://paperswithcode.com/sota/audio-classification-on-vocalsound",
    "https://paperswithcode.com/sota/vehicle-re-identification-on-veri-wild-small",
    "https://paperswithcode.com/sota/image-captioning-on-object-halbench",
    "https://paperswithcode.com/sota/visual-question-answering-on-vqa-v2-1",
    "https://paperswithcode.com/sota/image-captioning-on-coco-captions",
    "https://paperswithcode.com/sota/natural-language-inference-on-anli-test",
    "https://paperswithcode.com/sota/table-based-fact-verification-on-tabfact",
    "https://paperswithcode.com/sota/machine-learning-on-big-bench",
    "https://paperswithcode.com/sota/visual-question-answering-on-mm-vet-v2",
    "https://paperswithcode.com/sota/logical-reasoning-on-big-bench-formal",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-6",
    "https://paperswithcode.com/sota/semantic-textual-similarity-on-sts15",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-7-0-13",
    "https://paperswithcode.com/sota/action-recognition-on-hacs",
    "https://paperswithcode.com/sota/language-modelling-on-penn-treebank-word",
    "https://paperswithcode.com/sota/speech-recognition-on-common-voice-7-0-5",
    "https://paperswithcode.com/sota/automated-theorem-proving-on-minif2f-1",
    "https://paperswithcode.com/sota/common-sense-reasoning-on-parus",
    "https://paperswithcode.com/sota/video-question-answering-on-perception-test",
    "https://paperswithcode.com/sota/task-oriented-dialogue-systems-on-kvret",
    "https://paperswithcode.com/sota/language-modelling-on-fewclue-bustm",
    "https://paperswithcode.com/sota/question-answering-on-openbookqa",
    "https://paperswithcode.com/sota/entity-resolution-on-wdc-products",
    "https://paperswithcode.com/sota/referring-expression-segmentation-on-refcoco-5",
    "https://paperswithcode.com/sota/image-captioning-on-whoops",
    "https://paperswithcode.com/sota/video-generation-on-ucf-101",
    "https://paperswithcode.com/sota/sarcasm-detection-on-big-bench-snarks",
    "https://paperswithcode.com/sota/image-classification-on-clevr-count",
    "https://paperswithcode.com/sota/natural-language-inference-on-commitmentbank",
    "https://paperswithcode.com/sota/on-big-bench-hyperbaton",
    "https://paperswithcode.com/sota/visual-question-answering-on-msrvtt-qa-1",
    "https://paperswithcode.com/sota/molecular-property-prediction-on-hiv-dataset",
    "https://paperswithcode.com/sota/bias-detection-on-stereoset-1",
    "https://paperswithcode.com/sota/grammatical-error-correction-on-conll-2014",
    "https://paperswithcode.com/sota/scene-text-recognition-on-ic19-art",
    "https://paperswithcode.com/sota/visual-question-answering-on-benchlmm",
    "https://paperswithcode.com/sota/chart-question-answering-on-plotqa",
    "https://paperswithcode.com/sota/question-answering-on-coqa",
    "https://paperswithcode.com/sota/named-entity-recognition-on-findvehicle",
    "https://paperswithcode.com/sota/visual-question-answering-on-plotqa-d1-1",
    "https://paperswithcode.com/sota/visual-question-answering-on-plotqa-d2-1",
    "https://paperswithcode.com/sota/natural-language-inference-on-rte",
    "https://paperswithcode.com/sota/video-retrieval-on-activitynet",
    "https://paperswithcode.com/sota/language-modelling-on-stackexchange",
    "https://paperswithcode.com/sota/machine-translation-on-wmt2016-english-german",
    "https://paperswithcode.com/sota/factual-inconsistency-detection-in-chart-1",
    "https://paperswithcode.com/sota/speech-recognition-on-spgispeech",
    "https://paperswithcode.com/sota/machine-translation-on-wmt2016-romanian",
    "https://paperswithcode.com/sota/conversational-web-navigation-on-weblinx",
    "https://paperswithcode.com/sota/vcgbench-diverse-on-videoinstruct",
    "https://paperswithcode.com/sota/multimodal-reasoning-on-algopuzzlevqa",
    "https://paperswithcode.com/sota/2d-human-pose-estimation-on-coco-wholebody-1",
    "https://paperswithcode.com/sota/language-modelling-on-uspto-backgrounds",
    "https://paperswithcode.com/sota/text-generation-on-sciq",
    "https://paperswithcode.com/sota/word-sense-disambiguation-on-big-bench",
    "https://paperswithcode.com/sota/temporal-action-localization-on-thumos14",
    "https://paperswithcode.com/sota/visual-question-answering-on-coco-visual-5",
    "https://paperswithcode.com/sota/object-counting-on-tallyqa-complex",
    "https://paperswithcode.com/sota/text-to-sql-on-bird-big-bench-for-large-scale",
    "https://paperswithcode.com/sota/extreme-summarization-on-gem-xsum",
    "https://paperswithcode.com/sota/question-answering-on-obqa",
    "https://paperswithcode.com/sota/temporal-action-localization-on-hacs",
    "https://paperswithcode.com/sota/code-generation-on-humaneval",
    "https://paperswithcode.com/sota/machine-translation-on-wmt2016-german-english",
    "https://paperswithcode.com/sota/zero-shot-transfer-image-classification-on-2",
]

len(paperswithcode_leaderboard_primary)

566

In [66]:
# leaderboards hosted by Papers With Code but maintained by other organizations
paperswithcode_leaderboards_hosted_by_github_mapping = [
    {
        "provenance": ["InfiMM-Eval"],  # https://infimm.github.io/InfiMM-Eval
        "leaderboards": [
            "https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm",
        ],
    },
    {
        "provenance": ["CORE-MM"],  # https://core-mm.github.io
        "leaderboards": [
            "https://paperswithcode.com/sota/visual-question-answering-vqa-on-core-mm-1"
        ],
    },
]

paperswithcode_leaderboards_hosted_by_github = []
for leaderboard in paperswithcode_leaderboards_hosted_by_github_mapping:
    paperswithcode_leaderboards_hosted_by_github.extend(leaderboard["leaderboards"])

len(paperswithcode_leaderboards_hosted_by_github)

2

In [132]:
# leaderboards hosted on Papers With Code with leaderboard smells
paperswithcode_leaderboard_with_smell = {
    "https://paperswithcode.com/sota/factual-inconsistency-detection-in-chart": "https://github.com/paperswithcode/sota-extractor/issues/80",
    "https://paperswithcode.com/sota/machine-translation-on-wmt2014-english-german": "https://github.com/paperswithcode/sota-extractor/issues/27",
    "https://paperswithcode.com/sota/image-classification-on-imagenet": "https://github.com/paperswithcode/sota-extractor/issues/25",
    "https://paperswithcode.com/sota/visual-question-answering-vqa-on-3": "https://github.com/paperswithcode/sota-extractor/issues/40",
}
len(paperswithcode_leaderboard_with_smell)

4

In [183]:
# load the data of FM leaderboards that are not hosted on Papers With Code
with pd.ExcelFile(f"{path_result}/Foundation Model Leaderboards.xlsx") as excel_file:
    df_leaderboard = pd.read_excel(excel_file, sheet_name="Leaderboard")
    df_leaderboard['Host platforms'] = df_leaderboard['Host platforms'].apply(lambda x: str(x).upper())

# append the data of FM leaderboards that are hosted on Papers With Code
for leaderboard in paperswithcode_leaderboard_primary:
    if leaderboard in paperswithcode_leaderboards_hosted_by_github:
        continue
    leaderboard_entry = {
        "Name": leaderboard,
        "Host platforms": "PWC",
        "Workflow patterns": "1",
        "Have many major releases?": np.nan,
        "Have a GitHub repository?": np.nan,
        "Have a submission channel/protocol?": "y",  # https://github.com/paperswithcode/axcell
        "Other types of submission": np.nan,
        "Attach model provenance?": "y",
        "Smell cases": paperswithcode_leaderboard_with_smell[leaderboard] if leaderboard in paperswithcode_leaderboard_with_smell else np.nan,
    }
    df_leaderboard = pd.concat(
        [df_leaderboard, pd.Series(leaderboard_entry).to_frame().T], ignore_index=True
    )

print(
    f"There are {len(df_leaderboard[df_leaderboard['Host platforms'].str.contains(',')])} ({len(df_leaderboard[df_leaderboard['Host platforms'].str.contains(',')])/len(df_leaderboard)*100:.2f}%) FM leaderboards hosted on multiple platforms."
)
leaderboard_distribution = df_leaderboard["Host platforms"].value_counts().to_dict()

# draw the distribution of FM leaderboards hosted on different platforms
fig = px.bar(
    x=leaderboard_distribution.keys(),
    y=leaderboard_distribution.values(),
    text_auto=True,
    labels={"x": "Host Platforms", "y": "Number of Leaderboards"},
)
# Update the layout for a tighter look
fig.update_layout(
    autosize=True,
    margin=dict(
        l=10,  # Left margin
        r=10,  # Right margin
        b=10,  # Bottom margin
        t=10,  # Top margin
        pad=4,  # Padding between the plot and the margin
    ),
    xaxis=dict(
        title_font=dict(size=18, family="Arial, bold", color="black"),
        tickfont=dict(color="black"),
    ),
    yaxis=dict(
        title_font=dict(size=18, family="Arial, bold", color="black"),
        tickfont=dict(color="black"),
    ),
)
pio.write_image(fig, f"{path_result}/Leaderboard-Distribution.pdf")
fig.update_layout(
    width=1200,  # Set the width of the figure
    height=800,  # Set the height of the figure
)
fig.show()

df_leaderboard["Host platforms"] = df_leaderboard["Host platforms"].apply(
    lambda x: str(x).split(",")
)
df_leaderboard["Workflow patterns"] = df_leaderboard["Workflow patterns"].apply(
    lambda x: str(x).split(",")
)
aggregated_df = (
    df_leaderboard.explode("Host platforms")
    .groupby("Host platforms")
    .apply(lambda x: x.notna().sum(), include_groups=False)
)

for index, row in aggregated_df.iterrows():
    aggregated_df.at[index, "Percentage Share"] = (
        f"{row['Name']/len(df_leaderboard)*100:.2f}% ({row['Name']}/{len(df_leaderboard)})"
    )
    aggregated_df.at[index, "Have many major releases?"] = (
        f"{row['Have many major releases?']/row['Name']*100:.2f}% ({row['Have many major releases?']}/{row['Name']})"
    )
    aggregated_df.at[index, "Have a GitHub repository?"] = (
        f"{row['Have a GitHub repository?']/row['Name']*100:.2f}% ({row['Have a GitHub repository?']}/{row['Name']})"
    )
    aggregated_df.at[index, "Have a submission channel/protocol?"] = (
        f"{row['Have a submission channel/protocol?']/row['Name']*100:.2f}% ({row['Have a submission channel/protocol?']}/{row['Name']})"
    )
    aggregated_df.at[index, "Other types of submission"] = (
        f"{row['Other types of submission']/row['Name']*100:.2f}% ({row['Other types of submission']}/{row['Name']})"
    )
    aggregated_df.at[index, "Attach model provenance?"] = (
        f"{row['Attach model provenance?']/row['Name']*100:.2f}% ({row['Attach model provenance?']}/{row['Name']})"
    )

columns_to_aggregate = [
    "Percentage Share",
    "Have a GitHub repository?",
    "Have many major releases?",
    "Attach model provenance?",
    "Have a submission channel/protocol?",
    "Other types of submission",
]
aggregated_df = aggregated_df[columns_to_aggregate].reset_index()
aggregated_df["Host platforms"] = aggregated_df["Host platforms"].map(
    platform_abbreviation_mapping
)
aggregated_df.rename(columns={"Host platforms": "Host platform"}, inplace=True)

# print the detailed statistics on several key aspects of FM leaderboards across host platforms
print(aggregated_df.to_latex(escape=True, index=False))

There are 79 (7.52%) FM leaderboards hosted on multiple platforms.


\begin{tabular}{lllllll}
\toprule
Host platform & Percentage Share & Have a GitHub repository? & Have many major releases? & Attach model provenance? & Have a submission channel/protocol? & Other types of submission \\
\midrule
NaN & 21.90\% (230/1050) & 100.00\% (230/230) & 1.30\% (3/230) & 51.74\% (119/230) & 44.78\% (103/230) & 2.17\% (5/230) \\
NaN & 17.14\% (180/1050) & 66.67\% (120/180) & 3.33\% (6/180) & 66.67\% (120/180) & 55.00\% (99/180) & 4.44\% (8/180) \\
NaN & 14.38\% (151/1050) & 58.94\% (89/151) & 1.32\% (2/151) & 63.58\% (96/151) & 53.64\% (81/151) & 4.64\% (7/151) \\
NaN & 53.90\% (566/1050) & 0.35\% (2/566) & 0.00\% (0/566) & 100.00\% (566/566) & 100.00\% (566/566) & 0.00\% (0/566) \\
NaN & 0.38\% (4/1050) & 100.00\% (4/4) & 0.00\% (0/4) & 75.00\% (3/4) & 25.00\% (1/4) & 0.00\% (0/4) \\
\bottomrule
\end{tabular}




Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '1.30% (3/230)' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '100.00% (230/230)' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '44.78% (103/230)' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '2.17% (5/230)' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '51.74%

In [180]:
# Split the host platforms into multiple rows
df_exploded = (
    df_leaderboard.explode("Host platforms")
    .assign(**{"Workflow patterns": df_leaderboard["Workflow patterns"]})
    .explode("Workflow patterns")
)

# Create a cross-tabulation of Host platforms and Workflow Patterns
platform_workflow = pd.crosstab(
    df_exploded["Host platforms"], df_exploded["Workflow patterns"]
)

# Calculate the column sums once
column_sums = platform_workflow.sum(axis=0)

# Apply the formatting function to each element, using precomputed column sums
for column in platform_workflow.columns:
    platform_workflow[column] = platform_workflow[column].apply(
        lambda x, total=column_sums[column]: f"{x/total*100:.2f}% ({x}/{total})"
    )

platform_workflow.index = platform_workflow.index.map(platform_abbreviation_mapping)
platform_workflow.columns = [
    f"Workflow Pattern {col}" if col != "Host platforms" else "Host platform"
    for col in platform_workflow.columns
]
platform_workflow.reset_index(inplace=True)

# print the distribution of workflow patterns across host platforms
print(platform_workflow.to_latex(index=False, escape=True))

df_workflow = (
    df_leaderboard.explode("Workflow patterns")
    .groupby("Workflow patterns")["Name"]
    .size()
    .reset_index(name="Percentage Share")
)
df_workflow["Percentage Share"] = df_workflow["Percentage Share"].apply(
    lambda x: f"{x/len(df_leaderboard)*100:.2f}% ({x}/{len(df_leaderboard)})"
)
df_workflow["Workflow patterns"] = df_workflow["Workflow patterns"].map(lambda x: f"Workflow Pattern {x}")
# print the distribution of workflow patterns
df_workflow

\begin{tabular}{llllll}
\toprule
Host platforms & Workflow Pattern 1 & Workflow Pattern 2 & Workflow Pattern 3 & Workflow Pattern 4 & Workflow Pattern 5 \\
\midrule
GitHub & 6.15\% (40/650) & 36.36\% (44/121) & 0.00\% (0/2) & 43.61\% (222/509) & 21.43\% (6/28) \\
Hugging Face Spaces & 4.77\% (31/650) & 22.31\% (27/121) & 50.00\% (1/2) & 31.63\% (161/509) & 60.71\% (17/28) \\
independent platform & 2.15\% (14/650) & 38.84\% (47/121) & 50.00\% (1/2) & 23.58\% (120/509) & 17.86\% (5/28) \\
Papers With Code & 86.77\% (564/650) & 1.65\% (2/121) & 0.00\% (0/2) & 0.39\% (2/509) & 0.00\% (0/28) \\
Spreadsheet & 0.15\% (1/650) & 0.83\% (1/121) & 0.00\% (0/2) & 0.79\% (4/509) & 0.00\% (0/28) \\
\bottomrule
\end{tabular}



Unnamed: 0,Workflow patterns,Percentage Share
0,Workflow Pattern 1,60.48% (635/1050)
1,Workflow Pattern 2,9.43% (99/1050)
2,Workflow Pattern 3,0.19% (2/1050)
3,Workflow Pattern 4,41.43% (435/1050)
4,Workflow Pattern 5,2.19% (23/1050)


In [140]:
# import webbrowser

# count = 0

# # Load the Excel file
# with pd.ExcelFile(f"{path_result}/Foundation Model Leaderboards.xlsx") as xls:

#     # Initialize a dictionary to store unique counts
#     unique_counts = {}

#     # Iterate over each sheet
#     for sheet_name in xls.sheet_names:
#         if sheet_name in ['Leaderboard']:
#             continue

#         df = pd.read_excel(xls, sheet_name=sheet_name)

#         # Iterate over each column
#         for column in df.columns:
#             for url in df[column].dropna().to_list():
#                 if 'paperswithcode/sota-extractor' not in url:
#                     continue
#                 webbrowser.open(url)
#                 if count % 20 == 19:
#                     code = input("Press enter key to proceed.")
#                     if code.lower() == "esc":
#                         raise KeyboardInterrupt
#                 count += 1

In [142]:
import openpyxl
import pandas as pd
from collections import defaultdict

# red cell represents the smell cases resolved by leaderboard operators or our team
# green cell represents the smell cases admitted or verfied by leaderboard operators
# yellow cell represents the smell cases verified by independent users
# cells with bold font represent SATD smell cases
wb = openpyxl.load_workbook(f"{path_result}/Foundation Model Leaderboards.xlsx")

# Define the green color you are looking for (based on RGB code)
red_color = "FFFF0000"  # Example color code, adjust as necessary
green_color = "FF00FF00"
yellow_color = "FFFFFF00"

# Initialize counters
smell_workflow = defaultdict(set)
link_workflows = defaultdict(list)
total_smell_confirmed_by_leaderboard_operator = 0
total_smell_confirmed_by_independent_user = 0
total_smell_resolved = 0
total_satd_count = 0
smell_examples = []

# Initialize a dictionary to store unique counts
unique_counts = {}

# Iterate through all sheets in the workbook
for sheet in wb.worksheets:
    if sheet.title == "Leaderboard":
        # Read the sheet into a pandas dataframe
        data = sheet.values
        columns = next(data)
        df = pd.DataFrame(data, columns=columns)

        # Clean up dataframe, handle any None values
        df = df.dropna(how="all", axis=0)  # Drop rows where all values are NaN
        df = df.dropna(how="all", axis=1)  # Drop columns where all values are NaN

        df["Workflow patterns"] = df["Workflow patterns"].apply(
            lambda x: x.split(",") if isinstance(x, str) else [str(int(x))]
        )
        df["Smell cases"] = df["Smell cases"].apply(
            lambda x: x.split(",") if isinstance(x, str) else []
        )

        # # Clean up dataframe, handle any None values
        # df = df.dropna()

        for index, row in df.iterrows():
            for smell in row["Smell cases"]:
                link_workflows[smell].extend(row["Workflow patterns"])
        continue

    # Initialize a dictionary to keep track of unique values for each column
    unique_values = {}

    # Iterate through the cells in the current sheet to count green cells
    for row_idx, row in enumerate(sheet.iter_rows(values_only=False), start=1):
        for col_idx, cell in enumerate(row, start=1):
            fill = cell.fill

            # Add links if cell contains 'https'
            if cell.value and isinstance(cell.value, str) and "https" in cell.value:
                smell_examples.append(cell.value)
                if cell.value in link_workflows:
                    smell_workflow[sheet.title] = smell_workflow[sheet.title].union(
                        link_workflows[cell.value]
                    )

            # Add to the total count of red cells
            if (
                isinstance(fill.fgColor, openpyxl.styles.colors.Color)
                and fill.fgColor.rgb == red_color
            ):
                total_smell_resolved += 1

            # Add to the total count of green cells
            if (
                isinstance(fill.fgColor, openpyxl.styles.colors.Color)
                and fill.fgColor.rgb == green_color
            ):
                total_smell_confirmed_by_leaderboard_operator += 1
                if cell.font.bold:  # Check if the font is bold
                    total_satd_count += 1

            # Add to the total count of yellow cells
            if (
                isinstance(fill.fgColor, openpyxl.styles.colors.Color)
                and fill.fgColor.rgb == yellow_color
            ):
                total_smell_confirmed_by_independent_user += 1

            # Track unique values for each column
            if row_idx == 1:
                # Create a set for unique values if it's the first row (headers)
                header = sheet.cell(row=row_idx, column=col_idx).value
                if header:
                    unique_values[header] = set()
            else:
                # Add cell value to the respective column's unique set
                header = sheet.cell(row=1, column=col_idx).value
                if header and cell.value is not None:
                    unique_values[header].add(cell.value)

    # Store unique counts for the current sheet
    unique_counts[sheet.title] = {
        col: len(unique_set) for col, unique_set in unique_values.items()
    }

# Ensure the column order is the same as the original Excel sheet
df_smell_component = pd.DataFrame(unique_counts).transpose()
df_smell_component.drop(columns=["Uncategorized"], index="Others", inplace=True)
df_smell_component = df_smell_component.map(
    lambda x: int(x) if isinstance(x, (float, int)) else x
)

# print the number of smell cases and their associated leaderboard components
print(df_smell_component.to_latex(index=True, escape=True))

# Extract unique column names from the values in the defaultdict
columns = sorted(set().union(*smell_workflow.values()))

# Create the dataframe with dynamic columns
df_workflow_smell = pd.DataFrame(index=smell_workflow.keys(), columns=columns)
df_workflow_smell.drop(index="Others", inplace=True)

# Populate the dataframe dynamically
for entity, items in smell_workflow.items():
    for col in columns:
        df_workflow_smell.loc[entity, col] = "√" if col in items else ""

df_workflow_smell.columns = [f"Workflow Pattern {index}" for index in df_workflow_smell.columns]

# print the existence of smell cases and their associated workflow patterns
print(df_workflow_smell.to_latex(index=True, escape=True))

# print the key statistics of smell cases
print(f"Total number of smell cases: {len(smell_examples)}")
print(
    f"Total number of smell cases hosted in GitHub: {len([example for example in smell_examples if 'github' in example])}"
)
print(
    f"Total number of smell cases hosted in Hugging Face: {len([example for example in smell_examples if 'huggingface' in example])}"
)
print(f"Total number of smell cases identified as SATDs: {total_satd_count}")
print(f"Total number of resolved smell cases: {total_smell_resolved}")
print(
    f"Total number of unresolved smell cases confirmed by leaderboard operators: {total_smell_confirmed_by_leaderboard_operator}"
)
print(
    f"Total number of unresolved smell cases confirmed by independent users: {total_smell_confirmed_by_independent_user}"
)

\begin{tabular}{lrrrrrrrr}
\toprule
 & Benchmark Metric & Benchmark Protocol & Benchmark Raw Dataset & Benchmark Task & Evaluation Score & Model (Information) & Ranking Dataframe & Submission Channel/Protocol \\
\midrule
Confusing Entity & 1 & 3 & 12 & 1 & 3 & 2 & 3 & 20 \\
Deprecated Entity & 0 & 0 & 0 & 1 & 0 & 0 & 2 & 0 \\
Inaccessible Entity & 0 & 0 & 2 & 0 & 1 & 2 & 15 & 7 \\
Misdisplayed Entity & 5 & 1 & 0 & 0 & 2 & 0 & 4 & 0 \\
Mismatched Entity & 2 & 2 & 2 & 5 & 5 & 5 & 1 & 0 \\
Missing Entity & 2 & 4 & 1 & 0 & 6 & 51 & 90 & 9 \\
Redundant Entity & 0 & 0 & 0 & 2 & 2 & 2 & 2 & 0 \\
Unresponsive Entity & 0 & 0 & 0 & 0 & 0 & 0 & 132 & 5 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
 & Workflow pattern 1 & Workflow pattern 2 & Workflow pattern 3 & Workflow pattern 4 & Workflow pattern 5 \\
\midrule
Confusing Entity & √ & √ & √ & √ &  \\
Deprecated Entity &  & √ &  & √ &  \\
Inaccessible Entity & √ & √ &  & √ & √ \\
Misdisplayed Entity &  &  &  & √ &  \\
Mismatched 