In [34]:
from brx import BRX, sftoq, uif
from superdocs_python.utils.diff_utils import parse_diff
from datasets import load_dataset, IterableDataset
from dotenv import load_dotenv
import os
import json
from superdocs_python.utils.model import create_model
import tiktoken
from datasets import Dataset
from functools import partial

# the prompt for generating is "Suggest rewrites that..."
#
# The output should first be a brief plan and then an execution based on the results
#

DELIM = "-----"

load_dotenv("../../.env")

dataset = load_dataset("princeton-nlp/SWE-bench_oracle", split="train")
dataset = dataset.to_iterable_dataset()
brx_client = BRX(os.environ.get("BRX_ACCESS_TOKEN"))
model = create_model(os.environ.get("OPENAI_API_KEY"), "gpt-3.5-turbo")
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")





In [35]:
PATCH_FORMATTING_INST = """I need you to solve this issue by generating a single patch file that I can apply directly to this repository using git apply. Please respond with a single patch file in the following format.
<patch>
--- a/file.py
+++ b/file.py
@@ -1,27 +1,35 @@
 def euclidean(a, b):
-    while b:
-        a, b = b, a % b
-    return a
+    if b == 0:
+        return a
+    return euclidean(b, a % b)
 
 
 def bresenham(x0, y0, x1, y1):
     points = []
     dx = abs(x1 - x0)
     dy = abs(y1 - y0)
-    sx = 1 if x0 < x1 else -1
-    sy = 1 if y0 < y1 else -1
-    err = dx - dy
+    x, y = x0, y0
+    sx = -1 if x0 > x1 else 1
+    sy = -1 if y0 > y1 else 1
 
-    while True:
-        points.append((x0, y0))
-        if x0 == x1 and y0 == y1:
-            break
-        e2 = 2 * err
-        if e2 > -dy:
+    if dx > dy:
+        err = dx / 2.0
+        while x != x1:
+            points.append((x, y))
             err -= dy
-            x0 += sx
-        if e2 < dx:
-            err += dx
-            y0 += sy
+            if err < 0:
+                y += sy
+                err += dx
+            x += sx
+    else:
+        err = dy / 2.0
+        while y != y1:
+            points.append((x, y))
+            err -= dx
+            if err < 0:
+                x += sx
+                err += dy
+            y += sy
 
+    points.append((x, y))
     return points
</patch>"""
SEARCH_REPLACE_INST = """
Please respond with a series of search-replace and new-file blocks. Here's an example:

First, in order to add an import for sympy at the top of the program, we must copy over the first couple lines for context.

<edit>
<filepath>mathweb/flask/app.py</filepath>
<search>
from flask import Flask

app = Flask(__name__)
</search>
<replace>
import sympy
from flask import Flask

app = Flask(__name__)
</replace>
</edit>

Second, let's remove the is_prime function completely.
<edit>
<filepath>mathweb/flask/app.py</filepath>
<search>
    def is_prime(x):
        if x < 2:
            return False
        for i in range(2, int(math.sqrt(x)) + 1):
            if x % i == 0:
                return False
        return True
</search>
<replace>
</replace>
</edit>

Third, let's rewrite nth_prime to use sympy.isprime() instead of is_prime. We need to copy over is_prime completely to be able to properly make the replacement.
<edit>
<filepath>mathweb/flask/app.py</filepath>
<search>
    @app.route('/prime/<int:n>')
    def nth_prime(n):
        count = 0
        num = 1
        while count < n:
            num += 1
            if is_prime(num):
                count += 1
        return str(num)
</search>
<replace>
    @app.route('/prime/<int:n>')
    def nth_prime(n):
        count = 0
        num = 1
        while count < n:
            num += 1
            if sympy.isprime(num):
                count += 1
        return str(num)
</replace>
</edit>

Here's a quick example of generating a new file:
<newfile>
<filepath>mathweb/custom_fibonacci.py</filepath>
<content>
def nth_fibonacci(n):
    if n <= 1:
        return n
    else:
        fib = [0, 1]
        for i in range(2, n+1):
            fib.append(fib[i-1] + fib[i-2])
        return fib[n]
</content>
</newfile>
"""

In [41]:
import re

first_inst = next(iter(dataset))['text']
print(first_inst)
print(PATCH_FORMATTING_INST in first_inst)

test_patch = next(iter(dataset))['patch']
test_patch = test_patch.replace("<patch>", "").replace("</patch>", "")
test_patch = re.sub(r'^diff.*\n?', '', test_patch, flags=re.MULTILINE)

parsed_test_diff = parse_diff(test_patch)


print(test_patch)
print("LENGTH OF THE PARSED DIFF: ", len(parsed_test_diff))

for block in parsed_test_diff:
    print("FILEPATH")
    print(block.filepath)
    print("SEARCH")
    print(block.search_block)
    print("REPLACE")
    print(block.replace_block)

You will be provided with a partial code base and an issue statement explaining a problem to resolve.
<issue>
`initialize` and `Statevector` don't play nicely
<!-- ⚠️ If you do not respect this template, your issue will be closed -->
<!-- ⚠️ Make sure to browse the opened and closed issues -->

### Informations

- **Qiskit Aer version**: 0.5.1
- **Python version**: 3.7.3
- **Operating system**: OSX

### What is the current behavior?

Using `initialize` in a circuit and then running with `Statevector` results in the error "Cannot apply Instruction: reset"

### Steps to reproduce the problem

```
import qiskit as qk
import qiskit.quantum_info as qi
from numpy import sqrt

n = 2

ket0 = [1/sqrt(2),0,0,1/sqrt(2)]

qc = qk.QuantumCircuit(n)
qc.initialize(ket0,range(n))
    
ket_qi = qi.Statevector.from_instruction(qc)
```




</issue>
<code>
[start of README.md]
1 # Qiskit Terra
2 
3 [![License](https://img.shields.io/github/license/Qiskit/qiskit-terra.svg?style=popout-square)](https://open

In [37]:


def process_next_row():
    for row in dataset:
        try:
            input_text = row["text"]
            input_text = input_text.replace(PATCH_FORMATTING_INST, SEARCH_REPLACE_INST)
            row_id = row["instance_id"]

            input_text += f"\n\n{DELIM}\n\n"

            diff_string_trimmed = row["patch"].replace("<patch>", "").replace("</patch>", "")
            # Maybe clean up some more strings at the top
            parsed_diff = parse_diff(diff_string_trimmed)
            fmtd_search_replaces = ""
            # Need to remove the a/ and the b/ at the start
            for block in parsed_diff:
                if len(block.search_block.strip()) == 0 and len(block.replace_block.strip()) == 0:
                    continue

                if "dev/null" in block.previous_filepath:
                    fmtd_search_replaces += f"\n<newfile>\n<filepath>{block.filepath[2:]}</filepath>\n<content>{block.contents}</content>\n</newfile>"
                else:
                    fmtd_search_replaces += f"\n<edit>\n<filepath>{block.filepath[2:]}</filepath>\n<search>{block.search_block}</search>\n<replace>{block.replace_block}</replace>\n</edit>\n"

            token_count = len(encoding.encode(input_text)) + len(encoding.encode(fmtd_search_replaces))
            # print(f"Was able to calculate token count")
            if token_count > 8000:
                print(f"Token count is too large: {row_id}")
            else:
                yield {"input": input_text, "output": fmtd_search_replaces}
        except Exception as e:
            print("There was an error processing")

ds = Dataset.from_generator(process_next_row)
ds.save_to_disk("./search_replace_dataset/")
    


Generating train split: 0 examples [00:00, ? examples/s]

Token count is too large: Qiskit__qiskit-4465
Token count is too large: Qiskit__qiskit-1295
Token count is too large: docker__compose-6410
Token count is too large: ytdl-org__youtube-dl-1591
Token count is too large: numpy__numpy-13703


Generating train split: 3 examples [00:00, 10.73 examples/s]

Token count is too large: pandas-dev__pandas-27237
Token count is too large: googleapis__google-cloud-python-3712
Token count is too large: Qiskit__qiskit-6240
Token count is too large: numpy__numpy-11280
Token count is too large: numpy__numpy-21187
Token count is too large: pandas-dev__pandas-26343
Token count is too large: googleapis__google-cloud-python-8438
Token count is too large: numpy__numpy-23700
Token count is too large: mesonbuild__meson-11181
Token count is too large: huggingface__transformers-15625
Token count is too large: apache__airflow-33043
Token count is too large: pantsbuild__pants-18947
Token count is too large: googleapis__google-cloud-python-5020
Token count is too large: numpy__numpy-7258
Token count is too large: pandas-dev__pandas-19230
Token count is too large: ray-project__ray-8231
Token count is too large: huggingface__transformers-8016
Token count is too large: mesonbuild__meson-3894
Token count is too large: numpy__numpy-23600


Generating train split: 9 examples [00:00, 10.45 examples/s]

Token count is too large: Qiskit__qiskit-10300
Token count is too large: pandas-dev__pandas-8134
Token count is too large: docker__compose-5653
Token count is too large: pandas-dev__pandas-19730
Token count is too large: pandas-dev__pandas-19112
Token count is too large: pandas-dev__pandas-28297
Token count is too large: pypa__pip-4992
There was an error processing


Generating train split: 13 examples [00:01, 12.67 examples/s]

Token count is too large: huggingface__transformers-21969
Token count is too large: conan-io__conan-2916
Token count is too large: google__jax-1388
Token count is too large: pandas-dev__pandas-6122
Token count is too large: apache__airflow-8230
Token count is too large: pandas-dev__pandas-33857


Generating train split: 15 examples [00:01, 10.34 examples/s]

Token count is too large: pandas-dev__pandas-5701
Token count is too large: Qiskit__qiskit-1276
Token count is too large: pandas-dev__pandas-37546
Token count is too large: Qiskit__qiskit-9823
Token count is too large: wagtail__wagtail-9922
Token count is too large: pandas-dev__pandas-30494
Token count is too large: PrefectHQ__prefect-312
Token count is too large: pandas-dev__pandas-22804
Token count is too large: docker__compose-6221
Token count is too large: pandas-dev__pandas-10841
Token count is too large: pandas-dev__pandas-23021
Token count is too large: Qiskit__qiskit-5601
Token count is too large: explosion__spaCy-3434
Token count is too large: Lightning-AI__lightning-1748
Token count is too large: ray-project__ray-8781


Generating train split: 22 examples [00:01, 17.36 examples/s]

Token count is too large: pandas-dev__pandas-6044
Token count is too large: apache__airflow-22710
Token count is too large: ytdl-org__youtube-dl-14833
Token count is too large: pandas-dev__pandas-6937
Token count is too large: apache__airflow-10864
Token count is too large: mesonbuild__meson-5396


Generating train split: 26 examples [00:01, 17.97 examples/s]

Token count is too large: pandas-dev__pandas-5713
Token count is too large: Qiskit__qiskit-1856
Token count is too large: pandas-dev__pandas-39217
Token count is too large: pandas-dev__pandas-10108
Token count is too large: pandas-dev__pandas-16295
Token count is too large: Qiskit__qiskit-6001
Token count is too large: pantsbuild__pants-16220
Token count is too large: celery__celery-6629
Token count is too large: celery__celery-6288
Token count is too large: Lightning-AI__lightning-1387


Generating train split: 30 examples [00:02, 16.16 examples/s]

Token count is too large: pandas-dev__pandas-7456
Token count is too large: mesonbuild__meson-327
Token count is too large: huggingface__transformers-18803
Token count is too large: mesonbuild__meson-2849
Token count is too large: mesonbuild__meson-5553
Token count is too large: pandas-dev__pandas-20098
Token count is too large: pandas-dev__pandas-20891
Token count is too large: apache__airflow-18228
Token count is too large: googleapis__google-cloud-python-7441


Generating train split: 32 examples [00:02, 15.49 examples/s]

Token count is too large: numpy__numpy-10679
Token count is too large: Lightning-AI__lightning-2865
Token count is too large: pandas-dev__pandas-26134
Token count is too large: docker__compose-1939
Token count is too large: pandas-dev__pandas-19889
Token count is too large: ytdl-org__youtube-dl-21658
Token count is too large: pandas-dev__pandas-22266


Generating train split: 43 examples [00:02, 22.64 examples/s]

Token count is too large: googleapis__google-cloud-python-5036
Token count is too large: jupyterlab__jupyterlab-8486
Token count is too large: numpy__numpy-7635
Token count is too large: Lightning-AI__lightning-1667
Token count is too large: docker__compose-3670
Token count is too large: numpy__numpy-18339
Token count is too large: huggingface__transformers-6648
Token count is too large: conan-io__conan-4495
Token count is too large: mesonbuild__meson-5073
Token count is too large: pandas-dev__pandas-31262
Token count is too large: pypa__pip-7489
Token count is too large: mesonbuild__meson-2496
Token count is too large: pantsbuild__pants-5971
Token count is too large: Lightning-AI__lightning-1377
Token count is too large: googleapis__google-cloud-python-6010
Token count is too large: googleapis__google-cloud-python-2151
Token count is too large: ipython__ipython-2232
Token count is too large: pandas-dev__pandas-19401
Token count is too large: googleapis__google-cloud-python-814


Generating train split: 49 examples [00:02, 25.40 examples/s]

Token count is too large: conan-io__conan-2600
Token count is too large: pantsbuild__pants-14169
Token count is too large: pandas-dev__pandas-21507
Token count is too large: pandas-dev__pandas-38142
Token count is too large: pandas-dev__pandas-17751
Token count is too large: Qiskit__qiskit-9040
Token count is too large: googleapis__google-cloud-python-603
Token count is too large: pandas-dev__pandas-7076
Token count is too large: pandas-dev__pandas-7657
Token count is too large: pandas-dev__pandas-25434
Token count is too large: google__jax-509
Token count is too large: Qiskit__qiskit-10622
Token count is too large: pandas-dev__pandas-23402


Generating train split: 53 examples [00:02, 26.36 examples/s]

Token count is too large: googleapis__google-cloud-python-1172
Token count is too large: googleapis__google-cloud-python-10015
Token count is too large: docker__compose-2720
Token count is too large: googleapis__google-cloud-python-4498
Token count is too large: pandas-dev__pandas-27068
Token count is too large: googleapis__google-cloud-python-2989
Token count is too large: google__jax-2561
Token count is too large: pandas-dev__pandas-23837
Token count is too large: ipython__ipython-14014
Token count is too large: numpy__numpy-3266
Token count is too large: huggingface__transformers-21727
Token count is too large: googleapis__google-cloud-python-276
Token count is too large: pantsbuild__pants-5386


Generating train split: 57 examples [00:03, 24.26 examples/s]

Token count is too large: pandas-dev__pandas-31918
Token count is too large: pypa__pip-6818
Token count is too large: Qiskit__qiskit-1615
Token count is too large: Qiskit__qiskit-8759
Token count is too large: ipython__ipython-3683
Token count is too large: Lightning-AI__lightning-2185
Token count is too large: pandas-dev__pandas-6661
Token count is too large: pandas-dev__pandas-5345


Generating train split: 60 examples [00:03, 19.83 examples/s]

Token count is too large: huggingface__transformers-17836
Token count is too large: pandas-dev__pandas-3585
Token count is too large: pandas-dev__pandas-30995
Token count is too large: conda__conda-11666
Token count is too large: docker__compose-4333
Token count is too large: numpy__numpy-11219
Token count is too large: pantsbuild__pants-12281


Generating train split: 63 examples [00:03, 15.75 examples/s]

Token count is too large: huggingface__transformers-20735
Token count is too large: conan-io__conan-9360
Token count is too large: pandas-dev__pandas-19973
Token count is too large: Lightning-AI__lightning-2842
Token count is too large: Lightning-AI__lightning-1104
Token count is too large: pandas-dev__pandas-22695
Token count is too large: mesonbuild__meson-9369
Token count is too large: pandas-dev__pandas-26298
Token count is too large: conan-io__conan-9596


Generating train split: 66 examples [00:03, 16.93 examples/s]

Token count is too large: googleapis__google-cloud-python-9033
Token count is too large: Lightning-AI__lightning-1797
Token count is too large: Qiskit__qiskit-2978
Token count is too large: docker__compose-2051
Token count is too large: huggingface__transformers-15085
Token count is too large: pandas-dev__pandas-38057
Token count is too large: pandas-dev__pandas-25275
Token count is too large: pandas-dev__pandas-11006


Generating train split: 68 examples [00:03, 16.08 examples/s]

Token count is too large: pandas-dev__pandas-21216
Token count is too large: pandas-dev__pandas-11957
Token count is too large: apache__airflow-19418
Token count is too large: google__jax-1972
Token count is too large: pandas-dev__pandas-14344
Token count is too large: pandas-dev__pandas-6495


Generating train split: 73 examples [00:04, 18.89 examples/s]

Token count is too large: pandas-dev__pandas-7485
Token count is too large: numpy__numpy-3244
Token count is too large: pandas-dev__pandas-14629
Token count is too large: pandas-dev__pandas-25729
Token count is too large: conda__conda-12378
Token count is too large: pypa__pip-11502
Token count is too large: pantsbuild__pants-4773
Token count is too large: numpy__numpy-6543


Generating train split: 75 examples [00:04, 13.75 examples/s]

Token count is too large: Qiskit__qiskit-1373
Token count is too large: pandas-dev__pandas-11079
Token count is too large: pandas-dev__pandas-4313
Token count is too large: pandas-dev__pandas-5849
Token count is too large: pantsbuild__pants-4686


Generating train split: 80 examples [00:04, 15.08 examples/s]

Token count is too large: pantsbuild__pants-7115
Token count is too large: googleapis__google-cloud-python-2390
Token count is too large: Lightning-AI__lightning-1724
Token count is too large: numpy__numpy-14345
Token count is too large: Qiskit__qiskit-989
Token count is too large: apache__airflow-19142
Token count is too large: pandas-dev__pandas-4846
Token count is too large: ipython__ipython-11650
Token count is too large: pandas-dev__pandas-6553
Token count is too large: pandas-dev__pandas-4267
Token count is too large: pantsbuild__pants-5605
Token count is too large: pantsbuild__pants-16808
Token count is too large: huggingface__transformers-7456
Token count is too large: numpy__numpy-11850


Generating train split: 82 examples [00:05, 11.69 examples/s]

Token count is too large: huggingface__transformers-18018
Token count is too large: mesonbuild__meson-4354
Token count is too large: mesonbuild__meson-11951
Token count is too large: ipython__ipython-11330
Token count is too large: numpy__numpy-20934
Token count is too large: pandas-dev__pandas-23657
Token count is too large: open-mmlab__mmdetection-6279
Token count is too large: Qiskit__qiskit-943
Token count is too large: pandas-dev__pandas-22037
Token count is too large: pandas-dev__pandas-27083
Token count is too large: conda__conda-2355
Token count is too large: ray-project__ray-4469
Token count is too large: apache__airflow-19193
Token count is too large: Qiskit__qiskit-6675
Token count is too large: gitpython-developers__GitPython-1224
Token count is too large: pandas-dev__pandas-24547


Generating train split: 85 examples [00:05,  8.80 examples/s]

Token count is too large: pandas-dev__pandas-33513
Token count is too large: mesonbuild__meson-9295
Token count is too large: conda__conda-3051
Token count is too large: Lightning-AI__lightning-2565
Token count is too large: pandas-dev__pandas-31679
Token count is too large: conan-io__conan-4045
Token count is too large: pypa__pip-10962
Token count is too large: ipython__ipython-2820
Token count is too large: Qiskit__qiskit-10284
Token count is too large: jupyterlab__jupyterlab-5196
Token count is too large: mesonbuild__meson-5571
Token count is too large: scipy__scipy-4385
Token count is too large: ray-project__ray-3731
Token count is too large: pypa__pip-1311
Token count is too large: docker__compose-7121


Generating train split: 89 examples [00:05, 11.79 examples/s]

Token count is too large: pandas-dev__pandas-16658
Token count is too large: pandas-dev__pandas-31215
Token count is too large: dagster-io__dagster-9792
Token count is too large: pyca__cryptography-377
Token count is too large: tiangolo__fastapi-1534
Token count is too large: jupyterlab__jupyterlab-7055
Token count is too large: pantsbuild__pants-7304
Token count is too large: conan-io__conan-9230
Token count is too large: apache__airflow-18224
Token count is too large: docker__compose-2585
Token count is too large: pandas-dev__pandas-22640
Token count is too large: scipy__scipy-3717


Generating train split: 92 examples [00:05, 12.35 examples/s]

Token count is too large: numpy__numpy-20386
Token count is too large: Qiskit__qiskit-7618
Token count is too large: pandas-dev__pandas-26158
Token count is too large: ytdl-org__youtube-dl-15112
Token count is too large: numpy__numpy-20246
Token count is too large: pandas-dev__pandas-19307
Token count is too large: apache__airflow-17850
Token count is too large: pandas-dev__pandas-35751
Token count is too large: Qiskit__qiskit-6918


Generating train split: 94 examples [00:06, 11.33 examples/s]

Token count is too large: pandas-dev__pandas-6986
Token count is too large: Qiskit__qiskit-2463
Token count is too large: ray-project__ray-3793
Token count is too large: pandas-dev__pandas-35966
Token count is too large: ray-project__ray-836
Token count is too large: celery__celery-5631
Token count is too large: apache__airflow-9740
Token count is too large: apache__airflow-16491
Token count is too large: conan-io__conan-11123
Token count is too large: PrefectHQ__prefect-793
Token count is too large: pyca__cryptography-8617
Token count is too large: apache__airflow-15112
Token count is too large: mesonbuild__meson-7527
Token count is too large: ytdl-org__youtube-dl-4389
Token count is too large: numpy__numpy-14536


Generating train split: 99 examples [00:06, 12.38 examples/s]

Token count is too large: pandas-dev__pandas-6438
Token count is too large: ipython__ipython-3484
Token count is too large: conan-io__conan-5864
Token count is too large: docker__compose-1261
Token count is too large: huggingface__transformers-7767
There was an error processing
Token count is too large: pandas-dev__pandas-18707
Token count is too large: pandas-dev__pandas-24277
Token count is too large: pandas-dev__pandas-37390
Token count is too large: conda__conda-6436
Token count is too large: scipy__scipy-4425
Token count is too large: PrefectHQ__prefect-1338


Generating train split: 106 examples [00:06, 18.59 examples/s]

Token count is too large: conda__conda-7157
Token count is too large: ray-project__ray-3661
Token count is too large: mesonbuild__meson-779
Token count is too large: pantsbuild__pants-16922
Token count is too large: ipython__ipython-5924
Token count is too large: numpy__numpy-13648
Token count is too large: apache__airflow-21006
Token count is too large: numpy__numpy-21566
Token count is too large: pandas-dev__pandas-37787
Token count is too large: huggingface__transformers-11962
Token count is too large: Qiskit__qiskit-858
Token count is too large: pandas-dev__pandas-22654
Token count is too large: conda__conda-6525


Generating train split: 112 examples [00:07, 16.25 examples/s]

Token count is too large: apache__airflow-18163
Token count is too large: pandas-dev__pandas-36179
Token count is too large: Lightning-AI__lightning-1488
Token count is too large: PrefectHQ__prefect-177
Token count is too large: PrefectHQ__prefect-2492
Token count is too large: googleapis__google-cloud-python-6453
Token count is too large: apache__airflow-12336
Token count is too large: numpy__numpy-12358
Token count is too large: pyca__cryptography-7034
Token count is too large: google__jax-3328
Token count is too large: conan-io__conan-8218
Token count is too large: jupyterlab__jupyterlab-7361
Token count is too large: numpy__numpy-10539


Generating train split: 118 examples [00:07, 21.96 examples/s]

Token count is too large: pandas-dev__pandas-4470
Token count is too large: wagtail__wagtail-7922
Token count is too large: pandas-dev__pandas-16090
Token count is too large: conan-io__conan-5583
Token count is too large: pypa__pip-443
Token count is too large: Qiskit__qiskit-5458
Token count is too large: apache__airflow-17003
Token count is too large: pandas-dev__pandas-25967
Token count is too large: ytdl-org__youtube-dl-21077
Token count is too large: Lightning-AI__lightning-1015


Generating train split: 123 examples [00:07, 22.77 examples/s]

Token count is too large: pandas-dev__pandas-38021
Token count is too large: mesonbuild__meson-7716
Token count is too large: mesonbuild__meson-5523
Token count is too large: conda__conda-5054
Token count is too large: docker__compose-4213
Token count is too large: pyca__cryptography-1398
Token count is too large: pandas-dev__pandas-14428
Token count is too large: gitpython-developers__GitPython-918
Token count is too large: google__jax-3152
Token count is too large: Qiskit__qiskit-5537
Token count is too large: pandas-dev__pandas-11257


Generating train split: 130 examples [00:07, 22.68 examples/s]

Token count is too large: Qiskit__qiskit-4616
Token count is too large: google__jax-2712
Token count is too large: pandas-dev__pandas-5375
Token count is too large: wagtail__wagtail-1133
Token count is too large: pypa__pip-2796
Token count is too large: conan-io__conan-6048
Token count is too large: pypa__pip-7285
Token count is too large: conda__conda-9284
Token count is too large: googleapis__google-cloud-python-3177
Token count is too large: mesonbuild__meson-1443
Token count is too large: wagtail__wagtail-1412
Token count is too large: huggingface__transformers-500
Token count is too large: conan-io__conan-4308
Token count is too large: pandas-dev__pandas-21957
Token count is too large: googleapis__google-cloud-python-11288
Token count is too large: pandas-dev__pandas-17482
Token count is too large: pantsbuild__pants-4661
Token count is too large: pypa__pip-7095
Token count is too large: Lightning-AI__lightning-782
Token count is too large: pandas-dev__pandas-18876
Token count is t

Generating train split: 135 examples [00:08, 14.01 examples/s]

Token count is too large: pandas-dev__pandas-37639
Token count is too large: conan-io__conan-7051
Token count is too large: pandas-dev__pandas-36118
Token count is too large: numpy__numpy-21027
Token count is too large: pandas-dev__pandas-7375
Token count is too large: ytdl-org__youtube-dl-30664
Token count is too large: ray-project__ray-7111
Token count is too large: numpy__numpy-3059
Token count is too large: PrefectHQ__prefect-1510
Token count is too large: numpy__numpy-7414
Token count is too large: conan-io__conan-5786
Token count is too large: pandas-dev__pandas-24759
Token count is too large: pandas-dev__pandas-17632
Token count is too large: huggingface__transformers-4385
Token count is too large: conan-io__conan-2845
Token count is too large: pandas-dev__pandas-23517
Token count is too large: wagtail__wagtail-10355
Token count is too large: pandas-dev__pandas-17465
Token count is too large: mesonbuild__meson-8905


Generating train split: 137 examples [00:08, 10.09 examples/s]

Token count is too large: pandas-dev__pandas-36478
Token count is too large: wagtail__wagtail-1606
Token count is too large: numpy__numpy-24077
Token count is too large: huggingface__transformers-17356
Token count is too large: pandas-dev__pandas-7560
Token count is too large: pantsbuild__pants-6619
Token count is too large: pandas-dev__pandas-35590
Token count is too large: huggingface__transformers-11620


Generating train split: 144 examples [00:09, 14.22 examples/s]

Token count is too large: mesonbuild__meson-3404
Token count is too large: pandas-dev__pandas-20992
Token count is too large: huggingface__transformers-4450
Token count is too large: pandas-dev__pandas-21523
Token count is too large: pandas-dev__pandas-28289
Token count is too large: pantsbuild__pants-13559
Token count is too large: tiangolo__fastapi-1549
Token count is too large: Qiskit__qiskit-6522
Token count is too large: conda__conda-8289
Token count is too large: pantsbuild__pants-16931
Token count is too large: Qiskit__qiskit-4584
Token count is too large: google__jax-374
Token count is too large: numpy__numpy-22421
Token count is too large: jupyterlab__jupyterlab-8806
Token count is too large: google__jax-2257
Token count is too large: apache__airflow-27323
Token count is too large: ipython__ipython-7210


Generating train split: 148 examples [00:09, 16.61 examples/s]

Token count is too large: pandas-dev__pandas-19191
Token count is too large: conan-io__conan-8355
Token count is too large: Lightning-AI__lightning-278
Token count is too large: pandas-dev__pandas-36452
Token count is too large: huggingface__transformers-17318
Token count is too large: pandas-dev__pandas-8183
Token count is too large: pandas-dev__pandas-8966
Token count is too large: ipython__ipython-4285
Token count is too large: ray-project__ray-9227
Token count is too large: pandas-dev__pandas-10609
Token count is too large: conda__conda-7415
Token count is too large: pandas-dev__pandas-8407
Token count is too large: pandas-dev__pandas-38919
Token count is too large: celery__celery-6774
Token count is too large: mesonbuild__meson-3523
Token count is too large: mesonbuild__meson-1483
Token count is too large: huggingface__transformers-1966
Token count is too large: pandas-dev__pandas-21259
Token count is too large: conda__conda-2685
Token count is too large: pandas-dev__pandas-17789


Generating train split: 156 examples [00:09, 17.17 examples/s]

Token count is too large: pandas-dev__pandas-34594
Token count is too large: ray-project__ray-5844
Token count is too large: gitpython-developers__GitPython-1263
Token count is too large: numpy__numpy-20446
Token count is too large: Qiskit__qiskit-5074
Token count is too large: pypa__pip-7643
Token count is too large: googleapis__google-cloud-python-5890
Token count is too large: pandas-dev__pandas-31278
Token count is too large: googleapis__google-cloud-python-11461
Token count is too large: mesonbuild__meson-1650
Token count is too large: Qiskit__qiskit-1391


Generating train split: 160 examples [00:10, 17.30 examples/s]

Token count is too large: ipython__ipython-14108
Token count is too large: huggingface__transformers-4773
Token count is too large: huggingface__transformers-4916
Token count is too large: pyca__cryptography-3658
Token count is too large: pandas-dev__pandas-36139
Token count is too large: conda__conda-6370
Token count is too large: conda__conda-7515
Token count is too large: pandas-dev__pandas-23755
Token count is too large: docker__compose-1714
Token count is too large: pypa__pip-5836
Token count is too large: pandas-dev__pandas-22380
Token count is too large: pypa__pip-1201
Token count is too large: pandas-dev__pandas-4969
Token count is too large: pandas-dev__pandas-26300
Token count is too large: pandas-dev__pandas-4154
Token count is too large: googleapis__google-cloud-python-2730
Token count is too large: Qiskit__qiskit-8640
Token count is too large: numpy__numpy-17748
Token count is too large: googleapis__google-cloud-python-11326
Token count is too large: pandas-dev__pandas-418

Generating train split: 166 examples [00:10, 18.78 examples/s]

Token count is too large: pandas-dev__pandas-8434
Token count is too large: numpy__numpy-13397
Token count is too large: pandas-dev__pandas-36266
Token count is too large: pandas-dev__pandas-34595
Token count is too large: google__jax-1269
Token count is too large: huggingface__transformers-15835
Token count is too large: conan-io__conan-4714


Generating train split: 171 examples [00:10, 19.83 examples/s]

Token count is too large: ytdl-org__youtube-dl-1932
Token count is too large: DataDog__integrations-core-1019
Token count is too large: numpy__numpy-317
Token count is too large: pandas-dev__pandas-23963
Token count is too large: Qiskit__qiskit-3069
Token count is too large: docker__compose-1658
Token count is too large: pandas-dev__pandas-31036
Token count is too large: pandas-dev__pandas-17940
Token count is too large: docker__compose-2142
Token count is too large: pandas-dev__pandas-13761
Token count is too large: explosion__spaCy-2478
Token count is too large: wagtail__wagtail-1395
Token count is too large: celery__celery-3669


Generating train split: 178 examples [00:10, 23.83 examples/s]

Token count is too large: googleapis__google-cloud-python-1334
Token count is too large: celery__celery-5297
Token count is too large: conda__conda-5316
Token count is too large: huggingface__transformers-9554
Token count is too large: Qiskit__qiskit-2531
Token count is too large: ipython__ipython-8620
Token count is too large: pandas-dev__pandas-28945
Token count is too large: pandas-dev__pandas-23524
Token count is too large: pandas-dev__pandas-4585
Token count is too large: conda__conda-7289
Token count is too large: pypa__pip-5623
Token count is too large: Qiskit__qiskit-2928
Token count is too large: google__jax-337


Generating train split: 183 examples [00:11, 20.64 examples/s]

Token count is too large: pypa__pip-3196
Token count is too large: mesonbuild__meson-52
Token count is too large: ipython__ipython-2218
Token count is too large: pandas-dev__pandas-31820
Token count is too large: PrefectHQ__prefect-2832
Token count is too large: pandas-dev__pandas-4459
Token count is too large: google__jax-452
Token count is too large: pandas-dev__pandas-15443
Token count is too large: pandas-dev__pandas-5187
Token count is too large: wagtail__wagtail-9194
Token count is too large: pandas-dev__pandas-31071
Token count is too large: pandas-dev__pandas-36691
Token count is too large: pandas-dev__pandas-30638


Generating train split: 186 examples [00:11, 17.26 examples/s]

Token count is too large: pandas-dev__pandas-27304
Token count is too large: huggingface__transformers-18280
Token count is too large: mesonbuild__meson-4666
Token count is too large: pandas-dev__pandas-26078
Token count is too large: googleapis__google-cloud-python-5594
Token count is too large: pandas-dev__pandas-33250
Token count is too large: Lightning-AI__lightning-2055
Token count is too large: pandas-dev__pandas-27291
Token count is too large: Lightning-AI__lightning-1654


Generating train split: 191 examples [00:11, 19.00 examples/s]

Token count is too large: pandas-dev__pandas-37180
Token count is too large: pandas-dev__pandas-33291
Token count is too large: docker__compose-3922
Token count is too large: numpy__numpy-23392
Token count is too large: pandas-dev__pandas-20702
Token count is too large: pantsbuild__pants-6789
Token count is too large: mesonbuild__meson-11063
Token count is too large: mesonbuild__meson-1669
Token count is too large: numpy__numpy-18146
Token count is too large: Qiskit__qiskit-4808
Token count is too large: pandas-dev__pandas-8278
Token count is too large: huggingface__transformers-19481
Token count is too large: ytdl-org__youtube-dl-718
Token count is too large: pandas-dev__pandas-35588
Token count is too large: pandas-dev__pandas-7966
Token count is too large: pypa__pip-8291


Generating train split: 197 examples [00:12, 17.41 examples/s]

Token count is too large: mesonbuild__meson-6838
Token count is too large: conda__conda-12670
Token count is too large: numpy__numpy-20182
Token count is too large: mesonbuild__meson-7978
Token count is too large: mesonbuild__meson-7475
Token count is too large: google__jax-834
Token count is too large: googleapis__google-cloud-python-5245
Token count is too large: pandas-dev__pandas-6004
Token count is too large: apache__airflow-20902
Token count is too large: pandas-dev__pandas-5222
Token count is too large: huggingface__transformers-12548
Token count is too large: conan-io__conan-3239
Token count is too large: numpy__numpy-4406
Token count is too large: googleapis__google-cloud-python-576
Token count is too large: wagtail__wagtail-10189


Generating train split: 200 examples [00:12, 14.15 examples/s]

Token count is too large: huggingface__transformers-20681
Token count is too large: Lightning-AI__lightning-3145
Token count is too large: Qiskit__qiskit-3577
Token count is too large: conan-io__conan-2933
Token count is too large: numpy__numpy-16878
Token count is too large: apache__airflow-18804
Token count is too large: PrefectHQ__prefect-956
Token count is too large: mesonbuild__meson-1024
Token count is too large: huggingface__transformers-21398
Token count is too large: pandas-dev__pandas-15185


Generating train split: 206 examples [00:12, 16.20 examples/s]

Token count is too large: googleapis__google-cloud-python-6281
Token count is too large: pandas-dev__pandas-11330
Token count is too large: google__jax-741
Token count is too large: pandas-dev__pandas-6954
Token count is too large: huggingface__transformers-10027
Token count is too large: Qiskit__qiskit-3668
Token count is too large: ipython__ipython-5376
Token count is too large: conan-io__conan-4656
Token count is too large: jupyterlab__jupyterlab-7079
Token count is too large: pandas-dev__pandas-22963
Token count is too large: conan-io__conan-5348


Generating train split: 209 examples [00:12, 18.09 examples/s]

Token count is too large: ipython__ipython-3662
Token count is too large: docker__compose-246
Token count is too large: pandas-dev__pandas-16157
Token count is too large: ipython__ipython-6536
Token count is too large: pandas-dev__pandas-11937
Token count is too large: mesonbuild__meson-7116
Token count is too large: PrefectHQ__prefect-2599
Token count is too large: scipy__scipy-3932


Generating train split: 215 examples [00:12, 23.27 examples/s]

Token count is too large: googleapis__google-cloud-python-1925
Token count is too large: docker__compose-2020
Token count is too large: google__jax-443
Token count is too large: pandas-dev__pandas-11146
Token count is too large: Lightning-AI__lightning-2787
Token count is too large: huggingface__transformers-24238
Token count is too large: pandas-dev__pandas-7564
Token count is too large: pandas-dev__pandas-30560
Token count is too large: pandas-dev__pandas-39796


Generating train split: 218 examples [00:13, 17.01 examples/s]

Token count is too large: googleapis__google-cloud-python-6184
Token count is too large: pandas-dev__pandas-28970
Token count is too large: ytdl-org__youtube-dl-7069
Token count is too large: pandas-dev__pandas-5157
Token count is too large: huggingface__transformers-22938
Token count is too large: pandas-dev__pandas-7108
Token count is too large: mesonbuild__meson-2630
Token count is too large: huggingface__transformers-17294
Token count is too large: mesonbuild__meson-8742
Token count is too large: numpy__numpy-5500
Token count is too large: conda__conda-4361
Token count is too large: Lightning-AI__lightning-155
Token count is too large: numpy__numpy-12265
Token count is too large: wagtail__wagtail-964
Token count is too large: huggingface__transformers-14636
Token count is too large: pyca__cryptography-7697
Token count is too large: apache__airflow-33673
Token count is too large: pandas-dev__pandas-10188
Token count is too large: pandas-dev__pandas-11371
Token count is too large: Pr

Generating train split: 223 examples [00:13, 13.08 examples/s]

Token count is too large: mesonbuild__meson-11654
Token count is too large: pandas-dev__pandas-6408
Token count is too large: tensorflow__models-4554
Token count is too large: pantsbuild__pants-11661
Token count is too large: pandas-dev__pandas-15934
Token count is too large: tiangolo__fastapi-918
Token count is too large: mesonbuild__meson-7083
Token count is too large: ipython__ipython-1332
Token count is too large: pandas-dev__pandas-22479
Token count is too large: pypa__pip-5664
Token count is too large: huggingface__transformers-7054
Token count is too large: PrefectHQ__prefect-1059


Generating train split: 231 examples [00:14, 16.83 examples/s]

Token count is too large: huggingface__transformers-13225
Token count is too large: pandas-dev__pandas-27682
Token count is too large: conan-io__conan-4546
Token count is too large: tensorflow__models-7330
Token count is too large: pandas-dev__pandas-4791
Token count is too large: pandas-dev__pandas-38546
Token count is too large: ipython__ipython-4270
Token count is too large: Lightning-AI__lightning-3163
Token count is too large: Qiskit__qiskit-6322
Token count is too large: pantsbuild__pants-5040
Token count is too large: Qiskit__qiskit-8134


Generating train split: 235 examples [00:14, 18.59 examples/s]

Token count is too large: pandas-dev__pandas-18629
Token count is too large: Lightning-AI__lightning-409
Token count is too large: docker__compose-6390
Token count is too large: Qiskit__qiskit-7970
Token count is too large: pantsbuild__pants-18940
Token count is too large: huggingface__transformers-20064
Token count is too large: pantsbuild__pants-16155
Token count is too large: pandas-dev__pandas-27511


Generating train split: 238 examples [00:14, 17.60 examples/s]

Token count is too large: pandas-dev__pandas-26419
Token count is too large: ray-project__ray-9547
Token count is too large: jupyterlab__jupyterlab-1954
Token count is too large: pypa__pip-4047
Token count is too large: docker__compose-6973
Token count is too large: PrefectHQ__prefect-2546
Token count is too large: pandas-dev__pandas-39726
Token count is too large: conan-io__conan-6653
Token count is too large: Qiskit__qiskit-4263
Token count is too large: Lightning-AI__lightning-2986
Token count is too large: huggingface__transformers-21752
Token count is too large: Qiskit__qiskit-6199
Token count is too large: ipython__ipython-2904
Token count is too large: pypa__pip-9708
Token count is too large: googleapis__google-cloud-python-11352


Generating train split: 244 examples [00:15, 14.49 examples/s]

Token count is too large: pandas-dev__pandas-20844
Token count is too large: Qiskit__qiskit-3211
Token count is too large: pandas-dev__pandas-20632
Token count is too large: conan-io__conan-6720
Token count is too large: pandas-dev__pandas-36316
Token count is too large: google__jax-1262
Token count is too large: pandas-dev__pandas-20677
Token count is too large: pandas-dev__pandas-7112
Token count is too large: googleapis__google-cloud-python-771
Token count is too large: pandas-dev__pandas-6516
Token count is too large: pandas-dev__pandas-11796
Token count is too large: googleapis__google-cloud-python-3051
Token count is too large: pypa__pip-10202
Token count is too large: pantsbuild__pants-18165
There was an error processing
Token count is too large: Lightning-AI__lightning-2960
Token count is too large: ytdl-org__youtube-dl-18371
Token count is too large: huggingface__transformers-19477
Token count is too large: pandas-dev__pandas-17844


Generating train split: 246 examples [00:15, 10.70 examples/s]

Token count is too large: mesonbuild__meson-9546
Token count is too large: docker__compose-4528
Token count is too large: pandas-dev__pandas-39194
Token count is too large: numpy__numpy-7152
Token count is too large: pantsbuild__pants-10475
Token count is too large: docker__compose-1931
Token count is too large: PrefectHQ__prefect-2521
Token count is too large: apache__airflow-8775
Token count is too large: pandas-dev__pandas-21457
Token count is too large: pypa__pip-8483
Token count is too large: jupyterlab__jupyterlab-12844
Token count is too large: apache__airflow-10317
Token count is too large: numpy__numpy-5981
Token count is too large: pandas-dev__pandas-34816
Token count is too large: pandas-dev__pandas-32548
Token count is too large: numpy__numpy-5713
Token count is too large: googleapis__google-cloud-python-2962


Generating train split: 250 examples [00:15, 10.51 examples/s]

Token count is too large: numpy__numpy-3216
Token count is too large: numpy__numpy-21654
Token count is too large: dagster-io__dagster-8858
Token count is too large: googleapis__google-cloud-python-3130
Token count is too large: pandas-dev__pandas-8676
Token count is too large: pandas-dev__pandas-31729
Token count is too large: pandas-dev__pandas-7852
Token count is too large: googleapis__google-cloud-python-4199
Token count is too large: ipython__ipython-4338
Token count is too large: pandas-dev__pandas-18388
Token count is too large: huggingface__transformers-8423
Token count is too large: ytdl-org__youtube-dl-8372
Token count is too large: pantsbuild__pants-10779
Token count is too large: docker__compose-5541
Token count is too large: conda__conda-6923
Token count is too large: ipython__ipython-1502
Token count is too large: huggingface__transformers-12359
Token count is too large: googleapis__google-cloud-python-930


Generating train split: 254 examples [00:16, 11.10 examples/s]

Token count is too large: pandas-dev__pandas-20046
Token count is too large: google__jax-692
Token count is too large: googleapis__google-cloud-python-3927
Token count is too large: pantsbuild__pants-5959
Token count is too large: Lightning-AI__lightning-933
Token count is too large: pandas-dev__pandas-25667


Generating train split: 256 examples [00:16,  9.76 examples/s]

Token count is too large: celery__celery-5686
Token count is too large: Lightning-AI__lightning-1441
Token count is too large: dagster-io__dagster-6818
Token count is too large: huggingface__transformers-14408
Token count is too large: Qiskit__qiskit-4017
Token count is too large: Qiskit__qiskit-10008
Token count is too large: pandas-dev__pandas-14540
Token count is too large: docker__compose-6598
Token count is too large: pandas-dev__pandas-38443
Token count is too large: celery__celery-5154


Generating train split: 258 examples [00:16,  9.11 examples/s]

Token count is too large: pandas-dev__pandas-33761
Token count is too large: pandas-dev__pandas-21477
Token count is too large: pandas-dev__pandas-21224
Token count is too large: pantsbuild__pants-17716
Token count is too large: docker__compose-2743
Token count is too large: pandas-dev__pandas-38567


Generating train split: 262 examples [00:17, 10.05 examples/s]

Token count is too large: pandas-dev__pandas-11653
Token count is too large: Qiskit__qiskit-3526
Token count is too large: Qiskit__qiskit-1278
Token count is too large: docker__compose-4565
Token count is too large: google__jax-3082
Token count is too large: ray-project__ray-6915
Token count is too large: Lightning-AI__lightning-3404
Token count is too large: mesonbuild__meson-2109
Token count is too large: huggingface__transformers-21071
Token count is too large: celery__celery-4540
Token count is too large: ray-project__ray-8302


Generating train split: 266 examples [00:17, 12.19 examples/s]

Token count is too large: numpy__numpy-10741
Token count is too large: pandas-dev__pandas-17822
Token count is too large: celery__celery-6103
Token count is too large: apache__airflow-15212
Token count is too large: pypa__pip-8666
Token count is too large: pandas-dev__pandas-5935
Token count is too large: pypa__pip-1896
Token count is too large: numpy__numpy-14129
Token count is too large: mesonbuild__meson-2952
Token count is too large: apache__airflow-11529


Generating train split: 271 examples [00:17, 14.55 examples/s]

Token count is too large: pantsbuild__pants-9722
Token count is too large: scipy__scipy-3679
Token count is too large: conan-io__conan-133
Token count is too large: pandas-dev__pandas-35999
Token count is too large: pandas-dev__pandas-27700
Token count is too large: googleapis__google-cloud-python-7753
Token count is too large: ray-project__ray-5863
Token count is too large: pantsbuild__pants-13998
Token count is too large: pandas-dev__pandas-15964
Token count is too large: googleapis__google-cloud-python-3787
Token count is too large: huggingface__transformers-24049
Token count is too large: docker__compose-5362
Token count is too large: scipy__scipy-2659
Token count is too large: huggingface__transformers-3041
Token count is too large: pandas-dev__pandas-17628
Token count is too large: conan-io__conan-4716
Token count is too large: pandas-dev__pandas-10857
Token count is too large: pandas-dev__pandas-9936
Token count is too large: twisted__twisted-11589


Generating train split: 280 examples [00:17, 19.60 examples/s]

Token count is too large: pandas-dev__pandas-3378
Token count is too large: pypa__pip-8932
Token count is too large: pypa__pip-6236
Token count is too large: huggingface__transformers-15482
Token count is too large: pyca__cryptography-5072
Token count is too large: pandas-dev__pandas-31017
Token count is too large: wagtail__wagtail-10501
Token count is too large: pantsbuild__pants-16455
Token count is too large: pandas-dev__pandas-19908
Token count is too large: conda__conda-4651
Token count is too large: docker__compose-2392
Token count is too large: docker__compose-6984


Generating train split: 283 examples [00:18, 17.71 examples/s]

Token count is too large: docker__compose-2564
Token count is too large: ray-project__ray-541
Token count is too large: ytdl-org__youtube-dl-24883
Token count is too large: pandas-dev__pandas-10055
Token count is too large: Qiskit__qiskit-6027
Token count is too large: pandas-dev__pandas-10305
Token count is too large: huggingface__transformers-911
Token count is too large: pandas-dev__pandas-16220
Token count is too large: pandas-dev__pandas-5995
Token count is too large: conda__conda-4776


Generating train split: 291 examples [00:18, 18.09 examples/s]

Token count is too large: huggingface__transformers-24654
Token count is too large: twisted__twisted-11825
Token count is too large: pandas-dev__pandas-4765
Token count is too large: pandas-dev__pandas-33480
Token count is too large: huggingface__transformers-13478
Token count is too large: pypa__pip-9198
Token count is too large: Qiskit__qiskit-3087
Token count is too large: googleapis__google-cloud-python-4381
Token count is too large: pandas-dev__pandas-19441
Token count is too large: pandas-dev__pandas-10110
Token count is too large: pandas-dev__pandas-21406


Generating train split: 294 examples [00:19, 13.01 examples/s]

Token count is too large: Qiskit__qiskit-1210
Token count is too large: Qiskit__qiskit-6803
Token count is too large: pandas-dev__pandas-17194
Token count is too large: pandas-dev__pandas-24056
Token count is too large: pandas-dev__pandas-7086
Token count is too large: google__jax-3207


Generating train split: 300 examples [00:19, 18.08 examples/s]

Token count is too large: pandas-dev__pandas-34943
Token count is too large: pantsbuild__pants-18262
Token count is too large: pandas-dev__pandas-28151
Token count is too large: pandas-dev__pandas-32055
Token count is too large: pandas-dev__pandas-29431
Token count is too large: googleapis__google-cloud-python-192
Token count is too large: mesonbuild__meson-5387
Token count is too large: huggingface__transformers-12558
Token count is too large: pandas-dev__pandas-39008
Token count is too large: ray-project__ray-7434
Token count is too large: PrefectHQ__prefect-2409
Token count is too large: ytdl-org__youtube-dl-5954
Token count is too large: scipy__scipy-4881
Token count is too large: huggingface__transformers-15702
Token count is too large: pandas-dev__pandas-17469
Token count is too large: pyca__cryptography-7532
Token count is too large: huggingface__transformers-25239
Token count is too large: huggingface__transformers-15527
Token count is too large: docker__compose-3137
Token coun

Generating train split: 303 examples [00:19, 14.91 examples/s]

Token count is too large: pandas-dev__pandas-17402
Token count is too large: docker__compose-7965
Token count is too large: pandas-dev__pandas-24754
Token count is too large: conan-io__conan-4249
Token count is too large: pandas-dev__pandas-3013
Token count is too large: huggingface__transformers-24510
Token count is too large: googleapis__google-cloud-python-5870
Token count is too large: googleapis__google-cloud-python-10196
Token count is too large: huggingface__transformers-17092
Token count is too large: conda__conda-5314
Token count is too large: PrefectHQ__prefect-1375
Token count is too large: ipython__ipython-12150
Token count is too large: pandas-dev__pandas-15535
Token count is too large: google__jax-1299


Generating train split: 312 examples [00:20, 14.84 examples/s]

Token count is too large: googleapis__google-cloud-python-11323
Token count is too large: pandas-dev__pandas-18412
Token count is too large: pantsbuild__pants-15367
Token count is too large: Qiskit__qiskit-5851
Token count is too large: jupyterlab__jupyterlab-12852
Token count is too large: mesonbuild__meson-5128
Token count is too large: huggingface__transformers-19880
Token count is too large: mesonbuild__meson-75
Token count is too large: Qiskit__qiskit-5016
Token count is too large: docker__compose-7071
Token count is too large: pandas-dev__pandas-19675
Token count is too large: conda__conda-6524
Token count is too large: PrefectHQ__prefect-779
Token count is too large: celery__celery-5773
Token count is too large: pandas-dev__pandas-33540
Token count is too large: pypa__pip-4051
Token count is too large: pandas-dev__pandas-17897
Token count is too large: pypa__pip-5773
Token count is too large: Qiskit__qiskit-8937
Token count is too large: pandas-dev__pandas-5802


Generating train split: 318 examples [00:20, 16.29 examples/s]

Token count is too large: pandas-dev__pandas-9566
Token count is too large: PrefectHQ__prefect-907
Token count is too large: celery__celery-5942
Token count is too large: ray-project__ray-10513
Token count is too large: PrefectHQ__prefect-1005
Token count is too large: mesonbuild__meson-5234
Token count is too large: google__jax-1958
Token count is too large: ipython__ipython-10561
Token count is too large: conan-io__conan-3505
Token count is too large: pandas-dev__pandas-37834
Token count is too large: celery__celery-5997
Token count is too large: pandas-dev__pandas-38536
Token count is too large: pandas-dev__pandas-18645
Token count is too large: pandas-dev__pandas-37263
Token count is too large: Qiskit__qiskit-2265
Token count is too large: pandas-dev__pandas-30377
Token count is too large: pypa__pip-2291
Token count is too large: pyca__cryptography-5594
Token count is too large: ipython__ipython-9596
Token count is too large: Qiskit__qiskit-5836
Token count is too large: mesonbuild

Generating train split: 323 examples [00:21, 12.92 examples/s]

Token count is too large: gitpython-developers__GitPython-1124
Token count is too large: pandas-dev__pandas-5564
Token count is too large: conda__conda-7131
Token count is too large: conan-io__conan-3182
Token count is too large: conda__conda-5580
Token count is too large: pandas-dev__pandas-6659
Token count is too large: scipy__scipy-4415
Token count is too large: pantsbuild__pants-4333
Token count is too large: pandas-dev__pandas-22539
Token count is too large: Qiskit__qiskit-3057
Token count is too large: mesonbuild__meson-2819
Token count is too large: mesonbuild__meson-6356
Token count is too large: pandas-dev__pandas-22261
Token count is too large: pandas-dev__pandas-8331
Token count is too large: pandas-dev__pandas-23866
Token count is too large: pandas-dev__pandas-3900
Token count is too large: ipython__ipython-3744
Token count is too large: pandas-dev__pandas-4417
Token count is too large: ipython__ipython-11718


Generating train split: 328 examples [00:21, 11.75 examples/s]

Token count is too large: apache__airflow-17719
Token count is too large: conan-io__conan-4131
Token count is too large: pandas-dev__pandas-17971
Token count is too large: huggingface__transformers-25427
Token count is too large: Lightning-AI__lightning-1577
Token count is too large: googleapis__google-cloud-python-6176
Token count is too large: pypa__pip-10083
Token count is too large: pyca__cryptography-7292
Token count is too large: googleapis__google-cloud-python-2313
Token count is too large: Qiskit__qiskit-8927
Token count is too large: huggingface__transformers-17188
Token count is too large: pandas-dev__pandas-4713
Token count is too large: Qiskit__qiskit-10287
Token count is too large: pandas-dev__pandas-18889
Token count is too large: pandas-dev__pandas-32510
Token count is too large: numpy__numpy-16835


Generating train split: 332 examples [00:21, 12.49 examples/s]

Token count is too large: pandas-dev__pandas-8417
Token count is too large: pantsbuild__pants-5535
Token count is too large: pandas-dev__pandas-8003
Token count is too large: pandas-dev__pandas-38560
Token count is too large: pyca__cryptography-3609
Token count is too large: pantsbuild__pants-17416
Token count is too large: ytdl-org__youtube-dl-29810


Generating train split: 335 examples [00:22, 12.02 examples/s]

Token count is too large: pandas-dev__pandas-11173
Token count is too large: mesonbuild__meson-4259
Token count is too large: pandas-dev__pandas-7322
Token count is too large: pandas-dev__pandas-34976
Token count is too large: Qiskit__qiskit-1915
Token count is too large: numpy__numpy-10392
Token count is too large: Qiskit__qiskit-2061
Token count is too large: huggingface__transformers-14071
Token count is too large: pandas-dev__pandas-36934
Token count is too large: ytdl-org__youtube-dl-17097
Token count is too large: googleapis__google-cloud-python-1787
Token count is too large: pandas-dev__pandas-27631
Token count is too large: huggingface__transformers-11075
Token count is too large: huggingface__transformers-6686
Token count is too large: huggingface__transformers-9294


Generating train split: 339 examples [00:22,  9.84 examples/s]

Token count is too large: pandas-dev__pandas-5439
Token count is too large: mesonbuild__meson-9452
Token count is too large: pandas-dev__pandas-38173
Token count is too large: pandas-dev__pandas-5554
Token count is too large: celery__celery-5373
Token count is too large: ytdl-org__youtube-dl-21536
Token count is too large: apache__airflow-19907
Token count is too large: pyca__cryptography-6603
Token count is too large: mesonbuild__meson-9162
Token count is too large: Qiskit__qiskit-2573
Token count is too large: huggingface__transformers-9038
Token count is too large: apache__airflow-963
Token count is too large: ray-project__ray-8770


Generating train split: 341 examples [00:22,  9.72 examples/s]

Token count is too large: googleapis__google-cloud-python-11168
Token count is too large: pandas-dev__pandas-31053
Token count is too large: huggingface__transformers-13939
Token count is too large: apache__airflow-13073
Token count is too large: celery__celery-4402
Token count is too large: pandas-dev__pandas-30151
Token count is too large: pandas-dev__pandas-34737
Token count is too large: Qiskit__qiskit-5492
Token count is too large: pantsbuild__pants-5521
Token count is too large: numpy__numpy-10031
Token count is too large: mesonbuild__meson-6887


Generating train split: 345 examples [00:23, 11.94 examples/s]

Token count is too large: pandas-dev__pandas-8275
Token count is too large: docker__compose-4469
Token count is too large: apache__airflow-18742
Token count is too large: pandas-dev__pandas-14101
Token count is too large: pandas-dev__pandas-25246
Token count is too large: Qiskit__qiskit-3683
Token count is too large: open-mmlab__mmdetection-9358
There was an error processing
Token count is too large: mesonbuild__meson-7108
Token count is too large: celery__celery-5232
Token count is too large: pandas-dev__pandas-37874


Generating train split: 348 examples [00:23, 11.10 examples/s]

Token count is too large: huggingface__transformers-12803
Token count is too large: conda__conda-6775
Token count is too large: apache__airflow-1948
Token count is too large: googleapis__google-cloud-python-352
Token count is too large: ray-project__ray-6258
Token count is too large: apache__airflow-27898
Token count is too large: docker__compose-3980
Token count is too large: huggingface__transformers-23897
Token count is too large: pandas-dev__pandas-5482


Generating train split: 352 examples [00:23, 14.21 examples/s]

Token count is too large: pandas-dev__pandas-16126
Token count is too large: googleapis__google-cloud-python-6577
Token count is too large: huggingface__transformers-7075
Token count is too large: numpy__numpy-9952
Token count is too large: mesonbuild__meson-1516
Token count is too large: mesonbuild__meson-7791
Token count is too large: pandas-dev__pandas-16610
Token count is too large: Lightning-AI__lightning-1353
Token count is too large: numpy__numpy-13564
Token count is too large: pandas-dev__pandas-5600
Token count is too large: googleapis__google-cloud-python-9995
Token count is too large: huggingface__transformers-7340
Token count is too large: pantsbuild__pants-15408
Token count is too large: numpy__numpy-21722
Token count is too large: numpy__numpy-13218
Token count is too large: docker__compose-4930
Token count is too large: google__jax-1015
Token count is too large: pandas-dev__pandas-27341
Token count is too large: Qiskit__qiskit-1314
Token count is too large: numpy__numpy-

Generating train split: 354 examples [00:24,  8.79 examples/s]

Token count is too large: pandas-dev__pandas-37568
Token count is too large: pandas-dev__pandas-35078
Token count is too large: googleapis__google-cloud-python-6633
Token count is too large: apache__airflow-28008
Token count is too large: huggingface__transformers-11248
Token count is too large: PrefectHQ__prefect-1442
Token count is too large: pandas-dev__pandas-28933
Token count is too large: pypa__pip-2270
Token count is too large: Qiskit__qiskit-9762
Token count is too large: apache__airflow-21155
Token count is too large: Qiskit__qiskit-948
Token count is too large: pandas-dev__pandas-16563


Generating train split: 358 examples [00:24,  9.64 examples/s]

Token count is too large: pandas-dev__pandas-20705
Token count is too large: docker__compose-6535
Token count is too large: ray-project__ray-7238
Token count is too large: huggingface__transformers-21869
Token count is too large: pandas-dev__pandas-38146
Token count is too large: huggingface__transformers-5082
Token count is too large: google__jax-179
Token count is too large: pandas-dev__pandas-21261
Token count is too large: pandas-dev__pandas-20893
Token count is too large: pantsbuild__pants-12023
Token count is too large: docker__compose-7294
Token count is too large: pandas-dev__pandas-6380
Token count is too large: pandas-dev__pandas-38426
Token count is too large: numpy__numpy-5886
Token count is too large: pandas-dev__pandas-20782
Token count is too large: numpy__numpy-10588


Generating train split: 361 examples [00:24,  9.18 examples/s]

Token count is too large: pandas-dev__pandas-23592
Token count is too large: pandas-dev__pandas-18232
Token count is too large: numpy__numpy-5455
Token count is too large: googleapis__google-cloud-python-8980
Token count is too large: numpy__numpy-3120
Token count is too large: mesonbuild__meson-3322


Generating train split: 364 examples [00:25,  9.53 examples/s]

Token count is too large: huggingface__transformers-17316
Token count is too large: pypa__pip-1750
Token count is too large: pandas-dev__pandas-3722
Token count is too large: celery__celery-3903
Token count is too large: pandas-dev__pandas-5417
Token count is too large: pandas-dev__pandas-20404
Token count is too large: conda__conda-5221
Token count is too large: celery__celery-4545
Token count is too large: huggingface__transformers-8633
Token count is too large: pandas-dev__pandas-19054
Token count is too large: ytdl-org__youtube-dl-353
Token count is too large: pandas-dev__pandas-18269
Token count is too large: huggingface__transformers-21008
Token count is too large: apache__airflow-9473


Generating train split: 368 examples [00:25,  9.28 examples/s]

Token count is too large: Qiskit__qiskit-2216
Token count is too large: numpy__numpy-11849
Token count is too large: googleapis__google-cloud-python-6655
Token count is too large: pandas-dev__pandas-16060
Token count is too large: Qiskit__qiskit-2533
Token count is too large: pandas-dev__pandas-37657


Generating train split: 371 examples [00:25,  9.89 examples/s]

Token count is too large: huggingface__transformers-19239
Token count is too large: apache__airflow-15843
Token count is too large: pandas-dev__pandas-20510
Token count is too large: PrefectHQ__prefect-2608
Token count is too large: pantsbuild__pants-12392
Token count is too large: pandas-dev__pandas-38293
Token count is too large: Qiskit__qiskit-9316
Token count is too large: huggingface__transformers-22440
Token count is too large: twisted__twisted-11734
Token count is too large: pandas-dev__pandas-19943
Token count is too large: huggingface__transformers-22857
Token count is too large: numpy__numpy-12257
Token count is too large: pandas-dev__pandas-31477
Token count is too large: pandas-dev__pandas-17343


Generating train split: 381 examples [00:26, 17.69 examples/s]

Token count is too large: Qiskit__qiskit-6581
Token count is too large: PrefectHQ__prefect-2570
Token count is too large: pandas-dev__pandas-27921
Token count is too large: PrefectHQ__prefect-382
Token count is too large: jupyterlab__jupyterlab-9568
Token count is too large: conda__conda-8053
Token count is too large: PrefectHQ__prefect-1437
Token count is too large: ytdl-org__youtube-dl-23199
Token count is too large: conda__conda-10638
Token count is too large: numpy__numpy-9332
Token count is too large: googleapis__google-cloud-python-7444
Token count is too large: wagtail__wagtail-139
Token count is too large: Qiskit__qiskit-6020
Token count is too large: scipy__scipy-3926
Token count is too large: open-mmlab__mmdetection-7891


Generating train split: 385 examples [00:26, 18.47 examples/s]

Token count is too large: pandas-dev__pandas-29186
Token count is too large: Qiskit__qiskit-7028
Token count is too large: PrefectHQ__prefect-2354
Token count is too large: ipython__ipython-3377
Token count is too large: conan-io__conan-5319
Token count is too large: huggingface__transformers-17629
Token count is too large: mesonbuild__meson-6170
Token count is too large: wagtail__wagtail-356


Generating train split: 389 examples [00:26, 17.59 examples/s]

Token count is too large: googleapis__google-cloud-python-11337
Token count is too large: pypa__pip-3088
Token count is too large: pypa__pip-4666
Token count is too large: pandas-dev__pandas-37118
Token count is too large: pandas-dev__pandas-27826
Token count is too large: jupyterlab__jupyterlab-14273
Token count is too large: pandas-dev__pandas-18127
Token count is too large: pandas-dev__pandas-6977
Token count is too large: pandas-dev__pandas-24882
Token count is too large: pypa__pip-6613
Token count is too large: huggingface__transformers-9907
Token count is too large: pandas-dev__pandas-37329
Token count is too large: pandas-dev__pandas-5192
Token count is too large: Qiskit__qiskit-1668
Token count is too large: PrefectHQ__prefect-2491
Token count is too large: pandas-dev__pandas-25863
Token count is too large: apache__airflow-9067


Generating train split: 392 examples [00:27, 12.74 examples/s]

Token count is too large: mesonbuild__meson-3985
Token count is too large: ytdl-org__youtube-dl-15728
Token count is too large: PrefectHQ__prefect-3109
Token count is too large: docker__compose-1711
Token count is too large: pyca__cryptography-6515
Token count is too large: pandas-dev__pandas-16557
Token count is too large: conan-io__conan-8208
Token count is too large: pandas-dev__pandas-16141
Token count is too large: ipython__ipython-9884
Token count is too large: pandas-dev__pandas-24309
Token count is too large: twisted__twisted-11598
Token count is too large: pandas-dev__pandas-38681
Token count is too large: Qiskit__qiskit-5332
Token count is too large: ray-project__ray-7982
Token count is too large: pypa__pip-5163


Generating train split: 396 examples [00:27, 14.30 examples/s]

Token count is too large: pandas-dev__pandas-8075
Token count is too large: explosion__spaCy-2985
Token count is too large: Qiskit__qiskit-8590
Token count is too large: conda__conda-11849
Token count is too large: pandas-dev__pandas-25926
Token count is too large: mesonbuild__meson-9603
Token count is too large: numpy__numpy-14763
Token count is too large: apache__airflow-454
Token count is too large: pandas-dev__pandas-16092


Generating train split: 400 examples [00:27, 13.73 examples/s]

Token count is too large: googleapis__google-cloud-python-6296
Token count is too large: pandas-dev__pandas-15569
Token count is too large: Qiskit__qiskit-781
Token count is too large: jupyterlab__jupyterlab-3107
Token count is too large: huggingface__transformers-24226
Token count is too large: ytdl-org__youtube-dl-1811
Token count is too large: Qiskit__qiskit-893
Token count is too large: mesonbuild__meson-11667
Token count is too large: huggingface__transformers-9233
Token count is too large: pandas-dev__pandas-14645
Token count is too large: huggingface__transformers-15567
Token count is too large: pandas-dev__pandas-21580
Token count is too large: Qiskit__qiskit-4019


Generating train split: 404 examples [00:27, 12.42 examples/s]

Token count is too large: pandas-dev__pandas-3683
Token count is too large: googleapis__google-cloud-python-6285
Token count is too large: mesonbuild__meson-5133
Token count is too large: conan-io__conan-3290
Token count is too large: googleapis__google-cloud-python-2623
Token count is too large: pandas-dev__pandas-14864
Token count is too large: pantsbuild__pants-6871
Token count is too large: pandas-dev__pandas-28677
Token count is too large: conan-io__conan-3804
Token count is too large: apache__airflow-1230
Token count is too large: ipython__ipython-2370
Token count is too large: pandas-dev__pandas-3555


Generating train split: 408 examples [00:28, 15.24 examples/s]

Token count is too large: conda__conda-8892
Token count is too large: numpy__numpy-8774
Token count is too large: ytdl-org__youtube-dl-13773
Token count is too large: Lightning-AI__lightning-1632
Token count is too large: mesonbuild__meson-351
Token count is too large: numpy__numpy-5186
Token count is too large: pandas-dev__pandas-4007
Token count is too large: pandas-dev__pandas-20672


Generating train split: 415 examples [00:28, 21.34 examples/s]

Token count is too large: huggingface__transformers-13720
Token count is too large: huggingface__transformers-6293
Token count is too large: googleapis__google-cloud-python-3626
Token count is too large: ytdl-org__youtube-dl-14455
Token count is too large: pandas-dev__pandas-36161
Token count is too large: numpy__numpy-3097
Token count is too large: pandas-dev__pandas-36771
Token count is too large: ipython__ipython-6936
Token count is too large: google__jax-733
Token count is too large: ray-project__ray-5651
Token count is too large: celery__celery-4696
Token count is too large: conda__conda-12874
Token count is too large: numpy__numpy-9469
Token count is too large: pyca__cryptography-1651
Token count is too large: pandas-dev__pandas-26014


Generating train split: 419 examples [00:28, 19.89 examples/s]

Token count is too large: pandas-dev__pandas-22810
Token count is too large: pantsbuild__pants-12197
Token count is too large: mesonbuild__meson-3716
Token count is too large: dagster-io__dagster-899
Token count is too large: pandas-dev__pandas-35951
Token count is too large: googleapis__google-cloud-python-6343
Token count is too large: pandas-dev__pandas-36551
Token count is too large: pandas-dev__pandas-22941
Token count is too large: apache__airflow-27256
Token count is too large: apache__airflow-24362
Token count is too large: googleapis__google-cloud-python-10205
Token count is too large: pantsbuild__pants-18150
Token count is too large: googleapis__google-cloud-python-328
Token count is too large: pandas-dev__pandas-14918


Generating train split: 423 examples [00:28, 16.19 examples/s]

Token count is too large: pandas-dev__pandas-22854
Token count is too large: pyca__cryptography-3132
Token count is too large: huggingface__transformers-10727
Token count is too large: google__jax-3176
Token count is too large: gitpython-developers__GitPython-677
Token count is too large: pantsbuild__pants-5211
Token count is too large: pandas-dev__pandas-20067
Token count is too large: pandas-dev__pandas-4909
Token count is too large: jupyterlab__jupyterlab-6585
Token count is too large: pandas-dev__pandas-31416
Token count is too large: pantsbuild__pants-19120
Token count is too large: pandas-dev__pandas-4970
Token count is too large: numpy__numpy-22324
Token count is too large: pandas-dev__pandas-17253
Token count is too large: conan-io__conan-2468
Token count is too large: Qiskit__qiskit-91
Token count is too large: Lightning-AI__lightning-2962
Token count is too large: PrefectHQ__prefect-2054


Generating train split: 431 examples [00:29, 19.10 examples/s]

Token count is too large: pandas-dev__pandas-11988
Token count is too large: pyca__cryptography-3328
Token count is too large: apache__airflow-12595
Token count is too large: numpy__numpy-23949
Token count is too large: google__jax-2828
Token count is too large: pandas-dev__pandas-4983
Token count is too large: ipython__ipython-7326
Token count is too large: googleapis__google-cloud-python-11567
Token count is too large: conda__conda-5239
Token count is too large: conan-io__conan-3021
Token count is too large: conan-io__conan-11666
Token count is too large: gitpython-developers__GitPython-1240


Generating train split: 439 examples [00:29, 21.87 examples/s]

Token count is too large: pantsbuild__pants-16206
Token count is too large: huggingface__transformers-3955
Token count is too large: huggingface__transformers-16139
Token count is too large: pypa__pip-2629
Token count is too large: gitpython-developers__GitPython-744
Token count is too large: pypa__pip-9467
Token count is too large: pandas-dev__pandas-36649
Token count is too large: conan-io__conan-7695
Token count is too large: pandas-dev__pandas-21223
Token count is too large: huggingface__transformers-11566
Token count is too large: Qiskit__qiskit-7887
Token count is too large: Qiskit__qiskit-5135
Token count is too large: PrefectHQ__prefect-751
Token count is too large: wagtail__wagtail-10051
Token count is too large: pantsbuild__pants-18463
Token count is too large: pandas-dev__pandas-35519
Token count is too large: Qiskit__qiskit-8671
Token count is too large: pandas-dev__pandas-28606
Token count is too large: pandas-dev__pandas-36458
Token count is too large: Lightning-AI__light

Generating train split: 449 examples [00:29, 23.08 examples/s]

Token count is too large: pandas-dev__pandas-4945
Token count is too large: pandas-dev__pandas-33337
Token count is too large: pandas-dev__pandas-37502
Token count is too large: pandas-dev__pandas-39074
Token count is too large: pantsbuild__pants-4399
Token count is too large: conan-io__conan-2920
Token count is too large: ipython__ipython-1019
Token count is too large: ray-project__ray-1783
Token count is too large: pandas-dev__pandas-38499
Token count is too large: mesonbuild__meson-5687
Token count is too large: ytdl-org__youtube-dl-3691
Token count is too large: apache__airflow-12069
Token count is too large: Qiskit__qiskit-1229
Token count is too large: pandas-dev__pandas-13397
Token count is too large: pandas-dev__pandas-30485
Token count is too large: apache__airflow-33231


Generating train split: 455 examples [00:30, 25.00 examples/s]

Token count is too large: Qiskit__qiskit-6922
Token count is too large: conda__conda-5328
Token count is too large: numpy__numpy-17974
Token count is too large: ytdl-org__youtube-dl-30506
Token count is too large: Lightning-AI__lightning-2831
Token count is too large: dagster-io__dagster-6588
Token count is too large: pandas-dev__pandas-4590
Token count is too large: celery__celery-6583
Token count is too large: conda__conda-7512
Token count is too large: pandas-dev__pandas-7356
Token count is too large: pandas-dev__pandas-33134
Token count is too large: Qiskit__qiskit-4173
Token count is too large: numpy__numpy-3322
Token count is too large: googleapis__google-cloud-python-9873
Token count is too large: ipython__ipython-6717
Token count is too large: ray-project__ray-10775
Token count is too large: apache__airflow-27808
Token count is too large: pypa__pip-6691
Token count is too large: pandas-dev__pandas-7789
Token count is too large: docker__compose-5405
Token count is too large: pan

Generating train split: 464 examples [00:30, 23.23 examples/s]

Token count is too large: conda__conda-4789
Token count is too large: numpy__numpy-23787
Token count is too large: numpy__numpy-12408
Token count is too large: mesonbuild__meson-10887
Token count is too large: Lightning-AI__lightning-1097
Token count is too large: pandas-dev__pandas-25918
Token count is too large: pypa__pip-2853
Token count is too large: pantsbuild__pants-18861
Token count is too large: Qiskit__qiskit-6870
Token count is too large: pandas-dev__pandas-14232
Token count is too large: celery__celery-5527
Token count is too large: ytdl-org__youtube-dl-31152
Token count is too large: apache__airflow-9330
Token count is too large: Qiskit__qiskit-1101
Token count is too large: conan-io__conan-2486
Token count is too large: pantsbuild__pants-9416
Token count is too large: mesonbuild__meson-10503
Token count is too large: wagtail__wagtail-7063
Token count is too large: huggingface__transformers-21049
Token count is too large: conda__conda-4329


Generating train split: 467 examples [00:30, 16.66 examples/s]

Token count is too large: huggingface__transformers-25102
Token count is too large: mesonbuild__meson-5407
Token count is too large: pyca__cryptography-2112
Token count is too large: conda__conda-7310
Token count is too large: ipython__ipython-10699
Token count is too large: pandas-dev__pandas-24725
Token count is too large: Lightning-AI__lightning-1568
Token count is too large: pypa__pip-10495
Token count is too large: ray-project__ray-8225
Token count is too large: conan-io__conan-7183
Token count is too large: ipython__ipython-2278
Token count is too large: ray-project__ray-10715
Token count is too large: Qiskit__qiskit-4351


Generating train split: 471 examples [00:31, 19.34 examples/s]

Token count is too large: pandas-dev__pandas-29469
Token count is too large: pandas-dev__pandas-17755
Token count is too large: pandas-dev__pandas-6158
Token count is too large: docker__compose-4292
Token count is too large: conan-io__conan-8985
Token count is too large: pandas-dev__pandas-5097
Token count is too large: pandas-dev__pandas-23439
Token count is too large: mesonbuild__meson-8134
Token count is too large: conda__conda-6782


Generating train split: 474 examples [00:31, 16.82 examples/s]

Token count is too large: mesonbuild__meson-10800
Token count is too large: huggingface__transformers-16661
Token count is too large: pandas-dev__pandas-11641
Token count is too large: scipy__scipy-402
Token count is too large: huggingface__transformers-9427
Token count is too large: huggingface__transformers-16668
Token count is too large: pandas-dev__pandas-35769
Token count is too large: huggingface__transformers-11207
Token count is too large: huggingface__transformers-15951
Token count is too large: ipython__ipython-10539


Generating train split: 476 examples [00:31, 15.63 examples/s]

Token count is too large: pandas-dev__pandas-21318
Token count is too large: pandas-dev__pandas-37030
Token count is too large: pantsbuild__pants-19123
Token count is too large: pandas-dev__pandas-23650
Token count is too large: dagster-io__dagster-14886
Token count is too large: Lightning-AI__lightning-2819
Token count is too large: huggingface__transformers-8989
Token count is too large: Qiskit__qiskit-2835
Token count is too large: pandas-dev__pandas-30202
Token count is too large: pandas-dev__pandas-33247
Token count is too large: numpy__numpy-23061


Generating train split: 490 examples [00:31, 26.92 examples/s]

Token count is too large: docker__compose-2550
Token count is too large: pandas-dev__pandas-19890
Token count is too large: huggingface__transformers-8397
Token count is too large: pandas-dev__pandas-17887
Token count is too large: ytdl-org__youtube-dl-18217
Token count is too large: googleapis__google-cloud-python-323
Token count is too large: pandas-dev__pandas-9896
Token count is too large: pandas-dev__pandas-21584
Token count is too large: Qiskit__qiskit-3566
Token count is too large: Qiskit__qiskit-10007
Token count is too large: ytdl-org__youtube-dl-5556
Token count is too large: celery__celery-6264
Token count is too large: pandas-dev__pandas-35492
Token count is too large: Qiskit__qiskit-9617
Token count is too large: Qiskit__qiskit-2507
Token count is too large: pantsbuild__pants-17360
Token count is too large: conan-io__conan-4239
Token count is too large: numpy__numpy-22991
Token count is too large: pandas-dev__pandas-8982
Token count is too large: pandas-dev__pandas-37073
T

Generating train split: 497 examples [00:32, 21.67 examples/s]

Token count is too large: pandas-dev__pandas-37469
Token count is too large: pandas-dev__pandas-26707
Token count is too large: google__jax-1704
Token count is too large: huggingface__transformers-17203
Token count is too large: Lightning-AI__lightning-575
Token count is too large: ipython__ipython-6866
Token count is too large: docker__compose-5309
Token count is too large: ipython__ipython-14055
Token count is too large: pandas-dev__pandas-4394
Token count is too large: apache__airflow-19985
Token count is too large: conda__conda-12016
Token count is too large: google__jax-1057
Token count is too large: huggingface__transformers-7680
Token count is too large: ipython__ipython-9097
Token count is too large: ray-project__ray-9561
Token count is too large: Qiskit__qiskit-6831
Token count is too large: pandas-dev__pandas-24538
Token count is too large: pantsbuild__pants-13541


Generating train split: 500 examples [00:32, 15.90 examples/s]

Token count is too large: huggingface__transformers-20424
Token count is too large: pypa__pip-4493
Token count is too large: pyca__cryptography-3236
Token count is too large: pandas-dev__pandas-7587
Token count is too large: Qiskit__qiskit-2045
Token count is too large: pandas-dev__pandas-21674
Token count is too large: pandas-dev__pandas-33645
Token count is too large: pandas-dev__pandas-32734
Token count is too large: ipython__ipython-10841
Token count is too large: pandas-dev__pandas-20691
Token count is too large: pypa__pip-1874
Token count is too large: pandas-dev__pandas-22277
Token count is too large: pandas-dev__pandas-20780
Token count is too large: googleapis__google-cloud-python-4851
Token count is too large: pandas-dev__pandas-18507
Token count is too large: wagtail__wagtail-3191


Generating train split: 503 examples [00:33, 11.85 examples/s]

Token count is too large: huggingface__transformers-24947
Token count is too large: pandas-dev__pandas-31088
Token count is too large: pandas-dev__pandas-8492
Token count is too large: ray-project__ray-10368
Token count is too large: conan-io__conan-8130


Generating train split: 507 examples [00:33, 12.69 examples/s]

Token count is too large: pandas-dev__pandas-21623
Token count is too large: pandas-dev__pandas-16504
Token count is too large: pypa__pip-2445
Token count is too large: pandas-dev__pandas-21361
Token count is too large: docker__compose-2830
Token count is too large: googleapis__google-cloud-python-9332
Token count is too large: pypa__pip-6331
Token count is too large: Qiskit__qiskit-6443


Generating train split: 509 examples [00:33, 12.67 examples/s]

Token count is too large: pandas-dev__pandas-20522
Token count is too large: pandas-dev__pandas-17006
Token count is too large: pandas-dev__pandas-39604
Token count is too large: pantsbuild__pants-10789
Token count is too large: conda__conda-6494
Token count is too large: numpy__numpy-4565
Token count is too large: numpy__numpy-4372
Token count is too large: pandas-dev__pandas-27801
Token count is too large: Lightning-AI__lightning-749
Token count is too large: pandas-dev__pandas-27367
Token count is too large: numpy__numpy-6406


Generating train split: 511 examples [00:33, 11.78 examples/s]

Token count is too large: huggingface__transformers-3439
Token count is too large: numpy__numpy-22436
Token count is too large: conan-io__conan-4346
Token count is too large: pandas-dev__pandas-23044
Token count is too large: Qiskit__qiskit-9089
Token count is too large: pyca__cryptography-2957
Token count is too large: google__jax-1268
Token count is too large: ipython__ipython-10263
Token count is too large: ytdl-org__youtube-dl-3855
Token count is too large: mesonbuild__meson-7757
Token count is too large: pandas-dev__pandas-30335
Token count is too large: conda__conda-11304
Token count is too large: numpy__numpy-16675
Token count is too large: pantsbuild__pants-12782
Token count is too large: ytdl-org__youtube-dl-24968
Token count is too large: pandas-dev__pandas-29888
Token count is too large: wagtail__wagtail-1070
Token count is too large: Lightning-AI__lightning-2528
Token count is too large: numpy__numpy-5546


Generating train split: 518 examples [00:34, 13.16 examples/s]

Token count is too large: pandas-dev__pandas-28128
Token count is too large: ipython__ipython-6945
Token count is too large: conda__conda-7476
Token count is too large: pandas-dev__pandas-14650
Token count is too large: docker__compose-411
Token count is too large: google__jax-2626
Token count is too large: pandas-dev__pandas-39253
Token count is too large: pandas-dev__pandas-7440
Token count is too large: pandas-dev__pandas-23353
Token count is too large: googleapis__google-cloud-python-6086
Token count is too large: pandas-dev__pandas-37461
Token count is too large: pandas-dev__pandas-6363
Token count is too large: pandas-dev__pandas-13765
Token count is too large: pandas-dev__pandas-21558
Token count is too large: pypa__pip-11663
Token count is too large: Qiskit__qiskit-6242
Token count is too large: pantsbuild__pants-15098
Token count is too large: open-mmlab__mmdetection-2921
Token count is too large: pandas-dev__pandas-23855
Token count is too large: Qiskit__qiskit-7095


Generating train split: 522 examples [00:34, 13.78 examples/s]

Token count is too large: huggingface__transformers-14697
Token count is too large: numpy__numpy-3191
Token count is too large: pandas-dev__pandas-19250
Token count is too large: huggingface__transformers-9131


Generating train split: 525 examples [00:34, 12.08 examples/s]

Token count is too large: pandas-dev__pandas-6889
Token count is too large: huggingface__transformers-13800
Token count is too large: pandas-dev__pandas-16244
Token count is too large: conan-io__conan-2581
Token count is too large: pandas-dev__pandas-34137
Token count is too large: pypa__pip-612


Generating train split: 527 examples [00:35,  7.91 examples/s]

Token count is too large: huggingface__transformers-24856
Token count is too large: huggingface__transformers-9626
Token count is too large: google__jax-586
Token count is too large: Qiskit__qiskit-1977
Token count is too large: pypa__pip-6339
Token count is too large: numpy__numpy-10030
Token count is too large: pypa__pip-8026
Token count is too large: apache__airflow-20172
Token count is too large: pandas-dev__pandas-9134
Token count is too large: googleapis__google-cloud-python-8472
Token count is too large: googleapis__google-cloud-python-11205
Token count is too large: pantsbuild__pants-12398
Token count is too large: ytdl-org__youtube-dl-9597
Token count is too large: Qiskit__qiskit-4335
Token count is too large: mesonbuild__meson-10617
Token count is too large: Qiskit__qiskit-9155


Generating train split: 530 examples [00:35,  8.29 examples/s]

Token count is too large: pandas-dev__pandas-13510
Token count is too large: conda__conda-6588
Token count is too large: mesonbuild__meson-4719
Token count is too large: apache__airflow-8944
Token count is too large: pandas-dev__pandas-22359
Token count is too large: pandas-dev__pandas-11286
Token count is too large: conda__conda-7049
Token count is too large: mesonbuild__meson-4826
Token count is too large: google__jax-870
Token count is too large: ytdl-org__youtube-dl-17076


Generating train split: 534 examples [00:36, 10.24 examples/s]

Token count is too large: pandas-dev__pandas-11325
Token count is too large: Qiskit__qiskit-8197
Token count is too large: pandas-dev__pandas-24105
Token count is too large: pandas-dev__pandas-38504
Token count is too large: numpy__numpy-3121
Token count is too large: conan-io__conan-4556
Token count is too large: pyca__cryptography-2739
Token count is too large: Qiskit__qiskit-575


Generating train split: 542 examples [00:36, 16.95 examples/s]

Token count is too large: pandas-dev__pandas-6690
Token count is too large: pandas-dev__pandas-5753
Token count is too large: huggingface__transformers-15017
Token count is too large: Qiskit__qiskit-2084
Token count is too large: Qiskit__qiskit-2939
Token count is too large: pandas-dev__pandas-24450
Token count is too large: pandas-dev__pandas-23433
Token count is too large: ytdl-org__youtube-dl-30366
Token count is too large: pandas-dev__pandas-36971


Generating train split: 547 examples [00:36, 18.65 examples/s]

Token count is too large: googleapis__google-cloud-python-9321
Token count is too large: dagster-io__dagster-4833
Token count is too large: docker__compose-3299
Token count is too large: Qiskit__qiskit-5257
Token count is too large: pandas-dev__pandas-37406
Token count is too large: Qiskit__qiskit-2880
Token count is too large: docker__compose-6100
Token count is too large: conda__conda-7826
Token count is too large: ipython__ipython-1870
Token count is too large: pandas-dev__pandas-17640
Token count is too large: mesonbuild__meson-7254
Token count is too large: Qiskit__qiskit-3319
Token count is too large: pandas-dev__pandas-30960
Token count is too large: twisted__twisted-649
Token count is too large: numpy__numpy-20759


Generating train split: 551 examples [00:36, 20.48 examples/s]

Token count is too large: huggingface__transformers-17656
Token count is too large: pandas-dev__pandas-25247
Token count is too large: pandas-dev__pandas-14105
Token count is too large: pandas-dev__pandas-9701
Token count is too large: conan-io__conan-290
Token count is too large: conda__conda-2950
Token count is too large: numpy__numpy-11200
Token count is too large: pandas-dev__pandas-37864
Token count is too large: docker__compose-4541
Token count is too large: ytdl-org__youtube-dl-1464


Generating train split: 554 examples [00:37, 15.94 examples/s]

Token count is too large: googleapis__google-cloud-python-1008
Token count is too large: pandas-dev__pandas-3864
Token count is too large: numpy__numpy-17053
Token count is too large: pandas-dev__pandas-25419
Token count is too large: conda__conda-5190
Token count is too large: pantsbuild__pants-10052
Token count is too large: pandas-dev__pandas-4768
Token count is too large: googleapis__google-cloud-python-8666
Token count is too large: huggingface__transformers-14894
Token count is too large: pandas-dev__pandas-37958


Generating train split: 557 examples [00:37, 17.61 examples/s]

Token count is too large: Qiskit__qiskit-6822
Token count is too large: numpy__numpy-7654
Token count is too large: docker__compose-4730
Token count is too large: JohnSnowLabs__spark-nlp-13873
Token count is too large: huggingface__transformers-19531
Token count is too large: pantsbuild__pants-16183
Token count is too large: pandas-dev__pandas-34863
Token count is too large: celery__celery-5718


Generating train split: 561 examples [00:37, 17.96 examples/s]

Token count is too large: pandas-dev__pandas-6038
Token count is too large: jupyterlab__jupyterlab-6372
Token count is too large: Qiskit__qiskit-6730
Token count is too large: pandas-dev__pandas-5772
Token count is too large: pandas-dev__pandas-37009
Token count is too large: Qiskit__qiskit-7484
Token count is too large: googleapis__google-cloud-python-1804
Token count is too large: apache__airflow-32397
Token count is too large: pandas-dev__pandas-3618
Token count is too large: ipython__ipython-3555
Token count is too large: Qiskit__qiskit-3513
Token count is too large: ipython__ipython-13534
Token count is too large: conan-io__conan-12353


Generating train split: 569 examples [00:37, 22.06 examples/s]

Token count is too large: pandas-dev__pandas-38379
Token count is too large: Qiskit__qiskit-4893
Token count is too large: celery__celery-5486
Token count is too large: pandas-dev__pandas-35776
Token count is too large: googleapis__google-cloud-python-1843
Token count is too large: docker__compose-4861
Token count is too large: pandas-dev__pandas-14977
Token count is too large: ipython__ipython-4960
Token count is too large: wagtail__wagtail-5479
Token count is too large: pandas-dev__pandas-37564
Token count is too large: numpy__numpy-6596
Token count is too large: huggingface__transformers-20630
Token count is too large: Qiskit__qiskit-7584
Token count is too large: celery__celery-4403
Token count is too large: googleapis__google-cloud-python-373
Token count is too large: huggingface__transformers-9726
Token count is too large: docker__compose-5277
Token count is too large: mesonbuild__meson-4746
Token count is too large: pandas-dev__pandas-37367
Token count is too large: numpy__numpy

Generating train split: 576 examples [00:38, 18.56 examples/s]

Token count is too large: pandas-dev__pandas-23004
Token count is too large: pantsbuild__pants-18218
Token count is too large: numpy__numpy-22372
Token count is too large: mesonbuild__meson-11050
Token count is too large: Qiskit__qiskit-4166
Token count is too large: huggingface__transformers-18419
Token count is too large: numpy__numpy-5999
Token count is too large: numpy__numpy-3049
Token count is too large: google__jax-3098
Token count is too large: Qiskit__qiskit-7411
Token count is too large: scipy__scipy-3309
Token count is too large: pypa__pip-5405
Token count is too large: mesonbuild__meson-9615
Token count is too large: pandas-dev__pandas-27101
Token count is too large: ytdl-org__youtube-dl-12085
Token count is too large: PrefectHQ__prefect-1226
Token count is too large: dagster-io__dagster-1029
Token count is too large: conan-io__conan-4767
Token count is too large: open-mmlab__mmdetection-8136
Token count is too large: googleapis__google-cloud-python-1469


Generating train split: 579 examples [00:38, 14.16 examples/s]

Token count is too large: pandas-dev__pandas-14780
Token count is too large: pandas-dev__pandas-22030
Token count is too large: pandas-dev__pandas-21092
Token count is too large: numpy__numpy-21015
Token count is too large: pandas-dev__pandas-29567
Token count is too large: pandas-dev__pandas-14330
Token count is too large: ipython__ipython-13778
Token count is too large: Qiskit__qiskit-1860
Token count is too large: pandas-dev__pandas-30743
Token count is too large: apache__airflow-8430


Generating train split: 581 examples [00:38, 12.26 examples/s]

Token count is too large: huggingface__transformers-11466
Token count is too large: pandas-dev__pandas-21879
Token count is too large: pantsbuild__pants-6594
Token count is too large: google__jax-1524
Token count is too large: pandas-dev__pandas-9629
Token count is too large: pandas-dev__pandas-15515


Generating train split: 586 examples [00:39, 13.53 examples/s]

Token count is too large: numpy__numpy-3122
Token count is too large: googleapis__google-cloud-python-788
Token count is too large: huggingface__transformers-22190
Token count is too large: Qiskit__qiskit-760
Token count is too large: dagster-io__dagster-8618
Token count is too large: pandas-dev__pandas-30495
Token count is too large: numpy__numpy-10603
Token count is too large: googleapis__google-cloud-python-11320
Token count is too large: docker__compose-8644


Generating train split: 591 examples [00:39, 17.44 examples/s]

Token count is too large: PrefectHQ__prefect-624
Token count is too large: conan-io__conan-7610
Token count is too large: pandas-dev__pandas-20681
Token count is too large: Qiskit__qiskit-7765
Token count is too large: pandas-dev__pandas-13592
Token count is too large: pandas-dev__pandas-23432
Token count is too large: numpy__numpy-6730
Token count is too large: ytdl-org__youtube-dl-888
Token count is too large: docker__compose-5833
Token count is too large: pandas-dev__pandas-18829


Generating train split: 598 examples [00:39, 21.42 examples/s]

Token count is too large: pandas-dev__pandas-4714
Token count is too large: huggingface__transformers-19725
Token count is too large: pandas-dev__pandas-26947
Token count is too large: googleapis__google-cloud-python-10021
Token count is too large: conda__conda-8925
Token count is too large: mesonbuild__meson-1028
Token count is too large: mesonbuild__meson-5011
Token count is too large: pandas-dev__pandas-35585
Token count is too large: conda__conda-7499
Token count is too large: pypa__pip-299
Token count is too large: pyca__cryptography-6744
Token count is too large: Qiskit__qiskit-6508


Generating train split: 602 examples [00:39, 22.66 examples/s]

Token count is too large: huggingface__transformers-12367
Token count is too large: pandas-dev__pandas-9699
Token count is too large: numpy__numpy-2747
Token count is too large: ipython__ipython-9647
Token count is too large: pandas-dev__pandas-18805
Token count is too large: pandas-dev__pandas-26839
Token count is too large: pandas-dev__pandas-18685
Token count is too large: jupyterlab__jupyterlab-13589
Token count is too large: pypa__pip-5515
Token count is too large: pandas-dev__pandas-14749
Token count is too large: ipython__ipython-13411
Token count is too large: pypa__pip-2133
Token count is too large: conan-io__conan-8468
Token count is too large: Qiskit__qiskit-3243


Generating train split: 606 examples [00:39, 18.11 examples/s]

Token count is too large: mesonbuild__meson-2926
Token count is too large: conda__conda-6442
Token count is too large: open-mmlab__mmdetection-3528
Token count is too large: pandas-dev__pandas-18100
Token count is too large: mesonbuild__meson-9106
Token count is too large: pandas-dev__pandas-35688
Token count is too large: wagtail__wagtail-426


Generating train split: 612 examples [00:40, 21.25 examples/s]

Token count is too large: pandas-dev__pandas-23718
Token count is too large: ytdl-org__youtube-dl-11901
Token count is too large: mesonbuild__meson-1183
Token count is too large: mesonbuild__meson-1091
Token count is too large: pandas-dev__pandas-16663
Token count is too large: pandas-dev__pandas-19635
Token count is too large: pandas-dev__pandas-4614
Token count is too large: pandas-dev__pandas-8126
Token count is too large: pandas-dev__pandas-26004
Token count is too large: pandas-dev__pandas-26795
Token count is too large: docker__compose-2350
Token count is too large: docker__compose-588
Token count is too large: pandas-dev__pandas-6530
Token count is too large: mesonbuild__meson-11826
Token count is too large: pandas-dev__pandas-7009
Token count is too large: conan-io__conan-4251
Token count is too large: dagster-io__dagster-1114


Generating train split: 615 examples [00:40, 13.92 examples/s]

Token count is too large: huggingface__transformers-21870
Token count is too large: google__jax-3320
Token count is too large: pandas-dev__pandas-26404
Token count is too large: numpy__numpy-7297
Token count is too large: numpy__numpy-24526
Token count is too large: open-mmlab__mmdetection-7797
Token count is too large: pandas-dev__pandas-7532
Token count is too large: pypa__pip-2451
Token count is too large: celery__celery-6765
Token count is too large: ray-project__ray-1693
Token count is too large: pantsbuild__pants-5952
Token count is too large: pandas-dev__pandas-22357
Token count is too large: pandas-dev__pandas-11426
Token count is too large: ipython__ipython-10496
Token count is too large: pandas-dev__pandas-25280
Token count is too large: pandas-dev__pandas-24500


Generating train split: 618 examples [00:40, 14.58 examples/s]

Token count is too large: pandas-dev__pandas-7533
Token count is too large: googleapis__google-cloud-python-6651
Token count is too large: pandas-dev__pandas-27702
Token count is too large: PrefectHQ__prefect-1556
Token count is too large: pandas-dev__pandas-5960
Token count is too large: pandas-dev__pandas-5604
Token count is too large: pandas-dev__pandas-19884
Token count is too large: mesonbuild__meson-10790


Generating train split: 627 examples [00:41, 21.25 examples/s]

Token count is too large: docker__compose-1388
Token count is too large: huggingface__transformers-12953
Token count is too large: pandas-dev__pandas-32389
Token count is too large: google__jax-236
Token count is too large: pandas-dev__pandas-24275
Token count is too large: googleapis__google-cloud-python-6110
Token count is too large: pandas-dev__pandas-35111
Token count is too large: apache__airflow-24676
Token count is too large: Lightning-AI__lightning-2510
Token count is too large: pandas-dev__pandas-26297
Token count is too large: Qiskit__qiskit-7671
Token count is too large: conan-io__conan-5260
Token count is too large: mesonbuild__meson-6412
Token count is too large: pandas-dev__pandas-22696
Token count is too large: pandas-dev__pandas-30434
Token count is too large: pandas-dev__pandas-6614
Token count is too large: pandas-dev__pandas-7871


Generating train split: 632 examples [00:41, 14.82 examples/s]

Token count is too large: Qiskit__qiskit-7185
Token count is too large: huggingface__transformers-19648
Token count is too large: mesonbuild__meson-5568
Token count is too large: pandas-dev__pandas-23692
Token count is too large: pandas-dev__pandas-24188
Token count is too large: pandas-dev__pandas-22170
Token count is too large: pandas-dev__pandas-22207
Token count is too large: mesonbuild__meson-6690


Generating train split: 637 examples [00:42, 10.70 examples/s]

Token count is too large: celery__celery-6833
Token count is too large: pandas-dev__pandas-11997
Token count is too large: google__jax-2220
Token count is too large: pantsbuild__pants-7504
Token count is too large: celery__celery-5345
Token count is too large: pandas-dev__pandas-4716
Token count is too large: pandas-dev__pandas-27006
Token count is too large: Lightning-AI__lightning-2289
Token count is too large: conda__conda-8775


Generating train split: 641 examples [00:42, 11.71 examples/s]

Token count is too large: pandas-dev__pandas-7953
Token count is too large: mesonbuild__meson-5577
Token count is too large: Qiskit__qiskit-7350
Token count is too large: celery__celery-5114
Token count is too large: numpy__numpy-5358
Token count is too large: huggingface__transformers-22649
Token count is too large: conan-io__conan-8490
Token count is too large: mesonbuild__meson-8974
Token count is too large: googleapis__google-cloud-python-5803


Generating train split: 643 examples [00:42, 10.30 examples/s]

Token count is too large: pandas-dev__pandas-10638
Token count is too large: pandas-dev__pandas-21867
Token count is too large: wagtail__wagtail-5694
Token count is too large: Qiskit__qiskit-9002
Token count is too large: mesonbuild__meson-5942
Token count is too large: pantsbuild__pants-7447
Token count is too large: Qiskit__qiskit-8335
Token count is too large: Lightning-AI__lightning-1824
Token count is too large: pandas-dev__pandas-18674
Token count is too large: pandas-dev__pandas-28907
Token count is too large: pandas-dev__pandas-35360
Token count is too large: pandas-dev__pandas-38803
Token count is too large: google__jax-2788


Generating train split: 648 examples [00:43, 12.60 examples/s]

Token count is too large: ipython__ipython-5938
Token count is too large: pantsbuild__pants-19004
Token count is too large: huggingface__transformers-10551
Token count is too large: pandas-dev__pandas-5948
Token count is too large: pandas-dev__pandas-7458
Token count is too large: pandas-dev__pandas-10069
Token count is too large: google__jax-1905


Generating train split: 651 examples [00:43, 13.25 examples/s]

Token count is too large: Qiskit__qiskit-2213
Token count is too large: pandas-dev__pandas-36843
Token count is too large: apache__airflow-18979
Token count is too large: PrefectHQ__prefect-2507
Token count is too large: pandas-dev__pandas-16586


Generating train split: 653 examples [00:43, 11.67 examples/s]

Token count is too large: pandas-dev__pandas-4419
Token count is too large: conda__conda-6555
Token count is too large: google__jax-661
Token count is too large: pandas-dev__pandas-36694
Token count is too large: pandas-dev__pandas-5429
Token count is too large: docker__compose-1702
Token count is too large: numpy__numpy-13435


Generating train split: 655 examples [00:43, 10.98 examples/s]

Token count is too large: huggingface__transformers-25585
Token count is too large: Qiskit__qiskit-2052
Token count is too large: Qiskit__qiskit-4141
Token count is too large: mesonbuild__meson-9017
Token count is too large: pypa__pip-1882
Token count is too large: google__jax-3334
Token count is too large: pyca__cryptography-4822
Token count is too large: pandas-dev__pandas-23422
Token count is too large: pandas-dev__pandas-25743
Token count is too large: Lightning-AI__lightning-2723
Token count is too large: wagtail__wagtail-562
Token count is too large: conda__conda-10086
Token count is too large: pandas-dev__pandas-15142
Token count is too large: ipython__ipython-2432
Token count is too large: pandas-dev__pandas-10490


Generating train split: 661 examples [00:44, 13.86 examples/s]

Token count is too large: wagtail__wagtail-8861
Token count is too large: pandas-dev__pandas-36647
Token count is too large: google__jax-2414
Token count is too large: ipython__ipython-5522
Token count is too large: pandas-dev__pandas-10953
Token count is too large: pypa__pip-9320
Token count is too large: conda__conda-6189
Token count is too large: huggingface__transformers-10517
Token count is too large: open-mmlab__mmdetection-5486
Token count is too large: pandas-dev__pandas-7386
Token count is too large: apache__airflow-25856


Generating train split: 664 examples [00:44, 12.75 examples/s]

Token count is too large: huggingface__transformers-21929
Token count is too large: PrefectHQ__prefect-3007
Token count is too large: Qiskit__qiskit-8576
Token count is too large: huggingface__transformers-15928
Token count is too large: huggingface__transformers-24101
Token count is too large: open-mmlab__mmdetection-2626
Token count is too large: pantsbuild__pants-19302
Token count is too large: pandas-dev__pandas-31667


Generating train split: 667 examples [00:44, 12.87 examples/s]

Token count is too large: mesonbuild__meson-11104
Token count is too large: pandas-dev__pandas-25953
Token count is too large: pandas-dev__pandas-34338
Token count is too large: pandas-dev__pandas-14668
Token count is too large: pandas-dev__pandas-4881
Token count is too large: googleapis__google-cloud-python-3758
Token count is too large: Qiskit__qiskit-3354
Token count is too large: pandas-dev__pandas-31388
Token count is too large: Qiskit__qiskit-5223
Token count is too large: conda__conda-7146
Token count is too large: pandas-dev__pandas-25556


Generating train split: 670 examples [00:45, 11.40 examples/s]

Token count is too large: numpy__numpy-20762
Token count is too large: pandas-dev__pandas-36800
Token count is too large: mesonbuild__meson-3556
Token count is too large: Lightning-AI__lightning-2047
Token count is too large: Qiskit__qiskit-530
Token count is too large: ipython__ipython-2007
Token count is too large: open-mmlab__mmdetection-2349
Token count is too large: huggingface__transformers-13686
Token count is too large: ytdl-org__youtube-dl-386


Generating train split: 677 examples [00:45, 15.22 examples/s]

Token count is too large: pypa__pip-5312
Token count is too large: google__jax-3003
Token count is too large: pandas-dev__pandas-4406
Token count is too large: dagster-io__dagster-4615
Token count is too large: open-mmlab__mmdetection-9319
Token count is too large: conan-io__conan-3554
Token count is too large: pandas-dev__pandas-34293
Token count is too large: huggingface__transformers-3103
Token count is too large: huggingface__transformers-8852
Token count is too large: google__jax-3235
Token count is too large: mesonbuild__meson-10709


Generating train split: 683 examples [00:45, 19.10 examples/s]

Token count is too large: pandas-dev__pandas-7580
Token count is too large: pandas-dev__pandas-26810
Token count is too large: pandas-dev__pandas-13585
Token count is too large: conda__conda-3457
Token count is too large: pandas-dev__pandas-39407
Token count is too large: huggingface__transformers-10531
Token count is too large: Qiskit__qiskit-8571
Token count is too large: numpy__numpy-7211
Token count is too large: pandas-dev__pandas-38621


Generating train split: 688 examples [00:45, 20.28 examples/s]

Token count is too large: pandas-dev__pandas-19074
Token count is too large: pandas-dev__pandas-29836
Token count is too large: pandas-dev__pandas-30305
Token count is too large: google__jax-231
Token count is too large: pandas-dev__pandas-34408


Generating train split: 695 examples [00:46, 23.53 examples/s]

Token count is too large: huggingface__transformers-23367
Token count is too large: pandas-dev__pandas-16233
Token count is too large: conda__conda-2715
Token count is too large: huggingface__transformers-9105
Token count is too large: huggingface__transformers-7542
Token count is too large: pandas-dev__pandas-26891
Token count is too large: pantsbuild__pants-12022
Token count is too large: DataDog__integrations-core-11210
Token count is too large: scipy__scipy-4162
Token count is too large: huggingface__transformers-11061
Token count is too large: pandas-dev__pandas-34377
Token count is too large: ytdl-org__youtube-dl-30340
Token count is too large: apache__airflow-13745
Token count is too large: Lightning-AI__lightning-439


Generating train split: 699 examples [00:46, 20.28 examples/s]

Token count is too large: googleapis__google-cloud-python-9574
Token count is too large: pandas-dev__pandas-24503
Token count is too large: wagtail__wagtail-1978
Token count is too large: DataDog__integrations-core-727
Token count is too large: apache__airflow-23134
Token count is too large: numpy__numpy-17446
Token count is too large: dagster-io__dagster-9138
Token count is too large: googleapis__google-cloud-python-3425
Token count is too large: apache__airflow-25524
Token count is too large: wagtail__wagtail-9628


Generating train split: 702 examples [00:46, 21.25 examples/s]

Token count is too large: pandas-dev__pandas-22969
Token count is too large: googleapis__google-cloud-python-11329
Token count is too large: ipython__ipython-4314
Token count is too large: pantsbuild__pants-4500
Token count is too large: pandas-dev__pandas-35427
Token count is too large: mesonbuild__meson-7979
Token count is too large: Qiskit__qiskit-8830
Token count is too large: ipython__ipython-910
Token count is too large: numpy__numpy-8236
Token count is too large: pandas-dev__pandas-21570
Token count is too large: pandas-dev__pandas-30285
Token count is too large: mesonbuild__meson-1279
Token count is too large: apache__airflow-19307
Token count is too large: pypa__pip-5370
Token count is too large: pandas-dev__pandas-22697
Token count is too large: pandas-dev__pandas-37707
Token count is too large: numpy__numpy-7260
Token count is too large: pandas-dev__pandas-23221
Token count is too large: docker__compose-1990
Token count is too large: conda__conda-2772
Token count is too larg

Generating train split: 709 examples [00:47, 15.86 examples/s]

Token count is too large: docker__compose-6600
Token count is too large: Lightning-AI__lightning-1018
Token count is too large: wagtail__wagtail-1682
Token count is too large: conan-io__conan-2679
Token count is too large: pandas-dev__pandas-7285
Token count is too large: pandas-dev__pandas-20356
Token count is too large: pandas-dev__pandas-35686
Token count is too large: huggingface__transformers-18618


Generating train split: 726 examples [00:47, 25.80 examples/s]

Token count is too large: huggingface__transformers-21698
Token count is too large: Qiskit__qiskit-6550
Token count is too large: pandas-dev__pandas-15581
Token count is too large: Qiskit__qiskit-3575
Token count is too large: gitpython-developers__GitPython-479
Token count is too large: conan-io__conan-3059
Token count is too large: Qiskit__qiskit-1975
Token count is too large: dagster-io__dagster-8406
Token count is too large: pandas-dev__pandas-36552
Token count is too large: ipython__ipython-5592
Token count is too large: conan-io__conan-4359
Token count is too large: pandas-dev__pandas-20345
Token count is too large: conan-io__conan-4644
Token count is too large: mesonbuild__meson-1338
Token count is too large: googleapis__google-cloud-python-487
Token count is too large: pypa__pip-2542
Token count is too large: pandas-dev__pandas-8631
Token count is too large: pantsbuild__pants-6912
Token count is too large: pandas-dev__pandas-22365
Token count is too large: pandas-dev__pandas-18

Generating train split: 730 examples [00:47, 16.73 examples/s]

Token count is too large: Qiskit__qiskit-7282
Token count is too large: apache__airflow-22123
Token count is too large: pandas-dev__pandas-28233
Token count is too large: numpy__numpy-23911
Token count is too large: Qiskit__qiskit-1737
Token count is too large: pandas-dev__pandas-18116
Token count is too large: mesonbuild__meson-3289
Token count is too large: pantsbuild__pants-11872
Token count is too large: pyca__cryptography-7038
Token count is too large: pandas-dev__pandas-7404
Token count is too large: pandas-dev__pandas-29922
Token count is too large: Qiskit__qiskit-5688
Token count is too large: pandas-dev__pandas-4492
Token count is too large: pandas-dev__pandas-22862


Generating train split: 737 examples [00:48, 14.85 examples/s]

Token count is too large: ytdl-org__youtube-dl-16985
Token count is too large: ipython__ipython-12144
Token count is too large: numpy__numpy-12679
Token count is too large: ipython__ipython-4789
Token count is too large: scipy__scipy-287
Token count is too large: apache__airflow-19668
Token count is too large: googleapis__google-cloud-python-5372
Token count is too large: pandas-dev__pandas-25602
Token count is too large: pandas-dev__pandas-23847
Token count is too large: pandas-dev__pandas-5354
Token count is too large: googleapis__google-cloud-python-2270


Generating train split: 741 examples [00:48, 17.23 examples/s]

Token count is too large: mesonbuild__meson-9613
Token count is too large: numpy__numpy-18354
Token count is too large: pandas-dev__pandas-8743
Token count is too large: huggingface__transformers-9151
Token count is too large: huggingface__transformers-18714
Token count is too large: Qiskit__qiskit-8666
Token count is too large: numpy__numpy-24299
Token count is too large: twisted__twisted-11711
Token count is too large: ray-project__ray-7324
Token count is too large: wagtail__wagtail-9862


Generating train split: 745 examples [00:48, 17.12 examples/s]

Token count is too large: pantsbuild__pants-10511
Token count is too large: pandas-dev__pandas-27871
Token count is too large: pandas-dev__pandas-9036
Token count is too large: Qiskit__qiskit-9441
Token count is too large: numpy__numpy-16855
Token count is too large: pandas-dev__pandas-35280
Token count is too large: ipython__ipython-13868
Token count is too large: mesonbuild__meson-3721
Token count is too large: google__jax-1756
Token count is too large: pandas-dev__pandas-28908


Generating train split: 750 examples [00:49, 19.48 examples/s]

Token count is too large: Qiskit__qiskit-6213
Token count is too large: pandas-dev__pandas-39014
Token count is too large: pandas-dev__pandas-26463
Token count is too large: Qiskit__qiskit-8444
Token count is too large: Qiskit__qiskit-10410
Token count is too large: pantsbuild__pants-17093
Token count is too large: Qiskit__qiskit-6325
Token count is too large: apache__airflow-16108
Token count is too large: pandas-dev__pandas-17857
Token count is too large: docker__compose-6134
Token count is too large: numpy__numpy-21634
Token count is too large: mesonbuild__meson-9608
Token count is too large: pandas-dev__pandas-3972
Token count is too large: google__jax-107
Token count is too large: pandas-dev__pandas-37495
Token count is too large: pandas-dev__pandas-21214
Token count is too large: pantsbuild__pants-14448
Token count is too large: Lightning-AI__lightning-2846
Token count is too large: pandas-dev__pandas-21027
Token count is too large: conda__conda-6313
Token count is too large: tia

Generating train split: 754 examples [00:49, 16.85 examples/s]

Token count is too large: pandas-dev__pandas-5492
Token count is too large: pandas-dev__pandas-18091
Token count is too large: pandas-dev__pandas-8122
Token count is too large: pandas-dev__pandas-21935
Token count is too large: googleapis__google-cloud-python-289
Token count is too large: numpy__numpy-11843
Token count is too large: mesonbuild__meson-10580
Token count is too large: pandas-dev__pandas-30501
Token count is too large: numpy__numpy-23310


Generating train split: 757 examples [00:49, 12.90 examples/s]

Token count is too large: pandas-dev__pandas-10557
Token count is too large: google__jax-745
Token count is too large: ray-project__ray-833
Token count is too large: pandas-dev__pandas-6347
Token count is too large: ipython__ipython-4832
Token count is too large: Qiskit__qiskit-5546
Token count is too large: ytdl-org__youtube-dl-14358
There was an error processing
Token count is too large: Lightning-AI__lightning-1623


Generating train split: 759 examples [00:50, 11.59 examples/s]

Token count is too large: mesonbuild__meson-9344
Token count is too large: pypa__pip-5148
Token count is too large: pandas-dev__pandas-36311
Token count is too large: pandas-dev__pandas-16460
Token count is too large: explosion__spaCy-2880
Token count is too large: pandas-dev__pandas-25092
Token count is too large: mesonbuild__meson-7460
Token count is too large: pypa__pip-4656
Token count is too large: pandas-dev__pandas-25983
Token count is too large: apache__airflow-19886


Generating train split: 767 examples [00:50, 19.63 examples/s]

Token count is too large: google__jax-485
Token count is too large: docker__compose-7199
Token count is too large: numpy__numpy-4872
Token count is too large: docker__compose-23
Token count is too large: pandas-dev__pandas-8049
Token count is too large: Qiskit__qiskit-8021
Token count is too large: Qiskit__qiskit-2169
Token count is too large: pandas-dev__pandas-19737
Token count is too large: ytdl-org__youtube-dl-4598
Token count is too large: conan-io__conan-13450
Token count is too large: numpy__numpy-11086
Token count is too large: googleapis__google-cloud-python-4040
Token count is too large: google__jax-685
Token count is too large: pantsbuild__pants-5011
Token count is too large: conan-io__conan-5898
Token count is too large: ytdl-org__youtube-dl-1239
Token count is too large: twisted__twisted-11818
Token count is too large: conda__conda-6076


Generating train split: 771 examples [00:50, 18.68 examples/s]

Token count is too large: huggingface__transformers-11846
Token count is too large: docker__compose-5558
Token count is too large: pandas-dev__pandas-18618
Token count is too large: pandas-dev__pandas-10469
Token count is too large: huggingface__transformers-21207
Token count is too large: pypa__pip-3485
Token count is too large: dagster-io__dagster-15406
Token count is too large: dagster-io__dagster-15384
Token count is too large: explosion__spaCy-3253
Token count is too large: pandas-dev__pandas-17355
Token count is too large: celery__celery-3779
Token count is too large: pandas-dev__pandas-30295
Token count is too large: Lightning-AI__lightning-310
Token count is too large: Qiskit__qiskit-4931
Token count is too large: google__jax-1872
Token count is too large: conda__conda-6724
Token count is too large: Qiskit__qiskit-4326
Token count is too large: pandas-dev__pandas-18020
Token count is too large: conan-io__conan-4805
Token count is too large: pypa__pip-1787
Token count is too lar

Generating train split: 775 examples [00:51, 13.02 examples/s]

Token count is too large: pandas-dev__pandas-36937
Token count is too large: pandas-dev__pandas-15924
Token count is too large: wagtail__wagtail-10421
Token count is too large: pypa__pip-9189
Token count is too large: google__jax-257
Token count is too large: pandas-dev__pandas-7892
Token count is too large: twisted__twisted-1141
Token count is too large: celery__celery-4908
Token count is too large: pandas-dev__pandas-21238
Token count is too large: wagtail__wagtail-9217
Token count is too large: apache__airflow-12890
Token count is too large: conda__conda-3521
Token count is too large: mesonbuild__meson-4657
Token count is too large: ray-project__ray-1245
Token count is too large: open-mmlab__mmdetection-6104
Token count is too large: google__jax-860
Token count is too large: googleapis__google-cloud-python-1976
Token count is too large: DataDog__integrations-core-1731
Token count is too large: conda__conda-4190
Token count is too large: huggingface__transformers-17496
Token count is

Generating train split: 782 examples [00:51, 12.47 examples/s]

Token count is too large: huggingface__transformers-7891
Token count is too large: pantsbuild__pants-7145
Token count is too large: google__jax-2807
Token count is too large: pandas-dev__pandas-28336
Token count is too large: pandas-dev__pandas-3887
Token count is too large: open-mmlab__mmdetection-5884
Token count is too large: pypa__pip-8166
Token count is too large: docker__compose-1960
Token count is too large: ipython__ipython-9182
Token count is too large: gitpython-developers__GitPython-755
Token count is too large: conan-io__conan-6395
Token count is too large: ipython__ipython-1081
Token count is too large: ipython__ipython-4209
Token count is too large: mesonbuild__meson-9244
Token count is too large: Qiskit__qiskit-4818
Token count is too large: pandas-dev__pandas-22149
Token count is too large: pandas-dev__pandas-16324
Token count is too large: pandas-dev__pandas-33644


Generating train split: 784 examples [00:52, 10.17 examples/s]

Token count is too large: googleapis__google-cloud-python-11331
Token count is too large: huggingface__transformers-8239
Token count is too large: ytdl-org__youtube-dl-5641
Token count is too large: pantsbuild__pants-16423
Token count is too large: pyca__cryptography-3899
Token count is too large: ipython__ipython-13619
Token count is too large: numpy__numpy-5307
Token count is too large: mesonbuild__meson-6419
Token count is too large: apache__airflow-15822
Token count is too large: mesonbuild__meson-9150


Generating train split: 786 examples [00:52,  9.80 examples/s]

Token count is too large: pandas-dev__pandas-29452
Token count is too large: mesonbuild__meson-1953
Token count is too large: pantsbuild__pants-5089
Token count is too large: pypa__pip-6709
Token count is too large: numpy__numpy-6488
Token count is too large: huggingface__transformers-12963
Token count is too large: Qiskit__qiskit-7140
Token count is too large: googleapis__google-cloud-python-11169


Generating train split: 788 examples [00:52,  8.38 examples/s]

Token count is too large: ipython__ipython-4497
Token count is too large: pandas-dev__pandas-21254
Token count is too large: google__jax-197
Token count is too large: pantsbuild__pants-18406
Token count is too large: conan-io__conan-5971
Token count is too large: pandas-dev__pandas-39358
Token count is too large: apache__airflow-19481
Token count is too large: mesonbuild__meson-2883
Token count is too large: google__jax-1106
Token count is too large: pyca__cryptography-5295
Token count is too large: pypa__pip-12188
Token count is too large: jupyterlab__jupyterlab-8950
Token count is too large: google__jax-755


Generating train split: 796 examples [00:53, 12.19 examples/s]

Token count is too large: mesonbuild__meson-3612
Token count is too large: conda__conda-6895
Token count is too large: ray-project__ray-4775
Token count is too large: pandas-dev__pandas-5447
Token count is too large: conan-io__conan-3050
Token count is too large: google__jax-807
Token count is too large: pandas-dev__pandas-39022
Token count is too large: apache__airflow-25196
Token count is too large: Qiskit__qiskit-4713
Token count is too large: pandas-dev__pandas-18831
Token count is too large: pandas-dev__pandas-7631


Generating train split: 800 examples [00:53, 15.24 examples/s]

Token count is too large: googleapis__google-cloud-python-7622
Token count is too large: Lightning-AI__lightning-2665
Token count is too large: mesonbuild__meson-11982
Token count is too large: pandas-dev__pandas-11087
Token count is too large: pantsbuild__pants-15403
Token count is too large: pandas-dev__pandas-16826
Token count is too large: pandas-dev__pandas-23062
Token count is too large: PrefectHQ__prefect-2725


Generating train split: 802 examples [00:53, 13.82 examples/s]

Token count is too large: apache__airflow-28693
Token count is too large: pandas-dev__pandas-15984
Token count is too large: pandas-dev__pandas-39260
Token count is too large: ipython__ipython-7853
Token count is too large: mesonbuild__meson-5069
Token count is too large: numpy__numpy-10558
Token count is too large: pandas-dev__pandas-4388
Token count is too large: pandas-dev__pandas-6875
Token count is too large: mesonbuild__meson-7470
Token count is too large: numpy__numpy-23932


Generating train split: 807 examples [00:53, 15.31 examples/s]

Token count is too large: huggingface__transformers-22024
Token count is too large: apache__airflow-364
Token count is too large: pandas-dev__pandas-6639
Token count is too large: huggingface__transformers-25267
Token count is too large: pandas-dev__pandas-17846
Token count is too large: pandas-dev__pandas-7971
Token count is too large: numpy__numpy-18911
Token count is too large: pandas-dev__pandas-22075
Token count is too large: Qiskit__qiskit-6890


Generating train split: 809 examples [00:54, 10.94 examples/s]

Token count is too large: ipython__ipython-13385
Token count is too large: mesonbuild__meson-9631
Token count is too large: numpy__numpy-6500
Token count is too large: pypa__pip-9822
Token count is too large: pandas-dev__pandas-23769
Token count is too large: celery__celery-5737
Token count is too large: pandas-dev__pandas-37034
Token count is too large: mesonbuild__meson-1692
Token count is too large: conda__conda-6352
Token count is too large: huggingface__transformers-11382
Token count is too large: pandas-dev__pandas-10723
Token count is too large: pandas-dev__pandas-37776


Generating train split: 815 examples [00:54, 13.26 examples/s]

Token count is too large: pandas-dev__pandas-4823
Token count is too large: pandas-dev__pandas-30270
Token count is too large: conan-io__conan-5837
Token count is too large: pyca__cryptography-2071
Token count is too large: googleapis__google-cloud-python-2490
Token count is too large: pandas-dev__pandas-21799
Token count is too large: Qiskit__qiskit-3493
Token count is too large: apache__airflow-19148
Token count is too large: googleapis__google-cloud-python-6632
Token count is too large: pandas-dev__pandas-6879
Token count is too large: celery__celery-5613
Token count is too large: numpy__numpy-21201
Token count is too large: pandas-dev__pandas-27221
Token count is too large: docker__compose-393
Token count is too large: pyca__cryptography-6246
Token count is too large: Qiskit__qiskit-8199
Token count is too large: pantsbuild__pants-16186
Token count is too large: ipython__ipython-4890
Token count is too large: pandas-dev__pandas-30329


Generating train split: 821 examples [00:55, 12.46 examples/s]

Token count is too large: pandas-dev__pandas-35736
Token count is too large: wagtail__wagtail-4397
Token count is too large: numpy__numpy-14310
Token count is too large: pandas-dev__pandas-17960
Token count is too large: conda__conda-7135
Token count is too large: Qiskit__qiskit-2070
Token count is too large: pandas-dev__pandas-17738
Token count is too large: pandas-dev__pandas-39702
Token count is too large: jupyterlab__jupyterlab-5400
Token count is too large: pandas-dev__pandas-38982
Token count is too large: pandas-dev__pandas-35441
Token count is too large: conan-io__conan-3634
Token count is too large: docker__compose-3991
Token count is too large: Qiskit__qiskit-3888
Token count is too large: pypa__pip-6299


Generating train split: 823 examples [00:55, 11.82 examples/s]

Token count is too large: pandas-dev__pandas-30588
Token count is too large: pandas-dev__pandas-2224
Token count is too large: pandas-dev__pandas-17023
Token count is too large: pandas-dev__pandas-21917
Token count is too large: huggingface__transformers-6717
Token count is too large: pandas-dev__pandas-19650
Token count is too large: google__jax-77
Token count is too large: pandas-dev__pandas-22104


Generating train split: 825 examples [00:55, 11.01 examples/s]

Token count is too large: pandas-dev__pandas-6042
Token count is too large: pandas-dev__pandas-30903
Token count is too large: mesonbuild__meson-2852
Token count is too large: pandas-dev__pandas-9222
Token count is too large: numpy__numpy-15928


Generating train split: 827 examples [00:55, 11.61 examples/s]

Token count is too large: pandas-dev__pandas-29470
Token count is too large: pypa__pip-4393
Token count is too large: pypa__pip-6909
Token count is too large: pandas-dev__pandas-29955
Token count is too large: numpy__numpy-7675
Token count is too large: numpy__numpy-23881
Token count is too large: pandas-dev__pandas-26054
Token count is too large: pantsbuild__pants-4648
Token count is too large: pandas-dev__pandas-3998


Generating train split: 829 examples [00:56,  8.62 examples/s]

Token count is too large: google__jax-3174
Token count is too large: pypa__pip-5215
Token count is too large: huggingface__transformers-18545
Token count is too large: pandas-dev__pandas-10199


Generating train split: 833 examples [00:56, 12.62 examples/s]

Token count is too large: huggingface__transformers-15941
Token count is too large: pandas-dev__pandas-4658
Token count is too large: conda__conda-9385
Token count is too large: googleapis__google-cloud-python-11333
Token count is too large: numpy__numpy-21855
Token count is too large: numpy__numpy-19893
Token count is too large: docker__compose-4761
Token count is too large: pandas-dev__pandas-19224
Token count is too large: huggingface__transformers-25226
Token count is too large: huggingface__transformers-1057
Token count is too large: pandas-dev__pandas-10212
Token count is too large: numpy__numpy-12831
Token count is too large: Qiskit__qiskit-9612
Token count is too large: google__jax-754
Token count is too large: mesonbuild__meson-5990
Token count is too large: celery__celery-4448
Token count is too large: mesonbuild__meson-8940


Generating train split: 839 examples [00:56, 13.26 examples/s]

Token count is too large: Qiskit__qiskit-2781
Token count is too large: ray-project__ray-5270
Token count is too large: ytdl-org__youtube-dl-1563
Token count is too large: conan-io__conan-6138
Token count is too large: mesonbuild__meson-1150
Token count is too large: PrefectHQ__prefect-192
Token count is too large: pandas-dev__pandas-20043
Token count is too large: apache__airflow-20121
Token count is too large: celery__celery-6330
Token count is too large: pandas-dev__pandas-31114
Token count is too large: google__jax-1698
Token count is too large: scipy__scipy-4302


Generating train split: 842 examples [00:56, 15.20 examples/s]

Token count is too large: Qiskit__qiskit-7299
Token count is too large: pandas-dev__pandas-17730
Token count is too large: pandas-dev__pandas-26402
Token count is too large: Qiskit__qiskit-6109
Token count is too large: numpy__numpy-24468
Token count is too large: pandas-dev__pandas-24815
Token count is too large: pypa__pip-9442
Token count is too large: googleapis__google-cloud-python-8837
Token count is too large: pypa__pip-8594
Token count is too large: ipython__ipython-2855
Token count is too large: pandas-dev__pandas-4841
Token count is too large: apache__airflow-30608


Generating train split: 846 examples [00:57, 14.08 examples/s]

Token count is too large: googleapis__google-cloud-python-6175
Token count is too large: wagtail__wagtail-4136
Token count is too large: pandas-dev__pandas-25124
Token count is too large: huggingface__transformers-12063
Token count is too large: huggingface__transformers-9820
Token count is too large: pypa__pip-8343
Token count is too large: pantsbuild__pants-18112
Token count is too large: ytdl-org__youtube-dl-12512
Token count is too large: pypa__pip-6810
Token count is too large: numpy__numpy-10946
Token count is too large: conan-io__conan-7600
Token count is too large: googleapis__google-cloud-python-4257
Token count is too large: huggingface__transformers-3143
Token count is too large: pantsbuild__pants-18622
Token count is too large: huggingface__transformers-4751
Token count is too large: pandas-dev__pandas-5978
Token count is too large: huggingface__transformers-18044
Token count is too large: pandas-dev__pandas-37152
Token count is too large: celery__celery-4173
Token count is

Generating train split: 850 examples [00:57, 11.72 examples/s]

Token count is too large: huggingface__transformers-25049
Token count is too large: ray-project__ray-10979
Token count is too large: Qiskit__qiskit-4223
Token count is too large: Lightning-AI__lightning-2594
Token count is too large: pantsbuild__pants-16935
Token count is too large: pandas-dev__pandas-3736
Token count is too large: pandas-dev__pandas-31773
Token count is too large: numpy__numpy-21712
Token count is too large: huggingface__transformers-18402
Token count is too large: Lightning-AI__lightning-52
Token count is too large: apache__airflow-11487
Token count is too large: conan-io__conan-5112
Token count is too large: pandas-dev__pandas-19145
Token count is too large: open-mmlab__mmdetection-7147
Token count is too large: pandas-dev__pandas-23463
Token count is too large: conda__conda-3685
Token count is too large: apache__airflow-18772
Token count is too large: ray-project__ray-7752
Token count is too large: wagtail__wagtail-33
Token count is too large: google__jax-1390
Toke

Generating train split: 856 examples [00:57, 13.68 examples/s]

Token count is too large: scipy__scipy-5392
Token count is too large: docker__compose-5472
Token count is too large: pypa__pip-2205
Token count is too large: Qiskit__qiskit-4322
Token count is too large: Lightning-AI__lightning-3261
Token count is too large: pandas-dev__pandas-7720
Token count is too large: huggingface__transformers-13179
Token count is too large: apache__airflow-30641
Token count is too large: apache__airflow-25673
Token count is too large: pandas-dev__pandas-15538
Token count is too large: googleapis__google-cloud-python-783
Token count is too large: Qiskit__qiskit-1718
Token count is too large: pantsbuild__pants-10764


Generating train split: 858 examples [00:58, 11.86 examples/s]

Token count is too large: pandas-dev__pandas-4206
Token count is too large: googleapis__google-cloud-python-5295
Token count is too large: googleapis__google-cloud-python-8436
Token count is too large: ipython__ipython-7389
Token count is too large: Qiskit__qiskit-2997
Token count is too large: Qiskit__qiskit-5946
Token count is too large: googleapis__google-cloud-python-6935
Token count is too large: apache__airflow-9505


Generating train split: 860 examples [00:58, 13.10 examples/s]

Token count is too large: pandas-dev__pandas-9283
Token count is too large: pandas-dev__pandas-26071
Token count is too large: pandas-dev__pandas-16465
Token count is too large: Lightning-AI__lightning-2335
Token count is too large: wagtail__wagtail-9920
Token count is too large: google__jax-484
Token count is too large: pandas-dev__pandas-38579
Token count is too large: pandas-dev__pandas-21590


Generating train split: 867 examples [00:58, 15.80 examples/s]

Token count is too large: apache__airflow-17732
Token count is too large: mesonbuild__meson-4300
Token count is too large: pandas-dev__pandas-23652
Token count is too large: googleapis__google-cloud-python-11302
Token count is too large: pandas-dev__pandas-6373
Token count is too large: ipython__ipython-13501
Token count is too large: pandas-dev__pandas-29553
Token count is too large: Lightning-AI__lightning-2379
Token count is too large: pandas-dev__pandas-36730
Token count is too large: Qiskit__qiskit-3771


Generating train split: 872 examples [00:58, 18.08 examples/s]

Token count is too large: pandas-dev__pandas-9929
Token count is too large: googleapis__google-cloud-python-509
Token count is too large: pantsbuild__pants-12699
Token count is too large: mesonbuild__meson-10306
Token count is too large: conan-io__conan-5841
Token count is too large: pandas-dev__pandas-22667
Token count is too large: apache__airflow-22964
Token count is too large: pandas-dev__pandas-39353
Token count is too large: conda__conda-1808
Token count is too large: conda__conda-6928
Token count is too large: pypa__pip-5798
Token count is too large: numpy__numpy-12898
Token count is too large: pandas-dev__pandas-26306
Token count is too large: pandas-dev__pandas-19790
Token count is too large: google__jax-910
Token count is too large: pandas-dev__pandas-7674
Token count is too large: google__jax-638
Token count is too large: wagtail__wagtail-10661
Token count is too large: pandas-dev__pandas-20017
Token count is too large: googleapis__google-cloud-python-6438
Token count is too

Generating train split: 875 examples [00:59, 14.17 examples/s]

Token count is too large: apache__airflow-25995
Token count is too large: pandas-dev__pandas-5638
Token count is too large: docker__compose-8178
Token count is too large: Qiskit__qiskit-4568
Token count is too large: pandas-dev__pandas-36224
Token count is too large: google__jax-495
Token count is too large: googleapis__google-cloud-python-4724


Generating train split: 878 examples [00:59, 12.91 examples/s]

Token count is too large: huggingface__transformers-21062
Token count is too large: ytdl-org__youtube-dl-31360
Token count is too large: huggingface__transformers-23641
Token count is too large: pandas-dev__pandas-27077
Token count is too large: ray-project__ray-1760
Token count is too large: pandas-dev__pandas-18718


Generating train split: 881 examples [00:59, 14.41 examples/s]

Token count is too large: huggingface__transformers-12551
Token count is too large: pandas-dev__pandas-36655
Token count is too large: google__jax-960
Token count is too large: huggingface__transformers-873
Token count is too large: googleapis__google-cloud-python-9875
Token count is too large: pandas-dev__pandas-27773
Token count is too large: Qiskit__qiskit-5073
Token count is too large: pandas-dev__pandas-8671
Token count is too large: pandas-dev__pandas-35664


Generating train split: 883 examples [01:00, 11.16 examples/s]

Token count is too large: googleapis__google-cloud-python-11346
Token count is too large: pandas-dev__pandas-7323
Token count is too large: pandas-dev__pandas-37320
Token count is too large: pandas-dev__pandas-37249
Token count is too large: pandas-dev__pandas-6506
Token count is too large: pandas-dev__pandas-36348
Token count is too large: Qiskit__qiskit-9100
Token count is too large: apache__airflow-9843
Token count is too large: pandas-dev__pandas-6579
Token count is too large: Qiskit__qiskit-2890
Token count is too large: pandas-dev__pandas-29257
Token count is too large: mesonbuild__meson-1171
Token count is too large: pandas-dev__pandas-24968
Token count is too large: jupyterlab__jupyterlab-3168
Token count is too large: scipy__scipy-5288
Token count is too large: pandas-dev__pandas-6481
Token count is too large: pandas-dev__pandas-26465
Token count is too large: pandas-dev__pandas-3597
Token count is too large: huggingface__transformers-16496


Generating train split: 885 examples [01:00,  7.29 examples/s]

Token count is too large: pandas-dev__pandas-5752
Token count is too large: pandas-dev__pandas-19251
Token count is too large: pandas-dev__pandas-32959
Token count is too large: numpy__numpy-19775
Token count is too large: huggingface__transformers-18650
Token count is too large: ray-project__ray-1445
Token count is too large: jupyterlab__jupyterlab-2697
Token count is too large: pantsbuild__pants-15610
Token count is too large: google__jax-965
Token count is too large: Qiskit__qiskit-7211
Token count is too large: apache__airflow-15105
Token count is too large: pandas-dev__pandas-38571
Token count is too large: Qiskit__qiskit-8173
Token count is too large: google__jax-199
Token count is too large: pandas-dev__pandas-39737
Token count is too large: docker__compose-5926
Token count is too large: google__jax-3061
Token count is too large: conan-io__conan-4494
Token count is too large: mesonbuild__meson-7976
Token count is too large: wagtail__wagtail-8382
Token count is too large: pantsbu

Generating train split: 896 examples [01:01, 13.28 examples/s]

Token count is too large: pandas-dev__pandas-32477
Token count is too large: apache__airflow-27591
Token count is too large: pandas-dev__pandas-23167
Token count is too large: mesonbuild__meson-6005
Token count is too large: pandas-dev__pandas-11865
Token count is too large: conan-io__conan-622
Token count is too large: pandas-dev__pandas-6814
Token count is too large: pandas-dev__pandas-17341
Token count is too large: googleapis__google-cloud-python-1557
Token count is too large: open-mmlab__mmdetection-9734
Token count is too large: Qiskit__qiskit-4940
Token count is too large: conda__conda-12487
Token count is too large: pypa__pip-1272


Generating train split: 898 examples [01:01, 12.90 examples/s]

Token count is too large: pandas-dev__pandas-22647
Token count is too large: numpy__numpy-9845
Token count is too large: googleapis__google-cloud-python-5654
Token count is too large: conan-io__conan-4122
Token count is too large: pandas-dev__pandas-22511
Token count is too large: docker__compose-4997
Token count is too large: Qiskit__qiskit-4885
Token count is too large: google__jax-1697
Token count is too large: pandas-dev__pandas-7862
Token count is too large: googleapis__google-cloud-python-5966


Generating train split: 902 examples [01:01, 14.46 examples/s]

Token count is too large: pandas-dev__pandas-18729
Token count is too large: pandas-dev__pandas-26744
Token count is too large: open-mmlab__mmdetection-3522
Token count is too large: numpy__numpy-11733
Token count is too large: pandas-dev__pandas-17624
Token count is too large: huggingface__transformers-18046
Token count is too large: pandas-dev__pandas-23237
Token count is too large: Lightning-AI__lightning-2832


Generating train split: 905 examples [01:01, 14.01 examples/s]

Token count is too large: apache__airflow-25795
Token count is too large: ray-project__ray-4323
Token count is too large: conda__conda-3210
Token count is too large: pantsbuild__pants-6880
Token count is too large: jupyterlab__jupyterlab-9232
Token count is too large: pandas-dev__pandas-23739
Token count is too large: pyca__cryptography-4864
Token count is too large: pandas-dev__pandas-7798
Token count is too large: pandas-dev__pandas-34983
Token count is too large: huggingface__transformers-9703


Generating train split: 907 examples [01:01, 12.02 examples/s]

Token count is too large: Qiskit__qiskit-8370
Token count is too large: pandas-dev__pandas-30585
Token count is too large: pandas-dev__pandas-3874
Token count is too large: mesonbuild__meson-3481
Token count is too large: Lightning-AI__lightning-481
Token count is too large: mesonbuild__meson-9708
Token count is too large: huggingface__transformers-14746
Token count is too large: huggingface__transformers-10856
Token count is too large: numpy__numpy-11859
Token count is too large: huggingface__transformers-4448
Token count is too large: pantsbuild__pants-18877
Token count is too large: mesonbuild__meson-6829
Token count is too large: PrefectHQ__prefect-437
Token count is too large: numpy__numpy-11382
Token count is too large: numpy__numpy-10544
Token count is too large: gitpython-developers__GitPython-1440


Generating train split: 913 examples [01:02, 14.96 examples/s]

Token count is too large: pandas-dev__pandas-39482
Token count is too large: apache__airflow-23674
Token count is too large: Qiskit__qiskit-5630
Token count is too large: wagtail__wagtail-9161
Token count is too large: pandas-dev__pandas-7665
Token count is too large: ytdl-org__youtube-dl-31181
Token count is too large: mesonbuild__meson-10783
Token count is too large: pandas-dev__pandas-33118
Token count is too large: pandas-dev__pandas-19923
Token count is too large: numpy__numpy-21807
Token count is too large: huggingface__transformers-13275
Token count is too large: pantsbuild__pants-5045
Token count is too large: pypa__pip-10044
Token count is too large: pantsbuild__pants-13319


Generating train split: 926 examples [01:02, 24.19 examples/s]

Token count is too large: Qiskit__qiskit-7285
Token count is too large: pyca__cryptography-2828
Token count is too large: apache__airflow-15074
Token count is too large: jupyterlab__jupyterlab-3270
Token count is too large: pandas-dev__pandas-21711
Token count is too large: numpy__numpy-10947
Token count is too large: jupyterlab__jupyterlab-7979
Token count is too large: celery__celery-5500
Token count is too large: huggingface__transformers-3973
Token count is too large: googleapis__google-cloud-python-364
Token count is too large: huggingface__transformers-18443
Token count is too large: ytdl-org__youtube-dl-5328


Generating train split: 930 examples [01:02, 25.21 examples/s]

Token count is too large: pandas-dev__pandas-23034
Token count is too large: pypa__pip-9264
Token count is too large: pandas-dev__pandas-22535
Token count is too large: apache__airflow-29225
Token count is too large: scipy__scipy-456
Token count is too large: pandas-dev__pandas-23921
Token count is too large: jupyterlab__jupyterlab-7165


Generating train split: 935 examples [01:03, 26.07 examples/s]

Token count is too large: Lightning-AI__lightning-2917
Token count is too large: google__jax-720
Token count is too large: Qiskit__qiskit-6580
Token count is too large: pandas-dev__pandas-14708
Token count is too large: jupyterlab__jupyterlab-6058
Token count is too large: pandas-dev__pandas-39083
Token count is too large: Qiskit__qiskit-2933
Token count is too large: google__jax-868
Token count is too large: pandas-dev__pandas-34718
Token count is too large: google__jax-1955
Token count is too large: ipython__ipython-11528
Token count is too large: Lightning-AI__lightning-848
Token count is too large: pandas-dev__pandas-21686
Token count is too large: ytdl-org__youtube-dl-30532
Token count is too large: pandas-dev__pandas-38861


Generating train split: 943 examples [01:03, 25.07 examples/s]

Token count is too large: conda__conda-12627
Token count is too large: Qiskit__qiskit-8299
Token count is too large: pandas-dev__pandas-36299
Token count is too large: numpy__numpy-3608
Token count is too large: pandas-dev__pandas-32124
Token count is too large: numpy__numpy-8200
Token count is too large: mesonbuild__meson-4207
Token count is too large: docker__compose-7714
Token count is too large: Qiskit__qiskit-7431
Token count is too large: docker__compose-5329
Token count is too large: mesonbuild__meson-4084
Token count is too large: pandas-dev__pandas-31897
Token count is too large: pantsbuild__pants-5267
Token count is too large: Qiskit__qiskit-2812
Token count is too large: celery__celery-6447
Token count is too large: pandas-dev__pandas-5819
Token count is too large: pandas-dev__pandas-18550
Token count is too large: googleapis__google-cloud-python-11332
Token count is too large: huggingface__transformers-19684


Generating train split: 947 examples [01:03, 19.26 examples/s]

Token count is too large: pandas-dev__pandas-23618
Token count is too large: conan-io__conan-1065
Token count is too large: pandas-dev__pandas-37181
Token count is too large: googleapis__google-cloud-python-9176
Token count is too large: googleapis__google-cloud-python-581
Token count is too large: ray-project__ray-6756


Generating train split: 951 examples [01:04, 14.82 examples/s]

Token count is too large: pandas-dev__pandas-21401
Token count is too large: numpy__numpy-4666
Token count is too large: huggingface__transformers-8868
Token count is too large: huggingface__transformers-11964
Token count is too large: ray-project__ray-3937
Token count is too large: ray-project__ray-7662
Token count is too large: mesonbuild__meson-1396
Token count is too large: pantsbuild__pants-17461
Token count is too large: mesonbuild__meson-5477
Token count is too large: pandas-dev__pandas-16153
Token count is too large: PrefectHQ__prefect-583
Token count is too large: Lightning-AI__lightning-2925
Token count is too large: pandas-dev__pandas-14308


Generating train split: 953 examples [01:04, 13.79 examples/s]

Token count is too large: huggingface__transformers-23223
Token count is too large: jupyterlab__jupyterlab-7461
Token count is too large: pandas-dev__pandas-3166
Token count is too large: pandas-dev__pandas-7362
Token count is too large: conan-io__conan-2770
Token count is too large: conan-io__conan-5875
Token count is too large: ray-project__ray-8511
Token count is too large: mesonbuild__meson-6156
Token count is too large: pandas-dev__pandas-4729
Token count is too large: Qiskit__qiskit-5954
Token count is too large: mesonbuild__meson-4649
Token count is too large: Qiskit__qiskit-3100
Token count is too large: googleapis__google-cloud-python-1750
Token count is too large: google__jax-1298
Token count is too large: pandas-dev__pandas-36911
Token count is too large: Lightning-AI__lightning-903
Token count is too large: pypa__pip-1748


Generating train split: 958 examples [01:04, 12.88 examples/s]

Token count is too large: pandas-dev__pandas-23611
Token count is too large: pandas-dev__pandas-22982
Token count is too large: pandas-dev__pandas-29179
Token count is too large: DataDog__integrations-core-5694
Token count is too large: pandas-dev__pandas-16196
Token count is too large: Qiskit__qiskit-7450
Token count is too large: celery__celery-5095
Token count is too large: ray-project__ray-3130
Token count is too large: Lightning-AI__lightning-3043
Token count is too large: Lightning-AI__lightning-3274
Token count is too large: pandas-dev__pandas-31841


Generating train split: 962 examples [01:04, 15.04 examples/s]

Token count is too large: conda__conda-7195
Token count is too large: pandas-dev__pandas-3675
Token count is too large: pandas-dev__pandas-7416
Token count is too large: Qiskit__qiskit-6530
Token count is too large: pandas-dev__pandas-27279
Token count is too large: ipython__ipython-2776
Token count is too large: PrefectHQ__prefect-610
Token count is too large: docker__compose-5476
Token count is too large: google__jax-742
Token count is too large: conda__conda-3633
Token count is too large: pypa__pip-5053
Token count is too large: pandas-dev__pandas-22786
Token count is too large: google__jax-800
Token count is too large: mesonbuild__meson-9152
Token count is too large: google__jax-2810
Token count is too large: Lightning-AI__lightning-2298


Generating train split: 965 examples [01:05, 12.49 examples/s]

Token count is too large: huggingface__transformers-23871
Token count is too large: google__jax-3149
Token count is too large: Qiskit__qiskit-6348
Token count is too large: pandas-dev__pandas-17298
Token count is too large: ipython__ipython-4489
Token count is too large: pyca__cryptography-3473
Token count is too large: conda__conda-5382
Token count is too large: pandas-dev__pandas-26228
Token count is too large: pypa__pip-2937
Token count is too large: mesonbuild__meson-8489
Token count is too large: conda__conda-8999
Token count is too large: huggingface__transformers-24407
Token count is too large: mesonbuild__meson-3724
Token count is too large: pyca__cryptography-2186
Token count is too large: gitpython-developers__GitPython-537
Token count is too large: huggingface__transformers-15623
Token count is too large: huggingface__transformers-11492
Token count is too large: googleapis__google-cloud-python-7206
Token count is too large: pandas-dev__pandas-21531


Generating train split: 971 examples [01:05, 11.97 examples/s]

Token count is too large: huggingface__transformers-25375
Token count is too large: pyca__cryptography-4592
Token count is too large: numpy__numpy-22657
Token count is too large: huggingface__transformers-6915
Token count is too large: mesonbuild__meson-5971
Token count is too large: pyca__cryptography-2072
Token count is too large: pandas-dev__pandas-39605
Token count is too large: huggingface__transformers-20662
Token count is too large: pantsbuild__pants-13539
Token count is too large: huggingface__transformers-5331
Token count is too large: Qiskit__qiskit-2266
Token count is too large: pandas-dev__pandas-3286
Token count is too large: jupyterlab__jupyterlab-3914
Token count is too large: conda__conda-7562
Token count is too large: pandas-dev__pandas-36716
Token count is too large: numpy__numpy-3237
Token count is too large: pandas-dev__pandas-4153
Token count is too large: celery__celery-5684
Token count is too large: numpy__numpy-6763
Token count is too large: conan-io__conan-4128

Generating train split: 975 examples [01:06,  8.74 examples/s]

Token count is too large: pandas-dev__pandas-20839
Token count is too large: pandas-dev__pandas-38010
Token count is too large: conda__conda-11044
Token count is too large: Qiskit__qiskit-2103
Token count is too large: ipython__ipython-1284
Token count is too large: numpy__numpy-12413
Token count is too large: pandas-dev__pandas-4039
Token count is too large: pantsbuild__pants-7126


Generating train split: 977 examples [01:06,  8.17 examples/s]

Token count is too large: pandas-dev__pandas-10812
Token count is too large: tensorflow__models-2727
Token count is too large: huggingface__transformers-23468
Token count is too large: Qiskit__qiskit-7682
Token count is too large: huggingface__transformers-14294
Token count is too large: ipython__ipython-2092


Generating train split: 986 examples [01:07, 13.61 examples/s]

Token count is too large: pandas-dev__pandas-22653
Token count is too large: conda__conda-8163
Token count is too large: conan-io__conan-242
Token count is too large: numpy__numpy-14540
Token count is too large: numpy__numpy-5092
Token count is too large: PrefectHQ__prefect-568
Token count is too large: google__jax-3155
Token count is too large: docker__compose-5822
Token count is too large: numpy__numpy-10371
Token count is too large: huggingface__transformers-21410
Token count is too large: ipython__ipython-10907
Token count is too large: docker__compose-2734


Generating train split: 993 examples [01:07, 19.96 examples/s]

Token count is too large: pandas-dev__pandas-11765
Token count is too large: googleapis__google-cloud-python-10010
Token count is too large: pandas-dev__pandas-10105
Token count is too large: pandas-dev__pandas-25427
Token count is too large: ytdl-org__youtube-dl-23885
Token count is too large: mesonbuild__meson-4129
Token count is too large: ipython__ipython-6961
Token count is too large: Lightning-AI__lightning-2269
Token count is too large: PrefectHQ__prefect-670
Token count is too large: pantsbuild__pants-6686
Token count is too large: huggingface__transformers-4538
Token count is too large: numpy__numpy-13499
Token count is too large: conda__conda-3832
Token count is too large: Qiskit__qiskit-10392
Token count is too large: pandas-dev__pandas-6968
Token count is too large: pandas-dev__pandas-21162
Token count is too large: conda__conda-620
Token count is too large: mesonbuild__meson-4954
Token count is too large: Qiskit__qiskit-10400
Token count is too large: mesonbuild__meson-128

Generating train split: 997 examples [01:07, 13.24 examples/s]

Token count is too large: mesonbuild__meson-6818
Token count is too large: Qiskit__qiskit-7229
Token count is too large: pandas-dev__pandas-31482
Token count is too large: conda__conda-5250
Token count is too large: huggingface__transformers-9486
Token count is too large: ray-project__ray-10866
Token count is too large: ipython__ipython-10638
Token count is too large: numpy__numpy-16291
Token count is too large: Qiskit__qiskit-8648
Token count is too large: conan-io__conan-10984
Token count is too large: google__jax-1658
Token count is too large: pypa__pip-4384
Token count is too large: ray-project__ray-3951


Generating train split: 1000 examples [01:08, 11.83 examples/s]

Token count is too large: conda__conda-11854
Token count is too large: Qiskit__qiskit-4297
Token count is too large: pandas-dev__pandas-14737
Token count is too large: numpy__numpy-7608
Token count is too large: pandas-dev__pandas-4410
Token count is too large: pyca__cryptography-4681
Token count is too large: pandas-dev__pandas-36015
Token count is too large: pandas-dev__pandas-29808
Token count is too large: pypa__pip-5090
Token count is too large: pandas-dev__pandas-8668
Token count is too large: ipython__ipython-8046
Token count is too large: pandas-dev__pandas-12058
Token count is too large: pandas-dev__pandas-22600
Token count is too large: pandas-dev__pandas-18883


Generating train split: 1004 examples [01:08, 11.14 examples/s]

Token count is too large: huggingface__transformers-11223
Token count is too large: Qiskit__qiskit-2559
Token count is too large: celery__celery-6758
Token count is too large: huggingface__transformers-17513
Token count is too large: docker__compose-5583
Token count is too large: pandas-dev__pandas-36094
Token count is too large: Qiskit__qiskit-1955


Generating train split: 1006 examples [01:08, 11.54 examples/s]

Token count is too large: huggingface__transformers-21288
Token count is too large: ytdl-org__youtube-dl-764
Token count is too large: pandas-dev__pandas-37744
Token count is too large: mesonbuild__meson-428
Token count is too large: pandas-dev__pandas-7016


Generating train split: 1010 examples [01:09, 11.39 examples/s]

Token count is too large: scipy__scipy-2730
Token count is too large: twisted__twisted-971
Token count is too large: pandas-dev__pandas-28444
Token count is too large: google__jax-1429
Token count is too large: googleapis__google-cloud-python-11327
Token count is too large: PrefectHQ__prefect-2934
Token count is too large: pandas-dev__pandas-11102
Token count is too large: pyca__cryptography-6272
Token count is too large: apache__airflow-11195


Generating train split: 1012 examples [01:09, 11.68 examples/s]

Token count is too large: apache__airflow-9566
Token count is too large: mesonbuild__meson-7084
Token count is too large: Qiskit__qiskit-5827
Token count is too large: pantsbuild__pants-7924
Token count is too large: docker__compose-7435
Token count is too large: Qiskit__qiskit-5548
Token count is too large: Qiskit__qiskit-4678
Token count is too large: huggingface__transformers-2400
Token count is too large: huggingface__transformers-11945
Token count is too large: conda__conda-7180
Token count is too large: mesonbuild__meson-9165
Token count is too large: conan-io__conan-7779


Generating train split: 1019 examples [01:09, 19.43 examples/s]

Token count is too large: pandas-dev__pandas-32701
Token count is too large: Qiskit__qiskit-8250
Token count is too large: celery__celery-6059
Token count is too large: numpy__numpy-9013
Token count is too large: ytdl-org__youtube-dl-5533
Token count is too large: pandas-dev__pandas-28935
Token count is too large: numpy__numpy-14392
Token count is too large: Lightning-AI__lightning-2981
Token count is too large: ipython__ipython-4624
Token count is too large: ray-project__ray-10953
Token count is too large: apache__airflow-15599
Token count is too large: google__jax-1664
Token count is too large: Qiskit__qiskit-10148


Generating train split: 1027 examples [01:09, 20.99 examples/s]

Token count is too large: pandas-dev__pandas-20912
Token count is too large: numpy__numpy-8617
Token count is too large: conan-io__conan-6700
Token count is too large: scipy__scipy-148
Token count is too large: pandas-dev__pandas-26816
Token count is too large: pandas-dev__pandas-34767
Token count is too large: pandas-dev__pandas-9818
Token count is too large: google__jax-2400
Token count is too large: pandas-dev__pandas-34416
Token count is too large: apache__airflow-10643
Token count is too large: pandas-dev__pandas-35498
Token count is too large: mesonbuild__meson-3744
Token count is too large: pantsbuild__pants-17649
Token count is too large: pypa__pip-6225
Token count is too large: mesonbuild__meson-8833
Token count is too large: ytdl-org__youtube-dl-14107
Token count is too large: Qiskit__qiskit-8944


Generating train split: 1033 examples [01:10, 17.79 examples/s]

Token count is too large: pandas-dev__pandas-21861
Token count is too large: huggingface__transformers-13813
Token count is too large: pandas-dev__pandas-25469
Token count is too large: mesonbuild__meson-10803
Token count is too large: googleapis__google-cloud-python-2039
Token count is too large: pantsbuild__pants-14229
Token count is too large: pandas-dev__pandas-36364
Token count is too large: pandas-dev__pandas-25810
Token count is too large: apache__airflow-23053
Token count is too large: mesonbuild__meson-10049


Generating train split: 1035 examples [01:10, 16.33 examples/s]

Token count is too large: pantsbuild__pants-18554
Token count is too large: conda__conda-12097
Token count is too large: Qiskit__qiskit-6040
Token count is too large: wagtail__wagtail-7850
Token count is too large: gitpython-developers__GitPython-1314
Token count is too large: pandas-dev__pandas-9109
Token count is too large: ipython__ipython-3529
Token count is too large: celery__celery-4690
Token count is too large: docker__compose-3762
Token count is too large: Qiskit__qiskit-3777
Token count is too large: Qiskit__qiskit-5808
Token count is too large: huggingface__transformers-7610
Token count is too large: pandas-dev__pandas-10794
Token count is too large: mesonbuild__meson-4414
Token count is too large: pandas-dev__pandas-25853
Token count is too large: Lightning-AI__lightning-1773
Token count is too large: huggingface__transformers-13859
Token count is too large: google__jax-3390
Token count is too large: numpy__numpy-22046
Token count is too large: pandas-dev__pandas-9143


Generating train split: 1038 examples [01:10, 12.89 examples/s]

Token count is too large: huggingface__transformers-9128
Token count is too large: pandas-dev__pandas-31875
Token count is too large: pandas-dev__pandas-33962
Token count is too large: pandas-dev__pandas-25431
Token count is too large: pandas-dev__pandas-19772
Token count is too large: pandas-dev__pandas-37132
Token count is too large: pantsbuild__pants-11620
Token count is too large: conan-io__conan-2884
Token count is too large: mesonbuild__meson-4696


Generating train split: 1046 examples [01:11, 16.22 examples/s]

Token count is too large: pandas-dev__pandas-9258
Token count is too large: googleapis__google-cloud-python-7793
Token count is too large: ytdl-org__youtube-dl-1204
Token count is too large: scipy__scipy-2917
Token count is too large: conda__conda-1807
Token count is too large: wagtail__wagtail-7590
Token count is too large: apache__airflow-33408
Token count is too large: numpy__numpy-10674
Token count is too large: pandas-dev__pandas-11923
Token count is too large: pandas-dev__pandas-13894
Token count is too large: pantsbuild__pants-13078
Token count is too large: DataDog__integrations-core-8362
Token count is too large: ytdl-org__youtube-dl-30122
Token count is too large: docker__compose-3898
Token count is too large: pandas-dev__pandas-10889
Token count is too large: conan-io__conan-2967
Token count is too large: ipython__ipython-374
Token count is too large: mesonbuild__meson-10250
Token count is too large: pandas-dev__pandas-27467
Token count is too large: apache__airflow-13057


Generating train split: 1053 examples [01:11, 18.80 examples/s]

Token count is too large: pandas-dev__pandas-33884
Token count is too large: pandas-dev__pandas-39141
Token count is too large: pandas-dev__pandas-22754
Token count is too large: googleapis__google-cloud-python-5756
Token count is too large: Lightning-AI__lightning-3345
Token count is too large: huggingface__transformers-11825
Token count is too large: pandas-dev__pandas-13858
Token count is too large: conan-io__conan-2653
Token count is too large: pyca__cryptography-2813
Token count is too large: huggingface__transformers-5025
Token count is too large: pandas-dev__pandas-22987
Token count is too large: celery__celery-5954
Token count is too large: pandas-dev__pandas-20971
Token count is too large: scipy__scipy-188
Token count is too large: google__jax-298
Token count is too large: mesonbuild__meson-9636
Token count is too large: huggingface__transformers-21766
Token count is too large: numpy__numpy-5824
Token count is too large: mesonbuild__meson-6199
Token count is too large: pandas-

Generating train split: 1060 examples [01:12, 13.53 examples/s]

Token count is too large: pypa__pip-8556
Token count is too large: pandas-dev__pandas-30978
There was an error processing
Token count is too large: pypa__pip-2076
Token count is too large: Qiskit__qiskit-7856
Token count is too large: pandas-dev__pandas-28428
Token count is too large: googleapis__google-cloud-python-2806
Token count is too large: pandas-dev__pandas-23114


Generating train split: 1066 examples [01:12, 15.10 examples/s]

Token count is too large: conda__conda-7599
Token count is too large: celery__celery-8374
Token count is too large: pandas-dev__pandas-16752
Token count is too large: pantsbuild__pants-16481
Token count is too large: ipython__ipython-11182
Token count is too large: numpy__numpy-12353
Token count is too large: pandas-dev__pandas-2962
Token count is too large: open-mmlab__mmdetection-7808
Token count is too large: apache__airflow-22658
Token count is too large: pandas-dev__pandas-22131


Generating train split: 1072 examples [01:13, 14.80 examples/s]

Token count is too large: pypa__pip-6146
Token count is too large: mesonbuild__meson-997
Token count is too large: mesonbuild__meson-2624
Token count is too large: Qiskit__qiskit-2043
Token count is too large: huggingface__transformers-13989
Token count is too large: DataDog__integrations-core-9468
Token count is too large: ytdl-org__youtube-dl-4009
Token count is too large: wagtail__wagtail-7591
Token count is too large: pandas-dev__pandas-22106
Token count is too large: google__jax-3463
Token count is too large: docker__compose-2722
Token count is too large: pantsbuild__pants-5170
Token count is too large: Lightning-AI__lightning-3042
Token count is too large: apache__airflow-26191


Generating train split: 1074 examples [01:13, 12.97 examples/s]

Token count is too large: ipython__ipython-13768
Token count is too large: conda__conda-5230
Token count is too large: conan-io__conan-6780
Token count is too large: pandas-dev__pandas-22762
Token count is too large: wagtail__wagtail-621
Token count is too large: huggingface__transformers-3517
Token count is too large: huggingface__transformers-8518
Token count is too large: pandas-dev__pandas-8812
Token count is too large: PrefectHQ__prefect-266
Token count is too large: PrefectHQ__prefect-2413
Token count is too large: pandas-dev__pandas-28181
Token count is too large: pandas-dev__pandas-19024


Generating train split: 1076 examples [01:13, 13.00 examples/s]

Token count is too large: huggingface__transformers-24927
Token count is too large: pandas-dev__pandas-20698
Token count is too large: pandas-dev__pandas-21923
Token count is too large: conda__conda-12880
Token count is too large: ipython__ipython-3787
Token count is too large: pandas-dev__pandas-23321


Generating train split: 1082 examples [01:13, 14.19 examples/s]

Token count is too large: apache__airflow-28003
Token count is too large: apache__airflow-24142
Token count is too large: pandas-dev__pandas-5640
Token count is too large: Lightning-AI__lightning-2388
Token count is too large: pandas-dev__pandas-21780
Token count is too large: Qiskit__qiskit-4764
Token count is too large: pandas-dev__pandas-17932
Token count is too large: pandas-dev__pandas-7639
Token count is too large: pandas-dev__pandas-34193
Token count is too large: docker__compose-5725
Token count is too large: Lightning-AI__lightning-453
Token count is too large: mesonbuild__meson-3962
Token count is too large: open-mmlab__mmdetection-10056
Token count is too large: pandas-dev__pandas-38126
Token count is too large: Qiskit__qiskit-2823
Token count is too large: wagtail__wagtail-6442
Token count is too large: pandas-dev__pandas-28226
Token count is too large: pandas-dev__pandas-9812
Token count is too large: googleapis__google-cloud-python-1318
Token count is too large: ipython__

Generating train split: 1089 examples [01:14, 15.10 examples/s]

Token count is too large: pandas-dev__pandas-22318
Token count is too large: pandas-dev__pandas-16978
Token count is too large: conan-io__conan-9463
Token count is too large: pandas-dev__pandas-27669
Token count is too large: pandas-dev__pandas-30833
Token count is too large: Qiskit__qiskit-1080
Token count is too large: ray-project__ray-9108
Token count is too large: ipython__ipython-10304
Token count is too large: pandas-dev__pandas-11049
Token count is too large: Qiskit__qiskit-5662
Token count is too large: docker__compose-3291


Generating train split: 1091 examples [01:14, 11.80 examples/s]

Token count is too large: googleapis__google-cloud-python-11305
Token count is too large: pandas-dev__pandas-36004
Token count is too large: pandas-dev__pandas-32721
Token count is too large: huggingface__transformers-7872
Token count is too large: pandas-dev__pandas-30546
Token count is too large: numpy__numpy-18180
Token count is too large: googleapis__google-cloud-python-6103
Token count is too large: pypa__pip-984
Token count is too large: numpy__numpy-10524
Token count is too large: pandas-dev__pandas-4018
Token count is too large: apache__airflow-9759
Token count is too large: pypa__pip-6273
Token count is too large: pandas-dev__pandas-23893
Token count is too large: numpy__numpy-16821
Token count is too large: ray-project__ray-8953
Token count is too large: mesonbuild__meson-2874
Token count is too large: tensorflow__models-4181
Token count is too large: huggingface__transformers-13564
Token count is too large: ytdl-org__youtube-dl-4025
Token count is too large: pandas-dev__pand

Generating train split: 1099 examples [01:15, 18.36 examples/s]

Token count is too large: pandas-dev__pandas-34194
Token count is too large: pandas-dev__pandas-31456
Token count is too large: conda__conda-7269
Token count is too large: huggingface__transformers-1764
Token count is too large: apache__airflow-25757
Token count is too large: pantsbuild__pants-13418
Token count is too large: pandas-dev__pandas-36437
Token count is too large: ytdl-org__youtube-dl-3042
Token count is too large: pandas-dev__pandas-26316
Token count is too large: apache__airflow-29279
Token count is too large: pandas-dev__pandas-6275
Token count is too large: mesonbuild__meson-9016
Token count is too large: pandas-dev__pandas-4962
Token count is too large: apache__airflow-22772


Generating train split: 1104 examples [01:15, 23.45 examples/s]

Token count is too large: Qiskit__qiskit-774
Token count is too large: mesonbuild__meson-756
Token count is too large: Qiskit__qiskit-5059
Token count is too large: huggingface__transformers-6984
Token count is too large: pandas-dev__pandas-24863
Token count is too large: huggingface__transformers-24255
Token count is too large: celery__celery-5759
Token count is too large: wagtail__wagtail-9018
Token count is too large: huggingface__transformers-14586
Token count is too large: Qiskit__qiskit-5182
Token count is too large: pandas-dev__pandas-21515
Token count is too large: pandas-dev__pandas-36753
Token count is too large: apache__airflow-30375
Token count is too large: numpy__numpy-11348
Token count is too large: mesonbuild__meson-7232
Token count is too large: pandas-dev__pandas-6222
Token count is too large: conan-io__conan-13757
Token count is too large: googleapis__google-cloud-python-8105
Token count is too large: googleapis__google-cloud-python-9627
Token count is too large: pan

Generating train split: 1108 examples [01:15, 11.24 examples/s]

Token count is too large: mesonbuild__meson-8135
Token count is too large: pandas-dev__pandas-6256
Token count is too large: Qiskit__qiskit-5156
Token count is too large: pandas-dev__pandas-6761
Token count is too large: pandas-dev__pandas-3580
Token count is too large: mesonbuild__meson-4540
Token count is too large: pyca__cryptography-604
Token count is too large: pandas-dev__pandas-24027


Generating train split: 1114 examples [01:16, 14.87 examples/s]

Token count is too large: apache__airflow-17990
Token count is too large: pandas-dev__pandas-39069
Token count is too large: pantsbuild__pants-7299
Token count is too large: Qiskit__qiskit-9095
Token count is too large: Qiskit__qiskit-6153
Token count is too large: pandas-dev__pandas-16523
Token count is too large: Qiskit__qiskit-5139
Token count is too large: pypa__pip-1869
Token count is too large: PrefectHQ__prefect-2388
Token count is too large: pandas-dev__pandas-18099
Token count is too large: PrefectHQ__prefect-2867
Token count is too large: conda__conda-8259
Token count is too large: conan-io__conan-5702


Generating train split: 1118 examples [01:16, 17.15 examples/s]

Token count is too large: pandas-dev__pandas-18925
Token count is too large: conan-io__conan-9218
Token count is too large: Lightning-AI__lightning-2970
Token count is too large: conan-io__conan-5293
Token count is too large: pantsbuild__pants-17941
Token count is too large: Qiskit__qiskit-9403
Token count is too large: ytdl-org__youtube-dl-31043


Generating train split: 1121 examples [01:16, 15.42 examples/s]

Token count is too large: pandas-dev__pandas-20457
Token count is too large: pandas-dev__pandas-24340
Token count is too large: conda__conda-5237
Token count is too large: pantsbuild__pants-14715
Token count is too large: ipython__ipython-4389
Token count is too large: twisted__twisted-1142
Token count is too large: mesonbuild__meson-5065
Token count is too large: Qiskit__qiskit-1998
Token count is too large: pandas-dev__pandas-16351
Token count is too large: googleapis__google-cloud-python-597
Token count is too large: pandas-dev__pandas-21728
Token count is too large: pandas-dev__pandas-22988
Token count is too large: mesonbuild__meson-592
Token count is too large: pyca__cryptography-3361
Token count is too large: numpy__numpy-23559
Token count is too large: huggingface__transformers-18861
Token count is too large: huggingface__transformers-10095
Token count is too large: pandas-dev__pandas-24850
Token count is too large: googleapis__google-cloud-python-5674
Token count is too large:

Generating train split: 1124 examples [01:16, 13.05 examples/s]

Token count is too large: docker__compose-1763
Token count is too large: huggingface__transformers-17987
Token count is too large: numpy__numpy-2816
Token count is too large: celery__celery-1899
Token count is too large: ray-project__ray-9525
Token count is too large: wagtail__wagtail-6872
Token count is too large: conan-io__conan-4233
Token count is too large: pypa__pip-3401
Token count is too large: conda__conda-7725
Token count is too large: pandas-dev__pandas-36872
Token count is too large: jupyterlab__jupyterlab-13336
Token count is too large: numpy__numpy-21448
Token count is too large: googleapis__google-cloud-python-629


Generating train split: 1134 examples [01:17, 20.99 examples/s]

Token count is too large: huggingface__transformers-23749
Token count is too large: apache__airflow-9879
Token count is too large: Qiskit__qiskit-1542
Token count is too large: huggingface__transformers-22942
Token count is too large: PrefectHQ__prefect-2942
Token count is too large: pantsbuild__pants-6000
Token count is too large: pantsbuild__pants-17516
Token count is too large: huggingface__transformers-18272
Token count is too large: Qiskit__qiskit-5505
Token count is too large: numpy__numpy-5178
Token count is too large: pandas-dev__pandas-4820
Token count is too large: wagtail__wagtail-8949
Token count is too large: huggingface__transformers-21263
Token count is too large: PrefectHQ__prefect-2805
Token count is too large: Qiskit__qiskit-4135


Generating train split: 1137 examples [01:17, 15.12 examples/s]

Token count is too large: pandas-dev__pandas-5217
Token count is too large: Qiskit__qiskit-977
Token count is too large: pandas-dev__pandas-22919
Token count is too large: Qiskit__qiskit-4763
Token count is too large: celery__celery-2349
Token count is too large: pandas-dev__pandas-34056
Token count is too large: ipython__ipython-10533
Token count is too large: Qiskit__qiskit-1280
Token count is too large: pypa__pip-7557
Token count is too large: Qiskit__qiskit-6500
Token count is too large: PrefectHQ__prefect-749
Token count is too large: ytdl-org__youtube-dl-2722


Generating train split: 1146 examples [01:17, 19.15 examples/s]

Token count is too large: huggingface__transformers-23944
Token count is too large: Qiskit__qiskit-10153
Token count is too large: numpy__numpy-3830
Token count is too large: apache__airflow-17989
Token count is too large: huggingface__transformers-16018
Token count is too large: googleapis__google-cloud-python-3776
Token count is too large: googleapis__google-cloud-python-6837
Token count is too large: docker__compose-6914
Token count is too large: PrefectHQ__prefect-1899
Token count is too large: docker__compose-3139
Token count is too large: ytdl-org__youtube-dl-7599
Token count is too large: conan-io__conan-7200
Token count is too large: celery__celery-5682
Token count is too large: ipython__ipython-4158
Token count is too large: huggingface__transformers-16673
Token count is too large: Qiskit__qiskit-773
Token count is too large: ipython__ipython-6616
Token count is too large: pandas-dev__pandas-13516
Token count is too large: pandas-dev__pandas-36814
Token count is too large: hug

Generating train split: 1150 examples [01:18, 14.55 examples/s]

Token count is too large: pandas-dev__pandas-10236
Token count is too large: pandas-dev__pandas-31238
Token count is too large: Qiskit__qiskit-9537
Token count is too large: Lightning-AI__lightning-1865
Token count is too large: conan-io__conan-4810
Token count is too large: pandas-dev__pandas-20422
Token count is too large: Lightning-AI__lightning-936
Token count is too large: Lightning-AI__lightning-2959


Generating train split: 1155 examples [01:18, 16.75 examples/s]

Token count is too large: google__jax-383
Token count is too large: pypa__pip-11117
Token count is too large: pandas-dev__pandas-5870
Token count is too large: numpy__numpy-4633
Token count is too large: Qiskit__qiskit-8404
Token count is too large: Qiskit__qiskit-688
Token count is too large: apache__airflow-15848
Token count is too large: Qiskit__qiskit-5980
Token count is too large: pandas-dev__pandas-3152
Token count is too large: ytdl-org__youtube-dl-7057
Token count is too large: conan-io__conan-5492
Token count is too large: googleapis__google-cloud-python-8721
Token count is too large: pandas-dev__pandas-13812
Token count is too large: Qiskit__qiskit-7409
Token count is too large: Qiskit__qiskit-1060
Token count is too large: pandas-dev__pandas-25474
Token count is too large: wagtail__wagtail-10113
Token count is too large: pandas-dev__pandas-28951


Generating train split: 1162 examples [01:19, 16.73 examples/s]

Token count is too large: pandas-dev__pandas-7994
Token count is too large: google__jax-168
Token count is too large: conan-io__conan-3361
Token count is too large: pandas-dev__pandas-8036
Token count is too large: pandas-dev__pandas-19553
Token count is too large: Qiskit__qiskit-1334
Token count is too large: Qiskit__qiskit-6064
Token count is too large: docker__compose-6597
Token count is too large: ray-project__ray-11084
Token count is too large: huggingface__transformers-25429
Token count is too large: mesonbuild__meson-3571
Token count is too large: celery__celery-6713
Token count is too large: huggingface__transformers-3948
Token count is too large: pandas-dev__pandas-25943
Token count is too large: conan-io__conan-2633
Token count is too large: pandas-dev__pandas-23118
Token count is too large: ipython__ipython-1155
Token count is too large: pypa__pip-2162


Generating train split: 1169 examples [01:19, 15.58 examples/s]

Token count is too large: pandas-dev__pandas-10472
Token count is too large: pandas-dev__pandas-19980
Token count is too large: conan-io__conan-13661
Token count is too large: huggingface__transformers-13109
Token count is too large: huggingface__transformers-24893
Token count is too large: PrefectHQ__prefect-1610
Token count is too large: google__jax-332
Token count is too large: pandas-dev__pandas-21198
Token count is too large: pandas-dev__pandas-24486
Token count is too large: pandas-dev__pandas-19148
Token count is too large: huggingface__transformers-22828
Token count is too large: numpy__numpy-6363
Token count is too large: conda__conda-8924
Token count is too large: pypa__pip-11710
Token count is too large: pandas-dev__pandas-34939
Token count is too large: pandas-dev__pandas-10825


Generating train split: 1171 examples [01:19, 13.43 examples/s]

Token count is too large: pandas-dev__pandas-29313
Token count is too large: pandas-dev__pandas-28569
Token count is too large: ipython__ipython-13888
Token count is too large: numpy__numpy-11698
Token count is too large: apache__airflow-16388
Token count is too large: pandas-dev__pandas-37803
Token count is too large: docker__compose-6937
Token count is too large: pandas-dev__pandas-16079
Token count is too large: pandas-dev__pandas-22015
Token count is too large: dagster-io__dagster-9405


Generating train split: 1173 examples [01:20, 11.33 examples/s]

Token count is too large: pandas-dev__pandas-10272
Token count is too large: pandas-dev__pandas-5894
Token count is too large: conan-io__conan-10213
Token count is too large: pandas-dev__pandas-5359
Token count is too large: celery__celery-2651
Token count is too large: googleapis__google-cloud-python-6099
Token count is too large: huggingface__transformers-12350
Token count is too large: numpy__numpy-5031
Token count is too large: huggingface__transformers-17898


Generating train split: 1179 examples [01:20, 15.57 examples/s]

Token count is too large: Lightning-AI__lightning-1905
Token count is too large: mesonbuild__meson-2348
Token count is too large: ytdl-org__youtube-dl-4543
Token count is too large: Qiskit__qiskit-1000
Token count is too large: apache__airflow-17236
Token count is too large: numpy__numpy-9020
Token count is too large: pypa__pip-6171


Generating train split: 1181 examples [01:20, 10.49 examples/s]

Token count is too large: pypa__pip-3387
Token count is too large: mesonbuild__meson-8355
Token count is too large: Qiskit__qiskit-4711
Token count is too large: pantsbuild__pants-5808
Token count is too large: numpy__numpy-17344
Token count is too large: pandas-dev__pandas-3670
Token count is too large: conan-io__conan-7627


Generating train split: 1186 examples [01:21, 13.02 examples/s]

Token count is too large: huggingface__transformers-19794
Token count is too large: huggingface__transformers-12328
Token count is too large: pandas-dev__pandas-22280
Token count is too large: pandas-dev__pandas-36693
Token count is too large: pandas-dev__pandas-36532
Token count is too large: numpy__numpy-13574
Token count is too large: mesonbuild__meson-5212
Token count is too large: apache__airflow-21289
Token count is too large: apache__airflow-13512
Token count is too large: conan-io__conan-4748
Token count is too large: mesonbuild__meson-3739
Token count is too large: googleapis__google-cloud-python-494
Token count is too large: pandas-dev__pandas-22232
Token count is too large: Lightning-AI__lightning-619
Token count is too large: wagtail__wagtail-4689
Token count is too large: pypa__pip-2347


Generating train split: 1189 examples [01:21,  9.77 examples/s]

Token count is too large: huggingface__transformers-24629
Token count is too large: google__jax-1930
Token count is too large: ray-project__ray-11104
Token count is too large: numpy__numpy-9487
Token count is too large: pantsbuild__pants-18412
Token count is too large: mesonbuild__meson-5475
Token count is too large: numpy__numpy-13182
Token count is too large: pandas-dev__pandas-10951


Generating train split: 1194 examples [01:21, 12.24 examples/s]

Token count is too large: numpy__numpy-21977
Token count is too large: celery__celery-6360
Token count is too large: mesonbuild__meson-3314
Token count is too large: pandas-dev__pandas-7044
Token count is too large: conan-io__conan-3846
Token count is too large: huggingface__transformers-11537
Token count is too large: huggingface__transformers-4243
Token count is too large: celery__celery-3721
Token count is too large: Lightning-AI__lightning-1453
Token count is too large: celery__celery-7246


Generating train split: 1197 examples [01:21, 13.58 examples/s]

Token count is too large: pandas-dev__pandas-11893
Token count is too large: pandas-dev__pandas-18623
Token count is too large: huggingface__transformers-12770
Token count is too large: Qiskit__qiskit-7942
Token count is too large: apache__airflow-31033


Generating train split: 1200 examples [01:22, 14.77 examples/s]

Token count is too large: numpy__numpy-11427
Token count is too large: pantsbuild__pants-13578
Token count is too large: pandas-dev__pandas-25502
Token count is too large: ipython__ipython-10529
Token count is too large: explosion__spaCy-692
Token count is too large: numpy__numpy-19656
Token count is too large: pandas-dev__pandas-2056


Generating train split: 1211 examples [01:22, 24.73 examples/s]

Token count is too large: pandas-dev__pandas-35941
Token count is too large: pandas-dev__pandas-17142
Token count is too large: huggingface__transformers-17589
Token count is too large: pantsbuild__pants-12868
Token count is too large: dagster-io__dagster-8635
Token count is too large: gitpython-developers__GitPython-1340
Token count is too large: pandas-dev__pandas-16208
Token count is too large: huggingface__transformers-4747
Token count is too large: pantsbuild__pants-5407
Token count is too large: pandas-dev__pandas-9105
Token count is too large: pandas-dev__pandas-24303
Token count is too large: pandas-dev__pandas-16426
Token count is too large: huggingface__transformers-21047
Token count is too large: pandas-dev__pandas-32439
Token count is too large: pandas-dev__pandas-6848
Token count is too large: pandas-dev__pandas-19176
Token count is too large: pandas-dev__pandas-6447
Token count is too large: pandas-dev__pandas-27424
Token count is too large: ytdl-org__youtube-dl-31398
Tok

Generating train split: 1215 examples [01:23, 14.59 examples/s]

Token count is too large: huggingface__transformers-7410
Token count is too large: googleapis__google-cloud-python-9113
Token count is too large: ray-project__ray-1088
Token count is too large: PrefectHQ__prefect-292
Token count is too large: googleapis__google-cloud-python-11354
Token count is too large: huggingface__transformers-16368
Token count is too large: pandas-dev__pandas-8399
Token count is too large: pantsbuild__pants-15375
Token count is too large: pandas-dev__pandas-26651
Token count is too large: conan-io__conan-4673
Token count is too large: huggingface__transformers-4477
Token count is too large: pandas-dev__pandas-5970
Token count is too large: wagtail__wagtail-10175
Token count is too large: conda__conda-6922
Token count is too large: huggingface__transformers-6213
Token count is too large: ipython__ipython-12280
Token count is too large: pandas-dev__pandas-16960
Token count is too large: pandas-dev__pandas-33436
Token count is too large: Lightning-AI__lightning-2014


Generating train split: 1226 examples [01:23, 16.70 examples/s]

Token count is too large: conan-io__conan-9194
Token count is too large: pantsbuild__pants-13953
Token count is too large: conan-io__conan-5178
Token count is too large: pandas-dev__pandas-21508
Token count is too large: pandas-dev__pandas-18826
Token count is too large: pandas-dev__pandas-39439
Token count is too large: open-mmlab__mmdetection-4056
Token count is too large: pandas-dev__pandas-5704
Token count is too large: pandas-dev__pandas-38094


Generating train split: 1231 examples [01:23, 19.64 examples/s]

Token count is too large: mesonbuild__meson-4680
Token count is too large: google__jax-1002
Token count is too large: pandas-dev__pandas-6375
Token count is too large: jupyterlab__jupyterlab-6779
Token count is too large: apache__airflow-1132
Token count is too large: googleapis__google-cloud-python-3661
Token count is too large: dagster-io__dagster-5624
Token count is too large: pandas-dev__pandas-19579
Token count is too large: pandas-dev__pandas-36514
Token count is too large: Qiskit__qiskit-5597
Token count is too large: pandas-dev__pandas-10206
Token count is too large: ipython__ipython-10555
Token count is too large: Qiskit__qiskit-2173


Generating train split: 1241 examples [01:24, 24.85 examples/s]

Token count is too large: docker__compose-7457
Token count is too large: huggingface__transformers-14355
Token count is too large: conan-io__conan-10812
Token count is too large: ray-project__ray-9110
Token count is too large: conda__conda-7735
Token count is too large: pantsbuild__pants-15096
Token count is too large: pandas-dev__pandas-6905
Token count is too large: conan-io__conan-3613
Token count is too large: pandas-dev__pandas-16509
Token count is too large: mesonbuild__meson-3369
Token count is too large: ipython__ipython-13140
Token count is too large: ray-project__ray-4734
Token count is too large: apache__airflow-8220
Token count is too large: huggingface__transformers-22470
Token count is too large: pandas-dev__pandas-3744
Token count is too large: pandas-dev__pandas-28632
Token count is too large: conda__conda-8562
Token count is too large: pandas-dev__pandas-33102
Token count is too large: apache__airflow-33277
Token count is too large: celery__celery-6020
Token count is t

Generating train split: 1245 examples [01:24, 13.96 examples/s]

Token count is too large: pandas-dev__pandas-17925
Token count is too large: pypa__pip-10084
Token count is too large: pandas-dev__pandas-26374
Token count is too large: pypa__pip-1601
Token count is too large: numpy__numpy-24161
Token count is too large: Qiskit__qiskit-10659
Token count is too large: pandas-dev__pandas-30507
Token count is too large: mesonbuild__meson-10303
Token count is too large: mesonbuild__meson-7840
Token count is too large: pandas-dev__pandas-39280
Token count is too large: pantsbuild__pants-15755
Token count is too large: docker__compose-7720


Generating train split: 1249 examples [01:25, 15.04 examples/s]

Token count is too large: pandas-dev__pandas-23888
Token count is too large: pandas-dev__pandas-26876
Token count is too large: conda__conda-6447
Token count is too large: Qiskit__qiskit-3138
Token count is too large: huggingface__transformers-4098
Token count is too large: mesonbuild__meson-9899
Token count is too large: pandas-dev__pandas-16565
Token count is too large: jupyterlab__jupyterlab-5099
Token count is too large: ray-project__ray-9680
Token count is too large: googleapis__google-cloud-python-1997
Token count is too large: docker__compose-8122
Token count is too large: googleapis__google-cloud-python-8748
Token count is too large: pandas-dev__pandas-26721
Token count is too large: mesonbuild__meson-7170


Generating train split: 1255 examples [01:25, 19.30 examples/s]

Token count is too large: pandas-dev__pandas-33502
Token count is too large: apache__airflow-23860
Token count is too large: huggingface__transformers-11672
Token count is too large: docker__compose-5234
Token count is too large: googleapis__google-cloud-python-5687
Token count is too large: pantsbuild__pants-11660
Token count is too large: docker__compose-3466
Token count is too large: pandas-dev__pandas-16430
Token count is too large: numpy__numpy-6432
Token count is too large: docker__compose-3418
Token count is too large: pypa__pip-4987
Token count is too large: mesonbuild__meson-11548
Token count is too large: pandas-dev__pandas-33292


Generating train split: 1258 examples [01:25, 18.59 examples/s]

Token count is too large: numpy__numpy-8647
Token count is too large: pandas-dev__pandas-18082
Token count is too large: pandas-dev__pandas-16897
Token count is too large: pandas-dev__pandas-23318
Token count is too large: pandas-dev__pandas-20965
Token count is too large: huggingface__transformers-20645
Token count is too large: apache__airflow-32382


Generating train split: 1263 examples [01:25, 17.25 examples/s]

Token count is too large: huggingface__transformers-14401
Token count is too large: conda__conda-10413
Token count is too large: conan-io__conan-4204
Token count is too large: huggingface__transformers-17751
Token count is too large: google__jax-415
Token count is too large: pandas-dev__pandas-20571
Token count is too large: pandas-dev__pandas-13477


Generating train split: 1266 examples [01:26, 14.74 examples/s]

Token count is too large: huggingface__transformers-19590
Token count is too large: huggingface__transformers-11906
Token count is too large: huggingface__transformers-24666
Token count is too large: Qiskit__qiskit-2735
Token count is too large: apache__airflow-396
Token count is too large: ytdl-org__youtube-dl-30531
Token count is too large: google__jax-761
Token count is too large: ray-project__ray-4104
Token count is too large: numpy__numpy-7729
Token count is too large: gitpython-developers__GitPython-1399
Token count is too large: apache__airflow-19994
Token count is too large: pyca__cryptography-3897
Token count is too large: pandas-dev__pandas-13660
Token count is too large: numpy__numpy-9505
Token count is too large: PrefectHQ__prefect-3136
Token count is too large: pandas-dev__pandas-11627
Token count is too large: numpy__numpy-3248
Token count is too large: Qiskit__qiskit-936
Token count is too large: pandas-dev__pandas-21160


Generating train split: 1271 examples [01:26, 13.41 examples/s]

Token count is too large: google__jax-595
Token count is too large: docker__compose-3400
Token count is too large: pandas-dev__pandas-16181
Token count is too large: pandas-dev__pandas-28267
Token count is too large: scipy__scipy-3348
Token count is too large: conan-io__conan-2659
Token count is too large: huggingface__transformers-21542
Token count is too large: ytdl-org__youtube-dl-1248
Token count is too large: Qiskit__qiskit-4638


Generating train split: 1276 examples [01:26, 14.26 examples/s]

Token count is too large: pandas-dev__pandas-7006
Token count is too large: celery__celery-7873
Token count is too large: google__jax-3140
Token count is too large: ray-project__ray-1225
Token count is too large: docker__compose-6140
Token count is too large: mesonbuild__meson-1060
Token count is too large: celery__celery-6598
Token count is too large: Qiskit__qiskit-7655
Token count is too large: mesonbuild__meson-10742
Token count is too large: googleapis__google-cloud-python-4584
Token count is too large: pandas-dev__pandas-7902
Token count is too large: docker__compose-1461


Generating train split: 1282 examples [01:27, 15.50 examples/s]

Token count is too large: pandas-dev__pandas-10396
Token count is too large: huggingface__transformers-9401
Token count is too large: ipython__ipython-12095
Token count is too large: google__jax-1622
Token count is too large: pandas-dev__pandas-10931
Token count is too large: pantsbuild__pants-7186
Token count is too large: pandas-dev__pandas-22464
Token count is too large: pandas-dev__pandas-25521
Token count is too large: numpy__numpy-18415
Token count is too large: pandas-dev__pandas-24426
Token count is too large: conan-io__conan-10981
Token count is too large: google__jax-285
Token count is too large: pandas-dev__pandas-23550
Token count is too large: numpy__numpy-14621
Token count is too large: docker__compose-6313
Token count is too large: google__jax-351
Token count is too large: pandas-dev__pandas-11913


Generating train split: 1286 examples [01:27, 15.57 examples/s]

Token count is too large: pandas-dev__pandas-9345
Token count is too large: googleapis__google-cloud-python-5181
Token count is too large: pandas-dev__pandas-22109
Token count is too large: pandas-dev__pandas-18651
Token count is too large: pandas-dev__pandas-34473
Token count is too large: apache__airflow-11578
Token count is too large: docker__compose-6077
Token count is too large: tiangolo__fastapi-538
Token count is too large: pandas-dev__pandas-23143
Token count is too large: numpy__numpy-13348
Token count is too large: pandas-dev__pandas-36580
Token count is too large: pandas-dev__pandas-32370
Token count is too large: ipython__ipython-8483
Token count is too large: Lightning-AI__lightning-1349


Generating train split: 1294 examples [01:27, 18.98 examples/s]

Token count is too large: pandas-dev__pandas-36114
Token count is too large: mesonbuild__meson-8596
Token count is too large: ipython__ipython-8096
Token count is too large: googleapis__google-cloud-python-7843
Token count is too large: conan-io__conan-4293
Token count is too large: pyca__cryptography-3738
Token count is too large: pandas-dev__pandas-25738
Token count is too large: conda__conda-5372
Token count is too large: wagtail__wagtail-1444
Token count is too large: open-mmlab__mmdetection-4621
Token count is too large: wagtail__wagtail-10209
Token count is too large: numpy__numpy-16644
Token count is too large: huggingface__transformers-7016
Token count is too large: pandas-dev__pandas-37508
Token count is too large: docker__compose-6466
Token count is too large: pandas-dev__pandas-21098
Token count is too large: mesonbuild__meson-2840
Token count is too large: huggingface__transformers-9150
Token count is too large: pandas-dev__pandas-17683
Token count is too large: pypa__pip-8

Generating train split: 1297 examples [01:28, 10.34 examples/s]

Token count is too large: Qiskit__qiskit-10479
Token count is too large: ipython__ipython-3500
Token count is too large: celery__celery-8446
Token count is too large: googleapis__google-cloud-python-4616
Token count is too large: celery__celery-8143
Token count is too large: pandas-dev__pandas-5716
Token count is too large: pandas-dev__pandas-16443
Token count is too large: pandas-dev__pandas-27890
Token count is too large: conda__conda-9835
Token count is too large: pandas-dev__pandas-30858
Token count is too large: googleapis__google-cloud-python-73
Token count is too large: jupyterlab__jupyterlab-6509
Token count is too large: Qiskit__qiskit-1767
Token count is too large: open-mmlab__mmdetection-2030


Generating train split: 1299 examples [01:28, 10.74 examples/s]

Token count is too large: pandas-dev__pandas-38098
Token count is too large: ray-project__ray-7597
Token count is too large: pandas-dev__pandas-10558
Token count is too large: pandas-dev__pandas-20079
Token count is too large: numpy__numpy-19613
Token count is too large: pandas-dev__pandas-23255
Token count is too large: numpy__numpy-23357


Generating train split: 1310 examples [01:28, 19.35 examples/s]

Token count is too large: ytdl-org__youtube-dl-357
Token count is too large: conan-io__conan-3680
Token count is too large: pandas-dev__pandas-21175
Token count is too large: ytdl-org__youtube-dl-31515
Token count is too large: pandas-dev__pandas-37069
Token count is too large: pantsbuild__pants-13931
Token count is too large: ipython__ipython-4552
Token count is too large: conan-io__conan-9685
Token count is too large: pandas-dev__pandas-34223
Token count is too large: pantsbuild__pants-6308
Token count is too large: pandas-dev__pandas-33138
Token count is too large: pantsbuild__pants-7502
Token count is too large: mesonbuild__meson-1580
Token count is too large: dagster-io__dagster-12633
Token count is too large: PrefectHQ__prefect-2406
Token count is too large: pandas-dev__pandas-3720
Token count is too large: numpy__numpy-8349
Token count is too large: pandas-dev__pandas-7696
Token count is too large: numpy__numpy-7373
Token count is too large: pandas-dev__pandas-8472
Token count i

Generating train split: 1314 examples [01:29, 14.15 examples/s]

Token count is too large: pandas-dev__pandas-22034
Token count is too large: docker__compose-2023
Token count is too large: PrefectHQ__prefect-2744
Token count is too large: pandas-dev__pandas-30569
Token count is too large: mesonbuild__meson-8154
Token count is too large: jupyterlab__jupyterlab-10444
Token count is too large: huggingface__transformers-8049
Token count is too large: Qiskit__qiskit-10389


Generating train split: 1319 examples [01:29, 15.77 examples/s]

Token count is too large: pandas-dev__pandas-35704
Token count is too large: Qiskit__qiskit-5026
Token count is too large: numpy__numpy-12869
Token count is too large: mesonbuild__meson-9211
Token count is too large: Lightning-AI__lightning-1126
Token count is too large: pandas-dev__pandas-8041
Token count is too large: pandas-dev__pandas-7961
Token count is too large: conan-io__conan-278
Token count is too large: Lightning-AI__lightning-1670
Token count is too large: pandas-dev__pandas-5659
Token count is too large: mesonbuild__meson-3963
Token count is too large: huggingface__transformers-8877
Token count is too large: Qiskit__qiskit-4663
Token count is too large: pandas-dev__pandas-5918
Token count is too large: Qiskit__qiskit-4276
Token count is too large: googleapis__google-cloud-python-9504


Generating train split: 1324 examples [01:29, 18.01 examples/s]

Token count is too large: pandas-dev__pandas-28681
Token count is too large: googleapis__google-cloud-python-5187
Token count is too large: pantsbuild__pants-6278
Token count is too large: pandas-dev__pandas-28717
Token count is too large: apache__airflow-19878
Token count is too large: pypa__pip-1335
Token count is too large: conan-io__conan-3438
Token count is too large: pyca__cryptography-3279
Token count is too large: wagtail__wagtail-5999
Token count is too large: pandas-dev__pandas-14665
Token count is too large: huggingface__transformers-8237
Token count is too large: google__jax-2034
Token count is too large: ray-project__ray-5091
Token count is too large: pandas-dev__pandas-18893
Token count is too large: celery__celery-5141
Token count is too large: Qiskit__qiskit-4803
Token count is too large: wagtail__wagtail-5644


Generating train split: 1327 examples [01:30, 14.87 examples/s]

Token count is too large: conan-io__conan-5582
Token count is too large: pandas-dev__pandas-10418
Token count is too large: Qiskit__qiskit-3051
Token count is too large: wagtail__wagtail-6344
Token count is too large: pandas-dev__pandas-27876
Token count is too large: conda__conda-10759
Token count is too large: googleapis__google-cloud-python-4635
Token count is too large: Lightning-AI__lightning-743
Token count is too large: pandas-dev__pandas-35654
Token count is too large: Qiskit__qiskit-10382
Token count is too large: pyca__cryptography-714
Token count is too large: googleapis__google-cloud-python-3484


Generating train split: 1331 examples [01:30, 17.95 examples/s]

Token count is too large: huggingface__transformers-17324
Token count is too large: pantsbuild__pants-4226
Token count is too large: mesonbuild__meson-5872
Token count is too large: Qiskit__qiskit-5593
Token count is too large: pandas-dev__pandas-23839
Token count is too large: huggingface__transformers-7334
Token count is too large: pandas-dev__pandas-4684
Token count is too large: pandas-dev__pandas-28354
Token count is too large: pandas-dev__pandas-17934


Generating train split: 1335 examples [01:30, 17.57 examples/s]

Token count is too large: numpy__numpy-21630
Token count is too large: mesonbuild__meson-4907
Token count is too large: pandas-dev__pandas-38332
Token count is too large: pandas-dev__pandas-7531
Token count is too large: pantsbuild__pants-12067
Token count is too large: googleapis__google-cloud-python-4977
Token count is too large: Qiskit__qiskit-3230
Token count is too large: pandas-dev__pandas-31552
Token count is too large: Qiskit__qiskit-7230
Token count is too large: huggingface__transformers-15603
Token count is too large: pyca__cryptography-3553
Token count is too large: Qiskit__qiskit-8679
Token count is too large: pandas-dev__pandas-21487
Token count is too large: pandas-dev__pandas-35408


Generating train split: 1338 examples [01:30, 17.57 examples/s]

Token count is too large: pandas-dev__pandas-17903
Token count is too large: pantsbuild__pants-13908
Token count is too large: pandas-dev__pandas-20703
Token count is too large: pandas-dev__pandas-21176
Token count is too large: conda__conda-5469
Token count is too large: conan-io__conan-7781
Token count is too large: pandas-dev__pandas-38728
Token count is too large: open-mmlab__mmdetection-854
Token count is too large: mesonbuild__meson-8158
Token count is too large: numpy__numpy-15993
Token count is too large: apache__airflow-32756


Generating train split: 1343 examples [01:30, 18.85 examples/s]

Token count is too large: pandas-dev__pandas-31529
Token count is too large: apache__airflow-33481
Token count is too large: pandas-dev__pandas-14743
Token count is too large: googleapis__google-cloud-python-8939
Token count is too large: conda__conda-6723
Token count is too large: ytdl-org__youtube-dl-27618
Token count is too large: ipython__ipython-10403
Token count is too large: mesonbuild__meson-4255
Token count is too large: gitpython-developers__GitPython-156


Generating train split: 1346 examples [01:31, 15.98 examples/s]

Token count is too large: huggingface__transformers-24785
Token count is too large: huggingface__transformers-19626
Token count is too large: explosion__spaCy-3471
Token count is too large: huggingface__transformers-20969


Generating train split: 1350 examples [01:31, 18.26 examples/s]

Token count is too large: mesonbuild__meson-6018
Token count is too large: conan-io__conan-4103
Token count is too large: dagster-io__dagster-7673
Token count is too large: pandas-dev__pandas-16275
Token count is too large: wagtail__wagtail-8189
Token count is too large: pandas-dev__pandas-35741
Token count is too large: Lightning-AI__lightning-2360
Token count is too large: pandas-dev__pandas-18577
Token count is too large: pandas-dev__pandas-8384
Token count is too large: huggingface__transformers-18684
Token count is too large: pandas-dev__pandas-19048
Token count is too large: pandas-dev__pandas-20000


Generating train split: 1354 examples [01:31, 13.92 examples/s]

Token count is too large: pandas-dev__pandas-9877
Token count is too large: pantsbuild__pants-8226
Token count is too large: conan-io__conan-5571
Token count is too large: pandas-dev__pandas-15456
Token count is too large: pandas-dev__pandas-24909
Token count is too large: pandas-dev__pandas-18496
Token count is too large: mesonbuild__meson-1735


Generating train split: 1356 examples [01:32, 10.91 examples/s]

Token count is too large: pandas-dev__pandas-20770
Token count is too large: huggingface__transformers-24532
Token count is too large: ytdl-org__youtube-dl-2696
Token count is too large: pandas-dev__pandas-9875


Generating train split: 1359 examples [01:32, 11.28 examples/s]

Token count is too large: huggingface__transformers-24749
Token count is too large: ipython__ipython-12033
Token count is too large: Qiskit__qiskit-2790
Token count is too large: mesonbuild__meson-5230
Token count is too large: pandas-dev__pandas-6790
Token count is too large: huggingface__transformers-18585
Token count is too large: huggingface__transformers-14477
Token count is too large: tiangolo__fastapi-856
Token count is too large: pandas-dev__pandas-23289
Token count is too large: pandas-dev__pandas-34844


Generating train split: 1365 examples [01:32, 16.37 examples/s]

Token count is too large: mesonbuild__meson-5372
Token count is too large: ytdl-org__youtube-dl-316
Token count is too large: Lightning-AI__lightning-1596
Token count is too large: pantsbuild__pants-17471
Token count is too large: Lightning-AI__lightning-2073
Token count is too large: pandas-dev__pandas-24529
Token count is too large: pandas-dev__pandas-15081
Token count is too large: pandas-dev__pandas-37965
Token count is too large: pyca__cryptography-1988


Generating train split: 1368 examples [01:32, 16.53 examples/s]

Token count is too large: pandas-dev__pandas-38641
Token count is too large: pandas-dev__pandas-11155
Token count is too large: pypa__pip-1901
Token count is too large: Qiskit__qiskit-1285
Token count is too large: ytdl-org__youtube-dl-31453
Token count is too large: mesonbuild__meson-2094
Token count is too large: celery__celery-4131
Token count is too large: Qiskit__qiskit-8321
Token count is too large: pandas-dev__pandas-17766
Token count is too large: pantsbuild__pants-14603
Token count is too large: pandas-dev__pandas-30885
Token count is too large: Lightning-AI__lightning-516
Token count is too large: apache__airflow-9097
Token count is too large: conda__conda-7162
Token count is too large: googleapis__google-cloud-python-9550
Token count is too large: PrefectHQ__prefect-254
Token count is too large: pandas-dev__pandas-8370
Token count is too large: pandas-dev__pandas-23639


Generating train split: 1375 examples [01:33, 13.61 examples/s]

Token count is too large: pandas-dev__pandas-5962
Token count is too large: apache__airflow-12108
Token count is too large: pandas-dev__pandas-38317
Token count is too large: conan-io__conan-4721
Token count is too large: Qiskit__qiskit-6403
Token count is too large: conan-io__conan-4894


Generating train split: 1381 examples [01:33, 18.40 examples/s]

Token count is too large: pandas-dev__pandas-39777
Token count is too large: googleapis__google-cloud-python-248
Token count is too large: Lightning-AI__lightning-2910
Token count is too large: pandas-dev__pandas-26167
Token count is too large: pandas-dev__pandas-29690
Token count is too large: docker__compose-6599
Token count is too large: pandas-dev__pandas-5524
Token count is too large: pandas-dev__pandas-4513


Generating train split: 1386 examples [01:33, 17.42 examples/s]

Token count is too large: pandas-dev__pandas-29646
Token count is too large: ytdl-org__youtube-dl-15929
Token count is too large: pantsbuild__pants-15112
Token count is too large: ipython__ipython-11365
Token count is too large: ray-project__ray-11001
Token count is too large: pandas-dev__pandas-28398
Token count is too large: pandas-dev__pandas-17654
Token count is too large: pantsbuild__pants-19149
Token count is too large: pandas-dev__pandas-8634
Token count is too large: ytdl-org__youtube-dl-20740


Generating train split: 1389 examples [01:33, 17.48 examples/s]

Token count is too large: mesonbuild__meson-9604
Token count is too large: wagtail__wagtail-6440
Token count is too large: pandas-dev__pandas-16403
Token count is too large: pantsbuild__pants-16936
Token count is too large: ipython__ipython-878
Token count is too large: open-mmlab__mmdetection-5249
Token count is too large: pandas-dev__pandas-23874
Token count is too large: celery__celery-5462
Token count is too large: ray-project__ray-9960
Token count is too large: ytdl-org__youtube-dl-27396
Token count is too large: mesonbuild__meson-5819


Generating train split: 1394 examples [01:34, 14.87 examples/s]

Token count is too large: dagster-io__dagster-7134
Token count is too large: mesonbuild__meson-5835
Token count is too large: googleapis__google-cloud-python-2423
Token count is too large: PrefectHQ__prefect-2641
There was an error processing
Token count is too large: googleapis__google-cloud-python-4472


Generating train split: 1398 examples [01:34, 16.64 examples/s]

Token count is too large: mesonbuild__meson-3816
Token count is too large: ytdl-org__youtube-dl-30577
Token count is too large: pandas-dev__pandas-36152
Token count is too large: Lightning-AI__lightning-3020
Token count is too large: pandas-dev__pandas-39432
Token count is too large: Lightning-AI__lightning-1029
Token count is too large: docker__compose-6547
Token count is too large: docker__compose-4414
Token count is too large: pandas-dev__pandas-24621
Token count is too large: wagtail__wagtail-8812
Token count is too large: mesonbuild__meson-6444
Token count is too large: pandas-dev__pandas-8981
Token count is too large: docker__compose-7093
Token count is too large: pandas-dev__pandas-32914
Token count is too large: numpy__numpy-22588
Token count is too large: pandas-dev__pandas-36350
Token count is too large: apache__airflow-28379
Token count is too large: pandas-dev__pandas-30338
Token count is too large: ray-project__ray-7705


Generating train split: 1406 examples [01:35, 13.33 examples/s]

Token count is too large: conda__conda-12554
Token count is too large: PrefectHQ__prefect-1862
Token count is too large: Lightning-AI__lightning-2467
Token count is too large: pypa__pip-8083
Token count is too large: pandas-dev__pandas-3107
Token count is too large: pandas-dev__pandas-29944
Token count is too large: pandas-dev__pandas-28230
Token count is too large: huggingface__transformers-12619
Token count is too large: scipy__scipy-2756
Token count is too large: scipy__scipy-3109
Token count is too large: pandas-dev__pandas-39326
Token count is too large: numpy__numpy-3881
Token count is too large: pandas-dev__pandas-37096


Generating train split: 1415 examples [01:35, 19.41 examples/s]

Token count is too large: googleapis__google-cloud-python-2094
Token count is too large: ytdl-org__youtube-dl-3089
Token count is too large: googleapis__google-cloud-python-8718
Token count is too large: pandas-dev__pandas-35152
Token count is too large: conan-io__conan-4042
Token count is too large: ray-project__ray-7080
Token count is too large: conda__conda-12923
Token count is too large: pandas-dev__pandas-25540
Token count is too large: googleapis__google-cloud-python-6050
Token count is too large: ray-project__ray-3779
Token count is too large: Qiskit__qiskit-8938
Token count is too large: pandas-dev__pandas-11219


Generating train split: 1419 examples [01:35, 20.08 examples/s]

Token count is too large: pandas-dev__pandas-23503
Token count is too large: pantsbuild__pants-4624
Token count is too large: Qiskit__qiskit-3663
Token count is too large: pypa__pip-2153
Token count is too large: ytdl-org__youtube-dl-4599
Token count is too large: conda__conda-5095
Token count is too large: mesonbuild__meson-6743
Token count is too large: pandas-dev__pandas-36249
Token count is too large: docker__compose-1835
Token count is too large: Qiskit__qiskit-7433


Generating train split: 1422 examples [01:35, 21.48 examples/s]

Token count is too large: pyca__cryptography-1043
Token count is too large: googleapis__google-cloud-python-1992
Token count is too large: googleapis__google-cloud-python-2051
Token count is too large: apache__airflow-22619
Token count is too large: pypa__pip-4563
Token count is too large: pantsbuild__pants-14296
Token count is too large: pandas-dev__pandas-3936
Token count is too large: numpy__numpy-21377
Token count is too large: pandas-dev__pandas-19722


Generating train split: 1431 examples [01:36, 29.13 examples/s]

Token count is too large: pandas-dev__pandas-3884
Token count is too large: Qiskit__qiskit-1815
Token count is too large: pandas-dev__pandas-19975
Token count is too large: numpy__numpy-10361
Token count is too large: pandas-dev__pandas-3048
Token count is too large: ipython__ipython-2015
Token count is too large: mesonbuild__meson-1523
Token count is too large: ray-project__ray-5678
Token count is too large: pandas-dev__pandas-36175
Token count is too large: googleapis__google-cloud-python-5007
Token count is too large: pandas-dev__pandas-14967


Generating train split: 1435 examples [01:36, 26.34 examples/s]

Token count is too large: pantsbuild__pants-12858
Token count is too large: apache__airflow-8671
Token count is too large: ray-project__ray-4924
Token count is too large: conan-io__conan-5244
Token count is too large: numpy__numpy-8168
Token count is too large: conda__conda-10356
Token count is too large: pandas-dev__pandas-7368
Token count is too large: numpy__numpy-5398


Generating train split: 1439 examples [01:36, 25.20 examples/s]

Token count is too large: pandas-dev__pandas-25371
Token count is too large: huggingface__transformers-22743
Token count is too large: ipython__ipython-6036
Token count is too large: conan-io__conan-2818
Token count is too large: pandas-dev__pandas-6611
Token count is too large: mesonbuild__meson-2141
Token count is too large: mesonbuild__meson-2718
Token count is too large: numpy__numpy-8939
Token count is too large: ytdl-org__youtube-dl-7208
Token count is too large: pandas-dev__pandas-7232
Token count is too large: conan-io__conan-4766
Token count is too large: huggingface__transformers-8664
Token count is too large: Lightning-AI__lightning-2246
Token count is too large: pandas-dev__pandas-11398
Token count is too large: ytdl-org__youtube-dl-31175
Token count is too large: conda__conda-5831
Token count is too large: ytdl-org__youtube-dl-2997
Token count is too large: numpy__numpy-5496
Token count is too large: mesonbuild__meson-11058


Generating train split: 1445 examples [01:37, 16.14 examples/s]

Token count is too large: pandas-dev__pandas-5298
Token count is too large: pandas-dev__pandas-25266
Token count is too large: Qiskit__qiskit-4446
Token count is too large: wagtail__wagtail-10638
Token count is too large: numpy__numpy-16650
Token count is too large: ray-project__ray-4694
Token count is too large: conda__conda-8528
Token count is too large: pandas-dev__pandas-33784
Token count is too large: pandas-dev__pandas-5325
Token count is too large: apache__airflow-28394


Generating train split: 1450 examples [01:37, 18.62 examples/s]

Token count is too large: pandas-dev__pandas-31794
Token count is too large: pantsbuild__pants-17666
Token count is too large: apache__airflow-21815
Token count is too large: ipython__ipython-9804
Token count is too large: pandas-dev__pandas-18982
Token count is too large: conda__conda-2226
Token count is too large: pantsbuild__pants-15571
Token count is too large: ray-project__ray-10507
Token count is too large: conda__conda-4799
Token count is too large: pandas-dev__pandas-26185
Token count is too large: pandas-dev__pandas-11366
Token count is too large: apache__airflow-2128


Generating train split: 1453 examples [01:37, 19.14 examples/s]

Token count is too large: pandas-dev__pandas-11309
Token count is too large: pandas-dev__pandas-17238
Token count is too large: Lightning-AI__lightning-808
Token count is too large: pandas-dev__pandas-3199


Generating train split: 1458 examples [01:37, 18.31 examples/s]

Token count is too large: pandas-dev__pandas-21519
Token count is too large: conda__conda-8328
Token count is too large: pantsbuild__pants-11315
Token count is too large: pandas-dev__pandas-31167
Token count is too large: numpy__numpy-22952
Token count is too large: wagtail__wagtail-1660
Token count is too large: pandas-dev__pandas-3731
Token count is too large: conan-io__conan-5650
Token count is too large: docker__compose-5580
Token count is too large: numpy__numpy-22539
Token count is too large: huggingface__transformers-16148
Token count is too large: huggingface__transformers-13493
Token count is too large: mesonbuild__meson-6528
Token count is too large: huggingface__transformers-11927
Token count is too large: mesonbuild__meson-5572
Token count is too large: pyca__cryptography-2641
Token count is too large: conan-io__conan-3426
Token count is too large: Qiskit__qiskit-5421
Token count is too large: pypa__pip-4819
Token count is too large: tiangolo__fastapi-347
Token count is too

Generating train split: 1462 examples [01:38, 10.65 examples/s]

Token count is too large: googleapis__google-cloud-python-8182
Token count is too large: ray-project__ray-10921
Token count is too large: pandas-dev__pandas-35338
Token count is too large: pandas-dev__pandas-5211
Token count is too large: pandas-dev__pandas-9291
Token count is too large: google__jax-1514
Token count is too large: pandas-dev__pandas-36121
There was an error processing
Token count is too large: pandas-dev__pandas-17295
Token count is too large: mesonbuild__meson-3055
Token count is too large: gitpython-developers__GitPython-841


Generating train split: 1464 examples [01:38, 10.76 examples/s]

Token count is too large: pandas-dev__pandas-21279
Token count is too large: conan-io__conan-4902
Token count is too large: PrefectHQ__prefect-2629
Token count is too large: mesonbuild__meson-934
Token count is too large: google__jax-3162
Token count is too large: pandas-dev__pandas-36051
Token count is too large: pandas-dev__pandas-16080
Token count is too large: pandas-dev__pandas-31591
Token count is too large: mesonbuild__meson-1255
Token count is too large: pandas-dev__pandas-25260
Token count is too large: pandas-dev__pandas-34220
Token count is too large: jupyterlab__jupyterlab-7790
Token count is too large: pandas-dev__pandas-25768
Token count is too large: mesonbuild__meson-4593
Token count is too large: huggingface__transformers-23724
Token count is too large: ipython__ipython-3066


Generating train split: 1467 examples [01:39,  8.67 examples/s]

Token count is too large: pantsbuild__pants-15000
Token count is too large: pandas-dev__pandas-8227
Token count is too large: pandas-dev__pandas-5037
Token count is too large: ipython__ipython-4195


Generating train split: 1470 examples [01:39,  9.30 examples/s]

Token count is too large: pandas-dev__pandas-3708
Token count is too large: google__jax-874
Token count is too large: pandas-dev__pandas-4002
Token count is too large: Qiskit__qiskit-9941
Token count is too large: pandas-dev__pandas-4166
Token count is too large: pandas-dev__pandas-18486
Token count is too large: huggingface__transformers-7991
Token count is too large: conan-io__conan-5940
Token count is too large: pantsbuild__pants-10827
Token count is too large: huggingface__transformers-11449
Token count is too large: Qiskit__qiskit-8913


Generating train split: 1473 examples [01:39, 10.12 examples/s]

Token count is too large: pandas-dev__pandas-36177
Token count is too large: pandas-dev__pandas-38649
Token count is too large: wagtail__wagtail-1232
Token count is too large: pandas-dev__pandas-35936
Token count is too large: apache__airflow-19130
Token count is too large: pandas-dev__pandas-27070
Token count is too large: pantsbuild__pants-17435
Token count is too large: pyca__cryptography-905
Token count is too large: celery__celery-6259
Token count is too large: Qiskit__qiskit-870
Token count is too large: pantsbuild__pants-10287
Token count is too large: jupyterlab__jupyterlab-8944
Token count is too large: mesonbuild__meson-692
Token count is too large: pandas-dev__pandas-23162


Generating train split: 1477 examples [01:39, 11.24 examples/s]

Token count is too large: pandas-dev__pandas-11895
Token count is too large: googleapis__google-cloud-python-9533
Token count is too large: twisted__twisted-11712
Token count is too large: numpy__numpy-6859
Token count is too large: google__jax-2206
Token count is too large: numpy__numpy-18357
Token count is too large: ytdl-org__youtube-dl-2138
Token count is too large: pandas-dev__pandas-10513
Token count is too large: pandas-dev__pandas-19066
Token count is too large: pandas-dev__pandas-30562
Token count is too large: mesonbuild__meson-1039
Token count is too large: mesonbuild__meson-1666
Token count is too large: huggingface__transformers-8437
Token count is too large: celery__celery-6401
Token count is too large: numpy__numpy-13823
Token count is too large: pantsbuild__pants-18216
Token count is too large: Qiskit__qiskit-3657
Token count is too large: ray-project__ray-7669
Token count is too large: ytdl-org__youtube-dl-25804
Token count is too large: pandas-dev__pandas-39420
Token 

Generating train split: 1479 examples [01:40,  8.47 examples/s]

Token count is too large: pandas-dev__pandas-4830
Token count is too large: ray-project__ray-3691
Token count is too large: pandas-dev__pandas-39507
Token count is too large: pantsbuild__pants-18251
Token count is too large: pandas-dev__pandas-34991
Token count is too large: pandas-dev__pandas-14184
Token count is too large: pandas-dev__pandas-19472
Token count is too large: huggingface__transformers-15158
Token count is too large: mesonbuild__meson-1215


Generating train split: 1483 examples [01:40, 10.63 examples/s]

Token count is too large: pandas-dev__pandas-20062
Token count is too large: pypa__pip-4764
Token count is too large: numpy__numpy-9773
Token count is too large: pandas-dev__pandas-5985
Token count is too large: conda__conda-9738
Token count is too large: Qiskit__qiskit-6570
Token count is too large: google__jax-1749
Token count is too large: pandas-dev__pandas-16505
Token count is too large: wagtail__wagtail-3608
Token count is too large: Lightning-AI__lightning-950


Generating train split: 1487 examples [01:40, 13.85 examples/s]

Token count is too large: pandas-dev__pandas-32223
Token count is too large: pypa__pip-10675
Token count is too large: pandas-dev__pandas-25908
Token count is too large: pandas-dev__pandas-35229
Token count is too large: celery__celery-6804
Token count is too large: ipython__ipython-6029
Token count is too large: ipython__ipython-1361
Token count is too large: pandas-dev__pandas-37728
Token count is too large: google__jax-384
Token count is too large: Lightning-AI__lightning-932
Token count is too large: Lightning-AI__lightning-1804
Token count is too large: pandas-dev__pandas-27645
Token count is too large: Lightning-AI__lightning-521
Token count is too large: pypa__pip-11359


Generating train split: 1490 examples [01:41, 14.81 examples/s]

Token count is too large: kubeflow__pipelines-1032
Token count is too large: ipython__ipython-12453
Token count is too large: open-mmlab__mmdetection-6795
Token count is too large: huggingface__transformers-17416
Token count is too large: pandas-dev__pandas-10937
Token count is too large: pandas-dev__pandas-16930
Token count is too large: pantsbuild__pants-16232
Token count is too large: pandas-dev__pandas-3548
Token count is too large: huggingface__transformers-13897
Token count is too large: pandas-dev__pandas-14063
Token count is too large: numpy__numpy-3460
Token count is too large: google__jax-1694
Token count is too large: conan-io__conan-9624
Token count is too large: wagtail__wagtail-8623
Token count is too large: mesonbuild__meson-5319
Token count is too large: huggingface__transformers-19766
Token count is too large: pandas-dev__pandas-25263
Token count is too large: ytdl-org__youtube-dl-13415


Generating train split: 1497 examples [01:41, 14.37 examples/s]

Token count is too large: numpy__numpy-3772
Token count is too large: pandas-dev__pandas-26417
Token count is too large: conan-io__conan-3647
Token count is too large: pandas-dev__pandas-18354
Token count is too large: numpy__numpy-14993
Token count is too large: numpy__numpy-19905
Token count is too large: pandas-dev__pandas-11322
Token count is too large: huggingface__transformers-5122
Token count is too large: docker__compose-4956


Generating train split: 1502 examples [01:41, 17.69 examples/s]

Token count is too large: pandas-dev__pandas-33070
Token count is too large: pandas-dev__pandas-18402
Token count is too large: wagtail__wagtail-2441
Token count is too large: googleapis__google-cloud-python-8838
Token count is too large: pantsbuild__pants-10815
Token count is too large: Qiskit__qiskit-8799
Token count is too large: conda__conda-4627
Token count is too large: pandas-dev__pandas-7905
Token count is too large: PrefectHQ__prefect-59
Token count is too large: pandas-dev__pandas-27888
Token count is too large: pandas-dev__pandas-35140
Token count is too large: ray-project__ray-4605
Token count is too large: pantsbuild__pants-15071


Generating train split: 1505 examples [01:41, 18.74 examples/s]

Token count is too large: huggingface__transformers-15612
Token count is too large: pandas-dev__pandas-31756
Token count is too large: googleapis__google-cloud-python-5784
Token count is too large: Lightning-AI__lightning-270
Token count is too large: pandas-dev__pandas-34641
Token count is too large: pandas-dev__pandas-7582
Token count is too large: pandas-dev__pandas-23507
Token count is too large: pandas-dev__pandas-4772


Generating train split: 1511 examples [01:42, 20.91 examples/s]

Token count is too large: ytdl-org__youtube-dl-10934
Token count is too large: pandas-dev__pandas-2965
Token count is too large: pandas-dev__pandas-5169
Token count is too large: pandas-dev__pandas-19021
Token count is too large: ytdl-org__youtube-dl-4973
Token count is too large: mesonbuild__meson-634
Token count is too large: numpy__numpy-6568


Generating train split: 1519 examples [01:42, 22.90 examples/s]

Token count is too large: numpy__numpy-20914
Token count is too large: pypa__pip-3231
Token count is too large: huggingface__transformers-9683
Token count is too large: pandas-dev__pandas-24854
Token count is too large: pandas-dev__pandas-26324
Token count is too large: ipython__ipython-5712
Token count is too large: pandas-dev__pandas-7802
Token count is too large: pandas-dev__pandas-37208
Token count is too large: pandas-dev__pandas-18017
Token count is too large: ray-project__ray-2837
Token count is too large: pandas-dev__pandas-19355
Token count is too large: pandas-dev__pandas-37534
Token count is too large: conan-io__conan-6098
Token count is too large: huggingface__transformers-19218
Token count is too large: ytdl-org__youtube-dl-6537
Token count is too large: pandas-dev__pandas-28459
Token count is too large: Lightning-AI__lightning-1572
Token count is too large: pandas-dev__pandas-18517
Token count is too large: Qiskit__qiskit-7880
Token count is too large: dagster-io__dagster

Generating train split: 1525 examples [01:42, 21.18 examples/s]

Token count is too large: huggingface__transformers-13865
Token count is too large: Lightning-AI__lightning-413
Token count is too large: pandas-dev__pandas-26875
Token count is too large: pandas-dev__pandas-22169
Token count is too large: numpy__numpy-24185
Token count is too large: pantsbuild__pants-11317
Token count is too large: huggingface__transformers-24550
Token count is too large: pandas-dev__pandas-30301
Token count is too large: google__jax-2114
Token count is too large: pandas-dev__pandas-22394
Token count is too large: pantsbuild__pants-12585
Token count is too large: apache__airflow-15989
Token count is too large: ytdl-org__youtube-dl-1869
Token count is too large: huggingface__transformers-6998
Token count is too large: huggingface__transformers-9681
Token count is too large: huggingface__transformers-15913


Generating train split: 1529 examples [01:43, 13.92 examples/s]

Token count is too large: pandas-dev__pandas-29496
Token count is too large: pandas-dev__pandas-6813
Token count is too large: docker__compose-4589
Token count is too large: numpy__numpy-11038
Token count is too large: mesonbuild__meson-1066
Token count is too large: pypa__pip-4352
Token count is too large: dagster-io__dagster-13855
Token count is too large: pantsbuild__pants-19023
Token count is too large: ray-project__ray-6320
Token count is too large: mesonbuild__meson-10299
Token count is too large: Qiskit__qiskit-8568
Token count is too large: ipython__ipython-7777
Token count is too large: docker__compose-6455
Token count is too large: PrefectHQ__prefect-1532
Token count is too large: numpy__numpy-15124


Generating train split: 1535 examples [01:43, 14.60 examples/s]

Token count is too large: numpy__numpy-11347
Token count is too large: huggingface__transformers-598
Token count is too large: apache__airflow-19258
Token count is too large: pandas-dev__pandas-19236
Token count is too large: Lightning-AI__lightning-2417
Token count is too large: pandas-dev__pandas-37453
Token count is too large: docker__compose-5982
Token count is too large: conda__conda-1133
Token count is too large: numpy__numpy-10164
Token count is too large: numpy__numpy-11986
Token count is too large: conda__conda-909
Token count is too large: Qiskit__qiskit-9300
Token count is too large: pandas-dev__pandas-7599
Token count is too large: pandas-dev__pandas-34944
Token count is too large: pantsbuild__pants-14594
Token count is too large: pandas-dev__pandas-34266
Token count is too large: conan-io__conan-4285
Token count is too large: docker__compose-2405


Generating train split: 1539 examples [01:44, 11.89 examples/s]

Token count is too large: pandas-dev__pandas-33470
Token count is too large: pandas-dev__pandas-5070
Token count is too large: google__jax-494
Token count is too large: mesonbuild__meson-8656
Token count is too large: mesonbuild__meson-6288
Token count is too large: mesonbuild__meson-2516
Token count is too large: pandas-dev__pandas-4645
Token count is too large: numpy__numpy-15949
Token count is too large: mesonbuild__meson-8119
Token count is too large: ytdl-org__youtube-dl-5975
Token count is too large: ray-project__ray-7392
Token count is too large: pandas-dev__pandas-19554
Token count is too large: numpy__numpy-13698


Generating train split: 1546 examples [01:44, 14.90 examples/s]

Token count is too large: google__jax-233
Token count is too large: PrefectHQ__prefect-887
Token count is too large: conda__conda-6776
Token count is too large: mesonbuild__meson-2884
Token count is too large: pandas-dev__pandas-8264
Token count is too large: pandas-dev__pandas-36793
Token count is too large: Qiskit__qiskit-5887
Token count is too large: pandas-dev__pandas-35885
Token count is too large: Qiskit__qiskit-2840
Token count is too large: conda__conda-8846
Token count is too large: celery__celery-6668


Generating train split: 1549 examples [01:44, 15.55 examples/s]

Token count is too large: pandas-dev__pandas-6465
Token count is too large: conda__conda-8229
Token count is too large: pandas-dev__pandas-33821
Token count is too large: docker__compose-3292
Token count is too large: Qiskit__qiskit-5101
Token count is too large: googleapis__google-cloud-python-7863
Token count is too large: pandas-dev__pandas-18426
Token count is too large: mesonbuild__meson-11405
Token count is too large: celery__celery-8427
Token count is too large: pandas-dev__pandas-34429
Token count is too large: mesonbuild__meson-11613
Token count is too large: ipython__ipython-10369
Token count is too large: conda__conda-3538
Token count is too large: googleapis__google-cloud-python-1703


Generating train split: 1554 examples [01:44, 19.78 examples/s]

Token count is too large: conda__conda-2734
Token count is too large: pyca__cryptography-2219
Token count is too large: conan-io__conan-5298
Token count is too large: ray-project__ray-10573
Token count is too large: pandas-dev__pandas-36238
Token count is too large: numpy__numpy-8054
Token count is too large: pandas-dev__pandas-37433
Token count is too large: ytdl-org__youtube-dl-554
Token count is too large: ytdl-org__youtube-dl-3375
Token count is too large: pandas-dev__pandas-16932
Token count is too large: mesonbuild__meson-11672


Generating train split: 1560 examples [01:44, 24.72 examples/s]

Token count is too large: pandas-dev__pandas-7672
Token count is too large: pypa__pip-3136
Token count is too large: pandas-dev__pandas-4850
Token count is too large: pandas-dev__pandas-30336
Token count is too large: mesonbuild__meson-1027


Generating train split: 1563 examples [01:45, 21.42 examples/s]

Token count is too large: pandas-dev__pandas-26584
Token count is too large: Lightning-AI__lightning-1357
Token count is too large: numpy__numpy-8678
Token count is too large: Qiskit__qiskit-9063
Token count is too large: pandas-dev__pandas-23688
Token count is too large: Lightning-AI__lightning-3258
Token count is too large: pandas-dev__pandas-5848
Token count is too large: pantsbuild__pants-15587
Token count is too large: Qiskit__qiskit-5479
Token count is too large: huggingface__transformers-7858
Token count is too large: pandas-dev__pandas-24274
Token count is too large: ytdl-org__youtube-dl-4394
Token count is too large: conan-io__conan-4626
Token count is too large: wagtail__wagtail-8697
Token count is too large: conda__conda-662
Token count is too large: dagster-io__dagster-15082
Token count is too large: pandas-dev__pandas-3509
Token count is too large: numpy__numpy-20420


Generating train split: 1566 examples [01:45, 13.70 examples/s]

Token count is too large: wagtail__wagtail-767
Token count is too large: celery__celery-6138
Token count is too large: pandas-dev__pandas-25925
Token count is too large: ytdl-org__youtube-dl-2859
Token count is too large: pandas-dev__pandas-29444
Token count is too large: googleapis__google-cloud-python-355
Token count is too large: pandas-dev__pandas-39355
Token count is too large: Lightning-AI__lightning-2721
Token count is too large: wagtail__wagtail-4184
Token count is too large: pandas-dev__pandas-8958


Generating train split: 1572 examples [01:45, 15.88 examples/s]

Token count is too large: pantsbuild__pants-11760
Token count is too large: ytdl-org__youtube-dl-8348
Token count is too large: celery__celery-6770
Token count is too large: pandas-dev__pandas-13575
Token count is too large: pandas-dev__pandas-22601
Token count is too large: apache__airflow-29136
Token count is too large: pyca__cryptography-6348
Token count is too large: pandas-dev__pandas-6204
Token count is too large: celery__celery-7481


Generating train split: 1579 examples [01:46, 21.64 examples/s]

Token count is too large: huggingface__transformers-17105
Token count is too large: googleapis__google-cloud-python-2805
Token count is too large: pandas-dev__pandas-7015
Token count is too large: pandas-dev__pandas-4269
Token count is too large: huggingface__transformers-23872
Token count is too large: docker__compose-7689
Token count is too large: pandas-dev__pandas-38582
Token count is too large: pypa__pip-10943
Token count is too large: conan-io__conan-4172
Token count is too large: google__jax-1605
Token count is too large: conda__conda-4433
Token count is too large: pandas-dev__pandas-14520
Token count is too large: wagtail__wagtail-6422
Token count is too large: huggingface__transformers-22536


Generating train split: 1582 examples [01:46, 15.12 examples/s]

Token count is too large: pandas-dev__pandas-7616
Token count is too large: Qiskit__qiskit-6070
Token count is too large: conan-io__conan-5444
Token count is too large: DataDog__integrations-core-1620
Token count is too large: conan-io__conan-5461
Token count is too large: pandas-dev__pandas-20826
Token count is too large: Qiskit__qiskit-4517
Token count is too large: conan-io__conan-8125
Token count is too large: huggingface__transformers-5972
Token count is too large: celery__celery-8383
Token count is too large: numpy__numpy-20497


Generating train split: 1585 examples [01:46, 12.87 examples/s]

Token count is too large: mesonbuild__meson-6806
Token count is too large: pandas-dev__pandas-30905
Token count is too large: numpy__numpy-3854
Token count is too large: Lightning-AI__lightning-1192
Token count is too large: numpy__numpy-13163
Token count is too large: PrefectHQ__prefect-2482
Token count is too large: mesonbuild__meson-4601
Token count is too large: Qiskit__qiskit-2301
Token count is too large: celery__celery-6488
Token count is too large: Qiskit__qiskit-10322
Token count is too large: pandas-dev__pandas-29654
Token count is too large: pandas-dev__pandas-39500
Token count is too large: pandas-dev__pandas-35195
Token count is too large: pandas-dev__pandas-16895
Token count is too large: wagtail__wagtail-6402
Token count is too large: ipython__ipython-1836
Token count is too large: huggingface__transformers-13489
Token count is too large: googleapis__google-cloud-python-8416
Token count is too large: pandas-dev__pandas-37185
Token count is too large: huggingface__transfo

Generating train split: 1587 examples [01:47,  9.36 examples/s]

Token count is too large: ipython__ipython-6123
Token count is too large: Qiskit__qiskit-4811
Token count is too large: mesonbuild__meson-3135
Token count is too large: pandas-dev__pandas-32478
Token count is too large: ytdl-org__youtube-dl-16427
Token count is too large: Qiskit__qiskit-5268
Token count is too large: Qiskit__qiskit-10545
Token count is too large: ipython__ipython-7496
Token count is too large: numpy__numpy-3448
Token count is too large: pandas-dev__pandas-17879


Generating train split: 1589 examples [01:47,  9.35 examples/s]

Token count is too large: pandas-dev__pandas-35607
Token count is too large: mesonbuild__meson-6182
Token count is too large: mesonbuild__meson-11125
Token count is too large: mesonbuild__meson-1948
Token count is too large: pandas-dev__pandas-31232
Token count is too large: pandas-dev__pandas-29447
Token count is too large: pandas-dev__pandas-11345
Token count is too large: tiangolo__fastapi-437
Token count is too large: Lightning-AI__lightning-1492
Token count is too large: mesonbuild__meson-1879
Token count is too large: pantsbuild__pants-6614
Token count is too large: mesonbuild__meson-11863
Token count is too large: googleapis__google-cloud-python-9525
Token count is too large: numpy__numpy-12439


Generating train split: 1596 examples [01:48, 12.08 examples/s]

Token count is too large: pandas-dev__pandas-36862
Token count is too large: pandas-dev__pandas-37251
Token count is too large: pandas-dev__pandas-36675
Token count is too large: huggingface__transformers-15554
Token count is too large: mesonbuild__meson-4725
Token count is too large: pandas-dev__pandas-36709
Token count is too large: celery__celery-7945
Token count is too large: huggingface__transformers-19056
Token count is too large: huggingface__transformers-24960
Token count is too large: Lightning-AI__lightning-2169
Token count is too large: googleapis__google-cloud-python-506
Token count is too large: conan-io__conan-2885
Token count is too large: pandas-dev__pandas-14225
Token count is too large: pandas-dev__pandas-22293
Token count is too large: pypa__pip-9241


Generating train split: 1608 examples [01:48, 15.20 examples/s]

Token count is too large: pandas-dev__pandas-2708
Token count is too large: kubeflow__pipelines-1886
Token count is too large: google__jax-1329
Token count is too large: pantsbuild__pants-13560
Token count is too large: googleapis__google-cloud-python-6513
Token count is too large: mesonbuild__meson-2760
Token count is too large: mesonbuild__meson-5767
Token count is too large: pandas-dev__pandas-8699
Token count is too large: ray-project__ray-4379
Token count is too large: ipython__ipython-4977
Token count is too large: PrefectHQ__prefect-2853
Token count is too large: conda__conda-4778
Token count is too large: pypa__pip-3204
Token count is too large: Qiskit__qiskit-6324
Token count is too large: mesonbuild__meson-5824
Token count is too large: pandas-dev__pandas-38150
Token count is too large: mesonbuild__meson-3932
Token count is too large: numpy__numpy-12842
Token count is too large: Qiskit__qiskit-2480
Token count is too large: pandas-dev__pandas-2328
Token count is too large: pa

Generating train split: 1612 examples [01:49, 12.71 examples/s]

Token count is too large: pandas-dev__pandas-24447
Token count is too large: pandas-dev__pandas-18181
Token count is too large: open-mmlab__mmdetection-6767
Token count is too large: pyca__cryptography-2558
Token count is too large: pandas-dev__pandas-10026
Token count is too large: pandas-dev__pandas-37999
Token count is too large: pandas-dev__pandas-28982
Token count is too large: conda__conda-5839
Token count is too large: Qiskit__qiskit-1404
Token count is too large: ytdl-org__youtube-dl-5953


Generating train split: 1614 examples [01:49, 12.94 examples/s]

Token count is too large: pandas-dev__pandas-21397
Token count is too large: pandas-dev__pandas-27827
Token count is too large: pandas-dev__pandas-4622
Token count is too large: conan-io__conan-4737
Token count is too large: ipython__ipython-6643
Token count is too large: pypa__pip-8522
Token count is too large: pandas-dev__pandas-34756
Token count is too large: gitpython-developers__GitPython-681
Token count is too large: googleapis__google-cloud-python-8176
Token count is too large: pantsbuild__pants-4412


Generating train split: 1618 examples [01:49, 14.25 examples/s]

Token count is too large: Qiskit__qiskit-394
Token count is too large: tensorflow__models-2146
Token count is too large: pandas-dev__pandas-18624
Token count is too large: numpy__numpy-24191
Token count is too large: googleapis__google-cloud-python-10011


Generating train split: 1621 examples [01:50,  8.64 examples/s]

Token count is too large: pypa__pip-4038
Token count is too large: Lightning-AI__lightning-617
Token count is too large: wagtail__wagtail-10303
Token count is too large: Qiskit__qiskit-2185
Token count is too large: jupyterlab__jupyterlab-6040
Token count is too large: pandas-dev__pandas-22072
Token count is too large: google__jax-2111
Token count is too large: numpy__numpy-3249
Token count is too large: pandas-dev__pandas-17017
Token count is too large: pandas-dev__pandas-30515
Token count is too large: pandas-dev__pandas-18209
Token count is too large: docker__compose-5819
Token count is too large: huggingface__transformers-22653
Token count is too large: pandas-dev__pandas-7342


Generating train split: 1623 examples [01:50,  7.97 examples/s]

Token count is too large: docker__compose-5684
Token count is too large: pandas-dev__pandas-27105
Token count is too large: Lightning-AI__lightning-252
Token count is too large: pandas-dev__pandas-28229
Token count is too large: mesonbuild__meson-6582
Token count is too large: pandas-dev__pandas-23100
Token count is too large: huggingface__transformers-8568
Token count is too large: pandas-dev__pandas-6330


Generating train split: 1625 examples [01:50,  7.79 examples/s]

Token count is too large: huggingface__transformers-16829
Token count is too large: pandas-dev__pandas-7728
Token count is too large: conan-io__conan-3377
Token count is too large: googleapis__google-cloud-python-2590
Token count is too large: pandas-dev__pandas-26188
Token count is too large: pantsbuild__pants-16250
Token count is too large: ipython__ipython-1691
Token count is too large: PrefectHQ__prefect-557


Generating train split: 1633 examples [01:51, 13.77 examples/s]

Token count is too large: googleapis__google-cloud-python-4916
Token count is too large: pandas-dev__pandas-21954
Token count is too large: numpy__numpy-11522
Token count is too large: pandas-dev__pandas-5634
Token count is too large: pandas-dev__pandas-38892


Generating train split: 1635 examples [01:51, 12.36 examples/s]

Token count is too large: pandas-dev__pandas-25802
Token count is too large: ytdl-org__youtube-dl-20731
Token count is too large: pandas-dev__pandas-34508
Token count is too large: pandas-dev__pandas-6601
Token count is too large: pandas-dev__pandas-8782


Generating train split: 1641 examples [01:51, 14.62 examples/s]

Token count is too large: pandas-dev__pandas-6440
Token count is too large: pantsbuild__pants-13061
Token count is too large: docker__compose-4669
Token count is too large: apache__airflow-13286
Token count is too large: twisted__twisted-11796
Token count is too large: huggingface__transformers-7552
Token count is too large: celery__celery-5638
Token count is too large: pandas-dev__pandas-29680
Token count is too large: huggingface__transformers-13338
Token count is too large: conda__conda-5099
Token count is too large: ytdl-org__youtube-dl-25239
Token count is too large: Qiskit__qiskit-3867
Token count is too large: apache__airflow-8962


Generating train split: 1644 examples [01:51, 16.39 examples/s]

Token count is too large: huggingface__transformers-23751
Token count is too large: pandas-dev__pandas-31097
Token count is too large: pandas-dev__pandas-8752
Token count is too large: pandas-dev__pandas-18525
Token count is too large: pandas-dev__pandas-6433
Token count is too large: pandas-dev__pandas-25844
Token count is too large: numpy__numpy-6644
Token count is too large: Qiskit__qiskit-2705
Token count is too large: numpy__numpy-9133
Token count is too large: googleapis__google-cloud-python-9495
Token count is too large: ray-project__ray-2254
Token count is too large: numpy__numpy-21141
Token count is too large: wagtail__wagtail-1411
Token count is too large: Lightning-AI__lightning-1913


Generating train split: 1649 examples [01:52, 18.96 examples/s]

Token count is too large: huggingface__transformers-9350
Token count is too large: pandas-dev__pandas-10171
Token count is too large: googleapis__google-cloud-python-3180
Token count is too large: huggingface__transformers-14783
Token count is too large: mesonbuild__meson-8912
Token count is too large: pandas-dev__pandas-4942
Token count is too large: ytdl-org__youtube-dl-11122
Token count is too large: numpy__numpy-94
Token count is too large: Qiskit__qiskit-2650
Token count is too large: pandas-dev__pandas-38679
Token count is too large: docker__compose-5858
Token count is too large: twisted__twisted-11706
Token count is too large: numpy__numpy-12307
Token count is too large: Qiskit__qiskit-2442
Token count is too large: Qiskit__qiskit-9206


Generating train split: 1652 examples [01:52, 10.90 examples/s]

Token count is too large: numpy__numpy-12586
Token count is too large: PrefectHQ__prefect-2898
Token count is too large: pandas-dev__pandas-11110
Token count is too large: pantsbuild__pants-7179
Token count is too large: apache__airflow-26885
Token count is too large: pandas-dev__pandas-24114
Token count is too large: huggingface__transformers-7431
Token count is too large: apache__airflow-27067
Token count is too large: pandas-dev__pandas-30797
Token count is too large: ipython__ipython-1852
Token count is too large: conda__conda-8290


Generating train split: 1654 examples [01:52, 10.78 examples/s]

Token count is too large: pandas-dev__pandas-5930
Token count is too large: Qiskit__qiskit-7517
Token count is too large: huggingface__transformers-15416
Token count is too large: Qiskit__qiskit-1135
Token count is too large: pandas-dev__pandas-5723
Token count is too large: tensorflow__models-4084
Token count is too large: conan-io__conan-4298


Generating train split: 1656 examples [01:53,  9.93 examples/s]

Token count is too large: Qiskit__qiskit-4076
Token count is too large: conda__conda-7274
Token count is too large: pandas-dev__pandas-6477
Token count is too large: pandas-dev__pandas-16305
Token count is too large: pandas-dev__pandas-23479
Token count is too large: huggingface__transformers-14661
Token count is too large: Qiskit__qiskit-2783
Token count is too large: numpy__numpy-16311
Token count is too large: google__jax-640
Token count is too large: pandas-dev__pandas-8157


Generating train split: 1663 examples [01:53, 16.97 examples/s]

Token count is too large: Qiskit__qiskit-3760
Token count is too large: pandas-dev__pandas-16191
Token count is too large: pantsbuild__pants-13675
Token count is too large: huggingface__transformers-1315
There was an error processing
Token count is too large: mesonbuild__meson-2885
Token count is too large: pandas-dev__pandas-13814
Token count is too large: celery__celery-5565
Token count is too large: mesonbuild__meson-7309
Token count is too large: pandas-dev__pandas-7951
Token count is too large: pandas-dev__pandas-4837
Token count is too large: pandas-dev__pandas-16431


Generating train split: 1668 examples [01:53, 19.60 examples/s]

Token count is too large: dagster-io__dagster-2890
Token count is too large: numpy__numpy-8955
Token count is too large: pandas-dev__pandas-9814


Generating train split: 1671 examples [01:53, 14.41 examples/s]

Token count is too large: numpy__numpy-12382
Token count is too large: apache__airflow-15311
Token count is too large: huggingface__transformers-25636
Token count is too large: pantsbuild__pants-11223
Token count is too large: pandas-dev__pandas-38701
Token count is too large: apache__airflow-25370
Token count is too large: dagster-io__dagster-14060
Token count is too large: pandas-dev__pandas-20372
Token count is too large: ytdl-org__youtube-dl-18336
Token count is too large: pantsbuild__pants-16586
Token count is too large: pandas-dev__pandas-16992
Token count is too large: pypa__pip-5831
Token count is too large: pandas-dev__pandas-27311
Token count is too large: Qiskit__qiskit-9999
Token count is too large: pandas-dev__pandas-39747
Token count is too large: pantsbuild__pants-19366


Generating train split: 1675 examples [01:54, 12.39 examples/s]

Token count is too large: pandas-dev__pandas-24657
Token count is too large: numpy__numpy-10375
Token count is too large: mesonbuild__meson-6025
Token count is too large: pantsbuild__pants-15368
Token count is too large: pandas-dev__pandas-8026
Token count is too large: pandas-dev__pandas-17272
Token count is too large: pandas-dev__pandas-17871
Token count is too large: ipython__ipython-5916
Token count is too large: huggingface__transformers-11680
Token count is too large: pandas-dev__pandas-9845
Token count is too large: Lightning-AI__lightning-3394
Token count is too large: Lightning-AI__lightning-1017
Token count is too large: pandas-dev__pandas-5633
Token count is too large: conan-io__conan-5350
Token count is too large: pandas-dev__pandas-11750
Token count is too large: apache__airflow-15425


Generating train split: 1680 examples [01:54, 11.89 examples/s]

Token count is too large: pandas-dev__pandas-27349
Token count is too large: pantsbuild__pants-16251
Token count is too large: pandas-dev__pandas-38099
Token count is too large: numpy__numpy-13242
Token count is too large: pandas-dev__pandas-30580
Token count is too large: huggingface__transformers-11071
Token count is too large: conan-io__conan-8483
Token count is too large: Qiskit__qiskit-1360
Token count is too large: dagster-io__dagster-14717


Generating train split: 1684 examples [01:54, 13.59 examples/s]

Token count is too large: pandas-dev__pandas-29118
Token count is too large: pypa__pip-9522
Token count is too large: huggingface__transformers-23914
Token count is too large: pandas-dev__pandas-10379
Token count is too large: pandas-dev__pandas-4430
Token count is too large: pantsbuild__pants-14270
Token count is too large: huggingface__transformers-13573


Generating train split: 1686 examples [01:55, 12.44 examples/s]

Token count is too large: mesonbuild__meson-3225
Token count is too large: Lightning-AI__lightning-803
Token count is too large: pandas-dev__pandas-39352
Token count is too large: docker__compose-2467
Token count is too large: Qiskit__qiskit-7190
Token count is too large: mesonbuild__meson-3501
Token count is too large: pandas-dev__pandas-16484
Token count is too large: pandas-dev__pandas-23805
Token count is too large: pandas-dev__pandas-20730
Token count is too large: mesonbuild__meson-4193


Generating train split: 1690 examples [01:55, 13.74 examples/s]

Token count is too large: mesonbuild__meson-6067
Token count is too large: googleapis__google-cloud-python-4931
Token count is too large: mesonbuild__meson-6589
Token count is too large: googleapis__google-cloud-python-502
Token count is too large: pypa__pip-9835
Token count is too large: apache__airflow-12240
Token count is too large: huggingface__transformers-24526
Token count is too large: docker__compose-2665
Token count is too large: pyca__cryptography-5900
Token count is too large: Qiskit__qiskit-579
Token count is too large: numpy__numpy-16789


Generating train split: 1693 examples [01:55, 13.12 examples/s]

Token count is too large: pandas-dev__pandas-11870
Token count is too large: pandas-dev__pandas-23621
Token count is too large: wagtail__wagtail-8948
Token count is too large: google__jax-1668
Token count is too large: conan-io__conan-3352
Token count is too large: pandas-dev__pandas-32782
Token count is too large: pandas-dev__pandas-26665


Generating train split: 1697 examples [01:55, 17.55 examples/s]

Token count is too large: pandas-dev__pandas-38373
Token count is too large: pypa__pip-10906
Token count is too large: google__jax-752
Token count is too large: ray-project__ray-4915
Token count is too large: pandas-dev__pandas-37288
Token count is too large: huggingface__transformers-19846
Token count is too large: pandas-dev__pandas-10431
Token count is too large: conda__conda-6956
Token count is too large: apache__airflow-22536
Token count is too large: pandas-dev__pandas-17156
Token count is too large: celery__celery-4864
Token count is too large: ytdl-org__youtube-dl-7382
Token count is too large: pandas-dev__pandas-13662
Token count is too large: pandas-dev__pandas-6473
Token count is too large: pandas-dev__pandas-21740
Token count is too large: pandas-dev__pandas-22058
Token count is too large: huggingface__transformers-21490
Token count is too large: conan-io__conan-7625


Generating train split: 1704 examples [01:56, 17.83 examples/s]

Token count is too large: googleapis__google-cloud-python-9735
Token count is too large: numpy__numpy-7425
Token count is too large: pandas-dev__pandas-38014
Token count is too large: googleapis__google-cloud-python-1250
Token count is too large: huggingface__transformers-20353
Token count is too large: docker__compose-3459


Generating train split: 1711 examples [01:56, 21.75 examples/s]

Token count is too large: pandas-dev__pandas-6560
Token count is too large: google__jax-512
Token count is too large: conda__conda-6602
Token count is too large: dagster-io__dagster-9828
Token count is too large: pypa__pip-3073
Token count is too large: googleapis__google-cloud-python-9360
Token count is too large: conan-io__conan-14362
Token count is too large: numpy__numpy-10739
Token count is too large: conda__conda-7243
Token count is too large: huggingface__transformers-13436
Token count is too large: wagtail__wagtail-10255
Token count is too large: numpy__numpy-11717
Token count is too large: pandas-dev__pandas-7392
Token count is too large: conan-io__conan-4202
Token count is too large: googleapis__google-cloud-python-6650


Generating train split: 1714 examples [01:56, 17.34 examples/s]

Token count is too large: pandas-dev__pandas-4352
Token count is too large: pandas-dev__pandas-3148
Token count is too large: pandas-dev__pandas-21981
Token count is too large: conan-io__conan-99
Token count is too large: huggingface__transformers-22658
Token count is too large: Qiskit__qiskit-8120
Token count is too large: pandas-dev__pandas-19823


Generating train split: 1716 examples [01:56, 15.97 examples/s]

Token count is too large: conda__conda-4774
Token count is too large: pypa__pip-7542
Token count is too large: Qiskit__qiskit-2716
Token count is too large: pandas-dev__pandas-3961
Token count is too large: pandas-dev__pandas-19628
Token count is too large: huggingface__transformers-7724
Token count is too large: pandas-dev__pandas-6396
Token count is too large: numpy__numpy-18630
Token count is too large: huggingface__transformers-14744
Token count is too large: ytdl-org__youtube-dl-21208
Token count is too large: conan-io__conan-3212
Token count is too large: numpy__numpy-8121
Token count is too large: pandas-dev__pandas-21000
Token count is too large: ipython__ipython-9823


Generating train split: 1718 examples [01:57, 11.55 examples/s]

Token count is too large: mesonbuild__meson-5123
Token count is too large: pandas-dev__pandas-10727
Token count is too large: Lightning-AI__lightning-2273
Token count is too large: pandas-dev__pandas-5219
Token count is too large: Lightning-AI__lightning-2213
Token count is too large: pandas-dev__pandas-39372
Token count is too large: numpy__numpy-23073
Token count is too large: docker__compose-2851
Token count is too large: apache__airflow-29518


Generating train split: 1724 examples [01:57, 15.99 examples/s]

Token count is too large: Qiskit__qiskit-3635
Token count is too large: jupyterlab__jupyterlab-2038
Token count is too large: numpy__numpy-5504
Token count is too large: pandas-dev__pandas-8753
Token count is too large: pandas-dev__pandas-24034
Token count is too large: conan-io__conan-6380
Token count is too large: Qiskit__qiskit-3123
Token count is too large: Qiskit__qiskit-3547
Token count is too large: pypa__pip-3505


Generating train split: 1730 examples [01:57, 15.28 examples/s]

Token count is too large: googleapis__google-cloud-python-6081
Token count is too large: pandas-dev__pandas-38997
Token count is too large: conan-io__conan-2812
Token count is too large: jupyterlab__jupyterlab-5351
Token count is too large: pandas-dev__pandas-35763
Token count is too large: pandas-dev__pandas-7874
Token count is too large: pantsbuild__pants-9551
Token count is too large: conan-io__conan-4824
Token count is too large: Lightning-AI__lightning-2121
Token count is too large: huggingface__transformers-17712
Token count is too large: pandas-dev__pandas-19067
Token count is too large: celery__celery-4280


Generating train split: 1733 examples [01:58, 15.11 examples/s]

Token count is too large: docker__compose-1399
Token count is too large: pandas-dev__pandas-3125
Token count is too large: huggingface__transformers-24448
Token count is too large: ray-project__ray-8445
Token count is too large: pandas-dev__pandas-29743
Token count is too large: huggingface__transformers-18297
Token count is too large: tensorflow__models-3622
Token count is too large: pandas-dev__pandas-26518
Token count is too large: pandas-dev__pandas-38909
Token count is too large: numpy__numpy-4677
Token count is too large: pandas-dev__pandas-4104
Token count is too large: dagster-io__dagster-2640
Token count is too large: google__jax-760
Token count is too large: pandas-dev__pandas-35604
Token count is too large: mesonbuild__meson-5665
Token count is too large: ytdl-org__youtube-dl-5942
Token count is too large: numpy__numpy-20499
Token count is too large: huggingface__transformers-6168
Token count is too large: wagtail__wagtail-4229
Token count is too large: pandas-dev__pandas-55

Generating train split: 1738 examples [01:58, 12.41 examples/s]

Token count is too large: pandas-dev__pandas-3625
Token count is too large: PrefectHQ__prefect-2625
Token count is too large: pandas-dev__pandas-23991
Token count is too large: Lightning-AI__lightning-1251
Token count is too large: pandas-dev__pandas-34450
Token count is too large: numpy__numpy-20724
Token count is too large: pandas-dev__pandas-29508
Token count is too large: numpy__numpy-11721
Token count is too large: Lightning-AI__lightning-1512
Token count is too large: googleapis__google-cloud-python-9577
Token count is too large: Qiskit__qiskit-10581
Token count is too large: jupyterlab__jupyterlab-4410
Token count is too large: mesonbuild__meson-5338


Generating train split: 1743 examples [01:59, 10.94 examples/s]

Token count is too large: numpy__numpy-6567
Token count is too large: google__jax-169
Token count is too large: numpy__numpy-9930
Token count is too large: Lightning-AI__lightning-360
Token count is too large: numpy__numpy-9299
Token count is too large: pandas-dev__pandas-3564
Token count is too large: pypa__pip-3070
Token count is too large: jupyterlab__jupyterlab-9424


Generating train split: 1746 examples [01:59, 11.36 examples/s]

Token count is too large: pandas-dev__pandas-7120
Token count is too large: ipython__ipython-11716
Token count is too large: pandas-dev__pandas-21203
Token count is too large: Qiskit__qiskit-1197
Token count is too large: pandas-dev__pandas-39212
Token count is too large: huggingface__transformers-5629
Token count is too large: pandas-dev__pandas-14234
Token count is too large: huggingface__transformers-14713


Generating train split: 1749 examples [01:59, 11.67 examples/s]

Token count is too large: mesonbuild__meson-7103
Token count is too large: mesonbuild__meson-3272
Token count is too large: googleapis__google-cloud-python-11343
Token count is too large: pandas-dev__pandas-26024
Token count is too large: huggingface__transformers-21111
Token count is too large: pypa__pip-8056
Token count is too large: pandas-dev__pandas-38740
Token count is too large: Qiskit__qiskit-169
Token count is too large: ipython__ipython-876
Token count is too large: pandas-dev__pandas-36228
Token count is too large: pandas-dev__pandas-7303
Token count is too large: conda__conda-5373
Token count is too large: conda__conda-4843
Token count is too large: conan-io__conan-3567


Generating train split: 1751 examples [02:00,  5.80 examples/s]

Token count is too large: apache__airflow-17669
Token count is too large: pandas-dev__pandas-24159
Token count is too large: huggingface__transformers-16913
Token count is too large: wagtail__wagtail-5093
Token count is too large: pantsbuild__pants-11811
Token count is too large: pandas-dev__pandas-21069
Token count is too large: conda__conda-6764
Token count is too large: ytdl-org__youtube-dl-31414
Token count is too large: Qiskit__qiskit-4650


Generating train split: 1754 examples [02:00,  6.91 examples/s]

Token count is too large: pandas-dev__pandas-16487
Token count is too large: huggingface__transformers-12314
Token count is too large: pandas-dev__pandas-23327
Token count is too large: Qiskit__qiskit-1909


Generating train split: 1762 examples [02:01, 13.00 examples/s]

Token count is too large: huggingface__transformers-24067
Token count is too large: numpy__numpy-9996
Token count is too large: Qiskit__qiskit-2247
Token count is too large: pandas-dev__pandas-19525
Token count is too large: pandas-dev__pandas-32142
Token count is too large: google__jax-914
Token count is too large: conda__conda-6916
Token count is too large: huggingface__transformers-15751
Token count is too large: docker__compose-2019
Token count is too large: Qiskit__qiskit-363
Token count is too large: pypa__pip-5952
Token count is too large: conda__conda-2821
Token count is too large: mesonbuild__meson-10407
Token count is too large: pandas-dev__pandas-22564
Token count is too large: pandas-dev__pandas-29393
Token count is too large: Lightning-AI__lightning-2473
Token count is too large: pandas-dev__pandas-37321
Token count is too large: pandas-dev__pandas-13849
Token count is too large: pandas-dev__pandas-38074
Token count is too large: Qiskit__qiskit-867
Token count is too large

Generating train split: 1766 examples [02:01,  9.53 examples/s]

Token count is too large: numpy__numpy-3281
Token count is too large: pandas-dev__pandas-8975
Token count is too large: Qiskit__qiskit-5298
Token count is too large: pypa__pip-7023
Token count is too large: twisted__twisted-11654
Token count is too large: apache__airflow-30596
Token count is too large: celery__celery-3746
Token count is too large: pandas-dev__pandas-3818
Token count is too large: pandas-dev__pandas-7931
Token count is too large: PrefectHQ__prefect-598
Token count is too large: huggingface__transformers-10602
Token count is too large: pandas-dev__pandas-4751


Generating train split: 1771 examples [02:02, 12.12 examples/s]

Token count is too large: numpy__numpy-8030
Token count is too large: mesonbuild__meson-9221
Token count is too large: pandas-dev__pandas-22345
Token count is too large: pypa__pip-5339
There was an error processing
Token count is too large: ray-project__ray-8771
Token count is too large: huggingface__transformers-25344
Token count is too large: Qiskit__qiskit-9211


Generating train split: 1777 examples [02:02, 15.21 examples/s]

Token count is too large: huggingface__transformers-9516
Token count is too large: python__typeshed-8775
Token count is too large: numpy__numpy-5943
Token count is too large: conda__conda-6044
Token count is too large: numpy__numpy-6199
Token count is too large: pandas-dev__pandas-5034
Token count is too large: huggingface__transformers-1116


Generating train split: 1781 examples [02:02, 17.54 examples/s]

Token count is too large: pandas-dev__pandas-4571
Token count is too large: apache__airflow-23106
Token count is too large: gitpython-developers__GitPython-1618
Token count is too large: conda__conda-7498
Token count is too large: googleapis__google-cloud-python-407
Token count is too large: huggingface__transformers-9379
Token count is too large: twisted__twisted-11884
Token count is too large: pypa__pip-5483
Token count is too large: conda__conda-4100
Token count is too large: mesonbuild__meson-155


Generating train split: 1785 examples [02:02, 18.16 examples/s]

Token count is too large: pandas-dev__pandas-18238
Token count is too large: huggingface__transformers-9474
Token count is too large: google__jax-1718
Token count is too large: googleapis__google-cloud-python-8100
Token count is too large: pandas-dev__pandas-14917
There was an error processing
Token count is too large: pandas-dev__pandas-17956


Generating train split: 1793 examples [02:02, 24.22 examples/s]

Token count is too large: mesonbuild__meson-11458
Token count is too large: ray-project__ray-8107
Token count is too large: jupyterlab__jupyterlab-3207
Token count is too large: pandas-dev__pandas-35778
Token count is too large: pandas-dev__pandas-5787
Token count is too large: apache__airflow-15411
Token count is too large: ipython__ipython-10806
Token count is too large: pandas-dev__pandas-24635
Token count is too large: huggingface__transformers-6463
Token count is too large: googleapis__google-cloud-python-9164
Token count is too large: googleapis__google-cloud-python-6349
Token count is too large: numpy__numpy-9986
Token count is too large: pyca__cryptography-4114
Token count is too large: pypa__pip-6029
Token count is too large: PrefectHQ__prefect-1994
Token count is too large: docker__compose-5726
Token count is too large: pandas-dev__pandas-38145
Token count is too large: googleapis__google-cloud-python-6578
Token count is too large: pandas-dev__pandas-14768


Generating train split: 1797 examples [02:03, 19.72 examples/s]

Token count is too large: pandas-dev__pandas-5251
Token count is too large: pandas-dev__pandas-5941
Token count is too large: ytdl-org__youtube-dl-28849
Token count is too large: pandas-dev__pandas-14479
Token count is too large: Qiskit__qiskit-9391
Token count is too large: Qiskit__qiskit-8055
Token count is too large: googleapis__google-cloud-python-1479
Token count is too large: Qiskit__qiskit-447
Token count is too large: pandas-dev__pandas-4031


Generating train split: 1801 examples [02:03, 19.71 examples/s]

Token count is too large: pandas-dev__pandas-27156
There was an error processing
Token count is too large: numpy__numpy-11805
Token count is too large: pandas-dev__pandas-11291
Token count is too large: conda__conda-7156
Token count is too large: mesonbuild__meson-11874
Token count is too large: googleapis__google-cloud-python-2107
Token count is too large: Qiskit__qiskit-4797
Token count is too large: pantsbuild__pants-16108
Token count is too large: pandas-dev__pandas-3226


Generating train split: 1807 examples [02:03, 17.53 examples/s]

Token count is too large: dagster-io__dagster-974
Token count is too large: pandas-dev__pandas-39378
Token count is too large: googleapis__google-cloud-python-11334
Token count is too large: ray-project__ray-10504
Token count is too large: celery__celery-4549
Token count is too large: pantsbuild__pants-6538
Token count is too large: pandas-dev__pandas-17906
Token count is too large: pandas-dev__pandas-26015
Token count is too large: PrefectHQ__prefect-473
Token count is too large: pandas-dev__pandas-3236
Token count is too large: Qiskit__qiskit-1086
Token count is too large: pandas-dev__pandas-20841
Token count is too large: pandas-dev__pandas-22213
Token count is too large: pandas-dev__pandas-32242
Token count is too large: dagster-io__dagster-6237
Token count is too large: google__jax-1144


Generating train split: 1809 examples [02:04, 13.67 examples/s]

Token count is too large: huggingface__transformers-8586
Token count is too large: pantsbuild__pants-17451
Token count is too large: pandas-dev__pandas-30219
Token count is too large: pandas-dev__pandas-18385
Token count is too large: Lightning-AI__lightning-2020
Token count is too large: ipython__ipython-12860
Token count is too large: huggingface__transformers-16165


Generating train split: 1814 examples [02:04, 16.05 examples/s]

Token count is too large: pandas-dev__pandas-17361
Token count is too large: pandas-dev__pandas-7974
Token count is too large: numpy__numpy-18629
Token count is too large: mesonbuild__meson-10140
Token count is too large: dagster-io__dagster-14025
Token count is too large: huggingface__transformers-18414
Token count is too large: pandas-dev__pandas-22074
Token count is too large: celery__celery-4473
Token count is too large: pandas-dev__pandas-18934


Generating train split: 1816 examples [02:04, 11.02 examples/s]

Token count is too large: pandas-dev__pandas-36231
Token count is too large: huggingface__transformers-20966
Token count is too large: pandas-dev__pandas-4863


Generating train split: 1819 examples [02:05,  9.94 examples/s]

Token count is too large: pandas-dev__pandas-19558
Token count is too large: PrefectHQ__prefect-3005
Token count is too large: conda__conda-3629
Token count is too large: pantsbuild__pants-13027
Token count is too large: huggingface__transformers-21612
Token count is too large: pandas-dev__pandas-36595
Token count is too large: ipython__ipython-7564
Token count is too large: pandas-dev__pandas-28993
Token count is too large: pandas-dev__pandas-34049
Token count is too large: pantsbuild__pants-12982
Token count is too large: apache__airflow-15285
Token count is too large: pandas-dev__pandas-39409


Generating train split: 1821 examples [02:05, 10.83 examples/s]

Token count is too large: google__jax-1626
Token count is too large: google__jax-110
Token count is too large: pandas-dev__pandas-25693
Token count is too large: numpy__numpy-5584
Token count is too large: numpy__numpy-20974
Token count is too large: docker__compose-1159
Token count is too large: pandas-dev__pandas-11329
Token count is too large: pandas-dev__pandas-6551
Token count is too large: pandas-dev__pandas-6902
Token count is too large: pandas-dev__pandas-11850
Token count is too large: pandas-dev__pandas-22132
Token count is too large: pandas-dev__pandas-11892
Token count is too large: pantsbuild__pants-12808


Generating train split: 1826 examples [02:05, 10.64 examples/s]

Token count is too large: dagster-io__dagster-4830
Token count is too large: pandas-dev__pandas-38408
Token count is too large: conda__conda-6856
Token count is too large: pandas-dev__pandas-6164
Token count is too large: huggingface__transformers-7384
Token count is too large: celery__celery-5915
Token count is too large: Qiskit__qiskit-8741
Token count is too large: apache__airflow-21793


Generating train split: 1831 examples [02:05, 16.08 examples/s]

Token count is too large: google__jax-1549
Token count is too large: mesonbuild__meson-3860
Token count is too large: googleapis__google-cloud-python-8303
Token count is too large: pandas-dev__pandas-10887
Token count is too large: docker__compose-7754
Token count is too large: pandas-dev__pandas-14318
Token count is too large: mesonbuild__meson-1956
Token count is too large: pandas-dev__pandas-25157
Token count is too large: mesonbuild__meson-4926
Token count is too large: docker__compose-2355


Generating train split: 1836 examples [02:06, 14.00 examples/s]

Token count is too large: pandas-dev__pandas-19177
Token count is too large: conan-io__conan-5849
Token count is too large: pandas-dev__pandas-38931
Token count is too large: open-mmlab__mmdetection-2032
Token count is too large: ray-project__ray-5751
Token count is too large: docker__compose-2334
Token count is too large: conda__conda-3886
Token count is too large: pandas-dev__pandas-17374
Token count is too large: pantsbuild__pants-12717
Token count is too large: Qiskit__qiskit-2451
Token count is too large: googleapis__google-cloud-python-330
Token count is too large: numpy__numpy-6630


Generating train split: 1840 examples [02:06, 15.22 examples/s]

Token count is too large: pandas-dev__pandas-19669
Token count is too large: PrefectHQ__prefect-2502
Token count is too large: ipython__ipython-13943
Token count is too large: pandas-dev__pandas-27677
Token count is too large: Qiskit__qiskit-7733
Token count is too large: pandas-dev__pandas-30526
Token count is too large: ipython__ipython-7748
Token count is too large: ipython__ipython-9644
Token count is too large: conda__conda-7919
Token count is too large: Qiskit__qiskit-4555
Token count is too large: huggingface__transformers-18486
Token count is too large: huggingface__transformers-15622


Generating train split: 1844 examples [02:07, 10.83 examples/s]

Token count is too large: huggingface__transformers-21768
Token count is too large: ipython__ipython-1662
Token count is too large: conda__conda-3683
Token count is too large: jupyterlab__jupyterlab-2610
Token count is too large: pandas-dev__pandas-36141
Token count is too large: pantsbuild__pants-14606
Token count is too large: pandas-dev__pandas-14527
Token count is too large: docker__compose-5142
Token count is too large: pandas-dev__pandas-4829
Token count is too large: pandas-dev__pandas-27144


Generating train split: 1848 examples [02:07, 13.31 examples/s]

Token count is too large: pypa__pip-6418
Token count is too large: pandas-dev__pandas-5268
Token count is too large: mesonbuild__meson-11553
Token count is too large: pandas-dev__pandas-6304
Token count is too large: Qiskit__qiskit-3779
Token count is too large: ipython__ipython-12659
Token count is too large: celery__celery-7785
Token count is too large: pantsbuild__pants-4914
Token count is too large: pandas-dev__pandas-36000
Token count is too large: wagtail__wagtail-9176
Token count is too large: pypa__pip-8098
Token count is too large: huggingface__transformers-17968


Generating train split: 1851 examples [02:07, 13.35 examples/s]

Token count is too large: pandas-dev__pandas-22288
Token count is too large: wagtail__wagtail-9345
Token count is too large: Qiskit__qiskit-911
Token count is too large: numpy__numpy-7670
Token count is too large: pandas-dev__pandas-22296
Token count is too large: conda__conda-5921
Token count is too large: pantsbuild__pants-15402
Token count is too large: mesonbuild__meson-6398
Token count is too large: pandas-dev__pandas-21016


Generating train split: 1855 examples [02:07, 15.80 examples/s]

Token count is too large: pandas-dev__pandas-39191
Token count is too large: wagtail__wagtail-6420
Token count is too large: numpy__numpy-16253
Token count is too large: conda__conda-5284
Token count is too large: ytdl-org__youtube-dl-3407
Token count is too large: docker__compose-3136
Token count is too large: docker__compose-2027
Token count is too large: pandas-dev__pandas-27767
Token count is too large: pandas-dev__pandas-23544
Token count is too large: googleapis__google-cloud-python-1112
Token count is too large: pandas-dev__pandas-5105


Generating train split: 1859 examples [02:07, 17.34 examples/s]

Token count is too large: pandas-dev__pandas-6111
Token count is too large: mesonbuild__meson-7279
Token count is too large: jupyterlab__jupyterlab-2098
Token count is too large: pandas-dev__pandas-9667
Token count is too large: pypa__pip-10360
Token count is too large: conan-io__conan-4548
Token count is too large: open-mmlab__mmdetection-2524
Token count is too large: pyca__cryptography-2574
Token count is too large: google__jax-3110
Token count is too large: mesonbuild__meson-3461
Token count is too large: google__jax-1956
Token count is too large: conan-io__conan-5812
Token count is too large: pandas-dev__pandas-31818


Generating train split: 1862 examples [02:08, 14.84 examples/s]

Token count is too large: huggingface__transformers-24138
Token count is too large: pandas-dev__pandas-9994
Token count is too large: huggingface__transformers-9132
Token count is too large: conan-io__conan-2902
Token count is too large: pandas-dev__pandas-5295
Token count is too large: mesonbuild__meson-9424
Token count is too large: pandas-dev__pandas-25202
Token count is too large: conda__conda-5365
Token count is too large: pandas-dev__pandas-38737
Token count is too large: pandas-dev__pandas-34811
Token count is too large: huggingface__transformers-18398
Token count is too large: pandas-dev__pandas-7434
Token count is too large: ytdl-org__youtube-dl-1063
Token count is too large: dagster-io__dagster-9546
Token count is too large: ytdl-org__youtube-dl-17542


Generating train split: 1864 examples [02:08,  9.35 examples/s]

Token count is too large: mesonbuild__meson-1256
Token count is too large: pandas-dev__pandas-35723
Token count is too large: pandas-dev__pandas-39869
Token count is too large: pandas-dev__pandas-27495
Token count is too large: googleapis__google-cloud-python-3631
Token count is too large: dagster-io__dagster-13310
Token count is too large: pandas-dev__pandas-31748
Token count is too large: ytdl-org__youtube-dl-18281
Token count is too large: pandas-dev__pandas-5060
Token count is too large: pandas-dev__pandas-5510


Generating train split: 1867 examples [02:08, 10.85 examples/s]

Token count is too large: pyca__cryptography-5301
Token count is too large: docker__compose-5722
Token count is too large: pandas-dev__pandas-16294
Token count is too large: pantsbuild__pants-17057
Token count is too large: numpy__numpy-22519
Token count is too large: numpy__numpy-7660
Token count is too large: pandas-dev__pandas-37035
Token count is too large: pandas-dev__pandas-3680
Token count is too large: apache__airflow-28191
Token count is too large: apache__airflow-22849


Generating train split: 1870 examples [02:09, 11.01 examples/s]

Token count is too large: numpy__numpy-6208
Token count is too large: pandas-dev__pandas-9741
Token count is too large: pandas-dev__pandas-15040
Token count is too large: pandas-dev__pandas-38087
Token count is too large: docker__compose-1570


Generating train split: 1872 examples [02:09, 11.80 examples/s]

Token count is too large: pandas-dev__pandas-5018
Token count is too large: pandas-dev__pandas-27814
Token count is too large: pandas-dev__pandas-19244
Token count is too large: conan-io__conan-3077
Token count is too large: apache__airflow-33601
Token count is too large: pandas-dev__pandas-3677
Token count is too large: pantsbuild__pants-12675
Token count is too large: pypa__pip-4071
Token count is too large: pandas-dev__pandas-7201


Generating train split: 1876 examples [02:09, 12.51 examples/s]

Token count is too large: pandas-dev__pandas-19208
Token count is too large: pandas-dev__pandas-4120
Token count is too large: Lightning-AI__lightning-345
Token count is too large: pantsbuild__pants-13386
Token count is too large: googleapis__google-cloud-python-3340
Token count is too large: twisted__twisted-11873
Token count is too large: pandas-dev__pandas-25136
Token count is too large: conan-io__conan-5025
Token count is too large: huggingface__transformers-20158


Generating train split: 1884 examples [02:09, 16.96 examples/s]

Token count is too large: googleapis__google-cloud-python-6087
Token count is too large: googleapis__google-cloud-python-6825
Token count is too large: pandas-dev__pandas-19257
Token count is too large: pandas-dev__pandas-21432
Token count is too large: docker__compose-2508
Token count is too large: googleapis__google-cloud-python-9076
Token count is too large: mesonbuild__meson-8262
Token count is too large: pandas-dev__pandas-36836
Token count is too large: googleapis__google-cloud-python-8231
Token count is too large: Lightning-AI__lightning-2463
Token count is too large: pandas-dev__pandas-16091
Token count is too large: pandas-dev__pandas-17611
Token count is too large: apache__airflow-22904
Token count is too large: pandas-dev__pandas-27436
Token count is too large: Qiskit__qiskit-5881
Token count is too large: conan-io__conan-4708
Token count is too large: conda__conda-5312
Token count is too large: huggingface__transformers-20276
Token count is too large: huggingface__transform

Generating train split: 1888 examples [02:10, 10.35 examples/s]

Token count is too large: ipython__ipython-4858
Token count is too large: pantsbuild__pants-16276
Token count is too large: ray-project__ray-5580
Token count is too large: conda__conda-7418
Token count is too large: pandas-dev__pandas-6108
Token count is too large: huggingface__transformers-16465
Token count is too large: pypa__pip-4037
Token count is too large: docker__compose-7745
Token count is too large: pantsbuild__pants-4784
Token count is too large: jupyterlab__jupyterlab-14792
Token count is too large: conda__conda-6853
Token count is too large: pandas-dev__pandas-20988
Token count is too large: mesonbuild__meson-5473
Token count is too large: pandas-dev__pandas-30628
Token count is too large: pandas-dev__pandas-25109


Generating train split: 1898 examples [02:27,  1.15 examples/s]

Token count is too large: explosion__spaCy-1502
Token count is too large: pandas-dev__pandas-31840
Token count is too large: pandas-dev__pandas-26754
Token count is too large: pandas-dev__pandas-23466
Token count is too large: ytdl-org__youtube-dl-4247
Token count is too large: Qiskit__qiskit-5755
Token count is too large: pandas-dev__pandas-16553
Token count is too large: PrefectHQ__prefect-349
Token count is too large: pandas-dev__pandas-5009
Token count is too large: numpy__numpy-7340
Token count is too large: mesonbuild__meson-3002


Generating train split: 1903 examples [02:27,  1.68 examples/s]

Token count is too large: pandas-dev__pandas-10335
Token count is too large: open-mmlab__mmdetection-5654
Token count is too large: mesonbuild__meson-1373
Token count is too large: pandas-dev__pandas-5477
Token count is too large: google__jax-749
Token count is too large: googleapis__google-cloud-python-9029
Token count is too large: pandas-dev__pandas-22644
Token count is too large: pantsbuild__pants-10268
Token count is too large: PrefectHQ__prefect-485


Generating train split: 1908 examples [02:27,  2.40 examples/s]

Token count is too large: pandas-dev__pandas-3021
Token count is too large: Qiskit__qiskit-4945
Token count is too large: huggingface__transformers-11405
Token count is too large: numpy__numpy-22736
Token count is too large: ipython__ipython-6680
Token count is too large: ipython__ipython-2352
Token count is too large: huggingface__transformers-8231
Token count is too large: huggingface__transformers-12280


Generating train split: 1912 examples [02:28,  3.16 examples/s]

Token count is too large: huggingface__transformers-17082
Token count is too large: ipython__ipython-10030
Token count is too large: pandas-dev__pandas-14378
Token count is too large: mesonbuild__meson-10048
Token count is too large: celery__celery-7734
Token count is too large: googleapis__google-cloud-python-10028
Token count is too large: ipython__ipython-11425
Token count is too large: ray-project__ray-4844
Token count is too large: mesonbuild__meson-11077
Token count is too large: huggingface__transformers-6890
Token count is too large: dagster-io__dagster-8677
Token count is too large: apache__airflow-15194
Token count is too large: wagtail__wagtail-2080
Token count is too large: PrefectHQ__prefect-2109
Token count is too large: Qiskit__qiskit-3902
Token count is too large: ipython__ipython-10668
Token count is too large: mesonbuild__meson-9134
Token count is too large: pandas-dev__pandas-31066
Token count is too large: pypa__pip-2924
Token count is too large: ipython__ipython-66

Generating train split: 1915 examples [02:28,  3.51 examples/s]

Token count is too large: pandas-dev__pandas-10674
Token count is too large: pandas-dev__pandas-19673
Token count is too large: google__jax-391
Token count is too large: pandas-dev__pandas-36560
Token count is too large: wagtail__wagtail-1406
Token count is too large: mesonbuild__meson-2466
Token count is too large: ytdl-org__youtube-dl-19204
Token count is too large: huggingface__transformers-13194
Token count is too large: mesonbuild__meson-2888
Token count is too large: googleapis__google-cloud-python-7047


Generating train split: 1924 examples [02:28,  6.48 examples/s]

Token count is too large: huggingface__transformers-19027
Token count is too large: pandas-dev__pandas-21260
Token count is too large: Lightning-AI__lightning-2391
Token count is too large: ipython__ipython-12884
Token count is too large: pandas-dev__pandas-20484
Token count is too large: pandas-dev__pandas-16801
Token count is too large: apache__airflow-14978
Token count is too large: pandas-dev__pandas-36356


Generating train split: 1928 examples [02:29,  8.42 examples/s]

Token count is too large: Lightning-AI__lightning-866
Token count is too large: Qiskit__qiskit-4140
Token count is too large: googleapis__google-cloud-python-359
Token count is too large: ray-project__ray-8480
Token count is too large: pantsbuild__pants-17095
Token count is too large: gitpython-developers__GitPython-695
Token count is too large: pandas-dev__pandas-14059
Token count is too large: pandas-dev__pandas-4911
Token count is too large: Qiskit__qiskit-766
Token count is too large: huggingface__transformers-18351
Token count is too large: pandas-dev__pandas-28948
Token count is too large: dagster-io__dagster-12293
Token count is too large: ray-project__ray-8806
Token count is too large: numpy__numpy-8594
Token count is too large: pandas-dev__pandas-28601
Token count is too large: docker__compose-3057


Generating train split: 1933 examples [02:29,  9.49 examples/s]

Token count is too large: pandas-dev__pandas-30277
Token count is too large: ray-project__ray-5354
Token count is too large: Qiskit__qiskit-2926
Token count is too large: mesonbuild__meson-10961
Token count is too large: docker__compose-6529
Token count is too large: PrefectHQ__prefect-587
Token count is too large: huggingface__transformers-8479
Token count is too large: pandas-dev__pandas-35639
Token count is too large: pandas-dev__pandas-33585
Token count is too large: huggingface__transformers-8812


Generating train split: 1937 examples [02:29,  9.66 examples/s]

Token count is too large: pandas-dev__pandas-7217
Token count is too large: pandas-dev__pandas-26825
Token count is too large: Qiskit__qiskit-2342
Token count is too large: tensorflow__models-881
Token count is too large: pandas-dev__pandas-14118
Token count is too large: Qiskit__qiskit-9726
Token count is too large: conda__conda-6909
Token count is too large: huggingface__transformers-14525
Token count is too large: pandas-dev__pandas-5177


Generating train split: 1943 examples [02:30, 11.94 examples/s]

Token count is too large: Qiskit__qiskit-3042
Token count is too large: pandas-dev__pandas-35116
Token count is too large: Qiskit__qiskit-9829
Token count is too large: Qiskit__qiskit-10034
Token count is too large: pandas-dev__pandas-21657
Token count is too large: ipython__ipython-5855
Token count is too large: mesonbuild__meson-5881
Token count is too large: pandas-dev__pandas-5666
Token count is too large: explosion__spaCy-3038
Token count is too large: pandas-dev__pandas-18191


Generating train split: 1946 examples [02:30, 13.52 examples/s]

Token count is too large: apache__airflow-22302
Token count is too large: Qiskit__qiskit-7321
Token count is too large: pandas-dev__pandas-37607
Token count is too large: huggingface__transformers-9047
Token count is too large: Qiskit__qiskit-6370
Token count is too large: numpy__numpy-13301
Token count is too large: pandas-dev__pandas-34912
Token count is too large: apache__airflow-24054
Token count is too large: pandas-dev__pandas-8423
Token count is too large: google__jax-542
Token count is too large: Qiskit__qiskit-2241
Token count is too large: huggingface__transformers-11874
Token count is too large: ytdl-org__youtube-dl-10971
Token count is too large: pandas-dev__pandas-37557
Token count is too large: ipython__ipython-10012
Token count is too large: pandas-dev__pandas-15501
Token count is too large: pandas-dev__pandas-14056
Token count is too large: numpy__numpy-16627
Token count is too large: pandas-dev__pandas-7000
Token count is too large: huggingface__transformers-21400


Generating train split: 1950 examples [02:31,  9.04 examples/s]

Token count is too large: pandas-dev__pandas-37029
Token count is too large: Lightning-AI__lightning-2874
Token count is too large: mesonbuild__meson-4303
Token count is too large: ytdl-org__youtube-dl-3954
Token count is too large: Lightning-AI__lightning-971
Token count is too large: scipy__scipy-3006
Token count is too large: pandas-dev__pandas-36767
Token count is too large: huggingface__transformers-13077


Generating train split: 1955 examples [02:31, 12.13 examples/s]

Token count is too large: pandas-dev__pandas-31898
Token count is too large: apache__airflow-21116
Token count is too large: numpy__numpy-23382
Token count is too large: pandas-dev__pandas-25264
Token count is too large: googleapis__google-cloud-python-7378
Token count is too large: Qiskit__qiskit-10591
Token count is too large: numpy__numpy-7274
Token count is too large: pyca__cryptography-3497
Token count is too large: pandas-dev__pandas-26516
Token count is too large: wagtail__wagtail-6756
Token count is too large: pandas-dev__pandas-26770
Token count is too large: apache__airflow-16916


Generating train split: 1958 examples [02:31, 11.73 examples/s]

Token count is too large: pandas-dev__pandas-6359
Token count is too large: mesonbuild__meson-1342
Token count is too large: googleapis__google-cloud-python-7948
Token count is too large: pantsbuild__pants-11765
Token count is too large: pandas-dev__pandas-35673
Token count is too large: pandas-dev__pandas-24758
Token count is too large: numpy__numpy-9701
Token count is too large: mesonbuild__meson-4230
Token count is too large: dagster-io__dagster-8779
Token count is too large: pandas-dev__pandas-5281
Token count is too large: conda__conda-6564


Generating train split: 1964 examples [02:31, 15.79 examples/s]

Token count is too large: huggingface__transformers-10267
Token count is too large: numpy__numpy-5577
Token count is too large: ipython__ipython-7105
Token count is too large: ipython__ipython-4112
Token count is too large: Qiskit__qiskit-6294
Token count is too large: mesonbuild__meson-3733
Token count is too large: Qiskit__qiskit-4979
Token count is too large: pandas-dev__pandas-8291
Token count is too large: dagster-io__dagster-8023
Token count is too large: conda__conda-11589
Token count is too large: pandas-dev__pandas-39516
Token count is too large: pantsbuild__pants-14516
Token count is too large: pandas-dev__pandas-2846


Generating train split: 1969 examples [02:32, 13.51 examples/s]

Token count is too large: huggingface__transformers-20760
Token count is too large: conda__conda-3041
Token count is too large: pantsbuild__pants-18458
Token count is too large: Qiskit__qiskit-7744
Token count is too large: pantsbuild__pants-9215
Token count is too large: PrefectHQ__prefect-1534
Token count is too large: pandas-dev__pandas-38587
Token count is too large: pandas-dev__pandas-14228
Token count is too large: pantsbuild__pants-14803
Token count is too large: googleapis__google-cloud-python-5002
Token count is too large: pandas-dev__pandas-27808
Token count is too large: pandas-dev__pandas-7318
Token count is too large: numpy__numpy-10543
Token count is too large: numpy__numpy-9599
Token count is too large: jupyterlab__jupyterlab-13907


Generating train split: 1977 examples [02:32, 20.32 examples/s]

Token count is too large: pantsbuild__pants-17265
Token count is too large: PrefectHQ__prefect-2959
Token count is too large: pypa__pip-9469
Token count is too large: pandas-dev__pandas-37196
Token count is too large: pandas-dev__pandas-25110
Token count is too large: PrefectHQ__prefect-1701
Token count is too large: ray-project__ray-786
Token count is too large: apache__airflow-32697
Token count is too large: gitpython-developers__GitPython-1335
Token count is too large: pyca__cryptography-2436
Token count is too large: huggingface__transformers-18014
Token count is too large: pandas-dev__pandas-36626
Token count is too large: pandas-dev__pandas-32758
Token count is too large: conan-io__conan-3517
Token count is too large: pantsbuild__pants-16419
Token count is too large: pypa__pip-6518
Token count is too large: celery__celery-4892
Token count is too large: pantsbuild__pants-5021
Token count is too large: mesonbuild__meson-7686
Token count is too large: pandas-dev__pandas-35643
Token 

Generating train split: 1981 examples [02:32, 18.08 examples/s]

Token count is too large: huggingface__transformers-1736
Token count is too large: ytdl-org__youtube-dl-3053
There was an error processing
Token count is too large: huggingface__transformers-8328
Token count is too large: huggingface__transformers-10493
Token count is too large: numpy__numpy-20807
Token count is too large: googleapis__google-cloud-python-5374
Token count is too large: apache__airflow-11723
Token count is too large: pandas-dev__pandas-33233
Token count is too large: apache__airflow-1224
Token count is too large: DataDog__integrations-core-12675
Token count is too large: pandas-dev__pandas-10826
Token count is too large: pandas-dev__pandas-6931
Token count is too large: pandas-dev__pandas-4458
Token count is too large: pandas-dev__pandas-5656


Generating train split: 1987 examples [02:33, 16.98 examples/s]

Token count is too large: Qiskit__qiskit-6826
Token count is too large: pandas-dev__pandas-17881
Token count is too large: pantsbuild__pants-18188
Token count is too large: pandas-dev__pandas-22814
Token count is too large: googleapis__google-cloud-python-8889
Token count is too large: pandas-dev__pandas-22561
Token count is too large: pypa__pip-10082
Token count is too large: ytdl-org__youtube-dl-17448
Token count is too large: pandas-dev__pandas-27993
Token count is too large: pandas-dev__pandas-20292
Token count is too large: mesonbuild__meson-11432
Token count is too large: googleapis__google-cloud-python-2714
Token count is too large: pandas-dev__pandas-5584
Token count is too large: huggingface__transformers-17060


Generating train split: 1989 examples [02:33, 13.47 examples/s]

Token count is too large: pandas-dev__pandas-11400
Token count is too large: huggingface__transformers-17143
Token count is too large: pandas-dev__pandas-10602
Token count is too large: pandas-dev__pandas-32516
Token count is too large: wagtail__wagtail-1770
Token count is too large: huggingface__transformers-9411
Token count is too large: conan-io__conan-3687
Token count is too large: ray-project__ray-7794
Token count is too large: Qiskit__qiskit-8198
Token count is too large: pandas-dev__pandas-24397
Token count is too large: Qiskit__qiskit-2492


Generating train split: 2002 examples [02:34, 16.58 examples/s]

Token count is too large: ipython__ipython-11352
Token count is too large: pantsbuild__pants-13970
Token count is too large: huggingface__transformers-11353
Token count is too large: pandas-dev__pandas-21485
Token count is too large: numpy__numpy-23514
Token count is too large: pandas-dev__pandas-23161
Token count is too large: huggingface__transformers-20198


Generating train split: 2004 examples [02:34, 15.56 examples/s]

Token count is too large: pandas-dev__pandas-6588
Token count is too large: Qiskit__qiskit-6292
Token count is too large: huggingface__transformers-7642
Token count is too large: pantsbuild__pants-14497
Token count is too large: conda__conda-4935
Token count is too large: pandas-dev__pandas-6786
Token count is too large: apache__airflow-30948
Token count is too large: Lightning-AI__lightning-2567
Token count is too large: Qiskit__qiskit-2095
Token count is too large: numpy__numpy-12239
Token count is too large: ray-project__ray-5346
Token count is too large: dagster-io__dagster-14174
Token count is too large: scipy__scipy-2807
Token count is too large: docker__compose-1430


Generating train split: 2009 examples [02:34, 15.59 examples/s]

Token count is too large: huggingface__transformers-24771
Token count is too large: Qiskit__qiskit-4915
Token count is too large: PrefectHQ__prefect-525
Token count is too large: mesonbuild__meson-887
Token count is too large: explosion__spaCy-1792
Token count is too large: docker__compose-2364
Token count is too large: pandas-dev__pandas-10719
Token count is too large: Lightning-AI__lightning-1283
Token count is too large: mesonbuild__meson-6265


Generating train split: 2011 examples [02:34, 16.38 examples/s]

Token count is too large: pandas-dev__pandas-19986
Token count is too large: numpy__numpy-11033
Token count is too large: pantsbuild__pants-13336
Token count is too large: conda__conda-11422
Token count is too large: docker__compose-3102
Token count is too large: mesonbuild__meson-7682
Token count is too large: huggingface__transformers-9807
Token count is too large: pandas-dev__pandas-20796
Token count is too large: pantsbuild__pants-15836
Token count is too large: dagster-io__dagster-1143
Token count is too large: pypa__pip-11698


Generating train split: 2020 examples [02:34, 21.91 examples/s]

Token count is too large: googleapis__google-cloud-python-8693
Token count is too large: numpy__numpy-22878
Token count is too large: conda__conda-9261
Token count is too large: pandas-dev__pandas-36605
Token count is too large: dagster-io__dagster-9327
Token count is too large: conda__conda-6205
Token count is too large: conda__conda-8352
Token count is too large: pandas-dev__pandas-39440
Token count is too large: ytdl-org__youtube-dl-3565
Token count is too large: ipython__ipython-9022
Token count is too large: ytdl-org__youtube-dl-20801
Token count is too large: pandas-dev__pandas-18652
Token count is too large: mesonbuild__meson-5243
Token count is too large: Lightning-AI__lightning-833
Token count is too large: pantsbuild__pants-5930


Generating train split: 2024 examples [02:35, 19.88 examples/s]

Token count is too large: pandas-dev__pandas-29116
Token count is too large: google__jax-2834
Token count is too large: pandas-dev__pandas-7684
Token count is too large: conan-io__conan-4481
Token count is too large: conan-io__conan-8861
Token count is too large: pandas-dev__pandas-18300
Token count is too large: pandas-dev__pandas-39421
Token count is too large: mesonbuild__meson-6759
Token count is too large: jupyterlab__jupyterlab-7583
Token count is too large: huggingface__transformers-9169
Token count is too large: pandas-dev__pandas-9721
Token count is too large: tiangolo__fastapi-338
Token count is too large: pandas-dev__pandas-5208
Token count is too large: pandas-dev__pandas-9417
Token count is too large: pandas-dev__pandas-20584


Generating train split: 2027 examples [02:35, 15.19 examples/s]

Token count is too large: google__jax-2182
Token count is too large: Qiskit__qiskit-3696
Token count is too large: scipy__scipy-3857
Token count is too large: Qiskit__qiskit-2849
Token count is too large: dagster-io__dagster-8859
Token count is too large: pandas-dev__pandas-26585
Token count is too large: pandas-dev__pandas-7114
Token count is too large: conan-io__conan-3192
Token count is too large: huggingface__transformers-10215
Token count is too large: pandas-dev__pandas-31877
Token count is too large: conda__conda-12005
Token count is too large: pandas-dev__pandas-25490
Token count is too large: DataDog__integrations-core-2624


Generating train split: 2032 examples [02:35, 18.48 examples/s]

Token count is too large: numpy__numpy-13551
Token count is too large: pyca__cryptography-4785
Token count is too large: Qiskit__qiskit-5192
Token count is too large: pandas-dev__pandas-4233
Token count is too large: numpy__numpy-19083
Token count is too large: pandas-dev__pandas-11774


Generating train split: 2039 examples [02:36, 17.94 examples/s]

Token count is too large: pandas-dev__pandas-3696
Token count is too large: google__jax-2794
Token count is too large: Lightning-AI__lightning-1862
Token count is too large: Qiskit__qiskit-7665
Token count is too large: Qiskit__qiskit-6418
Token count is too large: pandas-dev__pandas-36900
Token count is too large: conda__conda-7722
Token count is too large: mesonbuild__meson-334
Token count is too large: pandas-dev__pandas-9470
Token count is too large: conan-io__conan-4595
Token count is too large: Qiskit__qiskit-1727
Token count is too large: pandas-dev__pandas-21235
Token count is too large: Qiskit__qiskit-5672
Token count is too large: mesonbuild__meson-968


Generating train split: 2041 examples [02:36, 16.97 examples/s]

Token count is too large: mesonbuild__meson-4401
Token count is too large: mesonbuild__meson-1961
Token count is too large: Qiskit__qiskit-2764
Token count is too large: pandas-dev__pandas-35780
Token count is too large: huggingface__transformers-13400
Token count is too large: Qiskit__qiskit-1181
Token count is too large: pandas-dev__pandas-8693
Token count is too large: ipython__ipython-3108


Generating train split: 2044 examples [02:36, 18.87 examples/s]

Token count is too large: googleapis__google-cloud-python-10026
Token count is too large: twisted__twisted-11644
Token count is too large: dagster-io__dagster-10991
Token count is too large: apache__airflow-33135
Token count is too large: huggingface__transformers-19158
Token count is too large: huggingface__transformers-25136
Token count is too large: numpy__numpy-6733
Token count is too large: pandas-dev__pandas-37357
Token count is too large: conan-io__conan-3717
Token count is too large: Lightning-AI__lightning-1326
Token count is too large: pandas-dev__pandas-13735
Token count is too large: pandas-dev__pandas-17859
Token count is too large: pandas-dev__pandas-9479
Token count is too large: twisted__twisted-11810
Token count is too large: pandas-dev__pandas-18905
Token count is too large: pantsbuild__pants-18369
Token count is too large: apache__airflow-22685
Token count is too large: pandas-dev__pandas-21811
Token count is too large: ipython__ipython-4713
Token count is too large:

Generating train split: 2047 examples [02:38,  5.16 examples/s]

Token count is too large: pypa__pip-3396
Token count is too large: pandas-dev__pandas-4250
Token count is too large: mesonbuild__meson-2338
Token count is too large: conda__conda-11290
Token count is too large: ytdl-org__youtube-dl-486
Token count is too large: Lightning-AI__lightning-2459
Token count is too large: pandas-dev__pandas-37161
Token count is too large: pantsbuild__pants-15682
Token count is too large: pandas-dev__pandas-34158


Generating train split: 2050 examples [02:38,  6.54 examples/s]

Token count is too large: google__jax-1836
Token count is too large: Lightning-AI__lightning-1493
Token count is too large: pandas-dev__pandas-9177
Token count is too large: huggingface__transformers-22518
Token count is too large: Lightning-AI__lightning-2535
Token count is too large: docker__compose-6813
Token count is too large: docker__compose-3006
Token count is too large: pandas-dev__pandas-7634
Token count is too large: huggingface__transformers-17917
Token count is too large: conan-io__conan-7259
Token count is too large: pandas-dev__pandas-17588
Token count is too large: pandas-dev__pandas-28241
Token count is too large: pandas-dev__pandas-7503
Token count is too large: pandas-dev__pandas-28735
Token count is too large: docker__compose-3436
Token count is too large: pandas-dev__pandas-34615
Token count is too large: docker__compose-5595


Generating train split: 2053 examples [02:38,  5.92 examples/s]

Token count is too large: pandas-dev__pandas-5438
Token count is too large: pandas-dev__pandas-26403
Token count is too large: pandas-dev__pandas-31207
Token count is too large: pandas-dev__pandas-20437
Token count is too large: pantsbuild__pants-18352
Token count is too large: conda__conda-7360
Token count is too large: wagtail__wagtail-566
Token count is too large: google__jax-736
Token count is too large: pandas-dev__pandas-4832
Token count is too large: huggingface__transformers-12519


Generating train split: 2060 examples [02:39,  8.76 examples/s]

Token count is too large: pandas-dev__pandas-16570
Token count is too large: pypa__pip-3324
Token count is too large: pandas-dev__pandas-30146
Token count is too large: huggingface__transformers-19249
Token count is too large: pandas-dev__pandas-35547
Token count is too large: huggingface__transformers-12121
Token count is too large: conan-io__conan-2875
Token count is too large: pypa__pip-7087
Token count is too large: numpy__numpy-4531
Token count is too large: conda__conda-5291
Token count is too large: mesonbuild__meson-6741
Token count is too large: pandas-dev__pandas-19979
Token count is too large: huggingface__transformers-15789
Token count is too large: pandas-dev__pandas-19339
Token count is too large: google__jax-41


Generating train split: 2062 examples [02:39,  7.86 examples/s]

Token count is too large: numpy__numpy-14124
Token count is too large: pandas-dev__pandas-28834
Token count is too large: Qiskit__qiskit-6282
Token count is too large: ray-project__ray-10464
Token count is too large: apache__airflow-24065
Token count is too large: pandas-dev__pandas-38446
Token count is too large: pandas-dev__pandas-20959
Token count is too large: mesonbuild__meson-10214
Token count is too large: pandas-dev__pandas-20846
Token count is too large: PrefectHQ__prefect-2443
Token count is too large: pyca__cryptography-2066
Token count is too large: pyca__cryptography-7080
Token count is too large: pandas-dev__pandas-24541
Token count is too large: wagtail__wagtail-10123
Token count is too large: pandas-dev__pandas-33241


Generating train split: 2067 examples [02:40,  9.21 examples/s]

Token count is too large: pandas-dev__pandas-21486
Token count is too large: Lightning-AI__lightning-448
Token count is too large: conan-io__conan-6675
There was an error processing
Token count is too large: pandas-dev__pandas-26355
Token count is too large: apache__airflow-10633
Token count is too large: conan-io__conan-9758
Token count is too large: pandas-dev__pandas-29237
Token count is too large: google__jax-312
Token count is too large: pandas-dev__pandas-6017
Token count is too large: pandas-dev__pandas-7790
Token count is too large: pandas-dev__pandas-38073
Token count is too large: pandas-dev__pandas-9377
Token count is too large: numpy__numpy-11218
Token count is too large: ipython__ipython-10829
Token count is too large: ytdl-org__youtube-dl-4794
Token count is too large: scipy__scipy-3313
Token count is too large: huggingface__transformers-25496


Generating train split: 2072 examples [02:40,  9.24 examples/s]

Token count is too large: apache__airflow-8551
Token count is too large: pandas-dev__pandas-6385
Token count is too large: pandas-dev__pandas-34634
Token count is too large: pandas-dev__pandas-18376
Token count is too large: pypa__pip-7187
Token count is too large: pandas-dev__pandas-37780
Token count is too large: pandas-dev__pandas-36115
Token count is too large: pandas-dev__pandas-6458
Token count is too large: conan-io__conan-5910
Token count is too large: ipython__ipython-4120
Token count is too large: pandas-dev__pandas-31350
Token count is too large: tensorflow__models-864


Generating train split: 2077 examples [02:40, 12.35 examples/s]

Token count is too large: googleapis__google-cloud-python-3491
Token count is too large: pantsbuild__pants-16668
Token count is too large: apache__airflow-8834
Token count is too large: apache__airflow-17839
Token count is too large: pantsbuild__pants-13793
Token count is too large: wagtail__wagtail-10731
Token count is too large: google__jax-131
Token count is too large: ray-project__ray-3121
Token count is too large: conan-io__conan-7524
Token count is too large: jupyterlab__jupyterlab-9256
Token count is too large: pandas-dev__pandas-35171
Token count is too large: Qiskit__qiskit-8597


Generating train split: 2082 examples [02:40, 14.15 examples/s]

Token count is too large: ray-project__ray-2374
Token count is too large: numpy__numpy-11233
Token count is too large: pandas-dev__pandas-25732
Token count is too large: pandas-dev__pandas-18953
Token count is too large: pandas-dev__pandas-19338
Token count is too large: huggingface__transformers-18984
Token count is too large: pandas-dev__pandas-7040
Token count is too large: pandas-dev__pandas-29143
Token count is too large: ytdl-org__youtube-dl-21421
Token count is too large: Qiskit__qiskit-9133
Token count is too large: pandas-dev__pandas-37966
Token count is too large: mesonbuild__meson-4699


Generating train split: 2084 examples [02:41,  9.90 examples/s]

Token count is too large: huggingface__transformers-13926
Token count is too large: conda__conda-6779
Token count is too large: ytdl-org__youtube-dl-1531
Token count is too large: pandas-dev__pandas-11410
Token count is too large: pypa__pip-1868
Token count is too large: pandas-dev__pandas-36563
Token count is too large: celery__celery-423
Token count is too large: pandas-dev__pandas-5199
Token count is too large: mesonbuild__meson-3188
Token count is too large: apache__airflow-17105


Generating train split: 2087 examples [02:41, 11.50 examples/s]

Token count is too large: mesonbuild__meson-1775
Token count is too large: google__jax-1308
Token count is too large: pyca__cryptography-4096
Token count is too large: ytdl-org__youtube-dl-4076
Token count is too large: pandas-dev__pandas-17377
Token count is too large: pandas-dev__pandas-29318
Token count is too large: pandas-dev__pandas-25230
Token count is too large: celery__celery-5869
Token count is too large: pypa__pip-11245
Token count is too large: pandas-dev__pandas-11822
Token count is too large: pandas-dev__pandas-3018
Token count is too large: pandas-dev__pandas-7326


Generating train split: 2089 examples [02:41, 10.83 examples/s]

Token count is too large: pandas-dev__pandas-5342
Token count is too large: conda__conda-6448
Token count is too large: googleapis__google-cloud-python-416
Token count is too large: ipython__ipython-3560
Token count is too large: numpy__numpy-5327
Token count is too large: ray-project__ray-4195
Token count is too large: jupyterlab__jupyterlab-3346
Token count is too large: pandas-dev__pandas-18313
Token count is too large: huggingface__transformers-20786
Token count is too large: pandas-dev__pandas-23527
Token count is too large: ytdl-org__youtube-dl-30635
Token count is too large: pandas-dev__pandas-17603
Token count is too large: huggingface__transformers-22920


Generating train split: 2091 examples [02:42,  7.00 examples/s]

Token count is too large: huggingface__transformers-12604
Token count is too large: huggingface__transformers-25415
Token count is too large: pypa__pip-11874
Token count is too large: Qiskit__qiskit-7134
Token count is too large: google__jax-671
Token count is too large: wagtail__wagtail-9905
Token count is too large: Lightning-AI__lightning-2775


Generating train split: 2096 examples [02:42, 11.12 examples/s]

Token count is too large: huggingface__transformers-11703
Token count is too large: ytdl-org__youtube-dl-5729
Token count is too large: ytdl-org__youtube-dl-27732
Token count is too large: Qiskit__qiskit-885
Token count is too large: PrefectHQ__prefect-2680
Token count is too large: pandas-dev__pandas-8652
Token count is too large: docker__compose-5393
Token count is too large: pandas-dev__pandas-28010


Generating train split: 2105 examples [02:42, 18.06 examples/s]

Token count is too large: pandas-dev__pandas-11861
Token count is too large: ipython__ipython-1689
Token count is too large: ray-project__ray-4305
Token count is too large: pandas-dev__pandas-25968
Token count is too large: pantsbuild__pants-4466
Token count is too large: googleapis__google-cloud-python-1636
Token count is too large: apache__airflow-6783
Token count is too large: huggingface__transformers-7786
Token count is too large: pandas-dev__pandas-21164
Token count is too large: pandas-dev__pandas-26155
Token count is too large: PrefectHQ__prefect-67
Token count is too large: PrefectHQ__prefect-2390
Token count is too large: pandas-dev__pandas-33749
Token count is too large: Qiskit__qiskit-1534
Token count is too large: googleapis__google-cloud-python-2409
Token count is too large: Qiskit__qiskit-8989
Token count is too large: pandas-dev__pandas-31119


Generating train split: 2110 examples [02:43, 21.78 examples/s]

Token count is too large: celery__celery-6416
Token count is too large: numpy__numpy-18108
Token count is too large: pandas-dev__pandas-14343
Token count is too large: pandas-dev__pandas-16003
Token count is too large: conda__conda-1541
Token count is too large: pantsbuild__pants-14819
Token count is too large: celery__celery-6917
Token count is too large: googleapis__google-cloud-python-4859
Token count is too large: ipython__ipython-13211
Token count is too large: pandas-dev__pandas-16163


Generating train split: 2114 examples [02:43, 19.27 examples/s]

Token count is too large: huggingface__transformers-22253
Token count is too large: pandas-dev__pandas-38194
Token count is too large: pandas-dev__pandas-33865
Token count is too large: pandas-dev__pandas-27874
Token count is too large: jupyterlab__jupyterlab-9604
Token count is too large: pandas-dev__pandas-23480
Token count is too large: conda__conda-5030
Token count is too large: mesonbuild__meson-8964
Token count is too large: google__jax-2673
Token count is too large: pandas-dev__pandas-15423
Token count is too large: pypa__pip-9289
Token count is too large: pantsbuild__pants-4542
Token count is too large: pandas-dev__pandas-8282
Token count is too large: pypa__pip-11259
Token count is too large: Lightning-AI__lightning-876
Token count is too large: pandas-dev__pandas-5100
Token count is too large: conan-io__conan-3727
Token count is too large: numpy__numpy-24100
Token count is too large: huggingface__transformers-18598
Token count is too large: pandas-dev__pandas-37706
Token coun

Generating train split: 2125 examples [02:44, 17.36 examples/s]

Token count is too large: huggingface__transformers-20305
Token count is too large: Lightning-AI__lightning-1253
Token count is too large: conda__conda-5157
Token count is too large: pandas-dev__pandas-17149
Token count is too large: pandas-dev__pandas-25731
Token count is too large: pandas-dev__pandas-34329
Token count is too large: conan-io__conan-5357
Token count is too large: pandas-dev__pandas-14799
Token count is too large: pandas-dev__pandas-5903
Token count is too large: googleapis__google-cloud-python-4299
Token count is too large: mesonbuild__meson-9506
Token count is too large: pandas-dev__pandas-9475
Token count is too large: scipy__scipy-5479
Token count is too large: wagtail__wagtail-2540
Token count is too large: numpy__numpy-14070
Token count is too large: pandas-dev__pandas-20799
Token count is too large: pandas-dev__pandas-34589
Token count is too large: pantsbuild__pants-7092
Token count is too large: apache__airflow-14030
Token count is too large: googleapis__google

Generating train split: 2129 examples [02:44, 16.14 examples/s]

Token count is too large: pandas-dev__pandas-20544
Token count is too large: Qiskit__qiskit-1725
Token count is too large: pantsbuild__pants-5534
Token count is too large: Qiskit__qiskit-9886
Token count is too large: apache__airflow-11730
Token count is too large: huggingface__transformers-11197
Token count is too large: pandas-dev__pandas-25089
Token count is too large: conda__conda-7516
Token count is too large: huggingface__transformers-15001
Token count is too large: conan-io__conan-4903
Token count is too large: pandas-dev__pandas-8644
Token count is too large: pandas-dev__pandas-19253
Token count is too large: ipython__ipython-9109
Token count is too large: huggingface__transformers-7616
Token count is too large: Qiskit__qiskit-10031


Generating train split: 2132 examples [02:44, 16.27 examples/s]

Token count is too large: numpy__numpy-21952
Token count is too large: pandas-dev__pandas-35284
Token count is too large: mesonbuild__meson-11137
Token count is too large: pandas-dev__pandas-4643
Token count is too large: Qiskit__qiskit-5009
Token count is too large: scipy__scipy-3924
Token count is too large: pyca__cryptography-6562
Token count is too large: wagtail__wagtail-10465
Token count is too large: pandas-dev__pandas-30284
Token count is too large: ytdl-org__youtube-dl-12587
Token count is too large: pandas-dev__pandas-26836
Token count is too large: pandas-dev__pandas-23749
Token count is too large: ray-project__ray-9306
Token count is too large: Qiskit__qiskit-8673


Generating train split: 2140 examples [02:45, 15.46 examples/s]

Token count is too large: pandas-dev__pandas-4258
Token count is too large: wagtail__wagtail-2038
Token count is too large: pandas-dev__pandas-26267
Token count is too large: mesonbuild__meson-10339
Token count is too large: pandas-dev__pandas-35314
Token count is too large: pandas-dev__pandas-10301
Token count is too large: Qiskit__qiskit-4836
Token count is too large: jupyterlab__jupyterlab-2714
Token count is too large: google__jax-977
Token count is too large: googleapis__google-cloud-python-7022
Token count is too large: PrefectHQ__prefect-1063
Token count is too large: jupyterlab__jupyterlab-2466
Token count is too large: conda__conda-3410
Token count is too large: huggingface__transformers-22603
Token count is too large: mesonbuild__meson-10295
Token count is too large: pandas-dev__pandas-24253
Token count is too large: ipython__ipython-3282
Token count is too large: pandas-dev__pandas-8388
Token count is too large: celery__celery-6457
Token count is too large: pypa__pip-4755
To

Generating train split: 2143 examples [02:45,  9.78 examples/s]

Token count is too large: Qiskit__qiskit-435
Token count is too large: googleapis__google-cloud-python-9337
Token count is too large: pandas-dev__pandas-17630
Token count is too large: googleapis__google-cloud-python-2537
Token count is too large: pandas-dev__pandas-36623
Token count is too large: numpy__numpy-20502
Token count is too large: Qiskit__qiskit-155


Generating train split: 2147 examples [02:46, 12.24 examples/s]

Token count is too large: pandas-dev__pandas-37571
Token count is too large: gitpython-developers__GitPython-686
Token count is too large: mesonbuild__meson-11849
Token count is too large: Lightning-AI__lightning-1235
Token count is too large: google__jax-527
Token count is too large: huggingface__transformers-4881
Token count is too large: pandas-dev__pandas-39028
Token count is too large: apache__airflow-28685
Token count is too large: huggingface__transformers-23799
Token count is too large: Lightning-AI__lightning-3060
Token count is too large: celery__celery-4779
Token count is too large: huggingface__transformers-20592
Token count is too large: Qiskit__qiskit-9368


Generating train split: 2151 examples [02:46,  9.60 examples/s]

Token count is too large: huggingface__transformers-22235
Token count is too large: pandas-dev__pandas-6259
Token count is too large: conda__conda-7629
Token count is too large: huggingface__transformers-8881
Token count is too large: pantsbuild__pants-15665
Token count is too large: pandas-dev__pandas-25898
Token count is too large: google__jax-675
Token count is too large: googleapis__google-cloud-python-9205
Token count is too large: ipython__ipython-6978
Token count is too large: conda__conda-5986
Token count is too large: mesonbuild__meson-9566


Generating train split: 2156 examples [02:46, 11.92 examples/s]

Token count is too large: pandas-dev__pandas-26471
Token count is too large: PrefectHQ__prefect-1429
Token count is too large: conda__conda-12104
Token count is too large: conan-io__conan-2118
Token count is too large: Qiskit__qiskit-2838
Token count is too large: pandas-dev__pandas-24554
Token count is too large: pyca__cryptography-4063
Token count is too large: conda__conda-2291
Token count is too large: pandas-dev__pandas-5810
Token count is too large: pandas-dev__pandas-38668
Token count is too large: numpy__numpy-12807
Token count is too large: docker__compose-370
Token count is too large: ipython__ipython-7903
Token count is too large: pandas-dev__pandas-10703
Token count is too large: Qiskit__qiskit-2238


Generating train split: 2159 examples [02:47, 12.50 examples/s]

Token count is too large: pypa__pip-3211
Token count is too large: pandas-dev__pandas-26292
Token count is too large: pandas-dev__pandas-17865
Token count is too large: pandas-dev__pandas-13551
Token count is too large: wagtail__wagtail-9721
Token count is too large: pandas-dev__pandas-33628
Token count is too large: pandas-dev__pandas-36077
Token count is too large: pandas-dev__pandas-39484
Token count is too large: ytdl-org__youtube-dl-3202


Generating train split: 2167 examples [02:47, 20.08 examples/s]

Token count is too large: ray-project__ray-5889
Token count is too large: conan-io__conan-378
Token count is too large: Qiskit__qiskit-10279
Token count is too large: pandas-dev__pandas-17990
Token count is too large: pandas-dev__pandas-25009
Token count is too large: Qiskit__qiskit-3084
Token count is too large: Qiskit__qiskit-6487
Token count is too large: pantsbuild__pants-5007
Token count is too large: ray-project__ray-1499
Token count is too large: pandas-dev__pandas-36264
Token count is too large: googleapis__google-cloud-python-430
Token count is too large: pandas-dev__pandas-36604
Token count is too large: pandas-dev__pandas-5292
Token count is too large: conda__conda-6274
Token count is too large: Qiskit__qiskit-5417
Token count is too large: PrefectHQ__prefect-94
Token count is too large: pandas-dev__pandas-39229
Token count is too large: pandas-dev__pandas-22952
Token count is too large: mesonbuild__meson-7447
Token count is too large: googleapis__google-cloud-python-11348
T

Generating train split: 2172 examples [02:47, 13.86 examples/s]

Token count is too large: pypa__pip-9599
Token count is too large: apache__airflow-25556
Token count is too large: pandas-dev__pandas-4360
Token count is too large: ipython__ipython-3319
Token count is too large: wagtail__wagtail-9726
Token count is too large: conda__conda-8222
Token count is too large: Qiskit__qiskit-3722
Token count is too large: pandas-dev__pandas-37363
Token count is too large: pandas-dev__pandas-26588
Token count is too large: celery__celery-8432
Token count is too large: wagtail__wagtail-227
Token count is too large: Qiskit__qiskit-6318
Token count is too large: jupyterlab__jupyterlab-3127
Token count is too large: celery__celery-5587


Generating train split: 2177 examples [02:48, 14.68 examples/s]

Token count is too large: pandas-dev__pandas-7133
Token count is too large: wagtail__wagtail-1018
Token count is too large: conan-io__conan-5407
Token count is too large: open-mmlab__mmdetection-3608
Token count is too large: pandas-dev__pandas-26425
Token count is too large: conda__conda-4325
Token count is too large: pandas-dev__pandas-6104
Token count is too large: ipython__ipython-11452
Token count is too large: numpy__numpy-9815
Token count is too large: mesonbuild__meson-6704
Token count is too large: pypa__pip-6764
Token count is too large: pandas-dev__pandas-4662
Token count is too large: pandas-dev__pandas-38380
Token count is too large: pandas-dev__pandas-31511
Token count is too large: pandas-dev__pandas-37097


Generating train split: 2183 examples [02:48, 13.92 examples/s]

Token count is too large: huggingface__transformers-21144
Token count is too large: pypa__pip-2025
Token count is too large: googleapis__google-cloud-python-5412
Token count is too large: docker__compose-2948
Token count is too large: conan-io__conan-5950
Token count is too large: Qiskit__qiskit-4123
Token count is too large: pandas-dev__pandas-16015


Generating train split: 2186 examples [02:48, 13.59 examples/s]

Token count is too large: numpy__numpy-3866
Token count is too large: ytdl-org__youtube-dl-13382
Token count is too large: Qiskit__qiskit-6059
Token count is too large: ytdl-org__youtube-dl-428
Token count is too large: pandas-dev__pandas-36026
Token count is too large: Qiskit__qiskit-6556
Token count is too large: Qiskit__qiskit-1231
Token count is too large: pantsbuild__pants-19161
Token count is too large: pandas-dev__pandas-29566
Token count is too large: pandas-dev__pandas-11393
Token count is too large: conda__conda-7386
Token count is too large: huggingface__transformers-13855
Token count is too large: DataDog__integrations-core-4925


Generating train split: 2195 examples [02:49, 19.61 examples/s]

Token count is too large: huggingface__transformers-11306
Token count is too large: googleapis__google-cloud-python-6828
Token count is too large: docker__compose-2934
Token count is too large: pandas-dev__pandas-17741
Token count is too large: Qiskit__qiskit-7408
Token count is too large: pypa__pip-11897
Token count is too large: pandas-dev__pandas-17405
Token count is too large: numpy__numpy-11203
Token count is too large: wagtail__wagtail-1879
Token count is too large: pandas-dev__pandas-25982
Token count is too large: celery__celery-6440
Token count is too large: pypa__pip-437
Token count is too large: conan-io__conan-2602
Token count is too large: pypa__pip-1751
Token count is too large: mesonbuild__meson-9828
Token count is too large: googleapis__google-cloud-python-3744
Token count is too large: googleapis__google-cloud-python-3783


Generating train split: 2198 examples [02:49, 18.32 examples/s]

Token count is too large: pandas-dev__pandas-26496
Token count is too large: pandas-dev__pandas-22308
Token count is too large: numpy__numpy-8963
Token count is too large: huggingface__transformers-15122
Token count is too large: google__jax-2674
Token count is too large: pandas-dev__pandas-36305
Token count is too large: Lightning-AI__lightning-1458
Token count is too large: huggingface__transformers-13882
Token count is too large: conan-io__conan-4354
Token count is too large: pandas-dev__pandas-23591
Token count is too large: pandas-dev__pandas-31894
Token count is too large: googleapis__google-cloud-python-2724
Token count is too large: celery__celery-5918
Token count is too large: pypa__pip-11324
Token count is too large: conda__conda-5148
Token count is too large: pandas-dev__pandas-6135


Generating train split: 2203 examples [02:49, 14.93 examples/s]

Token count is too large: pandas-dev__pandas-23398
Token count is too large: conan-io__conan-3413
Token count is too large: googleapis__google-cloud-python-258
Token count is too large: huggingface__transformers-23725
Token count is too large: pandas-dev__pandas-27932
Token count is too large: pandas-dev__pandas-10615
Token count is too large: conan-io__conan-11799
Token count is too large: pantsbuild__pants-5932
Token count is too large: pandas-dev__pandas-38539


Generating train split: 2207 examples [02:50, 10.99 examples/s]

Token count is too large: pandas-dev__pandas-33983
Token count is too large: pandas-dev__pandas-3959
Token count is too large: docker__compose-5844
Token count is too large: ytdl-org__youtube-dl-7660
Token count is too large: pandas-dev__pandas-32202
Token count is too large: mesonbuild__meson-5348
Token count is too large: docker__compose-5449
Token count is too large: huggingface__transformers-19706


Generating train split: 2209 examples [02:50, 10.96 examples/s]

Token count is too large: pandas-dev__pandas-39046
Token count is too large: pantsbuild__pants-4745
Token count is too large: Lightning-AI__lightning-2657
Token count is too large: pandas-dev__pandas-26818
Token count is too large: Qiskit__qiskit-6940
Token count is too large: pantsbuild__pants-16077
Token count is too large: pandas-dev__pandas-14208
Token count is too large: apache__airflow-20263
Token count is too large: pandas-dev__pandas-22759
Token count is too large: google__jax-2447
Token count is too large: Qiskit__qiskit-2024


Generating train split: 2213 examples [02:50, 11.91 examples/s]

Token count is too large: numpy__numpy-12727
Token count is too large: scipy__scipy-5129
Token count is too large: docker__compose-3334
Token count is too large: jupyterlab__jupyterlab-1397
Token count is too large: googleapis__google-cloud-python-5446
Token count is too large: conda__conda-7377
Token count is too large: scipy__scipy-3938
Token count is too large: ipython__ipython-13282
Token count is too large: ray-project__ray-3124
Token count is too large: numpy__numpy-13117
Token count is too large: dagster-io__dagster-10057
Token count is too large: pandas-dev__pandas-21612
Token count is too large: pypa__pip-2288


Generating train split: 2218 examples [02:51, 15.53 examples/s]

Token count is too large: mesonbuild__meson-8200
Token count is too large: huggingface__transformers-7039
Token count is too large: Qiskit__qiskit-6621
Token count is too large: pandas-dev__pandas-24255
Token count is too large: apache__airflow-25782
Token count is too large: conda__conda-3811
Token count is too large: numpy__numpy-23757
Token count is too large: pandas-dev__pandas-38746
Token count is too large: docker__compose-6371
Token count is too large: pyca__cryptography-2196
Token count is too large: pandas-dev__pandas-16458
Token count is too large: huggingface__transformers-2217
Token count is too large: jupyterlab__jupyterlab-8779
Token count is too large: pandas-dev__pandas-19881


Generating train split: 2224 examples [02:51, 17.01 examples/s]

Token count is too large: pandas-dev__pandas-24189
Token count is too large: pandas-dev__pandas-17447
Token count is too large: ytdl-org__youtube-dl-17934
Token count is too large: wagtail__wagtail-714
Token count is too large: numpy__numpy-6053
Token count is too large: pandas-dev__pandas-33477
Token count is too large: Lightning-AI__lightning-2767
Token count is too large: pandas-dev__pandas-37355
Token count is too large: numpy__numpy-12463


Generating train split: 2226 examples [02:51, 14.44 examples/s]

Token count is too large: pandas-dev__pandas-3978
Token count is too large: apache__airflow-32354
Token count is too large: mesonbuild__meson-3223
Token count is too large: pandas-dev__pandas-14077
Token count is too large: numpy__numpy-13571
Token count is too large: mesonbuild__meson-4356
Token count is too large: pandas-dev__pandas-28047
Token count is too large: pandas-dev__pandas-20938


Generating train split: 2228 examples [02:52,  9.89 examples/s]

Token count is too large: mesonbuild__meson-4010
Token count is too large: pandas-dev__pandas-17742
Token count is too large: conda__conda-3080
Token count is too large: huggingface__transformers-24916
Token count is too large: ytdl-org__youtube-dl-17407
Token count is too large: Qiskit__qiskit-6236
Token count is too large: pandas-dev__pandas-6718
Token count is too large: pandas-dev__pandas-18788
Token count is too large: pandas-dev__pandas-26561
Token count is too large: pandas-dev__pandas-16444
Token count is too large: pantsbuild__pants-17010
Token count is too large: pandas-dev__pandas-20569
Token count is too large: mesonbuild__meson-3743
Token count is too large: numpy__numpy-7590


Generating train split: 2236 examples [02:52, 14.11 examples/s]

Token count is too large: mesonbuild__meson-11811
Token count is too large: pandas-dev__pandas-33220
Token count is too large: apache__airflow-24943
Token count is too large: conan-io__conan-2546
Token count is too large: Lightning-AI__lightning-3322
Token count is too large: googleapis__google-cloud-python-9084
Token count is too large: googleapis__google-cloud-python-6853
Token count is too large: docker__compose-2210
Token count is too large: pyca__cryptography-3550
Token count is too large: pandas-dev__pandas-10042
Token count is too large: googleapis__google-cloud-python-9626


Generating train split: 2243 examples [02:52, 19.81 examples/s]

Token count is too large: conan-io__conan-7527
Token count is too large: pandas-dev__pandas-30976
Token count is too large: pandas-dev__pandas-36540
Token count is too large: huggingface__transformers-10632
Token count is too large: pandas-dev__pandas-38387
Token count is too large: conan-io__conan-5908
Token count is too large: ytdl-org__youtube-dl-16129
Token count is too large: pandas-dev__pandas-26152
Token count is too large: googleapis__google-cloud-python-5509
Token count is too large: conda__conda-6653
Token count is too large: pandas-dev__pandas-3591


Generating train split: 2246 examples [02:52, 17.91 examples/s]

Token count is too large: pandas-dev__pandas-18617
Token count is too large: docker__compose-2996
Token count is too large: numpy__numpy-3450
Token count is too large: Lightning-AI__lightning-2997
Token count is too large: Qiskit__qiskit-2108
Token count is too large: dagster-io__dagster-1101
Token count is too large: numpy__numpy-3182
Token count is too large: huggingface__transformers-2232
Token count is too large: pypa__pip-10525
Token count is too large: pandas-dev__pandas-15913
Token count is too large: pandas-dev__pandas-3124
Token count is too large: pandas-dev__pandas-28052
Token count is too large: pantsbuild__pants-15373
Token count is too large: pandas-dev__pandas-38727
Token count is too large: huggingface__transformers-20210
Token count is too large: google__jax-666
Token count is too large: pypa__pip-7962


Generating train split: 2249 examples [02:53, 14.45 examples/s]

Token count is too large: pandas-dev__pandas-17656
Token count is too large: PrefectHQ__prefect-324
Token count is too large: googleapis__google-cloud-python-488
Token count is too large: ipython__ipython-6394
Token count is too large: pandas-dev__pandas-19247
Token count is too large: pandas-dev__pandas-34453
Token count is too large: huggingface__transformers-14514
Token count is too large: googleapis__google-cloud-python-1688
Token count is too large: pandas-dev__pandas-19849
Token count is too large: pandas-dev__pandas-10974
Token count is too large: pandas-dev__pandas-6221
Token count is too large: mesonbuild__meson-5083


Generating train split: 2251 examples [02:53,  9.49 examples/s]

Token count is too large: pandas-dev__pandas-17798
Token count is too large: Lightning-AI__lightning-567
Token count is too large: pypa__pip-6253
Token count is too large: celery__celery-6378
Token count is too large: jupyterlab__jupyterlab-2759
Token count is too large: googleapis__google-cloud-python-3370
Token count is too large: pandas-dev__pandas-33498
Token count is too large: pandas-dev__pandas-4217
Token count is too large: pandas-dev__pandas-18783
Token count is too large: mesonbuild__meson-8066


Generating train split: 2258 examples [02:54, 14.63 examples/s]

Token count is too large: googleapis__google-cloud-python-9506
Token count is too large: pandas-dev__pandas-16939
Token count is too large: apache__airflow-32819
Token count is too large: mesonbuild__meson-6688
Token count is too large: ytdl-org__youtube-dl-29765
Token count is too large: pandas-dev__pandas-38330
Token count is too large: Qiskit__qiskit-6593
Token count is too large: pandas-dev__pandas-5327
Token count is too large: scipy__scipy-3300


Generating train split: 2262 examples [02:54, 16.73 examples/s]

Token count is too large: pandas-dev__pandas-19893
Token count is too large: pantsbuild__pants-14949
Token count is too large: googleapis__google-cloud-python-2321
Token count is too large: Qiskit__qiskit-5286
Token count is too large: Qiskit__qiskit-9836
Token count is too large: pandas-dev__pandas-21293
Token count is too large: googleapis__google-cloud-python-2149
Token count is too large: ray-project__ray-960
Token count is too large: Qiskit__qiskit-4192
Token count is too large: huggingface__transformers-13586
Token count is too large: ipython__ipython-2811


Generating train split: 2266 examples [02:54, 16.45 examples/s]

Token count is too large: pandas-dev__pandas-27777
Token count is too large: mesonbuild__meson-6132
Token count is too large: pandas-dev__pandas-38654
Token count is too large: twisted__twisted-11914
Token count is too large: pandas-dev__pandas-17090
Token count is too large: docker__compose-2547
Token count is too large: pyca__cryptography-2435
Token count is too large: celery__celery-4443
Token count is too large: ray-project__ray-1984
Token count is too large: ipython__ipython-7005
Token count is too large: huggingface__transformers-18097
Token count is too large: mesonbuild__meson-7128
Token count is too large: pandas-dev__pandas-20786
Token count is too large: PrefectHQ__prefect-948
Token count is too large: jupyterlab__jupyterlab-7268
Token count is too large: pandas-dev__pandas-3384
Token count is too large: pandas-dev__pandas-7107


Generating train split: 2269 examples [02:54, 14.39 examples/s]

Token count is too large: conda__conda-6892
Token count is too large: numpy__numpy-20786
Token count is too large: google__jax-1358
Token count is too large: Lightning-AI__lightning-2973
Token count is too large: googleapis__google-cloud-python-321
Token count is too large: pandas-dev__pandas-35027
Token count is too large: tiangolo__fastapi-439
Token count is too large: mesonbuild__meson-5204
Token count is too large: pandas-dev__pandas-15451
Token count is too large: numpy__numpy-20505
Token count is too large: pandas-dev__pandas-19189
Token count is too large: docker__compose-6041
Token count is too large: pandas-dev__pandas-7191
Token count is too large: Qiskit__qiskit-2683
Token count is too large: gitpython-developers__GitPython-950
Token count is too large: pantsbuild__pants-4822
Token count is too large: PrefectHQ__prefect-695


Generating train split: 2273 examples [02:55, 13.90 examples/s]

Token count is too large: pandas-dev__pandas-4784
Token count is too large: pandas-dev__pandas-10097
Token count is too large: huggingface__transformers-5252
Token count is too large: Lightning-AI__lightning-424
Token count is too large: pypa__pip-8659
Token count is too large: ipython__ipython-11426
Token count is too large: huggingface__transformers-6929
Token count is too large: apache__airflow-27609
Token count is too large: pandas-dev__pandas-27855
Token count is too large: Qiskit__qiskit-4444
Token count is too large: pypa__pip-5936


Generating train split: 2280 examples [02:55, 16.94 examples/s]

Token count is too large: celery__celery-4730
Token count is too large: docker__compose-2877
Token count is too large: Qiskit__qiskit-8731
Token count is too large: Qiskit__qiskit-7301
Token count is too large: googleapis__google-cloud-python-257
Token count is too large: gitpython-developers__GitPython-961
Token count is too large: pandas-dev__pandas-39759
Token count is too large: pandas-dev__pandas-17888
Token count is too large: pandas-dev__pandas-32121
Token count is too large: ipython__ipython-6249
Token count is too large: apache__airflow-18757
Token count is too large: pandas-dev__pandas-8705
Token count is too large: mesonbuild__meson-4479


Generating train split: 2282 examples [02:55, 15.53 examples/s]

Token count is too large: huggingface__transformers-17091
Token count is too large: scipy__scipy-5525
Token count is too large: googleapis__google-cloud-python-8568
Token count is too large: pandas-dev__pandas-24132
Token count is too large: pandas-dev__pandas-7823
Token count is too large: numpy__numpy-22834
Token count is too large: jupyterlab__jupyterlab-2838
Token count is too large: pandas-dev__pandas-23405
Token count is too large: docker__compose-2746
Token count is too large: numpy__numpy-9881
Token count is too large: pandas-dev__pandas-6728
Token count is too large: ipython__ipython-6969
Token count is too large: Lightning-AI__lightning-2134
Token count is too large: scipy__scipy-4762
Token count is too large: ipython__ipython-4928
Token count is too large: numpy__numpy-16476
Token count is too large: pandas-dev__pandas-9555
Token count is too large: ipython__ipython-4453
Token count is too large: pandas-dev__pandas-26080
Token count is too large: pandas-dev__pandas-25020


Generating train split: 2293 examples [02:56, 20.05 examples/s]

Token count is too large: pandas-dev__pandas-16821
Token count is too large: pantsbuild__pants-14510
Token count is too large: huggingface__transformers-14193
Token count is too large: docker__compose-3052
Token count is too large: docker__compose-4419
Token count is too large: pandas-dev__pandas-35568
Token count is too large: pandas-dev__pandas-32990
Token count is too large: conda__conda-6363
Token count is too large: pandas-dev__pandas-9889
Token count is too large: pandas-dev__pandas-8512
Token count is too large: mesonbuild__meson-1465


Generating train split: 2298 examples [02:56, 22.02 examples/s]

Token count is too large: googleapis__google-cloud-python-6391
Token count is too large: Qiskit__qiskit-4747
Token count is too large: pandas-dev__pandas-17066
Token count is too large: celery__celery-4260
Token count is too large: ipython__ipython-4796
Token count is too large: Qiskit__qiskit-6673
Token count is too large: ytdl-org__youtube-dl-7113
Token count is too large: apache__airflow-31836
Token count is too large: pandas-dev__pandas-16989
Token count is too large: pandas-dev__pandas-36080
Token count is too large: pantsbuild__pants-16249
Token count is too large: pandas-dev__pandas-39623
Token count is too large: pandas-dev__pandas-11134
Token count is too large: pantsbuild__pants-15014
Token count is too large: pandas-dev__pandas-26590
Token count is too large: pandas-dev__pandas-36109
Token count is too large: huggingface__transformers-13408
Token count is too large: Qiskit__qiskit-4150
Token count is too large: pandas-dev__pandas-6677
Token count is too large: pandas-dev__pa

Generating train split: 2306 examples [02:57, 12.73 examples/s]

Token count is too large: huggingface__transformers-9596
Token count is too large: Lightning-AI__lightning-701
Token count is too large: open-mmlab__mmdetection-9479
Token count is too large: apache__airflow-12154
Token count is too large: huggingface__transformers-10070
Token count is too large: mesonbuild__meson-2635
Token count is too large: pandas-dev__pandas-10240
Token count is too large: ytdl-org__youtube-dl-2944
Token count is too large: conda__conda-6295
Token count is too large: googleapis__google-cloud-python-11335
Token count is too large: mesonbuild__meson-607
Token count is too large: pandas-dev__pandas-27929
Token count is too large: pandas-dev__pandas-18787
Token count is too large: huggingface__transformers-8996
Token count is too large: huggingface__transformers-2065
Token count is too large: pandas-dev__pandas-33026
Token count is too large: pandas-dev__pandas-10054
Token count is too large: pandas-dev__pandas-32036
Token count is too large: ray-project__ray-10792
To

Generating train split: 2312 examples [02:57, 14.41 examples/s]

Token count is too large: pandas-dev__pandas-16266
Token count is too large: Qiskit__qiskit-4481
Token count is too large: numpy__numpy-4305
Token count is too large: conan-io__conan-4514
Token count is too large: Qiskit__qiskit-5570
Token count is too large: pandas-dev__pandas-24190
Token count is too large: pandas-dev__pandas-39040
Token count is too large: mesonbuild__meson-5095


Generating train split: 2318 examples [02:57, 17.28 examples/s]

Token count is too large: Lightning-AI__lightning-733
Token count is too large: docker__compose-2458
Token count is too large: pandas-dev__pandas-31724
Token count is too large: mesonbuild__meson-5520
Token count is too large: numpy__numpy-21306
Token count is too large: numpy__numpy-16953
Token count is too large: mesonbuild__meson-9402
Token count is too large: mesonbuild__meson-4147
Token count is too large: pandas-dev__pandas-23915
Token count is too large: wagtail__wagtail-8391
Token count is too large: numpy__numpy-21904


Generating train split: 2327 examples [02:58, 21.46 examples/s]

Token count is too large: conda__conda-6647
Token count is too large: docker__compose-2880
Token count is too large: pandas-dev__pandas-38445
Token count is too large: ray-project__ray-4338
Token count is too large: docker__compose-6327
Token count is too large: pandas-dev__pandas-28793
Token count is too large: mesonbuild__meson-7919
Token count is too large: ipython__ipython-3538
Token count is too large: pantsbuild__pants-7241
Token count is too large: pandas-dev__pandas-27426


Generating train split: 2330 examples [02:58, 22.49 examples/s]

Token count is too large: ytdl-org__youtube-dl-13606
Token count is too large: ipython__ipython-13276
Token count is too large: ytdl-org__youtube-dl-8374
Token count is too large: pandas-dev__pandas-22230
Token count is too large: mesonbuild__meson-490
Token count is too large: Lightning-AI__lightning-2356
Token count is too large: mesonbuild__meson-9679
Token count is too large: open-mmlab__mmdetection-4391
Token count is too large: pandas-dev__pandas-16438


Generating train split: 2337 examples [02:58, 20.14 examples/s]

Token count is too large: apache__airflow-20206
Token count is too large: huggingface__transformers-11746
Token count is too large: pandas-dev__pandas-25224
Token count is too large: pandas-dev__pandas-13276
Token count is too large: huggingface__transformers-17764
Token count is too large: Qiskit__qiskit-8997
Token count is too large: numpy__numpy-18369
Token count is too large: numpy__numpy-19545
Token count is too large: pandas-dev__pandas-5739
Token count is too large: mesonbuild__meson-7263
Token count is too large: googleapis__google-cloud-python-6916
Token count is too large: pandas-dev__pandas-22488
Token count is too large: pandas-dev__pandas-10716
Token count is too large: pandas-dev__pandas-27530
Token count is too large: pandas-dev__pandas-20549
Token count is too large: googleapis__google-cloud-python-5021
Token count is too large: numpy__numpy-260
Token count is too large: jupyterlab__jupyterlab-5119
Token count is too large: pandas-dev__pandas-25182


Generating train split: 2340 examples [02:58, 16.38 examples/s]

Token count is too large: huggingface__transformers-22272
Token count is too large: pandas-dev__pandas-23506
Token count is too large: pantsbuild__pants-16037
Token count is too large: pandas-dev__pandas-4232
Token count is too large: pandas-dev__pandas-17772
Token count is too large: dagster-io__dagster-9518
Token count is too large: apache__airflow-16352
Token count is too large: apache__airflow-13654
Token count is too large: apache__airflow-33045
Token count is too large: jupyterlab__jupyterlab-7723
Token count is too large: pandas-dev__pandas-20946
Token count is too large: mesonbuild__meson-4644
Token count is too large: mesonbuild__meson-8096
Token count is too large: pandas-dev__pandas-5745


Generating train split: 2348 examples [02:59, 16.46 examples/s]

Token count is too large: pandas-dev__pandas-36997
Token count is too large: pandas-dev__pandas-22617
Token count is too large: huggingface__transformers-7630
Token count is too large: pantsbuild__pants-4373
Token count is too large: Lightning-AI__lightning-1425
Token count is too large: pandas-dev__pandas-20873
Token count is too large: wagtail__wagtail-10618
Token count is too large: pandas-dev__pandas-38070


Generating train split: 2352 examples [02:59, 18.58 examples/s]

Token count is too large: googleapis__google-cloud-python-9268
Token count is too large: pandas-dev__pandas-26580
Token count is too large: twisted__twisted-11747
Token count is too large: pantsbuild__pants-10900
Token count is too large: pandas-dev__pandas-16859
Token count is too large: pyca__cryptography-3769
Token count is too large: Qiskit__qiskit-655
Token count is too large: pyca__cryptography-7439
Token count is too large: mesonbuild__meson-1098
Token count is too large: conan-io__conan-2964
Token count is too large: pandas-dev__pandas-19377
Token count is too large: huggingface__transformers-10703
Token count is too large: twisted__twisted-11805


Generating train split: 2360 examples [02:59, 21.93 examples/s]

Token count is too large: pandas-dev__pandas-29140
Token count is too large: pandas-dev__pandas-4308
Token count is too large: pandas-dev__pandas-34709
Token count is too large: huggingface__transformers-4109
Token count is too large: ipython__ipython-5788
Token count is too large: pandas-dev__pandas-32594
Token count is too large: pandas-dev__pandas-24819
Token count is too large: Qiskit__qiskit-8998
Token count is too large: Lightning-AI__lightning-1645


Generating train split: 2365 examples [03:00, 18.15 examples/s]

Token count is too large: pandas-dev__pandas-6364
Token count is too large: ytdl-org__youtube-dl-14497
Token count is too large: huggingface__transformers-5060
Token count is too large: pandas-dev__pandas-36842
Token count is too large: PrefectHQ__prefect-1504
Token count is too large: googleapis__google-cloud-python-5576
Token count is too large: Qiskit__qiskit-8560
Token count is too large: Qiskit__qiskit-2748
Token count is too large: celery__celery-6818
Token count is too large: Lightning-AI__lightning-2255
Token count is too large: Qiskit__qiskit-1849
Token count is too large: pandas-dev__pandas-31684
Token count is too large: google__jax-1736
Token count is too large: Lightning-AI__lightning-1760
Token count is too large: googleapis__google-cloud-python-2141
Token count is too large: ray-project__ray-5160
Token count is too large: mesonbuild__meson-9743
Token count is too large: Lightning-AI__lightning-3066
Token count is too large: Qiskit__qiskit-6084
Token count is too large: L

Generating train split: 2374 examples [03:00, 19.73 examples/s]

Token count is too large: Qiskit__qiskit-8689
Token count is too large: apache__airflow-11797
Token count is too large: pandas-dev__pandas-14116
Token count is too large: pandas-dev__pandas-17819
Token count is too large: pandas-dev__pandas-38597
Token count is too large: Lightning-AI__lightning-543
Token count is too large: pandas-dev__pandas-38638
Token count is too large: pandas-dev__pandas-6301
Token count is too large: pandas-dev__pandas-7043
Token count is too large: pandas-dev__pandas-4761
Token count is too large: pandas-dev__pandas-23190
Token count is too large: huggingface__transformers-9381
Token count is too large: pandas-dev__pandas-18177
Token count is too large: pandas-dev__pandas-11148
Token count is too large: mesonbuild__meson-3374
Token count is too large: conda__conda-2838


Generating train split: 2379 examples [03:01, 15.15 examples/s]

Token count is too large: mesonbuild__meson-10930
Token count is too large: pandas-dev__pandas-16317
Token count is too large: pandas-dev__pandas-6202
Token count is too large: pantsbuild__pants-11635
Token count is too large: wagtail__wagtail-9518
Token count is too large: numpy__numpy-20344
Token count is too large: pandas-dev__pandas-8984
Token count is too large: pandas-dev__pandas-30676
Token count is too large: jupyterlab__jupyterlab-9492
Token count is too large: pypa__pip-3652
Token count is too large: pandas-dev__pandas-27607
Token count is too large: pyca__cryptography-2124
Token count is too large: google__jax-360
Token count is too large: gitpython-developers__GitPython-936
Token count is too large: DataDog__integrations-core-1583
Token count is too large: mesonbuild__meson-2340
Token count is too large: mesonbuild__meson-6877
Token count is too large: wagtail__wagtail-7669
Token count is too large: google__jax-2805
Token count is too large: pandas-dev__pandas-10283


Generating train split: 2395 examples [03:01, 28.26 examples/s]

Token count is too large: googleapis__google-cloud-python-6088
Token count is too large: ray-project__ray-6141
Token count is too large: conda__conda-2706
Token count is too large: mesonbuild__meson-10089
Token count is too large: huggingface__transformers-10868
Token count is too large: pandas-dev__pandas-3138
Token count is too large: apache__airflow-30190
Token count is too large: pyca__cryptography-3124
Token count is too large: gitpython-developers__GitPython-697
Token count is too large: ipython__ipython-12207
Token count is too large: numpy__numpy-22375
Token count is too large: ytdl-org__youtube-dl-4388
Token count is too large: huggingface__transformers-15590
Token count is too large: huggingface__transformers-24429
Token count is too large: pantsbuild__pants-17251
Token count is too large: conda__conda-5226
Token count is too large: Qiskit__qiskit-7288
Token count is too large: googleapis__google-cloud-python-602
Token count is too large: pantsbuild__pants-16110
Token count i

Generating train split: 2399 examples [03:01, 21.91 examples/s]

Token count is too large: ytdl-org__youtube-dl-30292
Token count is too large: Lightning-AI__lightning-625
Token count is too large: huggingface__transformers-18902
Token count is too large: pandas-dev__pandas-7094
Token count is too large: pandas-dev__pandas-20655
Token count is too large: pantsbuild__pants-14822
Token count is too large: PrefectHQ__prefect-2076
Token count is too large: ipython__ipython-4722


Generating train split: 2412 examples [03:02, 30.98 examples/s]

Token count is too large: pandas-dev__pandas-30083
Token count is too large: conda__conda-5096
Token count is too large: google__jax-780
Token count is too large: Lightning-AI__lightning-2253
Token count is too large: mesonbuild__meson-3599
Token count is too large: ipython__ipython-9419
Token count is too large: celery__celery-6942
Token count is too large: googleapis__google-cloud-python-1280
Token count is too large: pandas-dev__pandas-34222
Token count is too large: pandas-dev__pandas-33629
Token count is too large: pandas-dev__pandas-16133
Token count is too large: pandas-dev__pandas-24510
Token count is too large: Qiskit__qiskit-1055
Token count is too large: huggingface__transformers-17119
Token count is too large: pypa__pip-1040
Token count is too large: huggingface__transformers-11896
Token count is too large: PrefectHQ__prefect-2944
Token count is too large: Qiskit__qiskit-2006


Generating train split: 2416 examples [03:02, 19.07 examples/s]

Token count is too large: mesonbuild__meson-1209
Token count is too large: docker__compose-2646
Token count is too large: pandas-dev__pandas-28974
Token count is too large: pandas-dev__pandas-35986
Token count is too large: dagster-io__dagster-14707
Token count is too large: huggingface__transformers-21651
Token count is too large: numpy__numpy-439
Token count is too large: google__jax-2214
Token count is too large: pandas-dev__pandas-17812
Token count is too large: huggingface__transformers-18078
Token count is too large: pandas-dev__pandas-16952
Token count is too large: pypa__pip-7593
Token count is too large: pandas-dev__pandas-18352
Token count is too large: pandas-dev__pandas-28239


Generating train split: 2421 examples [03:03, 15.90 examples/s]

Token count is too large: pandas-dev__pandas-27696
Token count is too large: Qiskit__qiskit-4522
Token count is too large: pandas-dev__pandas-13458
Token count is too large: numpy__numpy-13673
Token count is too large: pandas-dev__pandas-5512
Token count is too large: huggingface__transformers-5999
Token count is too large: ipython__ipython-2255
Token count is too large: pandas-dev__pandas-29373
Token count is too large: pandas-dev__pandas-14501
Token count is too large: ytdl-org__youtube-dl-9430


Generating train split: 2426 examples [03:03, 17.77 examples/s]

Token count is too large: ipython__ipython-5810
Token count is too large: huggingface__transformers-18833
Token count is too large: pandas-dev__pandas-38018
Token count is too large: conan-io__conan-4872
Token count is too large: pandas-dev__pandas-7497
Token count is too large: conan-io__conan-7871
Token count is too large: docker__compose-5701
Token count is too large: pandas-dev__pandas-19139
Token count is too large: Lightning-AI__lightning-1529
Token count is too large: apache__airflow-22557
Token count is too large: pandas-dev__pandas-19818
Token count is too large: pandas-dev__pandas-38141
Token count is too large: pandas-dev__pandas-36440
Token count is too large: huggingface__transformers-13495
Token count is too large: pandas-dev__pandas-25069
Token count is too large: huggingface__transformers-21345
Token count is too large: Lightning-AI__lightning-744
Token count is too large: pandas-dev__pandas-6652
Token count is too large: pypa__pip-2304
Token count is too large: conda__

Generating train split: 2429 examples [03:04, 10.95 examples/s]

Token count is too large: numpy__numpy-6633
Token count is too large: Qiskit__qiskit-1061
Token count is too large: pandas-dev__pandas-7264
Token count is too large: docker__compose-5896
Token count is too large: pandas-dev__pandas-25686
Token count is too large: celery__celery-6218
Token count is too large: pandas-dev__pandas-23540
Token count is too large: pandas-dev__pandas-37138
Token count is too large: Qiskit__qiskit-1054
Token count is too large: pandas-dev__pandas-21183
Token count is too large: docker__compose-2783
Token count is too large: PrefectHQ__prefect-729


Generating train split: 2431 examples [03:04, 10.95 examples/s]

Token count is too large: conan-io__conan-136
Token count is too large: numpy__numpy-23335
Token count is too large: pandas-dev__pandas-25541
Token count is too large: conan-io__conan-2504
Token count is too large: conda__conda-7729
Token count is too large: ytdl-org__youtube-dl-30596
Token count is too large: pandas-dev__pandas-4957
Token count is too large: apache__airflow-20737
Token count is too large: pandas-dev__pandas-5864
Token count is too large: pandas-dev__pandas-27665
There was an error processing
Token count is too large: googleapis__google-cloud-python-7008
Token count is too large: huggingface__transformers-14306
Token count is too large: numpy__numpy-9942
Token count is too large: PrefectHQ__prefect-2590
Token count is too large: ytdl-org__youtube-dl-1007
Token count is too large: apache__airflow-32217
Token count is too large: pandas-dev__pandas-23132


Generating train split: 2436 examples [03:04, 10.63 examples/s]

Token count is too large: pandas-dev__pandas-8364
Token count is too large: wagtail__wagtail-5837
Token count is too large: pandas-dev__pandas-31641
Token count is too large: pandas-dev__pandas-23285
Token count is too large: docker__compose-6126
Token count is too large: Qiskit__qiskit-10383
Token count is too large: ipython__ipython-1569
Token count is too large: conda__conda-1496
Token count is too large: ipython__ipython-3184
Token count is too large: Qiskit__qiskit-6942
Token count is too large: conda__conda-5936
Token count is too large: pandas-dev__pandas-24841


Generating train split: 2439 examples [03:04, 12.00 examples/s]

Token count is too large: mesonbuild__meson-272
Token count is too large: pandas-dev__pandas-37801
Token count is too large: apache__airflow-27881
Token count is too large: huggingface__transformers-10229
Token count is too large: huggingface__transformers-17785
Token count is too large: Qiskit__qiskit-5365


Generating train split: 2442 examples [03:05, 12.92 examples/s]

Token count is too large: ipython__ipython-13592
Token count is too large: mesonbuild__meson-6635
Token count is too large: pandas-dev__pandas-30925
Token count is too large: numpy__numpy-21868
Token count is too large: googleapis__google-cloud-python-6829
Token count is too large: pandas-dev__pandas-18514
Token count is too large: gitpython-developers__GitPython-859
Token count is too large: conda__conda-6657
Token count is too large: google__jax-2593
Token count is too large: Qiskit__qiskit-807
Token count is too large: huggingface__transformers-11682
Token count is too large: mesonbuild__meson-10464
Token count is too large: pantsbuild__pants-4905


Generating train split: 2447 examples [03:05, 13.57 examples/s]

Token count is too large: huggingface__transformers-14508
Token count is too large: pantsbuild__pants-5911
Token count is too large: Lightning-AI__lightning-2890
Token count is too large: Qiskit__qiskit-5360
Token count is too large: pandas-dev__pandas-35587
Token count is too large: pandas-dev__pandas-3277
Token count is too large: huggingface__transformers-19204
Token count is too large: conda__conda-5478
Token count is too large: numpy__numpy-19089
Token count is too large: Lightning-AI__lightning-1528
Token count is too large: pandas-dev__pandas-36724
Token count is too large: numpy__numpy-4861
Token count is too large: huggingface__transformers-7282
Token count is too large: google__jax-1568
Token count is too large: Qiskit__qiskit-2205
Token count is too large: ytdl-org__youtube-dl-12391


Generating train split: 2449 examples [03:05, 13.66 examples/s]

Token count is too large: numpy__numpy-3452
Token count is too large: apache__airflow-32785
Token count is too large: googleapis__google-cloud-python-2002
Token count is too large: googleapis__google-cloud-python-7297
Token count is too large: conda__conda-6887
Token count is too large: numpy__numpy-21558
Token count is too large: apache__airflow-20888
Token count is too large: pyca__cryptography-3014
Token count is too large: huggingface__transformers-8368
Token count is too large: huggingface__transformers-10213
Token count is too large: pandas-dev__pandas-36054
Token count is too large: pandas-dev__pandas-30096
Token count is too large: twisted__twisted-11678
Token count is too large: celery__celery-2840
Token count is too large: mesonbuild__meson-6558
Token count is too large: docker__compose-6131
Token count is too large: pandas-dev__pandas-31383


Generating train split: 2456 examples [03:05, 21.10 examples/s]

Token count is too large: mesonbuild__meson-11830
Token count is too large: mesonbuild__meson-3036
Token count is too large: Lightning-AI__lightning-2033
Token count is too large: conan-io__conan-226
Token count is too large: pandas-dev__pandas-27852
Token count is too large: pandas-dev__pandas-18340
Token count is too large: huggingface__transformers-6713


Generating train split: 2459 examples [03:06, 12.91 examples/s]

Token count is too large: pypa__pip-8611
Token count is too large: pandas-dev__pandas-18167
Token count is too large: mesonbuild__meson-5516
Token count is too large: huggingface__transformers-24404
Token count is too large: Qiskit__qiskit-7972
Token count is too large: googleapis__google-cloud-python-2262
Token count is too large: pyca__cryptography-6303
Token count is too large: pandas-dev__pandas-5043
Token count is too large: mesonbuild__meson-8340
Token count is too large: pandas-dev__pandas-6031
Token count is too large: docker__compose-4635
Token count is too large: pandas-dev__pandas-19529
Token count is too large: apache__airflow-33055
Token count is too large: numpy__numpy-24542
Token count is too large: numpy__numpy-7133
Token count is too large: ipython__ipython-8311


Generating train split: 2465 examples [03:06, 14.84 examples/s]

Token count is too large: pandas-dev__pandas-23941
Token count is too large: huggingface__transformers-11524
Token count is too large: googleapis__google-cloud-python-888
Token count is too large: pandas-dev__pandas-38954
Token count is too large: pypa__pip-12140
Token count is too large: ytdl-org__youtube-dl-8332
Token count is too large: pandas-dev__pandas-14545
Token count is too large: ipython__ipython-7202
Token count is too large: pandas-dev__pandas-14004
Token count is too large: pantsbuild__pants-8047
Token count is too large: tiangolo__fastapi-1547
Token count is too large: Lightning-AI__lightning-1360
Token count is too large: pandas-dev__pandas-3802
Token count is too large: huggingface__transformers-13613


Generating train split: 2474 examples [03:06, 19.63 examples/s]

Token count is too large: pandas-dev__pandas-28412
Token count is too large: conda__conda-6277
Token count is too large: mesonbuild__meson-4860
Token count is too large: apache__airflow-11406
Token count is too large: ytdl-org__youtube-dl-25198
Token count is too large: pandas-dev__pandas-29334
Token count is too large: numpy__numpy-20659
Token count is too large: Qiskit__qiskit-7997
Token count is too large: ipython__ipython-6076
Token count is too large: ray-project__ray-3541


Generating train split: 2478 examples [03:07, 20.36 examples/s]

Token count is too large: pandas-dev__pandas-13678
Token count is too large: pantsbuild__pants-15034
Token count is too large: pandas-dev__pandas-37013
Token count is too large: ytdl-org__youtube-dl-22921
Token count is too large: apache__airflow-16809
Token count is too large: Lightning-AI__lightning-1265
Token count is too large: Qiskit__qiskit-1082
Token count is too large: numpy__numpy-3852
Token count is too large: numpy__numpy-6668
Token count is too large: googleapis__google-cloud-python-1282
Token count is too large: pandas-dev__pandas-34116
Token count is too large: pandas-dev__pandas-5137
Token count is too large: docker__compose-5361
Token count is too large: Qiskit__qiskit-808
Token count is too large: ytdl-org__youtube-dl-26032
Token count is too large: ytdl-org__youtube-dl-8249
Token count is too large: huggingface__transformers-22237


Generating train split: 2481 examples [03:07, 14.73 examples/s]

Token count is too large: googleapis__google-cloud-python-10551
Token count is too large: googleapis__google-cloud-python-2809
Token count is too large: docker__compose-3789
Token count is too large: pandas-dev__pandas-17626
Token count is too large: pandas-dev__pandas-13499
Token count is too large: mesonbuild__meson-8804
Token count is too large: pandas-dev__pandas-15873
Token count is too large: ytdl-org__youtube-dl-7659
Token count is too large: pandas-dev__pandas-17840
Token count is too large: pandas-dev__pandas-4921
Token count is too large: googleapis__google-cloud-python-10079
Token count is too large: huggingface__transformers-17549


Generating train split: 2483 examples [03:07, 11.29 examples/s]

Token count is too large: pandas-dev__pandas-5145
Token count is too large: ipython__ipython-2926
Token count is too large: pandas-dev__pandas-5214
Token count is too large: mesonbuild__meson-1807
Token count is too large: pantsbuild__pants-13956
Token count is too large: pantsbuild__pants-15033
Token count is too large: conda__conda-5824
Token count is too large: pandas-dev__pandas-17424
Token count is too large: conda__conda-11440
Token count is too large: numpy__numpy-5393
Token count is too large: apache__airflow-16102
Token count is too large: google__jax-3224
Token count is too large: conda__conda-8912
Token count is too large: ytdl-org__youtube-dl-26826
Token count is too large: huggingface__transformers-17518
Token count is too large: pyca__cryptography-2129
Token count is too large: pandas-dev__pandas-38257
Token count is too large: Lightning-AI__lightning-852
Token count is too large: docker__compose-970


Generating train split: 2485 examples [03:08,  8.74 examples/s]

Token count is too large: pandas-dev__pandas-37302
Token count is too large: pandas-dev__pandas-10249
Token count is too large: pandas-dev__pandas-18924
Token count is too large: pandas-dev__pandas-12208
Token count is too large: pantsbuild__pants-15415
Token count is too large: ipython__ipython-13483
Token count is too large: numpy__numpy-14932
Token count is too large: google__jax-335
Token count is too large: numpy__numpy-4339
Token count is too large: huggingface__transformers-25523
Token count is too large: pandas-dev__pandas-26273
Token count is too large: ipython__ipython-3528


Generating train split: 2495 examples [03:08, 14.95 examples/s]

Token count is too large: huggingface__transformers-21044
Token count is too large: PrefectHQ__prefect-2180
Token count is too large: jupyterlab__jupyterlab-3071
Token count is too large: pypa__pip-6851
Token count is too large: conda__conda-9418
Token count is too large: pandas-dev__pandas-35440
Token count is too large: conan-io__conan-4229
Token count is too large: Qiskit__qiskit-4470
Token count is too large: pandas-dev__pandas-23581
Token count is too large: celery__celery-5664
Token count is too large: wagtail__wagtail-9590
Token count is too large: pandas-dev__pandas-3655
Token count is too large: pandas-dev__pandas-37794
Token count is too large: pandas-dev__pandas-5197
Token count is too large: numpy__numpy-16811
Token count is too large: pandas-dev__pandas-22253
Token count is too large: pantsbuild__pants-16735
Token count is too large: pandas-dev__pandas-16237
Token count is too large: huggingface__transformers-7683
Token count is too large: numpy__numpy-8905
Token count is 

Generating train split: 2500 examples [03:09, 14.43 examples/s]

Token count is too large: googleapis__google-cloud-python-7126
Token count is too large: Lightning-AI__lightning-715
Token count is too large: Qiskit__qiskit-8752
Token count is too large: pandas-dev__pandas-23096
Token count is too large: pandas-dev__pandas-10497
Token count is too large: apache__airflow-18438
Token count is too large: pandas-dev__pandas-9357
Token count is too large: ipython__ipython-9515
Token count is too large: tiangolo__fastapi-621
Token count is too large: pandas-dev__pandas-1983


Generating train split: 2503 examples [03:09, 15.33 examples/s]

Token count is too large: pandas-dev__pandas-33299
Token count is too large: pandas-dev__pandas-34956
Token count is too large: numpy__numpy-18416
Token count is too large: Qiskit__qiskit-7389
Token count is too large: conan-io__conan-4662
Token count is too large: mesonbuild__meson-10966
Token count is too large: mesonbuild__meson-10085
Token count is too large: docker__compose-5825
Token count is too large: Lightning-AI__lightning-1647
Token count is too large: pypa__pip-10129
Token count is too large: docker__compose-5098
Token count is too large: pandas-dev__pandas-10172
Token count is too large: huggingface__transformers-14291
Token count is too large: googleapis__google-cloud-python-9911
Token count is too large: ipython__ipython-2073
Token count is too large: conan-io__conan-5070
Token count is too large: pantsbuild__pants-4729
Token count is too large: pandas-dev__pandas-25909


Generating train split: 2507 examples [03:09, 12.83 examples/s]

Token count is too large: pandas-dev__pandas-22852
Token count is too large: googleapis__google-cloud-python-8423
Token count is too large: conda__conda-5090
Token count is too large: docker__compose-5079
Token count is too large: wagtail__wagtail-2880
Token count is too large: pandas-dev__pandas-38819
Token count is too large: pandas-dev__pandas-11233


Generating train split: 2509 examples [03:09, 11.28 examples/s]

Token count is too large: pandas-dev__pandas-35681
Token count is too large: Lightning-AI__lightning-649
Token count is too large: pypa__pip-10697
Token count is too large: conda__conda-8834
Token count is too large: pandas-dev__pandas-7478
Token count is too large: pandas-dev__pandas-23127
Token count is too large: numpy__numpy-10097
Token count is too large: pandas-dev__pandas-21285
Token count is too large: google__jax-79
Token count is too large: ipython__ipython-2110
Token count is too large: googleapis__google-cloud-python-9572
Token count is too large: apache__airflow-14774
Token count is too large: huggingface__transformers-14085
Token count is too large: googleapis__google-cloud-python-5678


Generating train split: 2516 examples [03:10, 16.70 examples/s]

Token count is too large: pandas-dev__pandas-26162
Token count is too large: conan-io__conan-10174
Token count is too large: numpy__numpy-11708
Token count is too large: pandas-dev__pandas-21366
Token count is too large: celery__celery-6576
Token count is too large: Qiskit__qiskit-7109
Token count is too large: pandas-dev__pandas-19344
Token count is too large: googleapis__google-cloud-python-11313
Token count is too large: pandas-dev__pandas-21731
Token count is too large: numpy__numpy-3461
Token count is too large: docker__compose-5093
Token count is too large: pyca__cryptography-1952
Token count is too large: mesonbuild__meson-5018
Token count is too large: pyca__cryptography-4985
Token count is too large: huggingface__transformers-18187
Token count is too large: Qiskit__qiskit-4851
Token count is too large: pandas-dev__pandas-26399


Generating train split: 2519 examples [03:10, 10.67 examples/s]

Token count is too large: numpy__numpy-8648
Token count is too large: pandas-dev__pandas-36797
Token count is too large: google__jax-1030
Token count is too large: celery__celery-5348
Token count is too large: conda__conda-10992
Token count is too large: ipython__ipython-8062
Token count is too large: mesonbuild__meson-11644
Token count is too large: ipython__ipython-4996
Token count is too large: conan-io__conan-6334
Token count is too large: pandas-dev__pandas-3139
Token count is too large: explosion__spaCy-3324
Token count is too large: pypa__pip-10206
Token count is too large: gitpython-developers__GitPython-1521
Token count is too large: huggingface__transformers-14124


Generating train split: 2521 examples [03:11,  9.81 examples/s]

Token count is too large: pandas-dev__pandas-30241
Token count is too large: pandas-dev__pandas-19280
Token count is too large: Qiskit__qiskit-1366
Token count is too large: pandas-dev__pandas-14073
Token count is too large: Qiskit__qiskit-6998
Token count is too large: ipython__ipython-4382
Token count is too large: pandas-dev__pandas-39051
Token count is too large: ytdl-org__youtube-dl-6097
Token count is too large: jupyterlab__jupyterlab-8990


Generating train split: 2525 examples [03:11, 12.70 examples/s]

Token count is too large: pandas-dev__pandas-36147
Token count is too large: pandas-dev__pandas-14445
Token count is too large: ytdl-org__youtube-dl-16115
Token count is too large: pandas-dev__pandas-18923
Token count is too large: numpy__numpy-13739
Token count is too large: conda__conda-4711


Generating train split: 2531 examples [03:11, 13.71 examples/s]

Token count is too large: googleapis__google-cloud-python-6634
Token count is too large: pandas-dev__pandas-9007
Token count is too large: pandas-dev__pandas-15093
Token count is too large: Qiskit__qiskit-973
Token count is too large: Qiskit__qiskit-4478
Token count is too large: Qiskit__qiskit-626
Token count is too large: ipython__ipython-11409
Token count is too large: pandas-dev__pandas-9808
Token count is too large: pandas-dev__pandas-5658
Token count is too large: ipython__ipython-3082
Token count is too large: pandas-dev__pandas-33907
Token count is too large: pyca__cryptography-1883
Token count is too large: huggingface__transformers-8962
Token count is too large: Lightning-AI__lightning-2541


Generating train split: 2536 examples [03:11, 17.41 examples/s]

Token count is too large: huggingface__transformers-12638
Token count is too large: huggingface__transformers-25684
Token count is too large: ipython__ipython-734
Token count is too large: wagtail__wagtail-1533
Token count is too large: pantsbuild__pants-14088


Generating train split: 2542 examples [03:12, 21.29 examples/s]

Token count is too large: pandas-dev__pandas-28438
Token count is too large: Qiskit__qiskit-4382
Token count is too large: googleapis__google-cloud-python-8938
Token count is too large: conda__conda-6517
Token count is too large: pandas-dev__pandas-27651
Token count is too large: jupyterlab__jupyterlab-9580
Token count is too large: pantsbuild__pants-4679
Token count is too large: dagster-io__dagster-15078
Token count is too large: Qiskit__qiskit-5274
Token count is too large: pandas-dev__pandas-14965
Token count is too large: googleapis__google-cloud-python-1051
Token count is too large: pandas-dev__pandas-17119
Token count is too large: pandas-dev__pandas-35377
Token count is too large: conda__conda-2526
Token count is too large: pandas-dev__pandas-17412
Token count is too large: PrefectHQ__prefect-1434
Token count is too large: mesonbuild__meson-982
Token count is too large: pandas-dev__pandas-22576
Token count is too large: ytdl-org__youtube-dl-9367
Token count is too large: Lightn

Generating train split: 2556 examples [03:12, 24.53 examples/s]

Token count is too large: pandas-dev__pandas-16005
Token count is too large: googleapis__google-cloud-python-1213
Token count is too large: pandas-dev__pandas-24443
Token count is too large: pandas-dev__pandas-30516
Token count is too large: google__jax-303
Token count is too large: pantsbuild__pants-13996
Token count is too large: huggingface__transformers-14026
Token count is too large: open-mmlab__mmdetection-4555
Token count is too large: tensorflow__models-3448
Token count is too large: pypa__pip-5559
Token count is too large: conan-io__conan-3012
Token count is too large: numpy__numpy-14051


Generating train split: 2559 examples [03:12, 24.36 examples/s]

Token count is too large: pandas-dev__pandas-22141
Token count is too large: apache__airflow-27564
Token count is too large: Qiskit__qiskit-7031
Token count is too large: Qiskit__qiskit-2119
Token count is too large: ytdl-org__youtube-dl-4395
Token count is too large: Qiskit__qiskit-6458
Token count is too large: docker__compose-2601
Token count is too large: apache__airflow-26766
Token count is too large: pandas-dev__pandas-29245
Token count is too large: pandas-dev__pandas-7041
Token count is too large: pandas-dev__pandas-21093
Token count is too large: pantsbuild__pants-6023
Token count is too large: conda__conda-6656
Token count is too large: pandas-dev__pandas-21169
Token count is too large: open-mmlab__mmdetection-7572
Token count is too large: docker__compose-3128
Token count is too large: huggingface__transformers-4531
Token count is too large: pandas-dev__pandas-15774
Token count is too large: pantsbuild__pants-18635
Token count is too large: Lightning-AI__lightning-1954
Token

Generating train split: 2563 examples [03:13, 17.41 examples/s]

Token count is too large: pandas-dev__pandas-28686
Token count is too large: docker__compose-2907
Token count is too large: mesonbuild__meson-2142
Token count is too large: pandas-dev__pandas-26029
Token count is too large: huggingface__transformers-17055
Token count is too large: numpy__numpy-11449
Token count is too large: apache__airflow-320
Token count is too large: conan-io__conan-7215
Token count is too large: pyca__cryptography-6922
Token count is too large: pandas-dev__pandas-13617


Generating train split: 2572 examples [03:13, 20.83 examples/s]

Token count is too large: googleapis__google-cloud-python-9108
Token count is too large: mesonbuild__meson-3795
Token count is too large: wagtail__wagtail-10072
Token count is too large: Lightning-AI__lightning-1780
Token count is too large: huggingface__transformers-19219
Token count is too large: conan-io__conan-3153
Token count is too large: pandas-dev__pandas-8298
Token count is too large: Qiskit__qiskit-9170
Token count is too large: googleapis__google-cloud-python-9974
Token count is too large: Qiskit__qiskit-3470
Token count is too large: google__jax-1096


Generating train split: 2575 examples [03:13, 21.22 examples/s]

Token count is too large: numpy__numpy-6430
Token count is too large: pandas-dev__pandas-35582
Token count is too large: pandas-dev__pandas-23864
Token count is too large: google__jax-2885
Token count is too large: Qiskit__qiskit-1224
Token count is too large: Qiskit__qiskit-5581
Token count is too large: Lightning-AI__lightning-1010
Token count is too large: pandas-dev__pandas-19044
Token count is too large: ipython__ipython-11183
Token count is too large: pandas-dev__pandas-32911
Token count is too large: pandas-dev__pandas-4989
Token count is too large: numpy__numpy-22872
Token count is too large: pandas-dev__pandas-3060
Token count is too large: Lightning-AI__lightning-3266
Token count is too large: huggingface__transformers-21005
Token count is too large: ytdl-org__youtube-dl-8898
Token count is too large: twisted__twisted-11714


Generating train split: 2579 examples [03:14, 14.60 examples/s]

Token count is too large: Lightning-AI__lightning-2878
Token count is too large: ytdl-org__youtube-dl-12276
Token count is too large: pandas-dev__pandas-9487
Token count is too large: conan-io__conan-2996
Token count is too large: pantsbuild__pants-13977
Token count is too large: pandas-dev__pandas-24099
Token count is too large: huggingface__transformers-8714
Token count is too large: numpy__numpy-9132
Token count is too large: googleapis__google-cloud-python-6436
Token count is too large: celery__celery-6342
Token count is too large: pandas-dev__pandas-29368
Token count is too large: pandas-dev__pandas-8847


Generating train split: 2582 examples [03:14, 15.04 examples/s]

Token count is too large: pandas-dev__pandas-7038
Token count is too large: pantsbuild__pants-9769
Token count is too large: apache__airflow-19965
Token count is too large: Lightning-AI__lightning-1932
Token count is too large: Lightning-AI__lightning-2572
Token count is too large: docker__compose-6209
Token count is too large: pandas-dev__pandas-28858
Token count is too large: ray-project__ray-10672
Token count is too large: numpy__numpy-18351
Token count is too large: Lightning-AI__lightning-2403


Generating train split: 2586 examples [03:14, 15.14 examples/s]

Token count is too large: pandas-dev__pandas-3621
Token count is too large: pandas-dev__pandas-27798
Token count is too large: explosion__spaCy-3213
Token count is too large: conan-io__conan-6618
Token count is too large: pandas-dev__pandas-29788
Token count is too large: pandas-dev__pandas-27109
Token count is too large: ipython__ipython-4908
Token count is too large: mesonbuild__meson-129
Token count is too large: ipython__ipython-5379
Token count is too large: apache__airflow-17613
Token count is too large: googleapis__google-cloud-python-11311
Token count is too large: Qiskit__qiskit-5994
Token count is too large: pandas-dev__pandas-38175
Token count is too large: numpy__numpy-24496
Token count is too large: ipython__ipython-5961
Token count is too large: conda__conda-10057
Token count is too large: huggingface__transformers-19868
Token count is too large: conda__conda-2908


Generating train split: 2592 examples [03:14, 13.59 examples/s]

Token count is too large: huggingface__transformers-21349
Token count is too large: pandas-dev__pandas-24842
Token count is too large: wagtail__wagtail-3521
Token count is too large: ray-project__ray-4285
Token count is too large: pandas-dev__pandas-20727
Token count is too large: docker__compose-5224
Token count is too large: numpy__numpy-22771
Token count is too large: huggingface__transformers-17311
Token count is too large: Qiskit__qiskit-4270
Token count is too large: pandas-dev__pandas-25796
Token count is too large: pandas-dev__pandas-18292


Generating train split: 2595 examples [03:15, 14.08 examples/s]

Token count is too large: pandas-dev__pandas-5088
Token count is too large: wagtail__wagtail-7093
Token count is too large: pandas-dev__pandas-18228
Token count is too large: conan-io__conan-9240
Token count is too large: jupyterlab__jupyterlab-10769
Token count is too large: conan-io__conan-4113
Token count is too large: pandas-dev__pandas-33509
Token count is too large: pandas-dev__pandas-22320
Token count is too large: huggingface__transformers-7281
Token count is too large: pandas-dev__pandas-6486
Token count is too large: numpy__numpy-5498
Token count is too large: pantsbuild__pants-4716
Token count is too large: Qiskit__qiskit-3330


Generating train split: 2597 examples [03:15, 12.79 examples/s]

Token count is too large: pandas-dev__pandas-31333
Token count is too large: pandas-dev__pandas-23673
Token count is too large: PrefectHQ__prefect-530
Token count is too large: pandas-dev__pandas-19178
Token count is too large: conan-io__conan-6003
Token count is too large: huggingface__transformers-11128


Generating train split: 2602 examples [03:15, 14.29 examples/s]

Token count is too large: Qiskit__qiskit-4035
Token count is too large: Qiskit__qiskit-4181
Token count is too large: ipython__ipython-11418
Token count is too large: Qiskit__qiskit-2881
Token count is too large: apache__airflow-16441
Token count is too large: pandas-dev__pandas-37426
Token count is too large: mesonbuild__meson-3694
Token count is too large: celery__celery-6481
Token count is too large: numpy__numpy-23275
Token count is too large: huggingface__transformers-4943
Token count is too large: celery__celery-7553
Token count is too large: pandas-dev__pandas-21853
Token count is too large: pandas-dev__pandas-21655
Token count is too large: pandas-dev__pandas-34888


Generating train split: 2604 examples [03:15, 12.50 examples/s]

Token count is too large: pantsbuild__pants-19264
Token count is too large: pandas-dev__pandas-8877
Token count is too large: pypa__pip-2925
Token count is too large: Qiskit__qiskit-5358
Token count is too large: pandas-dev__pandas-25913
Token count is too large: huggingface__transformers-23156
Token count is too large: Lightning-AI__lightning-2596


Generating train split: 2606 examples [03:16, 11.55 examples/s]

Token count is too large: pandas-dev__pandas-6130
Token count is too large: huggingface__transformers-15074
Token count is too large: pypa__pip-2761
Token count is too large: pandas-dev__pandas-22866
Token count is too large: pandas-dev__pandas-4054
Token count is too large: mesonbuild__meson-2942
Token count is too large: pandas-dev__pandas-22918
Token count is too large: pandas-dev__pandas-27322
Token count is too large: pandas-dev__pandas-30636
Token count is too large: pandas-dev__pandas-37014
Token count is too large: pandas-dev__pandas-38029
Token count is too large: google__jax-1512
Token count is too large: google__jax-3318
Token count is too large: docker__compose-2421


Generating train split: 2610 examples [03:16, 12.53 examples/s]

Token count is too large: pandas-dev__pandas-25868
Token count is too large: Qiskit__qiskit-7007
Token count is too large: pandas-dev__pandas-8030
Token count is too large: celery__celery-5931
Token count is too large: huggingface__transformers-5184
Token count is too large: pandas-dev__pandas-34053
Token count is too large: conan-io__conan-2784
Token count is too large: conan-io__conan-5224
Token count is too large: pandas-dev__pandas-27878
Token count is too large: pandas-dev__pandas-8840
Token count is too large: pandas-dev__pandas-16095
Token count is too large: pandas-dev__pandas-20049
Token count is too large: pandas-dev__pandas-24748
Token count is too large: huggingface__transformers-13888


Generating train split: 2616 examples [03:16, 15.10 examples/s]

Token count is too large: huggingface__transformers-19321
Token count is too large: numpy__numpy-258
Token count is too large: PrefectHQ__prefect-2558
Token count is too large: pandas-dev__pandas-18844
Token count is too large: pandas-dev__pandas-21323
Token count is too large: pandas-dev__pandas-9612
Token count is too large: pantsbuild__pants-5910
Token count is too large: ipython__ipython-8428
Token count is too large: pandas-dev__pandas-27095
Token count is too large: pandas-dev__pandas-16420
Token count is too large: gitpython-developers__GitPython-1391
Token count is too large: googleapis__google-cloud-python-2773
Token count is too large: huggingface__transformers-17206
Token count is too large: huggingface__transformers-17293
Token count is too large: pandas-dev__pandas-8564


Generating train split: 2620 examples [03:17, 13.30 examples/s]

Token count is too large: huggingface__transformers-18997
Token count is too large: pandas-dev__pandas-33289
Token count is too large: pandas-dev__pandas-38796
Token count is too large: numpy__numpy-19766
Token count is too large: mesonbuild__meson-8888
Token count is too large: wagtail__wagtail-9221


Generating train split: 2622 examples [03:17, 10.65 examples/s]

Token count is too large: pandas-dev__pandas-23886
Token count is too large: conda__conda-12453
Token count is too large: Qiskit__qiskit-8925
Token count is too large: conda__conda-2534
Token count is too large: docker__compose-2032
Token count is too large: conda__conda-8531
Token count is too large: numpy__numpy-24134
Token count is too large: conan-io__conan-12967
Token count is too large: conda__conda-7675
Token count is too large: ytdl-org__youtube-dl-21003
Token count is too large: numpy__numpy-12813
Token count is too large: ytdl-org__youtube-dl-14281
Token count is too large: ray-project__ray-7198


Generating train split: 2631 examples [03:17, 18.45 examples/s]

Token count is too large: numpy__numpy-9096
Token count is too large: pandas-dev__pandas-37032
Token count is too large: pandas-dev__pandas-18810
Token count is too large: Lightning-AI__lightning-3067
Token count is too large: huggingface__transformers-24324
Token count is too large: pypa__pip-4579
Token count is too large: Qiskit__qiskit-1337


Generating train split: 2636 examples [03:17, 18.03 examples/s]

Token count is too large: Qiskit__qiskit-414
Token count is too large: googleapis__google-cloud-python-9915
Token count is too large: gitpython-developers__GitPython-1547
Token count is too large: pandas-dev__pandas-17885
Token count is too large: mesonbuild__meson-2684
Token count is too large: celery__celery-6000
Token count is too large: huggingface__transformers-14994
Token count is too large: huggingface__transformers-15558
Token count is too large: jupyterlab__jupyterlab-3555
Token count is too large: googleapis__google-cloud-python-2496
Token count is too large: huggingface__transformers-19316
Token count is too large: huggingface__transformers-7732


Generating train split: 2641 examples [03:18, 21.30 examples/s]

Token count is too large: pandas-dev__pandas-20797
Token count is too large: pandas-dev__pandas-6576
Token count is too large: pandas-dev__pandas-6737
Token count is too large: pandas-dev__pandas-39458
Token count is too large: jupyterlab__jupyterlab-5174
Token count is too large: conda__conda-4786
Token count is too large: conan-io__conan-3868
Token count is too large: Qiskit__qiskit-5521


Generating train split: 2644 examples [03:18, 15.44 examples/s]

Token count is too large: huggingface__transformers-19770
Token count is too large: pyca__cryptography-4944
Token count is too large: pandas-dev__pandas-21251
Token count is too large: pantsbuild__pants-12256
Token count is too large: apache__airflow-28771
Token count is too large: pypa__pip-2284
Token count is too large: google__jax-1069
Token count is too large: pandas-dev__pandas-3909
Token count is too large: pantsbuild__pants-12198
Token count is too large: PrefectHQ__prefect-2472
Token count is too large: pandas-dev__pandas-20091
Token count is too large: ipython__ipython-1934


Generating train split: 2650 examples [03:18, 18.62 examples/s]

Token count is too large: pantsbuild__pants-13698
Token count is too large: jupyterlab__jupyterlab-9286
Token count is too large: docker__compose-7564
Token count is too large: kubeflow__pipelines-522
Token count is too large: pypa__pip-10503
Token count is too large: pandas-dev__pandas-11182
Token count is too large: pandas-dev__pandas-22947
Token count is too large: pyca__cryptography-5056
Token count is too large: Lightning-AI__lightning-926
Token count is too large: mesonbuild__meson-9557
Token count is too large: pantsbuild__pants-19100
Token count is too large: huggingface__transformers-6719
Token count is too large: pypa__pip-1432
Token count is too large: huggingface__transformers-12147
Token count is too large: celery__celery-6298
Token count is too large: huggingface__transformers-8528
Token count is too large: pandas-dev__pandas-8959
Token count is too large: huggingface__transformers-17469
Token count is too large: mesonbuild__meson-7689


Generating train split: 2656 examples [03:19, 16.48 examples/s]

Token count is too large: pyca__cryptography-2857
Token count is too large: numpy__numpy-12009
Token count is too large: numpy__numpy-10629
Token count is too large: pandas-dev__pandas-36962
Token count is too large: PrefectHQ__prefect-2191
Token count is too large: pandas-dev__pandas-26872
Token count is too large: googleapis__google-cloud-python-4921
Token count is too large: googleapis__google-cloud-python-3735
Token count is too large: mesonbuild__meson-1814
Token count is too large: conda__conda-8644
Token count is too large: Qiskit__qiskit-3245
Token count is too large: ipython__ipython-9785
Token count is too large: pandas-dev__pandas-9316


Generating train split: 2658 examples [03:19, 16.04 examples/s]

Token count is too large: pandas-dev__pandas-18940
Token count is too large: huggingface__transformers-24241
Token count is too large: pantsbuild__pants-10879
Token count is too large: googleapis__google-cloud-python-1279
Token count is too large: pypa__pip-2386
Token count is too large: Lightning-AI__lightning-2164
Token count is too large: huggingface__transformers-18522
Token count is too large: apache__airflow-27190


Generating train split: 2665 examples [03:19, 19.07 examples/s]

Token count is too large: pandas-dev__pandas-33107
Token count is too large: googleapis__google-cloud-python-11338
Token count is too large: conan-io__conan-8016
Token count is too large: ray-project__ray-10750
Token count is too large: numpy__numpy-20810
Token count is too large: apache__airflow-25412
Token count is too large: pypa__pip-7367


Generating train split: 2667 examples [03:19, 18.06 examples/s]

Token count is too large: pandas-dev__pandas-24157
Token count is too large: pandas-dev__pandas-38532
Token count is too large: apache__airflow-25553
Token count is too large: pandas-dev__pandas-7786
Token count is too large: pandas-dev__pandas-24924
Token count is too large: pantsbuild__pants-18051
Token count is too large: docker__compose-4954
Token count is too large: pandas-dev__pandas-10926
Token count is too large: numpy__numpy-10542
Token count is too large: pantsbuild__pants-15598
Token count is too large: pandas-dev__pandas-34128
Token count is too large: Qiskit__qiskit-6866
Token count is too large: huggingface__transformers-9183
Token count is too large: pandas-dev__pandas-10939
Token count is too large: google__jax-2060
Token count is too large: Qiskit__qiskit-1665


Generating train split: 2669 examples [03:20, 10.90 examples/s]

Token count is too large: pandas-dev__pandas-16852
Token count is too large: pandas-dev__pandas-8476
Token count is too large: pypa__pip-1284
Token count is too large: huggingface__transformers-21772
Token count is too large: Qiskit__qiskit-8762
Token count is too large: numpy__numpy-23955
Token count is too large: ipython__ipython-3075
Token count is too large: apache__airflow-20919
Token count is too large: mesonbuild__meson-1068
Token count is too large: google__jax-920
Token count is too large: google__jax-3166
Token count is too large: Qiskit__qiskit-2692
Token count is too large: mesonbuild__meson-206
Token count is too large: pantsbuild__pants-5368
Token count is too large: pandas-dev__pandas-22201
Token count is too large: ytdl-org__youtube-dl-15188
Token count is too large: pandas-dev__pandas-30395
Token count is too large: pypa__pip-4642


Generating train split: 2671 examples [03:20, 10.40 examples/s]

Token count is too large: mesonbuild__meson-8606
Token count is too large: apache__airflow-25296
Token count is too large: pantsbuild__pants-11203
Token count is too large: huggingface__transformers-7756
Token count is too large: celery__celery-3790
Token count is too large: wagtail__wagtail-7937
Token count is too large: Qiskit__qiskit-4546
Token count is too large: googleapis__google-cloud-python-8102
Token count is too large: huggingface__transformers-16357


Generating train split: 2675 examples [03:20, 12.12 examples/s]

Token count is too large: huggingface__transformers-7973
Token count is too large: mesonbuild__meson-7193
Token count is too large: pantsbuild__pants-10805
Token count is too large: huggingface__transformers-11538
Token count is too large: pandas-dev__pandas-39766
Token count is too large: pandas-dev__pandas-26862
Token count is too large: pandas-dev__pandas-21210
Token count is too large: pandas-dev__pandas-6053
Token count is too large: pantsbuild__pants-6177
Token count is too large: apache__airflow-8849
Token count is too large: Qiskit__qiskit-9538
Token count is too large: gitpython-developers__GitPython-1437
Token count is too large: conan-io__conan-10874


Generating train split: 2677 examples [03:20, 11.01 examples/s]

Token count is too large: mesonbuild__meson-4238
Token count is too large: pandas-dev__pandas-29390
Token count is too large: Lightning-AI__lightning-1177
Token count is too large: conan-io__conan-6134
Token count is too large: pandas-dev__pandas-7515
Token count is too large: Qiskit__qiskit-5554
Token count is too large: docker__compose-4955
Token count is too large: pandas-dev__pandas-24495


Generating train split: 2679 examples [03:21, 10.49 examples/s]

Token count is too large: huggingface__transformers-15005
Token count is too large: pypa__pip-5883
Token count is too large: Lightning-AI__lightning-1630
Token count is too large: mesonbuild__meson-6125
Token count is too large: huggingface__transformers-12070
Token count is too large: pandas-dev__pandas-5238
Token count is too large: conda__conda-12496
Token count is too large: googleapis__google-cloud-python-7674
Token count is too large: celery__celery-6711
There was an error processing
Token count is too large: mesonbuild__meson-4887
Token count is too large: docker__compose-2230
Token count is too large: pandas-dev__pandas-6803
Token count is too large: pandas-dev__pandas-16292
Token count is too large: pandas-dev__pandas-28097
Token count is too large: pandas-dev__pandas-39765
Token count is too large: jupyterlab__jupyterlab-6340
Token count is too large: pandas-dev__pandas-5231


Generating train split: 2683 examples [03:21,  8.91 examples/s]

Token count is too large: pandas-dev__pandas-39029
Token count is too large: pandas-dev__pandas-38192
Token count is too large: numpy__numpy-9817
Token count is too large: pandas-dev__pandas-22517
Token count is too large: pandas-dev__pandas-37945
Token count is too large: googleapis__google-cloud-python-5613


Generating train split: 2684 examples [03:21,  8.83 examples/s]

Token count is too large: pandas-dev__pandas-4494
Token count is too large: apache__airflow-18602
Token count is too large: pandas-dev__pandas-4108
Token count is too large: googleapis__google-cloud-python-7954
Token count is too large: Lightning-AI__lightning-2513
Token count is too large: google__jax-2786
Token count is too large: pantsbuild__pants-18157
Token count is too large: pandas-dev__pandas-29657
Token count is too large: wagtail__wagtail-10242
Token count is too large: PrefectHQ__prefect-657
Token count is too large: pandas-dev__pandas-6725
Token count is too large: google__jax-599


Generating train split: 2687 examples [03:21, 11.11 examples/s]

Token count is too large: conan-io__conan-5189
Token count is too large: googleapis__google-cloud-python-5503
Token count is too large: conda__conda-2365
Token count is too large: pandas-dev__pandas-18558
Token count is too large: pandas-dev__pandas-13909
Token count is too large: pandas-dev__pandas-22725
Token count is too large: pandas-dev__pandas-23265
Token count is too large: PrefectHQ__prefect-2617
Token count is too large: huggingface__transformers-9713


Generating train split: 2692 examples [03:22, 13.08 examples/s]

Token count is too large: mesonbuild__meson-11215
Token count is too large: pandas-dev__pandas-32115
Token count is too large: ytdl-org__youtube-dl-16100
Token count is too large: jupyterlab__jupyterlab-2410
Token count is too large: pandas-dev__pandas-27556
Token count is too large: Lightning-AI__lightning-1477
Token count is too large: apache__airflow-19700
Token count is too large: pandas-dev__pandas-38709
Token count is too large: pandas-dev__pandas-7410


Generating train split: 2694 examples [03:22, 12.13 examples/s]

Token count is too large: pandas-dev__pandas-22486
Token count is too large: conda__conda-7998
Token count is too large: pandas-dev__pandas-6024
Token count is too large: conan-io__conan-2424
Token count is too large: pandas-dev__pandas-21896
Token count is too large: pandas-dev__pandas-25046
Token count is too large: pandas-dev__pandas-36535
Token count is too large: pandas-dev__pandas-5544
Token count is too large: Qiskit__qiskit-3952


Generating train split: 2696 examples [03:22,  7.64 examples/s]

Token count is too large: pandas-dev__pandas-20613
Token count is too large: huggingface__transformers-14316
Token count is too large: pandas-dev__pandas-15052
Token count is too large: Qiskit__qiskit-6102
Token count is too large: celery__celery-8152
Token count is too large: pandas-dev__pandas-30977
Token count is too large: pandas-dev__pandas-23600


Generating train split: 2698 examples [03:23,  8.34 examples/s]

Token count is too large: ray-project__ray-6634
Token count is too large: mesonbuild__meson-3898
Token count is too large: pandas-dev__pandas-18330
Token count is too large: numpy__numpy-16080
Token count is too large: huggingface__transformers-13687
Token count is too large: huggingface__transformers-9875
Token count is too large: pandas-dev__pandas-7638


Generating train split: 2702 examples [03:23, 11.57 examples/s]

Token count is too large: pandas-dev__pandas-33091
Token count is too large: pandas-dev__pandas-17801
Token count is too large: numpy__numpy-15685
Token count is too large: pypa__pip-5286
Token count is too large: pandas-dev__pandas-27127
Token count is too large: mesonbuild__meson-1763
Token count is too large: wagtail__wagtail-9974
Token count is too large: pandas-dev__pandas-30257
Token count is too large: docker__compose-4561
Token count is too large: pantsbuild__pants-6439
Token count is too large: pandas-dev__pandas-28183
Token count is too large: google__jax-1931
Token count is too large: pantsbuild__pants-8093
Token count is too large: numpy__numpy-10396
Token count is too large: numpy__numpy-6717


Generating train split: 2706 examples [03:23, 13.57 examples/s]

Token count is too large: pandas-dev__pandas-4985
There was an error processing
Token count is too large: conda__conda-5405
Token count is too large: numpy__numpy-9302
Token count is too large: pandas-dev__pandas-16701
Token count is too large: docker__compose-2407
Token count is too large: googleapis__google-cloud-python-9225
Token count is too large: conda__conda-3686
Token count is too large: huggingface__transformers-17637
Token count is too large: mesonbuild__meson-691
Token count is too large: huggingface__transformers-24942
Token count is too large: dagster-io__dagster-2468
Token count is too large: huggingface__transformers-9514
Token count is too large: pandas-dev__pandas-29142
Token count is too large: pandas-dev__pandas-4195
Token count is too large: ray-project__ray-10703
Token count is too large: numpy__numpy-8368
Token count is too large: conan-io__conan-4783
Token count is too large: pantsbuild__pants-14850
Token count is too large: pandas-dev__pandas-20960
Token count i

Generating train split: 2709 examples [03:24,  9.95 examples/s]

Token count is too large: pandas-dev__pandas-33693
Token count is too large: google__jax-1473
Token count is too large: numpy__numpy-7347
Token count is too large: pandas-dev__pandas-4091
Token count is too large: pandas-dev__pandas-27988
Token count is too large: apache__airflow-17626
Token count is too large: google__jax-102
Token count is too large: pantsbuild__pants-18850
Token count is too large: pandas-dev__pandas-27366
Token count is too large: googleapis__google-cloud-python-8808
Token count is too large: conda__conda-8911
Token count is too large: docker__compose-1642
Token count is too large: googleapis__google-cloud-python-6031


Generating train split: 2715 examples [03:24, 15.60 examples/s]

Token count is too large: pandas-dev__pandas-18042
Token count is too large: Qiskit__qiskit-1702
Token count is too large: pypa__pip-8223
Token count is too large: pandas-dev__pandas-5730
Token count is too large: mesonbuild__meson-557
Token count is too large: mesonbuild__meson-11802
Token count is too large: conan-io__conan-4181


Generating train split: 2721 examples [03:24, 18.65 examples/s]

Token count is too large: pandas-dev__pandas-6398
Token count is too large: pandas-dev__pandas-30656
Token count is too large: numpy__numpy-5805
Token count is too large: huggingface__transformers-12397


Generating train split: 2730 examples [03:24, 21.35 examples/s]

Token count is too large: huggingface__transformers-25392
Token count is too large: Qiskit__qiskit-7085
Token count is too large: mesonbuild__meson-2221
Token count is too large: Qiskit__qiskit-7793
Token count is too large: conan-io__conan-5810
Token count is too large: huggingface__transformers-9789
Token count is too large: pantsbuild__pants-13834
Token count is too large: googleapis__google-cloud-python-2998
Token count is too large: Qiskit__qiskit-5206
Token count is too large: pantsbuild__pants-6894
Token count is too large: pandas-dev__pandas-33709


Generating train split: 2734 examples [03:24, 22.42 examples/s]

Token count is too large: pandas-dev__pandas-34485
Token count is too large: mesonbuild__meson-3131
Token count is too large: apache__airflow-1196
Token count is too large: google__jax-1515
Token count is too large: pypa__pip-8702
Token count is too large: mesonbuild__meson-5564
Token count is too large: ipython__ipython-5285
Token count is too large: googleapis__google-cloud-python-6703
Token count is too large: pandas-dev__pandas-23913


Generating train split: 2741 examples [03:25, 27.49 examples/s]

Token count is too large: huggingface__transformers-13205
Token count is too large: pypa__pip-6827
Token count is too large: Qiskit__qiskit-10381
Token count is too large: ytdl-org__youtube-dl-8346
Token count is too large: numpy__numpy-19764
Token count is too large: pantsbuild__pants-10645
Token count is too large: pandas-dev__pandas-3809
Token count is too large: ipython__ipython-12171
Token count is too large: numpy__numpy-6489
Token count is too large: pandas-dev__pandas-22505
Token count is too large: apache__airflow-23008
Token count is too large: pyca__cryptography-2529
Token count is too large: PrefectHQ__prefect-224
Token count is too large: googleapis__google-cloud-python-4679
Token count is too large: numpy__numpy-13371
Token count is too large: Qiskit__qiskit-5495
Token count is too large: conan-io__conan-4133
Token count is too large: pandas-dev__pandas-31528
Token count is too large: conda__conda-11545
Token count is too large: mesonbuild__meson-8517
Token count is too l

Generating train split: 2748 examples [03:26, 13.84 examples/s]

Token count is too large: pandas-dev__pandas-6682
Token count is too large: apache__mxnet-627
Token count is too large: pantsbuild__pants-17877
Token count is too large: pandas-dev__pandas-7915
Token count is too large: pypa__pip-2134
Token count is too large: huggingface__transformers-7263
Token count is too large: pandas-dev__pandas-8280
Token count is too large: pandas-dev__pandas-32807
Token count is too large: pandas-dev__pandas-31101
Token count is too large: PrefectHQ__prefect-2151
Token count is too large: numpy__numpy-14433
Token count is too large: apache__airflow-9273
Token count is too large: Lightning-AI__lightning-397
Token count is too large: pandas-dev__pandas-20399
Token count is too large: explosion__spaCy-3405
Token count is too large: mesonbuild__meson-1431
Token count is too large: Lightning-AI__lightning-2876
Token count is too large: pantsbuild__pants-7752
Token count is too large: Qiskit__qiskit-1255
Token count is too large: Qiskit__qiskit-10183
Token count is 

Generating train split: 2752 examples [03:26, 12.26 examples/s]

Token count is too large: huggingface__transformers-23147
Token count is too large: pandas-dev__pandas-25498
Token count is too large: pantsbuild__pants-13843
Token count is too large: pandas-dev__pandas-24327
Token count is too large: mesonbuild__meson-9838
Token count is too large: apache__airflow-22834
Token count is too large: pandas-dev__pandas-8700
Token count is too large: wagtail__wagtail-719
Token count is too large: ytdl-org__youtube-dl-14997
Token count is too large: docker__compose-5724
Token count is too large: huggingface__transformers-22880
Token count is too large: pandas-dev__pandas-26684
Token count is too large: pandas-dev__pandas-23291
Token count is too large: pandas-dev__pandas-39009


Generating train split: 2756 examples [03:26, 11.72 examples/s]

Token count is too large: pandas-dev__pandas-34414
Token count is too large: celery__celery-6294
Token count is too large: docker__compose-5095
Token count is too large: huggingface__transformers-7381
Token count is too large: Lightning-AI__lightning-1100
Token count is too large: pandas-dev__pandas-33398
Token count is too large: conda__conda-8165
Token count is too large: mesonbuild__meson-1326
Token count is too large: pandas-dev__pandas-29799


Generating train split: 2758 examples [03:27, 10.00 examples/s]

Token count is too large: huggingface__transformers-21548
Token count is too large: apache__airflow-16383
Token count is too large: conda__conda-6589
Token count is too large: mesonbuild__meson-3243
Token count is too large: googleapis__google-cloud-python-6904
Token count is too large: pandas-dev__pandas-10464
Token count is too large: pandas-dev__pandas-10400
Token count is too large: pandas-dev__pandas-33440
Token count is too large: mesonbuild__meson-11385
Token count is too large: pandas-dev__pandas-16968
Token count is too large: pandas-dev__pandas-36876


Generating train split: 2763 examples [03:27, 13.89 examples/s]

Token count is too large: pandas-dev__pandas-7867
Token count is too large: huggingface__transformers-6841
Token count is too large: pandas-dev__pandas-32175
Token count is too large: PrefectHQ__prefect-3127
Token count is too large: pandas-dev__pandas-31939
Token count is too large: pypa__pip-8656
Token count is too large: pandas-dev__pandas-22745
Token count is too large: apache__airflow-16118
Token count is too large: pandas-dev__pandas-17943
Token count is too large: Lightning-AI__lightning-1589
Token count is too large: pandas-dev__pandas-34013
Token count is too large: pandas-dev__pandas-38835
Token count is too large: Qiskit__qiskit-1024
Token count is too large: ray-project__ray-7846


Generating train split: 2766 examples [03:27, 13.42 examples/s]

Token count is too large: apache__airflow-30129
Token count is too large: huggingface__transformers-19898
Token count is too large: pandas-dev__pandas-35974


Generating train split: 2773 examples [03:27, 16.62 examples/s]

Token count is too large: pandas-dev__pandas-26526
Token count is too large: Qiskit__qiskit-3210
Token count is too large: googleapis__google-cloud-python-3715
Token count is too large: pandas-dev__pandas-39464
Token count is too large: conda__conda-6615
Token count is too large: pandas-dev__pandas-37432
Token count is too large: pypa__pip-12056
Token count is too large: pandas-dev__pandas-6872
Token count is too large: pandas-dev__pandas-39308


Generating train split: 2775 examples [03:28, 15.02 examples/s]

Token count is too large: Qiskit__qiskit-6634
Token count is too large: pandas-dev__pandas-15639
Token count is too large: huggingface__transformers-10357
Token count is too large: pandas-dev__pandas-33959
Token count is too large: huggingface__transformers-3266
Token count is too large: huggingface__transformers-7678
Token count is too large: PrefectHQ__prefect-545
Token count is too large: conan-io__conan-8915
Token count is too large: conan-io__conan-14168


Generating train split: 2780 examples [03:28, 17.22 examples/s]

Token count is too large: huggingface__transformers-23813
Token count is too large: ipython__ipython-2373
Token count is too large: pandas-dev__pandas-21442
Token count is too large: pandas-dev__pandas-16455
Token count is too large: pandas-dev__pandas-22416
Token count is too large: pandas-dev__pandas-6731
Token count is too large: googleapis__google-cloud-python-6080
Token count is too large: jupyterlab__jupyterlab-3294
Token count is too large: Qiskit__qiskit-3329
Token count is too large: Qiskit__qiskit-10438
Token count is too large: pyca__cryptography-3686
Token count is too large: numpy__numpy-3257
Token count is too large: Qiskit__qiskit-5035


Generating train split: 2783 examples [03:28, 12.28 examples/s]

Token count is too large: Qiskit__qiskit-5680
Token count is too large: pandas-dev__pandas-34796
Token count is too large: Lightning-AI__lightning-1272
Token count is too large: Lightning-AI__lightning-3048
Token count is too large: googleapis__google-cloud-python-3079
Token count is too large: apache__airflow-18209
Token count is too large: huggingface__transformers-3924
Token count is too large: pandas-dev__pandas-21966
Token count is too large: mesonbuild__meson-537
Token count is too large: pantsbuild__pants-16116
Token count is too large: pandas-dev__pandas-23141
Token count is too large: gitpython-developers__GitPython-1464


Generating train split: 2789 examples [03:29, 16.23 examples/s]

Token count is too large: pandas-dev__pandas-21313
Token count is too large: huggingface__transformers-16828
Token count is too large: apache__airflow-21423
Token count is too large: pandas-dev__pandas-38204
Token count is too large: apache__airflow-8652
Token count is too large: apache__airflow-25970
Token count is too large: pandas-dev__pandas-39095
Token count is too large: conda__conda-6669
Token count is too large: conan-io__conan-9437
Token count is too large: numpy__numpy-3447


Generating train split: 2792 examples [03:29, 13.10 examples/s]

Token count is too large: googleapis__google-cloud-python-7106
Token count is too large: ytdl-org__youtube-dl-27143
Token count is too large: google__jax-2627
Token count is too large: pandas-dev__pandas-17168
Token count is too large: pandas-dev__pandas-39204
Token count is too large: pandas-dev__pandas-35328
Token count is too large: mesonbuild__meson-7196
Token count is too large: Qiskit__qiskit-6353


Generating train split: 2794 examples [03:29, 12.18 examples/s]

Token count is too large: ipython__ipython-6662
Token count is too large: conda__conda-7291
Token count is too large: pandas-dev__pandas-4624
Token count is too large: PrefectHQ__prefect-2602
Token count is too large: docker__compose-5341
Token count is too large: Qiskit__qiskit-2149
Token count is too large: pandas-dev__pandas-39132
Token count is too large: ipython__ipython-2554


Generating train split: 2798 examples [03:29, 14.95 examples/s]

Token count is too large: pandas-dev__pandas-37997
Token count is too large: open-mmlab__mmdetection-6317
Token count is too large: jupyterlab__jupyterlab-7109
Token count is too large: numpy__numpy-8674
Token count is too large: JohnSnowLabs__spark-nlp-13912
Token count is too large: Lightning-AI__lightning-3347
Token count is too large: wagtail__wagtail-3919
Token count is too large: pantsbuild__pants-16134
Token count is too large: PrefectHQ__prefect-1612
Token count is too large: docker__compose-5565
Token count is too large: pantsbuild__pants-16107


Generating train split: 2803 examples [03:29, 17.69 examples/s]

Token count is too large: pandas-dev__pandas-35353
Token count is too large: pandas-dev__pandas-26911
Token count is too large: Qiskit__qiskit-9162
Token count is too large: PrefectHQ__prefect-1198


Generating train split: 2807 examples [03:30, 16.34 examples/s]

Token count is too large: pandas-dev__pandas-25164
Token count is too large: conda__conda-1562
Token count is too large: numpy__numpy-7298
Token count is too large: pandas-dev__pandas-5979
Token count is too large: pandas-dev__pandas-22200
Token count is too large: conan-io__conan-508
Token count is too large: pandas-dev__pandas-34599
Token count is too large: pandas-dev__pandas-35379
Token count is too large: pypa__pip-4982
Token count is too large: ytdl-org__youtube-dl-2289
Token count is too large: google__jax-786
Token count is too large: pandas-dev__pandas-38103
Token count is too large: pandas-dev__pandas-16738
Token count is too large: pantsbuild__pants-11314
Token count is too large: huggingface__transformers-23014


Generating train split: 2811 examples [03:30, 12.56 examples/s]

Token count is too large: pandas-dev__pandas-7954
Token count is too large: google__jax-178
Token count is too large: numpy__numpy-19627
Token count is too large: numpy__numpy-15406
Token count is too large: pantsbuild__pants-5526
Token count is too large: celery__celery-6524
Token count is too large: pandas-dev__pandas-21043
Token count is too large: ipython__ipython-3575
Token count is too large: pandas-dev__pandas-10157


Generating train split: 2813 examples [03:31, 11.29 examples/s]

Token count is too large: conda__conda-10530
Token count is too large: google__jax-2364
Token count is too large: numpy__numpy-4400
Token count is too large: pandas-dev__pandas-8062
Token count is too large: ytdl-org__youtube-dl-3958
Token count is too large: google__jax-422
Token count is too large: pandas-dev__pandas-34579
Token count is too large: Qiskit__qiskit-7941
Token count is too large: pypa__pip-4495
Token count is too large: pypa__pip-1834


Generating train split: 2815 examples [03:31, 11.56 examples/s]

Token count is too large: pandas-dev__pandas-3452
Token count is too large: pandas-dev__pandas-24916
Token count is too large: Lightning-AI__lightning-1269
Token count is too large: mesonbuild__meson-1514
Token count is too large: pandas-dev__pandas-27926
Token count is too large: Qiskit__qiskit-4966
Token count is too large: pandas-dev__pandas-30181
Token count is too large: pypa__pip-968
Token count is too large: pantsbuild__pants-17124
Token count is too large: pandas-dev__pandas-30928
Token count is too large: pandas-dev__pandas-15883
Token count is too large: apache__airflow-17544
Token count is too large: apache__airflow-25599
Token count is too large: docker__compose-1544
Token count is too large: pypa__pip-3695
Token count is too large: conan-io__conan-6033
Token count is too large: pandas-dev__pandas-9308
Token count is too large: conda__conda-6798
Token count is too large: scipy__scipy-3707
Token count is too large: scipy__scipy-3920
Token count is too large: pandas-dev__pand

Generating train split: 2819 examples [03:31,  9.80 examples/s]

Token count is too large: pandas-dev__pandas-23647
Token count is too large: numpy__numpy-3099
Token count is too large: ipython__ipython-7726
Token count is too large: conan-io__conan-4195
Token count is too large: pandas-dev__pandas-8454
Token count is too large: ipython__ipython-8480
Token count is too large: pandas-dev__pandas-34075
Token count is too large: pandas-dev__pandas-20424
Token count is too large: pandas-dev__pandas-27844
Token count is too large: conan-io__conan-5174
Token count is too large: scipy__scipy-4262
Token count is too large: ipython__ipython-11367


Generating train split: 2822 examples [03:31, 12.85 examples/s]

Token count is too large: pandas-dev__pandas-19448
Token count is too large: pandas-dev__pandas-5053
Token count is too large: conda__conda-5269
Token count is too large: huggingface__transformers-13820
Token count is too large: ytdl-org__youtube-dl-3608
Token count is too large: ray-project__ray-2036
Token count is too large: ipython__ipython-3611
Token count is too large: googleapis__google-cloud-python-3845
Token count is too large: pandas-dev__pandas-33788
Token count is too large: apache__airflow-13933
Token count is too large: pandas-dev__pandas-16937


Generating train split: 2830 examples [03:31, 22.70 examples/s]

Token count is too large: pandas-dev__pandas-8322
Token count is too large: pandas-dev__pandas-16364
Token count is too large: huggingface__transformers-11663
Token count is too large: pandas-dev__pandas-24236
Token count is too large: wagtail__wagtail-8371
Token count is too large: conda__conda-7710
Token count is too large: pandas-dev__pandas-24320


Generating train split: 2835 examples [03:32, 18.66 examples/s]

Token count is too large: pandas-dev__pandas-8070
Token count is too large: pandas-dev__pandas-3558
Token count is too large: pandas-dev__pandas-36867
Token count is too large: wagtail__wagtail-68
Token count is too large: pandas-dev__pandas-32479
Token count is too large: pandas-dev__pandas-34484
Token count is too large: mesonbuild__meson-912
Token count is too large: googleapis__google-cloud-python-1652
Token count is too large: pandas-dev__pandas-28373
Token count is too large: pantsbuild__pants-6683
Token count is too large: ytdl-org__youtube-dl-1256
Token count is too large: Lightning-AI__lightning-1125


Generating train split: 2839 examples [03:32, 19.95 examples/s]

Token count is too large: mesonbuild__meson-5292
Token count is too large: Qiskit__qiskit-2427
Token count is too large: pandas-dev__pandas-39393
Token count is too large: mesonbuild__meson-5633
Token count is too large: Qiskit__qiskit-713
Token count is too large: docker__compose-2349
Token count is too large: google__jax-3018
Token count is too large: pandas-dev__pandas-27154
Token count is too large: Lightning-AI__lightning-2781
Token count is too large: pandas-dev__pandas-28130
Token count is too large: google__jax-2183
Token count is too large: numpy__numpy-6400
Token count is too large: huggingface__transformers-9922
Token count is too large: PrefectHQ__prefect-2150
Token count is too large: conda__conda-11796
Token count is too large: pantsbuild__pants-8430
Token count is too large: numpy__numpy-18350
Token count is too large: conda__conda-7395
Token count is too large: ipython__ipython-2101
Token count is too large: mesonbuild__meson-6721


Generating train split: 2843 examples [03:32, 17.09 examples/s]

Token count is too large: conan-io__conan-2672
Token count is too large: open-mmlab__mmdetection-2093
Token count is too large: jupyterlab__jupyterlab-8913
Token count is too large: pandas-dev__pandas-6366
Token count is too large: open-mmlab__mmdetection-2824
Token count is too large: pandas-dev__pandas-32598


Generating train split: 2846 examples [03:33, 13.32 examples/s]

Token count is too large: pandas-dev__pandas-21578
Token count is too large: huggingface__transformers-17667
Token count is too large: pandas-dev__pandas-36371
Token count is too large: conan-io__conan-3014
Token count is too large: google__jax-1058
Token count is too large: pandas-dev__pandas-15939
Token count is too large: docker__compose-5602
Token count is too large: conda__conda-1618
Token count is too large: pandas-dev__pandas-32591
Token count is too large: celery__celery-6898
Token count is too large: Qiskit__qiskit-2917
Token count is too large: pandas-dev__pandas-22377
Token count is too large: huggingface__transformers-23194
Token count is too large: pyca__cryptography-1348
Token count is too large: pypa__pip-3312
Token count is too large: apache__airflow-22678
Token count is too large: pandas-dev__pandas-20047
Token count is too large: pandas-dev__pandas-10988
Token count is too large: pypa__pip-8684
Token count is too large: huggingface__transformers-7344
Token count is to

Generating train split: 2850 examples [03:33, 13.91 examples/s]

Token count is too large: pandas-dev__pandas-25961
Token count is too large: pypa__pip-11871
Token count is too large: pandas-dev__pandas-18961
Token count is too large: ytdl-org__youtube-dl-26507
Token count is too large: pantsbuild__pants-17939
Token count is too large: mesonbuild__meson-6466
Token count is too large: scipy__scipy-344
Token count is too large: pandas-dev__pandas-24045
Token count is too large: pypa__pip-4395
Token count is too large: pandas-dev__pandas-34371
Token count is too large: gitpython-developers__GitPython-667
Token count is too large: docker__compose-6281
Token count is too large: mesonbuild__meson-6561
Token count is too large: wagtail__wagtail-9976
Token count is too large: Qiskit__qiskit-8278
Token count is too large: pandas-dev__pandas-22704


Generating train split: 2854 examples [03:33, 15.09 examples/s]

Token count is too large: pandas-dev__pandas-10473
Token count is too large: pandas-dev__pandas-31734
Token count is too large: Qiskit__qiskit-785
Token count is too large: pandas-dev__pandas-4247
Token count is too large: pantsbuild__pants-16948
Token count is too large: apache__airflow-12636
Token count is too large: conda__conda-2708
Token count is too large: conda__conda-7607
Token count is too large: Lightning-AI__lightning-3252
Token count is too large: pandas-dev__pandas-20834
Token count is too large: pandas-dev__pandas-23499
Token count is too large: apache__airflow-1261
Token count is too large: numpy__numpy-6538
Token count is too large: google__jax-2150
Token count is too large: huggingface__transformers-24349
Token count is too large: pandas-dev__pandas-3044


Generating train split: 2856 examples [03:34,  9.86 examples/s]

Token count is too large: conda__conda-4922
Token count is too large: pandas-dev__pandas-25358
Token count is too large: mesonbuild__meson-6890
Token count is too large: pandas-dev__pandas-11427
Token count is too large: numpy__numpy-10951
Token count is too large: conan-io__conan-3684
Token count is too large: apache__airflow-11732
Token count is too large: pandas-dev__pandas-31905


Generating train split: 2860 examples [03:34, 13.01 examples/s]

Token count is too large: pandas-dev__pandas-38544
Token count is too large: googleapis__google-cloud-python-8213
Token count is too large: Qiskit__qiskit-10000
Token count is too large: pypa__pip-1352
Token count is too large: pandas-dev__pandas-33462
Token count is too large: pantsbuild__pants-19022
Token count is too large: pandas-dev__pandas-14096
Token count is too large: pandas-dev__pandas-27786
Token count is too large: Qiskit__qiskit-2325
Token count is too large: conda__conda-8015
Token count is too large: pandas-dev__pandas-3572
Token count is too large: Qiskit__qiskit-4904
Token count is too large: explosion__spaCy-3267


Generating train split: 2869 examples [03:34, 17.55 examples/s]

Token count is too large: pandas-dev__pandas-24633
Token count is too large: conan-io__conan-8655
Token count is too large: huggingface__transformers-18232
Token count is too large: mesonbuild__meson-7667
Token count is too large: pantsbuild__pants-18303
Token count is too large: google__jax-3096
Token count is too large: docker__compose-2938
Token count is too large: conan-io__conan-6254
Token count is too large: pandas-dev__pandas-6448
Token count is too large: google__jax-2558
Token count is too large: celery__celery-4278
Token count is too large: huggingface__transformers-18451
Token count is too large: huggingface__transformers-1120
Token count is too large: pandas-dev__pandas-5850


Generating train split: 2872 examples [03:35, 13.60 examples/s]

Token count is too large: mesonbuild__meson-5263
Token count is too large: jupyterlab__jupyterlab-7374
Token count is too large: Lightning-AI__lightning-2829
Token count is too large: open-mmlab__mmdetection-5930
Token count is too large: google__jax-2513
Token count is too large: ray-project__ray-8304
Token count is too large: pandas-dev__pandas-18757
Token count is too large: googleapis__google-cloud-python-5711
Token count is too large: pandas-dev__pandas-16958
Token count is too large: Qiskit__qiskit-9471
Token count is too large: pandas-dev__pandas-830
Token count is too large: docker__compose-6320
Token count is too large: Qiskit__qiskit-2000
Token count is too large: numpy__numpy-8643
Token count is too large: ipython__ipython-3978
Token count is too large: huggingface__transformers-3147


Generating train split: 2876 examples [03:35, 13.87 examples/s]

Token count is too large: pandas-dev__pandas-34927
Token count is too large: ipython__ipython-8882
Token count is too large: ray-project__ray-7444
Token count is too large: mesonbuild__meson-10331
Token count is too large: pandas-dev__pandas-22825
Token count is too large: wagtail__wagtail-10192
Token count is too large: apache__airflow-13932
Token count is too large: mesonbuild__meson-1136
Token count is too large: pandas-dev__pandas-26239
Token count is too large: Qiskit__qiskit-4097
Token count is too large: mesonbuild__meson-6321
Token count is too large: mesonbuild__meson-1320


Generating train split: 2881 examples [03:35, 13.56 examples/s]

Token count is too large: pypa__pip-6749
Token count is too large: pandas-dev__pandas-31484
Token count is too large: pandas-dev__pandas-10142
Token count is too large: pandas-dev__pandas-6109
Token count is too large: ray-project__ray-7262
Token count is too large: apache__airflow-22355
Token count is too large: Qiskit__qiskit-6299
Token count is too large: open-mmlab__mmdetection-6918
Token count is too large: jupyterlab__jupyterlab-13126
Token count is too large: conan-io__conan-5028


Generating train split: 2886 examples [03:35, 18.25 examples/s]

Token count is too large: pandas-dev__pandas-10397
Token count is too large: huggingface__transformers-13873
Token count is too large: apache__airflow-24488
Token count is too large: PrefectHQ__prefect-2666
Token count is too large: pypa__pip-10588
Token count is too large: conda__conda-6957
Token count is too large: pandas-dev__pandas-30710
Token count is too large: Lightning-AI__lightning-457
Token count is too large: celery__celery-3952
Token count is too large: huggingface__transformers-9578


Generating train split: 2889 examples [03:36, 16.45 examples/s]

Token count is too large: huggingface__transformers-24197
Token count is too large: jupyterlab__jupyterlab-5723
Token count is too large: pandas-dev__pandas-36292
Token count is too large: mesonbuild__meson-10864
Token count is too large: pandas-dev__pandas-10516
Token count is too large: pypa__pip-8394
Token count is too large: dagster-io__dagster-10501
Token count is too large: scipy__scipy-5793
Token count is too large: pandas-dev__pandas-36998
Token count is too large: Lightning-AI__lightning-3213
Token count is too large: Qiskit__qiskit-5807
Token count is too large: pandas-dev__pandas-23187
Token count is too large: ipython__ipython-4514
Token count is too large: numpy__numpy-11428
Token count is too large: pantsbuild__pants-6284
Token count is too large: googleapis__google-cloud-python-480
Token count is too large: apache__airflow-28664
Token count is too large: apache__airflow-23237
Token count is too large: googleapis__google-cloud-python-318
Token count is too large: pandas-d

Generating train split: 2894 examples [03:36, 13.77 examples/s]

Token count is too large: pandas-dev__pandas-24830
Token count is too large: conan-io__conan-10416
Token count is too large: conan-io__conan-10797
Token count is too large: pandas-dev__pandas-32977
Token count is too large: huggingface__transformers-18372
Token count is too large: pandas-dev__pandas-37199
Token count is too large: pandas-dev__pandas-10098


Generating train split: 2897 examples [03:36, 15.86 examples/s]

Token count is too large: pandas-dev__pandas-21660
Token count is too large: mesonbuild__meson-11441
Token count is too large: Lightning-AI__lightning-1666
Token count is too large: numpy__numpy-9285
Token count is too large: pandas-dev__pandas-8007
Token count is too large: pypa__pip-9673
Token count is too large: docker__compose-5011
Token count is too large: Lightning-AI__lightning-336
Token count is too large: ipython__ipython-950
Token count is too large: ipython__ipython-11398


Generating train split: 2903 examples [03:37, 15.21 examples/s]

Token count is too large: pandas-dev__pandas-4856
Token count is too large: huggingface__transformers-19347
Token count is too large: Qiskit__qiskit-9222
Token count is too large: Qiskit__qiskit-9067
Token count is too large: Qiskit__qiskit-573
Token count is too large: Qiskit__qiskit-8528
Token count is too large: mesonbuild__meson-4416
Token count is too large: apache__airflow-12466
Token count is too large: Qiskit__qiskit-4079
Token count is too large: PrefectHQ__prefect-2831
Token count is too large: docker__compose-6222
Token count is too large: ipython__ipython-10792
Token count is too large: pandas-dev__pandas-35302
Token count is too large: numpy__numpy-13561
Token count is too large: conan-io__conan-9643
Token count is too large: Qiskit__qiskit-5020
Token count is too large: huggingface__transformers-23869
Token count is too large: PrefectHQ__prefect-546
Token count is too large: huggingface__transformers-12424
Token count is too large: pandas-dev__pandas-28582


Generating train split: 2906 examples [03:37, 11.09 examples/s]

Token count is too large: huggingface__transformers-25146
Token count is too large: pandas-dev__pandas-33190
Token count is too large: Lightning-AI__lightning-499
Token count is too large: conan-io__conan-4615
Token count is too large: docker__compose-7684
Token count is too large: Qiskit__qiskit-4631
Token count is too large: pandas-dev__pandas-17364
Token count is too large: pandas-dev__pandas-19610
Token count is too large: huggingface__transformers-9018
Token count is too large: numpy__numpy-16815
Token count is too large: ipython__ipython-9225
Token count is too large: pandas-dev__pandas-18555
Token count is too large: mesonbuild__meson-1010
Token count is too large: pandas-dev__pandas-35393
Token count is too large: Qiskit__qiskit-4038
Token count is too large: pandas-dev__pandas-32424


Generating train split: 2909 examples [03:37, 12.35 examples/s]

Token count is too large: ipython__ipython-237
Token count is too large: pandas-dev__pandas-6622
Token count is too large: pandas-dev__pandas-36683
Token count is too large: numpy__numpy-9916
Token count is too large: pandas-dev__pandas-18371
Token count is too large: conda__conda-6491
Token count is too large: mesonbuild__meson-1216
Token count is too large: apache__airflow-18883
Token count is too large: ray-project__ray-10705
Token count is too large: open-mmlab__mmdetection-3011
Token count is too large: pandas-dev__pandas-21650
Token count is too large: pandas-dev__pandas-39316


Generating train split: 2915 examples [03:37, 16.02 examples/s]

Token count is too large: pantsbuild__pants-13447
Token count is too large: pandas-dev__pandas-9127
Token count is too large: pantsbuild__pants-17594
Token count is too large: huggingface__transformers-15318
Token count is too large: mesonbuild__meson-7736
Token count is too large: pandas-dev__pandas-26055
Token count is too large: pandas-dev__pandas-17950
Token count is too large: explosion__spaCy-3273
Token count is too large: celery__celery-3218
Token count is too large: pandas-dev__pandas-35253
Token count is too large: pandas-dev__pandas-26732


Generating train split: 2917 examples [03:38, 14.35 examples/s]

Token count is too large: numpy__numpy-11298
Token count is too large: pandas-dev__pandas-24733
Token count is too large: pandas-dev__pandas-3473
Token count is too large: scipy__scipy-387
Token count is too large: numpy__numpy-5490
Token count is too large: huggingface__transformers-1723
Token count is too large: Qiskit__qiskit-4328
Token count is too large: huggingface__transformers-11318
Token count is too large: pandas-dev__pandas-29493
Token count is too large: Lightning-AI__lightning-1196
Token count is too large: pantsbuild__pants-7430
Token count is too large: ipython__ipython-13588
Token count is too large: huggingface__transformers-3909
Token count is too large: numpy__numpy-18961
Token count is too large: pandas-dev__pandas-18380
Token count is too large: googleapis__google-cloud-python-3426
Token count is too large: pandas-dev__pandas-17930
Token count is too large: Qiskit__qiskit-10164
Token count is too large: pypa__pip-3117
Token count is too large: pandas-dev__pandas-38

Generating train split: 2919 examples [03:38,  9.04 examples/s]

Token count is too large: huggingface__transformers-21896
Token count is too large: pandas-dev__pandas-37992
Token count is too large: numpy__numpy-23045
Token count is too large: ytdl-org__youtube-dl-20646
Token count is too large: pandas-dev__pandas-21813
Token count is too large: conan-io__conan-2280
Token count is too large: conan-io__conan-8927
Token count is too large: pandas-dev__pandas-24632
Token count is too large: pandas-dev__pandas-33418
Token count is too large: Qiskit__qiskit-762
Token count is too large: pandas-dev__pandas-25759


Generating train split: 2932 examples [03:38, 19.25 examples/s]

Token count is too large: pandas-dev__pandas-22710
Token count is too large: mesonbuild__meson-2457
Token count is too large: pandas-dev__pandas-10401
Token count is too large: pandas-dev__pandas-38939
Token count is too large: Lightning-AI__lightning-1434
Token count is too large: huggingface__transformers-12853
Token count is too large: google__jax-580
Token count is too large: PrefectHQ__prefect-246
Token count is too large: huggingface__transformers-21489
Token count is too large: googleapis__google-cloud-python-11339
Token count is too large: pandas-dev__pandas-30340
Token count is too large: mesonbuild__meson-6909
Token count is too large: numpy__numpy-22561
Token count is too large: conan-io__conan-3843
Token count is too large: googleapis__google-cloud-python-9334
Token count is too large: pandas-dev__pandas-4366
Token count is too large: conan-io__conan-3545
Token count is too large: pandas-dev__pandas-21594


Generating train split: 2935 examples [03:39, 11.67 examples/s]

Token count is too large: pandas-dev__pandas-24288
Token count is too large: scipy__scipy-3922
Token count is too large: pantsbuild__pants-15457
Token count is too large: ray-project__ray-7366
Token count is too large: conan-io__conan-5856


Generating train split: 2941 examples [03:39, 13.81 examples/s]

Token count is too large: huggingface__transformers-21785
Token count is too large: mesonbuild__meson-7561
Token count is too large: pandas-dev__pandas-37108
Token count is too large: pandas-dev__pandas-16887
Token count is too large: pandas-dev__pandas-38030
Token count is too large: google__jax-1143
Token count is too large: pandas-dev__pandas-3842
Token count is too large: huggingface__transformers-21738
Token count is too large: pandas-dev__pandas-16600
Token count is too large: Qiskit__qiskit-667
Token count is too large: numpy__numpy-15920
Token count is too large: huggingface__transformers-13432
Token count is too large: Qiskit__qiskit-3003
Token count is too large: pandas-dev__pandas-15742
Token count is too large: mesonbuild__meson-1210
Token count is too large: pandas-dev__pandas-6908
Token count is too large: pandas-dev__pandas-3557


Generating train split: 2944 examples [03:40, 12.09 examples/s]

Token count is too large: pantsbuild__pants-13827
Token count is too large: mesonbuild__meson-6891
Token count is too large: numpy__numpy-4463
Token count is too large: Qiskit__qiskit-6225
Token count is too large: pandas-dev__pandas-39720
Token count is too large: ytdl-org__youtube-dl-3786
Token count is too large: huggingface__transformers-24529


Generating train split: 2949 examples [03:40, 14.80 examples/s]

Token count is too large: pandas-dev__pandas-5780
Token count is too large: ray-project__ray-8840
Token count is too large: pandas-dev__pandas-23811
Token count is too large: ytdl-org__youtube-dl-3075
Token count is too large: apache__airflow-9277
Token count is too large: apache__airflow-8165
Token count is too large: huggingface__transformers-18851
Token count is too large: googleapis__google-cloud-python-2375
Token count is too large: pandas-dev__pandas-23981
Token count is too large: googleapis__google-cloud-python-6437
Token count is too large: Qiskit__qiskit-9287
Token count is too large: numpy__numpy-14799
Token count is too large: pandas-dev__pandas-23370
Token count is too large: numpy__numpy-10786
Token count is too large: Lightning-AI__lightning-3287
Token count is too large: Lightning-AI__lightning-1561
Token count is too large: Qiskit__qiskit-9832


Generating train split: 2956 examples [03:41, 13.23 examples/s]

Token count is too large: dagster-io__dagster-8984
Token count is too large: huggingface__transformers-16814
Token count is too large: pandas-dev__pandas-4283
Token count is too large: pandas-dev__pandas-17507
Token count is too large: huggingface__transformers-14420
Token count is too large: Qiskit__qiskit-6986
Token count is too large: googleapis__google-cloud-python-413
Token count is too large: Qiskit__qiskit-459


Generating train split: 2959 examples [03:41, 15.32 examples/s]

Token count is too large: pandas-dev__pandas-30923
There was an error processing
Token count is too large: ipython__ipython-1707
Token count is too large: pandas-dev__pandas-13804
Token count is too large: Qiskit__qiskit-9660
Token count is too large: googleapis__google-cloud-python-4357
Token count is too large: pandas-dev__pandas-25008
Token count is too large: pantsbuild__pants-5302


Generating train split: 2967 examples [03:41, 19.54 examples/s]

Token count is too large: pandas-dev__pandas-24955
Token count is too large: wagtail__wagtail-1623
Token count is too large: pandas-dev__pandas-38548
Token count is too large: huggingface__transformers-4759
Token count is too large: mesonbuild__meson-5185
Token count is too large: googleapis__google-cloud-python-3737
Token count is too large: pandas-dev__pandas-26753
Token count is too large: scipy__scipy-400
Token count is too large: ray-project__ray-3464
Token count is too large: docker__compose-3473
Token count is too large: Qiskit__qiskit-3838
Token count is too large: numpy__numpy-14974
Token count is too large: pandas-dev__pandas-16028


Generating train split: 2970 examples [03:41, 19.72 examples/s]

Token count is too large: pandas-dev__pandas-4117
Token count is too large: ray-project__ray-1261
Token count is too large: pandas-dev__pandas-32512
Token count is too large: pypa__pip-10869
Token count is too large: pandas-dev__pandas-19921
Token count is too large: pandas-dev__pandas-26854
Token count is too large: PrefectHQ__prefect-3000
Token count is too large: pandas-dev__pandas-31653
Token count is too large: Qiskit__qiskit-6585
Token count is too large: mesonbuild__meson-11022
Token count is too large: PrefectHQ__prefect-663
Token count is too large: docker__compose-2309
Token count is too large: Qiskit__qiskit-8669
Token count is too large: huggingface__transformers-8435
Token count is too large: ipython__ipython-6651
Token count is too large: apache__airflow-23160
Token count is too large: wagtail__wagtail-9603
Token count is too large: Qiskit__qiskit-3180
Token count is too large: mesonbuild__meson-3474
Token count is too large: pandas-dev__pandas-39486
Token count is too la

Generating train split: 2978 examples [03:42, 19.22 examples/s]

Token count is too large: pantsbuild__pants-15383
Token count is too large: numpy__numpy-8445
Token count is too large: Lightning-AI__lightning-653
Token count is too large: pandas-dev__pandas-29004
Token count is too large: pandas-dev__pandas-8926
Token count is too large: Qiskit__qiskit-76
Token count is too large: pypa__pip-9436
Token count is too large: conda__conda-5018
Token count is too large: scipy__scipy-4426
Token count is too large: ipython__ipython-6919
Token count is too large: pantsbuild__pants-4630
Token count is too large: Qiskit__qiskit-1867
Token count is too large: pandas-dev__pandas-7447


Generating train split: 2982 examples [03:42, 18.79 examples/s]

Token count is too large: jupyterlab__jupyterlab-8463
Token count is too large: Qiskit__qiskit-10090
Token count is too large: gitpython-developers__GitPython-953
Token count is too large: Qiskit__qiskit-5039
Token count is too large: docker__compose-6509
Token count is too large: huggingface__transformers-3100
Token count is too large: googleapis__google-cloud-python-1052
Token count is too large: ipython__ipython-13030
Token count is too large: pyca__cryptography-4442
Token count is too large: Qiskit__qiskit-7144
Token count is too large: Qiskit__qiskit-2106
Token count is too large: docker__compose-1755
Token count is too large: googleapis__google-cloud-python-5498
Token count is too large: pandas-dev__pandas-9104
Token count is too large: wagtail__wagtail-1368
Token count is too large: conan-io__conan-2891


Generating train split: 2986 examples [03:42, 17.92 examples/s]

Token count is too large: pandas-dev__pandas-36898
Token count is too large: pandas-dev__pandas-10680
Token count is too large: docker__compose-6494
Token count is too large: ytdl-org__youtube-dl-31434
Token count is too large: numpy__numpy-4390
Token count is too large: pandas-dev__pandas-9605
Token count is too large: pyca__cryptography-5849
Token count is too large: scipy__scipy-5494
Token count is too large: ipython__ipython-11137


Generating train split: 2988 examples [03:42, 16.63 examples/s]

Token count is too large: pandas-dev__pandas-39615
Token count is too large: scipy__scipy-3347
Token count is too large: celery__celery-4719
Token count is too large: Qiskit__qiskit-10358
Token count is too large: pyca__cryptography-2292
Token count is too large: pandas-dev__pandas-9934
Token count is too large: pandas-dev__pandas-25254
Token count is too large: wagtail__wagtail-6335
Token count is too large: PrefectHQ__prefect-406
Token count is too large: docker__compose-3718
Token count is too large: numpy__numpy-14629
Token count is too large: google__jax-2591


Generating train split: 2992 examples [03:43, 15.41 examples/s]

Token count is too large: pandas-dev__pandas-6299
Token count is too large: pantsbuild__pants-15031
Token count is too large: pandas-dev__pandas-4414
Token count is too large: apache__airflow-19605
Token count is too large: ytdl-org__youtube-dl-5780
Token count is too large: pantsbuild__pants-4201
Token count is too large: Qiskit__qiskit-4597
Token count is too large: ray-project__ray-8366
Token count is too large: open-mmlab__mmdetection-7516
Token count is too large: pandas-dev__pandas-23435


Generating train split: 2995 examples [03:43, 17.72 examples/s]

Token count is too large: ray-project__ray-5877
Token count is too large: mesonbuild__meson-5187
Token count is too large: pandas-dev__pandas-23262
Token count is too large: numpy__numpy-18831
Token count is too large: mesonbuild__meson-4743


Generating train split: 2997 examples [03:43, 15.11 examples/s]

Token count is too large: pandas-dev__pandas-7215
Token count is too large: googleapis__google-cloud-python-11310
Token count is too large: pandas-dev__pandas-33392
Token count is too large: PrefectHQ__prefect-2707
Token count is too large: pyca__cryptography-5338
Token count is too large: pypa__pip-11860
Token count is too large: pandas-dev__pandas-17803
Token count is too large: pandas-dev__pandas-12068
Token count is too large: celery__celery-8301
Token count is too large: Lightning-AI__lightning-2681
Token count is too large: googleapis__google-cloud-python-6335
Token count is too large: conan-io__conan-4250
Token count is too large: google__jax-822
Token count is too large: pandas-dev__pandas-2935


Generating train split: 3000 examples [03:43, 14.73 examples/s]

Token count is too large: pandas-dev__pandas-33723
Token count is too large: pantsbuild__pants-5889
Token count is too large: PrefectHQ__prefect-287
Token count is too large: numpy__numpy-7433
Token count is too large: googleapis__google-cloud-python-10017
Token count is too large: apache__airflow-13470
Token count is too large: pantsbuild__pants-11703
Token count is too large: pandas-dev__pandas-24492
Token count is too large: pandas-dev__pandas-23919
Token count is too large: Qiskit__qiskit-2816
Token count is too large: pandas-dev__pandas-8029


Generating train split: 3004 examples [03:43, 14.91 examples/s]

Token count is too large: numpy__numpy-5234
Token count is too large: huggingface__transformers-13988
Token count is too large: huggingface__transformers-24501
Token count is too large: huggingface__transformers-12930
Token count is too large: pandas-dev__pandas-26341
Token count is too large: numpy__numpy-4421
Token count is too large: celery__celery-5681
Token count is too large: Qiskit__qiskit-4438
Token count is too large: pantsbuild__pants-13400
Token count is too large: Qiskit__qiskit-7673


Generating train split: 3008 examples [03:44, 16.32 examples/s]

Token count is too large: pandas-dev__pandas-3225
Token count is too large: googleapis__google-cloud-python-349
Token count is too large: numpy__numpy-5825
Token count is too large: google__jax-203
Token count is too large: googleapis__google-cloud-python-11317
Token count is too large: Qiskit__qiskit-1096
Token count is too large: gitpython-developers__GitPython-685
Token count is too large: conda__conda-3682
Token count is too large: huggingface__transformers-21881
Token count is too large: pandas-dev__pandas-20966
Token count is too large: conda__conda-10115


Generating train split: 3010 examples [03:44, 10.13 examples/s]

Token count is too large: pandas-dev__pandas-22375
Token count is too large: pandas-dev__pandas-26746
Token count is too large: scipy__scipy-3696
Token count is too large: google__jax-146
Token count is too large: ytdl-org__youtube-dl-4629
Token count is too large: ray-project__ray-8964
Token count is too large: huggingface__transformers-12561
Token count is too large: Qiskit__qiskit-9309
Token count is too large: googleapis__google-cloud-python-3352
Token count is too large: pandas-dev__pandas-8973
Token count is too large: pypa__pip-2469
Token count is too large: numpy__numpy-20745
Token count is too large: pantsbuild__pants-18144
Token count is too large: open-mmlab__mmdetection-7387


Generating train split: 3012 examples [03:44,  9.62 examples/s]

Token count is too large: pandas-dev__pandas-3841
Token count is too large: pandas-dev__pandas-38266
Token count is too large: huggingface__transformers-22746
Token count is too large: pypa__pip-8861
Token count is too large: googleapis__google-cloud-python-4987
Token count is too large: PrefectHQ__prefect-2560
Token count is too large: dagster-io__dagster-14392
Token count is too large: huggingface__transformers-10780
Token count is too large: Lightning-AI__lightning-2437
Token count is too large: pandas-dev__pandas-17966
Token count is too large: ipython__ipython-11400
Token count is too large: ipython__ipython-11505
Token count is too large: huggingface__transformers-14992
Token count is too large: mesonbuild__meson-5744
Token count is too large: Qiskit__qiskit-7264
Token count is too large: google__jax-873
Token count is too large: pandas-dev__pandas-34058
Token count is too large: google__jax-3173
Token count is too large: wagtail__wagtail-7864
Token count is too large: pandas-dev

Generating train split: 3014 examples [03:45,  7.67 examples/s]

Token count is too large: pandas-dev__pandas-22655
Token count is too large: pandas-dev__pandas-6542
Token count is too large: ray-project__ray-3238
Token count is too large: apache__airflow-25633
Token count is too large: googleapis__google-cloud-python-5760
Token count is too large: conda__conda-11949
Token count is too large: wagtail__wagtail-8473


Generating train split: 3019 examples [03:45, 10.76 examples/s]

Token count is too large: numpy__numpy-12237
Token count is too large: googleapis__google-cloud-python-1205
Token count is too large: huggingface__transformers-1724
Token count is too large: googleapis__google-cloud-python-264
Token count is too large: apache__airflow-16415
Token count is too large: huggingface__transformers-3855
Token count is too large: google__jax-2395
Token count is too large: conan-io__conan-2831
Token count is too large: pandas-dev__pandas-27691
Token count is too large: pandas-dev__pandas-18749
Token count is too large: googleapis__google-cloud-python-1638
Token count is too large: pypa__pip-6427
Token count is too large: Lightning-AI__lightning-3229
Token count is too large: pandas-dev__pandas-31571
Token count is too large: pandas-dev__pandas-15050
Token count is too large: ray-project__ray-10721
Token count is too large: pandas-dev__pandas-38698
Token count is too large: Qiskit__qiskit-3612
Token count is too large: jupyterlab__jupyterlab-2958
Token count is 

Generating train split: 3027 examples [03:46,  9.91 examples/s]

Token count is too large: ipython__ipython-2170
Token count is too large: pandas-dev__pandas-16926
Token count is too large: docker__compose-5384
Token count is too large: celery__celery-7652
Token count is too large: docker__compose-4590
Token count is too large: conda__conda-6828
Token count is too large: conan-io__conan-4596
Token count is too large: pandas-dev__pandas-5266
Token count is too large: pandas-dev__pandas-33804
Token count is too large: pandas-dev__pandas-32304
Token count is too large: pandas-dev__pandas-5040
Token count is too large: pandas-dev__pandas-25993
Token count is too large: conda__conda-3436
Token count is too large: pypa__pip-1806


Generating train split: 3029 examples [03:46, 10.30 examples/s]

Token count is too large: Qiskit__qiskit-9486
Token count is too large: numpy__numpy-22748
Token count is too large: pandas-dev__pandas-19260
Token count is too large: Lightning-AI__lightning-2776
Token count is too large: wagtail__wagtail-9114
Token count is too large: pantsbuild__pants-13278
Token count is too large: Lightning-AI__lightning-1935
Token count is too large: mesonbuild__meson-11369
Token count is too large: pandas-dev__pandas-23147
Token count is too large: numpy__numpy-2942
Token count is too large: wagtail__wagtail-7212
Token count is too large: pandas-dev__pandas-9441
Token count is too large: mesonbuild__meson-5500
Token count is too large: googleapis__google-cloud-python-8111
Token count is too large: googleapis__google-cloud-python-4727
Token count is too large: pypa__pip-9274
Token count is too large: pandas-dev__pandas-13533
Token count is too large: pypa__pip-1623
Token count is too large: googleapis__google-cloud-python-74


Generating train split: 3032 examples [03:47,  8.59 examples/s]

Token count is too large: pandas-dev__pandas-27100
Token count is too large: pypa__pip-11858
Token count is too large: ipython__ipython-1851
Token count is too large: Lightning-AI__lightning-597
Token count is too large: apache__airflow-9698
Token count is too large: googleapis__google-cloud-python-9541
Token count is too large: pandas-dev__pandas-37023
Token count is too large: celery__celery-5085
Token count is too large: wagtail__wagtail-1382
Token count is too large: pandas-dev__pandas-17783
Token count is too large: ipython__ipython-542


Generating train split: 3035 examples [03:47, 10.25 examples/s]

Token count is too large: googleapis__google-cloud-python-8981
Token count is too large: conan-io__conan-4709
Token count is too large: pandas-dev__pandas-17744
Token count is too large: pandas-dev__pandas-36582
Token count is too large: pandas-dev__pandas-37221
Token count is too large: mesonbuild__meson-9193
Token count is too large: mesonbuild__meson-8292
Token count is too large: Qiskit__qiskit-7003
Token count is too large: ytdl-org__youtube-dl-18583


Generating train split: 3037 examples [03:47,  7.41 examples/s]

Token count is too large: pandas-dev__pandas-21249
Token count is too large: wagtail__wagtail-4166
Token count is too large: ipython__ipython-7129
Token count is too large: conan-io__conan-5537
Token count is too large: Qiskit__qiskit-866
Token count is too large: ytdl-org__youtube-dl-3050
Token count is too large: pandas-dev__pandas-18460
Token count is too large: open-mmlab__mmdetection-4615
Token count is too large: pandas-dev__pandas-6919


Generating train split: 3039 examples [03:48,  6.97 examples/s]

Token count is too large: numpy__numpy-5928
Token count is too large: pandas-dev__pandas-18248
Token count is too large: PrefectHQ__prefect-1945
Token count is too large: pandas-dev__pandas-19856
Token count is too large: scipy__scipy-5732
Token count is too large: Qiskit__qiskit-606
Token count is too large: apache__airflow-32781
Token count is too large: Lightning-AI__lightning-1582
Token count is too large: huggingface__transformers-12591
Token count is too large: celery__celery-5331
Token count is too large: numpy__numpy-9556
Token count is too large: pandas-dev__pandas-37468
Token count is too large: pandas-dev__pandas-36418


Generating train split: 3041 examples [03:48,  7.96 examples/s]

Token count is too large: Qiskit__qiskit-10621
Token count is too large: pyca__cryptography-2087
Token count is too large: mesonbuild__meson-1126
Token count is too large: apache__airflow-32216
Token count is too large: huggingface__transformers-11079
Token count is too large: pandas-dev__pandas-18229
Token count is too large: numpy__numpy-8142


Generating train split: 3048 examples [03:48, 14.82 examples/s]

Token count is too large: pandas-dev__pandas-36927
Token count is too large: pandas-dev__pandas-23512
Token count is too large: pandas-dev__pandas-37192
Token count is too large: ray-project__ray-6691
Token count is too large: pandas-dev__pandas-18825
Token count is too large: Lightning-AI__lightning-2446
Token count is too large: tiangolo__fastapi-241
Token count is too large: pandas-dev__pandas-37374
Token count is too large: gitpython-developers__GitPython-949
Token count is too large: mesonbuild__meson-9958
Token count is too large: ray-project__ray-5606
Token count is too large: apache__airflow-24099
Token count is too large: conda__conda-5555
Token count is too large: mesonbuild__meson-8383
Token count is too large: googleapis__google-cloud-python-6270
Token count is too large: pandas-dev__pandas-5760
Token count is too large: Qiskit__qiskit-3642
Token count is too large: numpy__numpy-7416
Token count is too large: PrefectHQ__prefect-158
Token count is too large: pandas-dev__pand

Generating train split: 3060 examples [03:49, 16.96 examples/s]

Token count is too large: huggingface__transformers-20984
Token count is too large: pandas-dev__pandas-18157
Token count is too large: conan-io__conan-3197
Token count is too large: dagster-io__dagster-8689
Token count is too large: numpy__numpy-16247
Token count is too large: huggingface__transformers-11675
Token count is too large: huggingface__transformers-3198
Token count is too large: scipy__scipy-3944
Token count is too large: scipy__scipy-5749
Token count is too large: pandas-dev__pandas-19879
Token count is too large: conda__conda-7986
Token count is too large: googleapis__google-cloud-python-5821
Token count is too large: pandas-dev__pandas-38472
Token count is too large: apache__airflow-13880
Token count is too large: pandas-dev__pandas-19552


Generating train split: 3066 examples [03:49, 19.01 examples/s]

Token count is too large: mesonbuild__meson-7844
Token count is too large: ray-project__ray-1880
Token count is too large: pantsbuild__pants-13573
Token count is too large: numpy__numpy-5364
Token count is too large: numpy__numpy-23599
Token count is too large: conan-io__conan-3918
Token count is too large: numpy__numpy-22388
Token count is too large: apache__airflow-8558
Token count is too large: ipython__ipython-1391
Token count is too large: ytdl-org__youtube-dl-16054
Token count is too large: conan-io__conan-5381
Token count is too large: conda__conda-3748
Token count is too large: pandas-dev__pandas-23462
Token count is too large: gitpython-developers__GitPython-368
Token count is too large: conda__conda-4548
Token count is too large: explosion__spaCy-1552
Token count is too large: pyca__cryptography-3190
Token count is too large: numpy__numpy-21205


Generating train split: 3070 examples [03:50, 12.31 examples/s]

Token count is too large: pandas-dev__pandas-4955
Token count is too large: pyca__cryptography-1131
Token count is too large: pandas-dev__pandas-15618
Token count is too large: gitpython-developers__GitPython-1015
Token count is too large: docker__compose-3964
Token count is too large: huggingface__transformers-9932
Token count is too large: apache__airflow-25355


Generating train split: 3076 examples [03:50, 14.73 examples/s]

Token count is too large: huggingface__transformers-9677
Token count is too large: pandas-dev__pandas-24129
Token count is too large: googleapis__google-cloud-python-9491
Token count is too large: googleapis__google-cloud-python-1421
Token count is too large: pandas-dev__pandas-17488


Generating train split: 3079 examples [03:50, 15.45 examples/s]

Token count is too large: pandas-dev__pandas-10686
Token count is too large: Lightning-AI__lightning-2881
Token count is too large: docker__compose-6205
Token count is too large: Lightning-AI__lightning-1164
Token count is too large: jupyterlab__jupyterlab-2069
Token count is too large: apache__airflow-24772
Token count is too large: Qiskit__qiskit-9076
Token count is too large: pypa__pip-6095
Token count is too large: huggingface__transformers-11951
Token count is too large: docker__compose-7485
Token count is too large: pandas-dev__pandas-16340
Token count is too large: huggingface__transformers-13022
Token count is too large: pandas-dev__pandas-36433


Generating train split: 3083 examples [03:50, 16.03 examples/s]

Token count is too large: pandas-dev__pandas-23968
Token count is too large: Qiskit__qiskit-1257
Token count is too large: pandas-dev__pandas-16915
Token count is too large: pandas-dev__pandas-14717
Token count is too large: pandas-dev__pandas-37701
Token count is too large: mesonbuild__meson-6855
Token count is too large: conda__conda-1668
Token count is too large: ray-project__ray-8731
Token count is too large: numpy__numpy-5869
Token count is too large: scipy__scipy-4996
Token count is too large: pandas-dev__pandas-22912
Token count is too large: pyca__cryptography-5595
Token count is too large: pandas-dev__pandas-7092
Token count is too large: Qiskit__qiskit-6339
Token count is too large: PrefectHQ__prefect-131
Token count is too large: wagtail__wagtail-1605


Generating train split: 3086 examples [03:51, 11.45 examples/s]

Token count is too large: pandas-dev__pandas-4164
Token count is too large: PrefectHQ__prefect-684
Token count is too large: pandas-dev__pandas-11639
Token count is too large: numpy__numpy-23010
Token count is too large: apache__airflow-16860
Token count is too large: ipython__ipython-14052
Token count is too large: jupyterlab__jupyterlab-12772
Token count is too large: numpy__numpy-22700


Generating train split: 3088 examples [03:51,  9.75 examples/s]

Token count is too large: pypa__pip-5521
Token count is too large: Qiskit__qiskit-2381
Token count is too large: pandas-dev__pandas-31247


Generating train split: 3096 examples [03:52, 12.49 examples/s]

Token count is too large: google__jax-3032
Token count is too large: Lightning-AI__lightning-3320
Token count is too large: Qiskit__qiskit-10163
Token count is too large: pandas-dev__pandas-14765
Token count is too large: huggingface__transformers-7560
Token count is too large: pandas-dev__pandas-35324
Token count is too large: mesonbuild__meson-2717
Token count is too large: docker__compose-5566
Token count is too large: pandas-dev__pandas-8941
Token count is too large: pandas-dev__pandas-3632
Token count is too large: Qiskit__qiskit-701
Token count is too large: pandas-dev__pandas-34335
Token count is too large: pandas-dev__pandas-26241
Token count is too large: huggingface__transformers-14324


Generating train split: 3106 examples [03:52, 14.69 examples/s]

Token count is too large: pandas-dev__pandas-37025
Token count is too large: huggingface__transformers-11378
Token count is too large: pandas-dev__pandas-28919
Token count is too large: mesonbuild__meson-1456
Token count is too large: apache__airflow-12694
Token count is too large: Qiskit__qiskit-3224
Token count is too large: numpy__numpy-7793
Token count is too large: ipython__ipython-11803
Token count is too large: apache__airflow-10227


Generating train split: 3109 examples [03:52, 15.92 examples/s]

Token count is too large: explosion__spaCy-2808
Token count is too large: pandas-dev__pandas-23514
Token count is too large: pandas-dev__pandas-24338
Token count is too large: pandas-dev__pandas-4099
Token count is too large: numpy__numpy-5101
Token count is too large: conda__conda-8342
Token count is too large: Lightning-AI__lightning-1378
Token count is too large: docker__compose-276
Token count is too large: ray-project__ray-10821
Token count is too large: PrefectHQ__prefect-409


Generating train split: 3112 examples [03:53, 14.25 examples/s]

Token count is too large: huggingface__transformers-17731
Token count is too large: pandas-dev__pandas-3563
Token count is too large: pandas-dev__pandas-6812
Token count is too large: pandas-dev__pandas-31867
Token count is too large: pandas-dev__pandas-26157


Generating train split: 3114 examples [03:53, 12.06 examples/s]

Token count is too large: pandas-dev__pandas-13925
Token count is too large: Qiskit__qiskit-2239
Token count is too large: mesonbuild__meson-2817
Token count is too large: Qiskit__qiskit-7712
Token count is too large: wagtail__wagtail-1626
Token count is too large: ytdl-org__youtube-dl-14571
Token count is too large: ytdl-org__youtube-dl-12879
Token count is too large: scipy__scipy-3110
Token count is too large: pandas-dev__pandas-23019
Token count is too large: conan-io__conan-5817
Token count is too large: PrefectHQ__prefect-725
Token count is too large: numpy__numpy-8024


Generating train split: 3118 examples [03:53, 14.37 examples/s]

Token count is too large: pandas-dev__pandas-4534
Token count is too large: numpy__numpy-3056
Token count is too large: conan-io__conan-2952
Token count is too large: docker__compose-6234
Token count is too large: conda__conda-2701
Token count is too large: pandas-dev__pandas-14582
Token count is too large: huggingface__transformers-5636
Token count is too large: pandas-dev__pandas-23206
Token count is too large: conda__conda-8917
Token count is too large: jupyterlab__jupyterlab-12926


Generating train split: 3127 examples [03:53, 18.61 examples/s]

Token count is too large: huggingface__transformers-20043
Token count is too large: pandas-dev__pandas-8462
Token count is too large: pantsbuild__pants-10489
Token count is too large: huggingface__transformers-19124
Token count is too large: pantsbuild__pants-8639


Generating train split: 3130 examples [03:54, 17.69 examples/s]

Token count is too large: pandas-dev__pandas-26997
Token count is too large: pandas-dev__pandas-8455
Token count is too large: ytdl-org__youtube-dl-7210
Token count is too large: docker__compose-6406
Token count is too large: huggingface__transformers-8747
Token count is too large: pandas-dev__pandas-36838
Token count is too large: huggingface__transformers-21879
Token count is too large: pypa__pip-4067


Generating train split: 3136 examples [03:54, 18.67 examples/s]

Token count is too large: wagtail__wagtail-9735
Token count is too large: Lightning-AI__lightning-2482
Token count is too large: pandas-dev__pandas-8237
Token count is too large: Qiskit__qiskit-3418
Token count is too large: ytdl-org__youtube-dl-8718
Token count is too large: open-mmlab__mmdetection-8439
Token count is too large: jupyterlab__jupyterlab-9244
Token count is too large: pandas-dev__pandas-20819
Token count is too large: googleapis__google-cloud-python-5607
Token count is too large: ray-project__ray-7181
Token count is too large: pandas-dev__pandas-30882


Generating train split: 3138 examples [03:54, 18.04 examples/s]

Token count is too large: numpy__numpy-10599
Token count is too large: jupyterlab__jupyterlab-9040
Token count is too large: pandas-dev__pandas-18211
Token count is too large: apache__airflow-26100
Token count is too large: Qiskit__qiskit-1799
Token count is too large: numpy__numpy-15939
Token count is too large: Qiskit__qiskit-4669
Token count is too large: Lightning-AI__lightning-1753
Token count is too large: pandas-dev__pandas-29846
Token count is too large: googleapis__google-cloud-python-5815


Generating train split: 3150 examples [03:54, 31.40 examples/s]

Token count is too large: pandas-dev__pandas-39293
Token count is too large: pypa__pip-9170
Token count is too large: pandas-dev__pandas-19232
Token count is too large: celery__celery-6251
Token count is too large: conda__conda-12050
Token count is too large: Qiskit__qiskit-1800
Token count is too large: ipython__ipython-815
Token count is too large: docker__compose-7762
Token count is too large: pandas-dev__pandas-22198
Token count is too large: conda__conda-5186
Token count is too large: ytdl-org__youtube-dl-15807
Token count is too large: wagtail__wagtail-6279
Token count is too large: pandas-dev__pandas-39655
Token count is too large: google__jax-1152
Token count is too large: pandas-dev__pandas-34514


Generating train split: 3154 examples [03:55, 28.97 examples/s]

Token count is too large: ytdl-org__youtube-dl-621
Token count is too large: pandas-dev__pandas-7818
Token count is too large: pandas-dev__pandas-8293
Token count is too large: pandas-dev__pandas-17214
Token count is too large: pandas-dev__pandas-34812
Token count is too large: googleapis__google-cloud-python-6920
Token count is too large: pandas-dev__pandas-7181
Token count is too large: conan-io__conan-3603
Token count is too large: pandas-dev__pandas-28542


Generating train split: 3158 examples [03:55, 28.50 examples/s]

Token count is too large: pandas-dev__pandas-26408
Token count is too large: mesonbuild__meson-11977
Token count is too large: pandas-dev__pandas-30489
Token count is too large: pantsbuild__pants-17928
Token count is too large: pandas-dev__pandas-18225
Token count is too large: huggingface__transformers-7322
Token count is too large: pandas-dev__pandas-27925
Token count is too large: pandas-dev__pandas-16533
Token count is too large: pandas-dev__pandas-37322
Token count is too large: pandas-dev__pandas-8706
Token count is too large: huggingface__transformers-5551
Token count is too large: pyca__cryptography-3919
Token count is too large: mesonbuild__meson-5193
Token count is too large: pandas-dev__pandas-34910
Token count is too large: numpy__numpy-13374


Generating train split: 3166 examples [03:55, 24.13 examples/s]

Token count is too large: pandas-dev__pandas-21871
Token count is too large: pandas-dev__pandas-38841
Token count is too large: pandas-dev__pandas-29912
Token count is too large: mesonbuild__meson-238
Token count is too large: googleapis__google-cloud-python-3674
Token count is too large: ipython__ipython-10318
Token count is too large: celery__celery-4617
Token count is too large: pantsbuild__pants-18366
Token count is too large: pandas-dev__pandas-8929
Token count is too large: Qiskit__qiskit-6345


Generating train split: 3169 examples [03:55, 19.50 examples/s]

Token count is too large: pandas-dev__pandas-35697
Token count is too large: huggingface__transformers-10611
Token count is too large: open-mmlab__mmdetection-7690
Token count is too large: pantsbuild__pants-12618
Token count is too large: Lightning-AI__lightning-449
Token count is too large: pandas-dev__pandas-11888
Token count is too large: mesonbuild__meson-4958
Token count is too large: pandas-dev__pandas-27787
Token count is too large: pandas-dev__pandas-5555


Generating train split: 3173 examples [03:56, 17.62 examples/s]

Token count is too large: apache__airflow-11753
Token count is too large: huggingface__transformers-10692
Token count is too large: Lightning-AI__lightning-1475
Token count is too large: pandas-dev__pandas-32568
Token count is too large: celery__celery-4357
Token count is too large: conan-io__conan-2846
Token count is too large: Qiskit__qiskit-10441
Token count is too large: celery__celery-4203


Generating train split: 3182 examples [03:56, 26.18 examples/s]

Token count is too large: googleapis__google-cloud-python-3046
Token count is too large: pyca__cryptography-3896
Token count is too large: mesonbuild__meson-1694
Token count is too large: Qiskit__qiskit-1450
Token count is too large: pandas-dev__pandas-36729
Token count is too large: google__jax-1175
Token count is too large: pantsbuild__pants-14184
Token count is too large: pandas-dev__pandas-33911
Token count is too large: googleapis__google-cloud-python-5767
Token count is too large: googleapis__google-cloud-python-3157
Token count is too large: pandas-dev__pandas-28741
Token count is too large: Lightning-AI__lightning-3061
Token count is too large: googleapis__google-cloud-python-1517
Token count is too large: jupyterlab__jupyterlab-946
Token count is too large: open-mmlab__mmdetection-7449
Token count is too large: googleapis__google-cloud-python-6521
Token count is too large: pandas-dev__pandas-30213
Token count is too large: conda__conda-12929
Token count is too large: Qiskit__q

Generating train split: 3188 examples [03:56, 19.88 examples/s]

Token count is too large: pyca__cryptography-2846
Token count is too large: numpy__numpy-7296
Token count is too large: pandas-dev__pandas-6974
Token count is too large: pandas-dev__pandas-38417
Token count is too large: googleapis__google-cloud-python-9022


Generating train split: 3191 examples [03:57, 18.46 examples/s]

Token count is too large: numpy__numpy-20721
Token count is too large: googleapis__google-cloud-python-615
Token count is too large: pandas-dev__pandas-8266
Token count is too large: conda__conda-3228
Token count is too large: pantsbuild__pants-12878
Token count is too large: Qiskit__qiskit-9597
Token count is too large: twisted__twisted-11603
Token count is too large: ytdl-org__youtube-dl-6196
Token count is too large: pandas-dev__pandas-37830
Token count is too large: pandas-dev__pandas-18307
Token count is too large: apache__airflow-23804
Token count is too large: pandas-dev__pandas-27321


Generating train split: 3198 examples [03:57, 23.63 examples/s]

Token count is too large: ipython__ipython-2818
Token count is too large: pypa__pip-1691
Token count is too large: jupyterlab__jupyterlab-6451
Token count is too large: celery__celery-2783
Token count is too large: pandas-dev__pandas-28213
Token count is too large: PrefectHQ__prefect-1993
Token count is too large: scipy__scipy-4618
Token count is too large: pandas-dev__pandas-7328
Token count is too large: pandas-dev__pandas-27674
Token count is too large: pandas-dev__pandas-10920
Token count is too large: pandas-dev__pandas-4836


Generating train split: 3201 examples [03:57, 21.50 examples/s]

Token count is too large: pandas-dev__pandas-35035
Token count is too large: pandas-dev__pandas-13836
Token count is too large: wagtail__wagtail-4402
Token count is too large: pantsbuild__pants-4753
Token count is too large: huggingface__transformers-10863
Token count is too large: Qiskit__qiskit-5336
Token count is too large: mesonbuild__meson-585
Token count is too large: pandas-dev__pandas-14801
Token count is too large: google__jax-730


Generating train split: 3206 examples [03:57, 20.93 examples/s]

Token count is too large: pandas-dev__pandas-17491
Token count is too large: pandas-dev__pandas-33404
Token count is too large: pandas-dev__pandas-3985
Token count is too large: pantsbuild__pants-17097
Token count is too large: pandas-dev__pandas-35815
Token count is too large: pandas-dev__pandas-39196
Token count is too large: pandas-dev__pandas-28417
Token count is too large: conan-io__conan-5824


Generating train split: 3212 examples [03:57, 19.07 examples/s]

Token count is too large: huggingface__transformers-19006
Token count is too large: twisted__twisted-11770
Token count is too large: pandas-dev__pandas-18182
Token count is too large: google__jax-2803
Token count is too large: mesonbuild__meson-3416
Token count is too large: pandas-dev__pandas-3627
Token count is too large: pandas-dev__pandas-25467
Token count is too large: docker__compose-258


Generating train split: 3216 examples [03:58, 20.63 examples/s]

Token count is too large: pandas-dev__pandas-18902
Token count is too large: pandas-dev__pandas-21548
Token count is too large: docker__compose-4216
Token count is too large: pandas-dev__pandas-10376
Token count is too large: pandas-dev__pandas-36722
Token count is too large: numpy__numpy-13767
Token count is too large: huggingface__transformers-9685
Token count is too large: pandas-dev__pandas-25368
Token count is too large: pandas-dev__pandas-26115
Token count is too large: wagtail__wagtail-6757
Token count is too large: pandas-dev__pandas-32839
Token count is too large: pandas-dev__pandas-9640
Token count is too large: pandas-dev__pandas-28185
Token count is too large: conan-io__conan-4860
Token count is too large: huggingface__transformers-14671
Token count is too large: ytdl-org__youtube-dl-31243
Token count is too large: PrefectHQ__prefect-2297
Token count is too large: ray-project__ray-9020
Token count is too large: pandas-dev__pandas-21187
Token count is too large: pantsbuild__

Generating train split: 3222 examples [03:58, 13.11 examples/s]

Token count is too large: pandas-dev__pandas-18437
Token count is too large: celery__celery-4709
Token count is too large: pantsbuild__pants-15283
Token count is too large: pypa__pip-6914
Token count is too large: huggingface__transformers-17797
Token count is too large: pantsbuild__pants-5416
Token count is too large: ipython__ipython-13991
Token count is too large: huggingface__transformers-3716
Token count is too large: pandas-dev__pandas-2883
Token count is too large: ipython__ipython-8884
Token count is too large: PrefectHQ__prefect-713
Token count is too large: PrefectHQ__prefect-320
Token count is too large: pandas-dev__pandas-32699
Token count is too large: ytdl-org__youtube-dl-2089
Token count is too large: pandas-dev__pandas-17498
Token count is too large: pandas-dev__pandas-14886
Token count is too large: pandas-dev__pandas-36869
Token count is too large: mesonbuild__meson-5344
Token count is too large: Qiskit__qiskit-6519
Token count is too large: Qiskit__qiskit-2389
Token 

Generating train split: 3229 examples [03:59, 10.27 examples/s]

Token count is too large: huggingface__transformers-25627
Token count is too large: numpy__numpy-3241
Token count is too large: pypa__pip-10032
Token count is too large: pandas-dev__pandas-18875
Token count is too large: conan-io__conan-5088
Token count is too large: pandas-dev__pandas-19834
Token count is too large: pandas-dev__pandas-36093
Token count is too large: pypa__pip-7908
Token count is too large: google__jax-1594
Token count is too large: Qiskit__qiskit-1765
Token count is too large: googleapis__google-cloud-python-689
Token count is too large: Qiskit__qiskit-6216
Token count is too large: pandas-dev__pandas-22651
Token count is too large: conan-io__conan-11238


Generating train split: 3235 examples [04:00, 11.84 examples/s]

Token count is too large: pandas-dev__pandas-21066
Token count is too large: celery__celery-6923
Token count is too large: apache__airflow-26369
Token count is too large: Qiskit__qiskit-4535
Token count is too large: googleapis__google-cloud-python-1559
Token count is too large: huggingface__transformers-4289
Token count is too large: pypa__pip-2978
Token count is too large: Qiskit__qiskit-1797
Token count is too large: conan-io__conan-5945
Token count is too large: conan-io__conan-4106
Token count is too large: pandas-dev__pandas-27265
Token count is too large: tiangolo__fastapi-435
Token count is too large: ipython__ipython-7608
Token count is too large: conda__conda-6368
Token count is too large: pandas-dev__pandas-37924
Token count is too large: pandas-dev__pandas-21034


Generating train split: 3237 examples [04:00,  9.13 examples/s]

Token count is too large: pandas-dev__pandas-25132
Token count is too large: pantsbuild__pants-14529
Token count is too large: dagster-io__dagster-1244
Token count is too large: huggingface__transformers-17852
Token count is too large: pandas-dev__pandas-25297
Token count is too large: Lightning-AI__lightning-1794
Token count is too large: pandas-dev__pandas-10738
Token count is too large: numpy__numpy-4428
Token count is too large: huggingface__transformers-22159
Token count is too large: pandas-dev__pandas-35182
Token count is too large: huggingface__transformers-19287
Token count is too large: docker__compose-5155
Token count is too large: pantsbuild__pants-6472
Token count is too large: PrefectHQ__prefect-988
Token count is too large: conda__conda-6570
Token count is too large: Lightning-AI__lightning-2184
Token count is too large: open-mmlab__mmdetection-6034
Token count is too large: PrefectHQ__prefect-2668
Token count is too large: pandas-dev__pandas-16201


Generating train split: 3243 examples [04:01, 11.10 examples/s]

Token count is too large: Lightning-AI__lightning-415
Token count is too large: pandas-dev__pandas-28798
Token count is too large: conan-io__conan-7272
Token count is too large: pandas-dev__pandas-5408
Token count is too large: mesonbuild__meson-8164
Token count is too large: pandas-dev__pandas-17431


Generating train split: 3247 examples [04:01, 12.49 examples/s]

Token count is too large: pandas-dev__pandas-18600
Token count is too large: pandas-dev__pandas-29920
Token count is too large: huggingface__transformers-20989
Token count is too large: celery__celery-3827
Token count is too large: pandas-dev__pandas-9630
Token count is too large: PrefectHQ__prefect-1388
Token count is too large: dagster-io__dagster-4336
Token count is too large: ray-project__ray-8898
Token count is too large: pantsbuild__pants-6315
Token count is too large: pandas-dev__pandas-23394


Generating train split: 3250 examples [04:01, 12.03 examples/s]

Token count is too large: conan-io__conan-4205
Token count is too large: mesonbuild__meson-10508
Token count is too large: huggingface__transformers-14274
Token count is too large: pandas-dev__pandas-7552
Token count is too large: googleapis__google-cloud-python-4716
Token count is too large: pandas-dev__pandas-27071
Token count is too large: mesonbuild__meson-3061
Token count is too large: numpy__numpy-10698
Token count is too large: pypa__pip-8096
Token count is too large: huggingface__transformers-21914
Token count is too large: google__jax-429
Token count is too large: ipython__ipython-13640
Token count is too large: twisted__twisted-11617
Token count is too large: mesonbuild__meson-1308
Token count is too large: Qiskit__qiskit-7381


Generating train split: 3252 examples [04:01,  9.02 examples/s]

Token count is too large: numpy__numpy-16987
Token count is too large: pandas-dev__pandas-22549
Token count is too large: huggingface__transformers-10436
Token count is too large: twisted__twisted-11767
Token count is too large: jupyterlab__jupyterlab-6414
Token count is too large: pandas-dev__pandas-26046
Token count is too large: pandas-dev__pandas-3878
Token count is too large: explosion__spaCy-2346
Token count is too large: PrefectHQ__prefect-1933
Token count is too large: pandas-dev__pandas-18424
Token count is too large: docker__compose-5787


Generating train split: 3257 examples [04:02, 12.85 examples/s]

Token count is too large: wagtail__wagtail-8122
Token count is too large: apache__airflow-24865
Token count is too large: pandas-dev__pandas-37559
Token count is too large: huggingface__transformers-12365
Token count is too large: pandas-dev__pandas-26628
Token count is too large: google__jax-2596
Token count is too large: huggingface__transformers-22031
Token count is too large: google__jax-144
Token count is too large: pypa__pip-6921
Token count is too large: pandas-dev__pandas-9983
Token count is too large: Qiskit__qiskit-765
Token count is too large: Qiskit__qiskit-1375
Token count is too large: pandas-dev__pandas-15099
Token count is too large: pandas-dev__pandas-28700
Token count is too large: pandas-dev__pandas-38101


Generating train split: 3259 examples [04:02,  9.24 examples/s]

Token count is too large: huggingface__transformers-24772
Token count is too large: pandas-dev__pandas-36370
Token count is too large: mesonbuild__meson-9347
Token count is too large: pandas-dev__pandas-19875
Token count is too large: huggingface__transformers-24853


Generating train split: 3265 examples [04:02, 11.39 examples/s]

Token count is too large: pandas-dev__pandas-5423
Token count is too large: pandas-dev__pandas-7788
Token count is too large: pandas-dev__pandas-23278
Token count is too large: pandas-dev__pandas-23936


Generating train split: 3270 examples [04:03, 13.18 examples/s]

Token count is too large: pandas-dev__pandas-35129
Token count is too large: pandas-dev__pandas-27951
Token count is too large: Qiskit__qiskit-10469
Token count is too large: PrefectHQ__prefect-214
Token count is too large: pandas-dev__pandas-4231
Token count is too large: docker__compose-2821
Token count is too large: pandas-dev__pandas-33984
Token count is too large: docker__compose-5006
Token count is too large: pandas-dev__pandas-29872
Token count is too large: huggingface__transformers-6461
Token count is too large: pandas-dev__pandas-18069
Token count is too large: DataDog__integrations-core-5659
Token count is too large: pandas-dev__pandas-4750


Generating train split: 3272 examples [04:03,  9.88 examples/s]

Token count is too large: pandas-dev__pandas-30852
Token count is too large: googleapis__google-cloud-python-11460
Token count is too large: ipython__ipython-4622
Token count is too large: pantsbuild__pants-6872
Token count is too large: mesonbuild__meson-10652


Generating train split: 3277 examples [04:03, 12.54 examples/s]

Token count is too large: Lightning-AI__lightning-2113
Token count is too large: huggingface__transformers-21473
Token count is too large: pantsbuild__pants-14186
Token count is too large: googleapis__google-cloud-python-2210
Token count is too large: pypa__pip-9096
Token count is too large: huggingface__transformers-9567
Token count is too large: pandas-dev__pandas-17071
Token count is too large: Qiskit__qiskit-3709
Token count is too large: huggingface__transformers-11117


Generating train split: 3285 examples [04:04, 22.69 examples/s]

Token count is too large: huggingface__transformers-7562
Token count is too large: ipython__ipython-4464
Token count is too large: Qiskit__qiskit-5346
Token count is too large: huggingface__transformers-12806
Token count is too large: huggingface__transformers-24058
Token count is too large: numpy__numpy-18100
Token count is too large: numpy__numpy-6504
Token count is too large: pandas-dev__pandas-26257
Token count is too large: PrefectHQ__prefect-550
Token count is too large: pandas-dev__pandas-24113
Token count is too large: numpy__numpy-3449
Token count is too large: Qiskit__qiskit-5830
Token count is too large: apache__airflow-13232
Token count is too large: pandas-dev__pandas-21541
Token count is too large: mesonbuild__meson-7266
Token count is too large: google__jax-2268
Token count is too large: DataDog__integrations-core-7451


Generating train split: 3289 examples [04:04, 15.67 examples/s]

Token count is too large: huggingface__transformers-25461
Token count is too large: pandas-dev__pandas-16124
Token count is too large: pandas-dev__pandas-25811
Token count is too large: huggingface__transformers-19191
Token count is too large: pandas-dev__pandas-26474
Token count is too large: pandas-dev__pandas-20721
Token count is too large: huggingface__transformers-15230
Token count is too large: PrefectHQ__prefect-532
Token count is too large: ipython__ipython-9655
Token count is too large: pantsbuild__pants-6156
Token count is too large: googleapis__google-cloud-python-993
Token count is too large: pandas-dev__pandas-26012
Token count is too large: pandas-dev__pandas-5160
Token count is too large: pandas-dev__pandas-36747
Token count is too large: PrefectHQ__prefect-1384
Token count is too large: huggingface__transformers-9691
Token count is too large: huggingface__transformers-20848
Token count is too large: Qiskit__qiskit-8121
Token count is too large: googleapis__google-cloud-

Generating train split: 3292 examples [04:05,  7.90 examples/s]

Token count is too large: pandas-dev__pandas-10676
Token count is too large: Qiskit__qiskit-10652
Token count is too large: huggingface__transformers-10475
Token count is too large: docker__compose-2314
Token count is too large: pandas-dev__pandas-11191
Token count is too large: mesonbuild__meson-10076
Token count is too large: conan-io__conan-2941
Token count is too large: scipy__scipy-4281
Token count is too large: Qiskit__qiskit-2672
Token count is too large: conan-io__conan-3857


Generating train split: 3297 examples [04:05, 10.02 examples/s]

Token count is too large: pandas-dev__pandas-5247
Token count is too large: dagster-io__dagster-6865
Token count is too large: ray-project__ray-9142
Token count is too large: numpy__numpy-19654
Token count is too large: googleapis__google-cloud-python-11204
Token count is too large: celery__celery-6635
Token count is too large: huggingface__transformers-19707
Token count is too large: numpy__numpy-9784
Token count is too large: Qiskit__qiskit-6014
Token count is too large: conan-io__conan-2459
Token count is too large: pantsbuild__pants-5358


Generating train split: 3307 examples [04:05, 17.30 examples/s]

Token count is too large: ytdl-org__youtube-dl-2182
Token count is too large: Lightning-AI__lightning-603
Token count is too large: pandas-dev__pandas-28216
Token count is too large: huggingface__transformers-14729
Token count is too large: pandas-dev__pandas-30460


Generating train split: 3312 examples [04:06, 16.10 examples/s]

Token count is too large: pandas-dev__pandas-22880
Token count is too large: conda__conda-3416
Token count is too large: mesonbuild__meson-6421
Token count is too large: pandas-dev__pandas-31733
Token count is too large: ray-project__ray-10680
Token count is too large: pandas-dev__pandas-28634
Token count is too large: googleapis__google-cloud-python-4453
Token count is too large: tensorflow__models-2725
Token count is too large: numpy__numpy-14368
Token count is too large: pandas-dev__pandas-18337


Generating train split: 3315 examples [04:06, 15.96 examples/s]

Token count is too large: huggingface__transformers-24430
Token count is too large: conan-io__conan-5237
Token count is too large: google__jax-353
Token count is too large: pandas-dev__pandas-38406
Token count is too large: pandas-dev__pandas-4356
Token count is too large: conan-io__conan-6152
Token count is too large: pandas-dev__pandas-8180
Token count is too large: ipython__ipython-4565
Token count is too large: apache__airflow-8265
Token count is too large: pantsbuild__pants-15611
Token count is too large: google__jax-1970
Token count is too large: tensorflow__models-5354


Generating train split: 3318 examples [04:06, 14.67 examples/s]

Token count is too large: pypa__pip-8054
Token count is too large: Qiskit__qiskit-9183
Token count is too large: pandas-dev__pandas-5973
Token count is too large: pandas-dev__pandas-15974
Token count is too large: pandas-dev__pandas-8238
Token count is too large: celery__celery-6578
Token count is too large: docker__compose-5254


Generating train split: 3323 examples [04:06, 17.89 examples/s]

Token count is too large: pandas-dev__pandas-4190
Token count is too large: apache__airflow-12899
Token count is too large: google__jax-175
Token count is too large: googleapis__google-cloud-python-7512
Token count is too large: mesonbuild__meson-1606
Token count is too large: ipython__ipython-1483
Token count is too large: gitpython-developers__GitPython-955
Token count is too large: pandas-dev__pandas-18997
Token count is too large: conda__conda-2566
Token count is too large: huggingface__transformers-15900
Token count is too large: pandas-dev__pandas-22674
Token count is too large: Qiskit__qiskit-4525
Token count is too large: huggingface__transformers-9815


Generating train split: 3326 examples [04:07, 16.65 examples/s]

Token count is too large: pypa__pip-8340
Token count is too large: ipython__ipython-5965
Token count is too large: huggingface__transformers-13679
Token count is too large: pandas-dev__pandas-3078
Token count is too large: Lightning-AI__lightning-2160
Token count is too large: googleapis__google-cloud-python-7692
Token count is too large: pandas-dev__pandas-34458
Token count is too large: wagtail__wagtail-6434
Token count is too large: pandas-dev__pandas-19624
Token count is too large: pandas-dev__pandas-8663
Token count is too large: numpy__numpy-7584
Token count is too large: pandas-dev__pandas-39807


Generating train split: 3333 examples [04:07, 15.84 examples/s]

Token count is too large: pandas-dev__pandas-10726
Token count is too large: celery__celery-1834
Token count is too large: huggingface__transformers-1055
Token count is too large: ray-project__ray-10767
Token count is too large: pandas-dev__pandas-8415
Token count is too large: PrefectHQ__prefect-1168
Token count is too large: huggingface__transformers-21675


Generating train split: 3335 examples [04:07, 15.42 examples/s]

Token count is too large: huggingface__transformers-19066
Token count is too large: wagtail__wagtail-637
Token count is too large: pandas-dev__pandas-37159
Token count is too large: google__jax-201
Token count is too large: ytdl-org__youtube-dl-16999
Token count is too large: mesonbuild__meson-7069
Token count is too large: pandas-dev__pandas-4106
Token count is too large: conan-io__conan-4276
Token count is too large: Qiskit__qiskit-1554
Token count is too large: pandas-dev__pandas-18150
Token count is too large: pantsbuild__pants-18787


Generating train split: 3337 examples [04:08, 12.95 examples/s]

Token count is too large: pandas-dev__pandas-5859
Token count is too large: twisted__twisted-11752
Token count is too large: pandas-dev__pandas-25588
Token count is too large: mesonbuild__meson-3863
Token count is too large: numpy__numpy-10615
Token count is too large: apache__airflow-20486
Token count is too large: pypa__pip-9394
Token count is too large: pandas-dev__pandas-3502
Token count is too large: pandas-dev__pandas-30491
Token count is too large: pandas-dev__pandas-19231


Generating train split: 3343 examples [04:08, 14.44 examples/s]

Token count is too large: huggingface__transformers-2973
Token count is too large: huggingface__transformers-18407
Token count is too large: pandas-dev__pandas-31842
Token count is too large: docker__compose-4383
Token count is too large: apache__airflow-9329
Token count is too large: conda__conda-4818
Token count is too large: Qiskit__qiskit-4420


Generating train split: 3350 examples [04:08, 23.20 examples/s]

Token count is too large: docker__compose-7930
Token count is too large: PrefectHQ__prefect-1653
Token count is too large: pypa__pip-2799
Token count is too large: conan-io__conan-3439
Token count is too large: ytdl-org__youtube-dl-26301
Token count is too large: pandas-dev__pandas-35150
Token count is too large: mesonbuild__meson-3246
Token count is too large: pandas-dev__pandas-35794
Token count is too large: pantsbuild__pants-7929
Token count is too large: numpy__numpy-6453


Generating train split: 3353 examples [04:08, 20.09 examples/s]

Token count is too large: huggingface__transformers-15646
Token count is too large: Qiskit__qiskit-1111
Token count is too large: ytdl-org__youtube-dl-30713
Token count is too large: googleapis__google-cloud-python-8667
Token count is too large: google__jax-1525
Token count is too large: googleapis__google-cloud-python-9796
Token count is too large: pandas-dev__pandas-19926
Token count is too large: pandas-dev__pandas-29943
Token count is too large: mesonbuild__meson-2933
Token count is too large: pandas-dev__pandas-34423


Generating train split: 3356 examples [04:08, 20.41 examples/s]

Token count is too large: conda__conda-6518
Token count is too large: pandas-dev__pandas-5096
Token count is too large: pandas-dev__pandas-27716
Token count is too large: conan-io__conan-8704
Token count is too large: pandas-dev__pandas-26607
Token count is too large: pandas-dev__pandas-23036
Token count is too large: Qiskit__qiskit-4795
Token count is too large: pandas-dev__pandas-28367
Token count is too large: ipython__ipython-10863
Token count is too large: pandas-dev__pandas-30784
Token count is too large: conan-io__conan-2930
Token count is too large: pandas-dev__pandas-7428
Token count is too large: pandas-dev__pandas-10593
Token count is too large: Qiskit__qiskit-9980
Token count is too large: Lightning-AI__lightning-1459
Token count is too large: numpy__numpy-3626
Token count is too large: Qiskit__qiskit-686
Token count is too large: pandas-dev__pandas-20968
Token count is too large: Qiskit__qiskit-9953
Token count is too large: mesonbuild__meson-4214


Generating train split: 3364 examples [04:09, 17.25 examples/s]

Token count is too large: googleapis__google-cloud-python-6451
Token count is too large: conan-io__conan-9256
Token count is too large: ytdl-org__youtube-dl-30676
Token count is too large: pandas-dev__pandas-19240
Token count is too large: pantsbuild__pants-19314
Token count is too large: mesonbuild__meson-1194
Token count is too large: pandas-dev__pandas-9659
Token count is too large: numpy__numpy-5383
Token count is too large: Lightning-AI__lightning-2501
Token count is too large: googleapis__google-cloud-python-3705
Token count is too large: celery__celery-6452
Token count is too large: celery__celery-6134


Generating train split: 3372 examples [04:09, 22.34 examples/s]

Token count is too large: pandas-dev__pandas-6996
Token count is too large: pandas-dev__pandas-39374
Token count is too large: huggingface__transformers-18470
Token count is too large: googleapis__google-cloud-python-3794
Token count is too large: pandas-dev__pandas-10370


Generating train split: 3375 examples [04:10, 18.24 examples/s]

Token count is too large: huggingface__transformers-8567
Token count is too large: conda__conda-8611
Token count is too large: apache__airflow-13260
Token count is too large: ipython__ipython-1855
Token count is too large: googleapis__google-cloud-python-3386
Token count is too large: ytdl-org__youtube-dl-12696
Token count is too large: pandas-dev__pandas-19734
Token count is too large: huggingface__transformers-15940
Token count is too large: Qiskit__qiskit-2126
Token count is too large: PrefectHQ__prefect-1673
Token count is too large: pandas-dev__pandas-38109
Token count is too large: pandas-dev__pandas-34619
Token count is too large: wagtail__wagtail-10131


Generating train split: 3382 examples [04:10, 20.13 examples/s]

Token count is too large: pandas-dev__pandas-20401
Token count is too large: pandas-dev__pandas-8373
Token count is too large: ytdl-org__youtube-dl-1216
Token count is too large: ray-project__ray-4857
Token count is too large: docker__compose-6034
Token count is too large: scipy__scipy-5132
Token count is too large: Lightning-AI__lightning-2957
Token count is too large: Qiskit__qiskit-2049
Token count is too large: googleapis__google-cloud-python-8430
Token count is too large: pandas-dev__pandas-32700
Token count is too large: googleapis__google-cloud-python-7219


Generating train split: 3387 examples [04:10, 22.61 examples/s]

Token count is too large: pandas-dev__pandas-29395
Token count is too large: Qiskit__qiskit-10372
Token count is too large: ipython__ipython-1951
Token count is too large: numpy__numpy-23269
Token count is too large: pandas-dev__pandas-5558
Token count is too large: pandas-dev__pandas-18695
Token count is too large: twisted__twisted-11720
Token count is too large: pandas-dev__pandas-25720
Token count is too large: pyca__cryptography-1552
Token count is too large: conda__conda-8184
Token count is too large: celery__celery-5091
Token count is too large: pandas-dev__pandas-39423


Generating train split: 3390 examples [04:10, 22.19 examples/s]

Token count is too large: ipython__ipython-793
Token count is too large: huggingface__transformers-12024
Token count is too large: conda__conda-6585
Token count is too large: pandas-dev__pandas-19073
Token count is too large: Qiskit__qiskit-1481
Token count is too large: pandas-dev__pandas-9123
Token count is too large: pandas-dev__pandas-25769
Token count is too large: pandas-dev__pandas-38486


Generating train split: 3393 examples [04:10, 21.10 examples/s]

Token count is too large: pandas-dev__pandas-34955
Token count is too large: pyca__cryptography-1384
Token count is too large: apache__airflow-17210
Token count is too large: pandas-dev__pandas-38247
Token count is too large: pandas-dev__pandas-11596
Token count is too large: huggingface__transformers-17936
Token count is too large: conda__conda-7783


Generating train split: 3397 examples [04:11, 19.67 examples/s]

Token count is too large: PrefectHQ__prefect-205
Token count is too large: apache__airflow-9861
Token count is too large: numpy__numpy-8827
Token count is too large: pandas-dev__pandas-38626
Token count is too large: pandas-dev__pandas-9257
Token count is too large: celery__celery-6791
Token count is too large: Lightning-AI__lightning-74
Token count is too large: conan-io__conan-2794
Token count is too large: ipython__ipython-1121
Token count is too large: pantsbuild__pants-14959
Token count is too large: googleapis__google-cloud-python-835
Token count is too large: pantsbuild__pants-16956
Token count is too large: google__jax-922
Token count is too large: pandas-dev__pandas-23685
Token count is too large: Lightning-AI__lightning-2632
Token count is too large: scipy__scipy-3709
Token count is too large: mesonbuild__meson-5683
Token count is too large: docker__compose-5101
Token count is too large: pypa__pip-7747
Token count is too large: conan-io__conan-5064
Token count is too large: d

Generating train split: 3403 examples [04:11, 13.66 examples/s]

Token count is too large: pandas-dev__pandas-25129
Token count is too large: ray-project__ray-3872
Token count is too large: pandas-dev__pandas-7712
Token count is too large: pandas-dev__pandas-24467
Token count is too large: mesonbuild__meson-6065
Token count is too large: pandas-dev__pandas-31524
Token count is too large: mesonbuild__meson-5905
Token count is too large: huggingface__transformers-25735
Token count is too large: mesonbuild__meson-11690
Token count is too large: googleapis__google-cloud-python-7697
Token count is too large: pandas-dev__pandas-33954
Token count is too large: pantsbuild__pants-5150
Token count is too large: PrefectHQ__prefect-2386
Token count is too large: pandas-dev__pandas-21291
Token count is too large: conan-io__conan-7309
Token count is too large: scipy__scipy-5782
Token count is too large: pantsbuild__pants-16499
Token count is too large: Qiskit__qiskit-6909
Token count is too large: pandas-dev__pandas-20999
Token count is too large: huggingface__tr

Generating train split: 3405 examples [04:12,  9.42 examples/s]

Token count is too large: docker__compose-3075
Token count is too large: pandas-dev__pandas-9793
Token count is too large: pandas-dev__pandas-7007
Token count is too large: pandas-dev__pandas-11809
Token count is too large: PrefectHQ__prefect-1014
Token count is too large: wagtail__wagtail-957
Token count is too large: docker__compose-3598
There was an error processing
Token count is too large: conda__conda-5835


Generating train split: 3407 examples [04:12,  8.55 examples/s]

Token count is too large: numpy__numpy-21997
Token count is too large: pandas-dev__pandas-4838
Token count is too large: conda__conda-11215
Token count is too large: pandas-dev__pandas-39071
Token count is too large: Lightning-AI__lightning-786
Token count is too large: conda__conda-5122
Token count is too large: tiangolo__fastapi-681
Token count is too large: google__jax-1882
Token count is too large: conda__conda-6652
Token count is too large: pandas-dev__pandas-34012
Token count is too large: PrefectHQ__prefect-2036


Generating train split: 3409 examples [04:12,  9.11 examples/s]

Token count is too large: googleapis__google-cloud-python-11303
Token count is too large: ipython__ipython-7006
Token count is too large: numpy__numpy-4792
Token count is too large: conda__conda-4370
Token count is too large: ipython__ipython-618
Token count is too large: apache__airflow-27828
Token count is too large: Qiskit__qiskit-6828
Token count is too large: google__jax-2616


Generating train split: 3411 examples [04:12,  8.56 examples/s]

Token count is too large: pandas-dev__pandas-6378
Token count is too large: pandas-dev__pandas-13767
Token count is too large: Qiskit__qiskit-2228
Token count is too large: pantsbuild__pants-16717
Token count is too large: pandas-dev__pandas-10061
Token count is too large: pandas-dev__pandas-22826
Token count is too large: wagtail__wagtail-1031


Generating train split: 3414 examples [04:13,  8.11 examples/s]

Token count is too large: pandas-dev__pandas-30584
Token count is too large: ray-project__ray-10097
Token count is too large: pandas-dev__pandas-17077
Token count is too large: googleapis__google-cloud-python-5987
Token count is too large: pandas-dev__pandas-16543
Token count is too large: googleapis__google-cloud-python-3318
Token count is too large: pyca__cryptography-2541
Token count is too large: huggingface__transformers-9749
Token count is too large: pandas-dev__pandas-4950
Token count is too large: pandas-dev__pandas-30945
Token count is too large: numpy__numpy-18695
Token count is too large: jupyterlab__jupyterlab-9101


Generating train split: 3415 examples [04:13,  8.21 examples/s]

Token count is too large: pandas-dev__pandas-5631
Token count is too large: pypa__pip-10249
Token count is too large: pandas-dev__pandas-38248
Token count is too large: explosion__spaCy-2135
Token count is too large: numpy__numpy-13337
Token count is too large: Lightning-AI__lightning-2505
Token count is too large: mesonbuild__meson-5739
Token count is too large: celery__celery-3730
Token count is too large: PrefectHQ__prefect-1782
Token count is too large: apache__airflow-31415
Token count is too large: Qiskit__qiskit-5245
Token count is too large: docker__compose-3340
Token count is too large: pandas-dev__pandas-30691
Token count is too large: pandas-dev__pandas-23072
Token count is too large: ytdl-org__youtube-dl-10524


Generating train split: 3420 examples [04:13, 14.80 examples/s]

Token count is too large: PrefectHQ__prefect-2089
Token count is too large: gitpython-developers__GitPython-645
Token count is too large: Qiskit__qiskit-9665
Token count is too large: huggingface__transformers-12975
Token count is too large: PrefectHQ__prefect-239
Token count is too large: numpy__numpy-7659
Token count is too large: pandas-dev__pandas-37827
Token count is too large: pandas-dev__pandas-26466
Token count is too large: pantsbuild__pants-18025
Token count is too large: pandas-dev__pandas-37439


Generating train split: 3423 examples [04:13, 12.72 examples/s]

Token count is too large: numpy__numpy-20267
Token count is too large: wagtail__wagtail-554
Token count is too large: googleapis__google-cloud-python-11578
Token count is too large: pantsbuild__pants-11793
Token count is too large: ytdl-org__youtube-dl-29187
Token count is too large: pandas-dev__pandas-3055
Token count is too large: google__jax-516
Token count is too large: numpy__numpy-22679


Generating train split: 3425 examples [04:14, 12.50 examples/s]

Token count is too large: pandas-dev__pandas-38405
Token count is too large: Qiskit__qiskit-2212
Token count is too large: google__jax-682
Token count is too large: huggingface__transformers-15972
Token count is too large: pandas-dev__pandas-24953
Token count is too large: Qiskit__qiskit-7255
Token count is too large: pandas-dev__pandas-24446
Token count is too large: pandas-dev__pandas-27368
Token count is too large: docker__compose-6342
Token count is too large: docker__compose-4370
Token count is too large: mesonbuild__meson-7769


Generating train split: 3431 examples [04:14, 16.46 examples/s]

Token count is too large: pandas-dev__pandas-16478
Token count is too large: jupyterlab__jupyterlab-2836
Token count is too large: mesonbuild__meson-1023
Token count is too large: huggingface__transformers-14102
Token count is too large: wagtail__wagtail-6183
Token count is too large: pandas-dev__pandas-36149
Token count is too large: mesonbuild__meson-5196
Token count is too large: docker__compose-6925
Token count is too large: Qiskit__qiskit-1115
Token count is too large: ipython__ipython-1717
Token count is too large: conan-io__conan-5623
Token count is too large: pandas-dev__pandas-29405


Generating train split: 3436 examples [04:14, 12.95 examples/s]

Token count is too large: pandas-dev__pandas-31461
Token count is too large: googleapis__google-cloud-python-6084
Token count is too large: ray-project__ray-8628
Token count is too large: pandas-dev__pandas-8988
Token count is too large: Qiskit__qiskit-5702
Token count is too large: mesonbuild__meson-3069
Token count is too large: ipython__ipython-13914
Token count is too large: Lightning-AI__lightning-2801


Generating train split: 3440 examples [04:15, 13.63 examples/s]

Token count is too large: conan-io__conan-4462
Token count is too large: scipy__scipy-3927
Token count is too large: huggingface__transformers-22078
Token count is too large: ray-project__ray-2283
Token count is too large: pandas-dev__pandas-38492
Token count is too large: conan-io__conan-4313
Token count is too large: pandas-dev__pandas-25612
Token count is too large: googleapis__google-cloud-python-287
Token count is too large: googleapis__google-cloud-python-5611
Token count is too large: huggingface__transformers-15843
Token count is too large: numpy__numpy-18070
Token count is too large: pandas-dev__pandas-5998
Token count is too large: ipython__ipython-1538
Token count is too large: numpy__numpy-8988
Token count is too large: scipy__scipy-3928


Generating train split: 3445 examples [04:15, 14.01 examples/s]

Token count is too large: pandas-dev__pandas-23208
Token count is too large: pandas-dev__pandas-27173
Token count is too large: huggingface__transformers-564
Token count is too large: ipython__ipython-5221
Token count is too large: ytdl-org__youtube-dl-2812
Token count is too large: celery__celery-3997
Token count is too large: numpy__numpy-11720
Token count is too large: pandas-dev__pandas-16879
Token count is too large: numpy__numpy-8555
Token count is too large: pandas-dev__pandas-4456


Generating train split: 3449 examples [04:15, 13.42 examples/s]

Token count is too large: pandas-dev__pandas-27580
Token count is too large: conda__conda-2705
Token count is too large: apache__airflow-30432
Token count is too large: conan-io__conan-2765
Token count is too large: conda__conda-4637
Token count is too large: numpy__numpy-3638
Token count is too large: conan-io__conan-11655
Token count is too large: pandas-dev__pandas-25914
Token count is too large: pyca__cryptography-4819
Token count is too large: numpy__numpy-8008
Token count is too large: numpy__numpy-14346


Generating train split: 3453 examples [04:15, 16.03 examples/s]

Token count is too large: ytdl-org__youtube-dl-12906
Token count is too large: conda__conda-8907
Token count is too large: huggingface__transformers-17951
Token count is too large: pandas-dev__pandas-21281
Token count is too large: twisted__twisted-11697
Token count is too large: ray-project__ray-9663
Token count is too large: Qiskit__qiskit-2299
Token count is too large: huggingface__transformers-6903


Generating train split: 3459 examples [04:16, 18.50 examples/s]

Token count is too large: conan-io__conan-5417
Token count is too large: mesonbuild__meson-1058
Token count is too large: pandas-dev__pandas-6078
Token count is too large: pypa__pip-6655
Token count is too large: pantsbuild__pants-4381
Token count is too large: mesonbuild__meson-8812
Token count is too large: docker__compose-5132
Token count is too large: mesonbuild__meson-4820
Token count is too large: open-mmlab__mmdetection-3497


Generating train split: 3466 examples [04:16, 25.86 examples/s]

Token count is too large: pandas-dev__pandas-35272
Token count is too large: pandas-dev__pandas-22883
Token count is too large: jupyterlab__jupyterlab-8921
Token count is too large: pandas-dev__pandas-19481
Token count is too large: googleapis__google-cloud-python-2682
Token count is too large: google__jax-2804
Token count is too large: google__jax-1378
Token count is too large: mesonbuild__meson-1444
Token count is too large: Qiskit__qiskit-3391
Token count is too large: mesonbuild__meson-10540
Token count is too large: mesonbuild__meson-3639


Generating train split: 3472 examples [04:16, 30.81 examples/s]

Token count is too large: pandas-dev__pandas-22196
Token count is too large: Qiskit__qiskit-9543
Token count is too large: numpy__numpy-6643
Token count is too large: PrefectHQ__prefect-1979
Token count is too large: conan-io__conan-132
Token count is too large: pandas-dev__pandas-27128
Token count is too large: Lightning-AI__lightning-1729
Token count is too large: pandas-dev__pandas-37661
Token count is too large: conda__conda-2547
Token count is too large: ray-project__ray-4735
Token count is too large: ipython__ipython-11806
Token count is too large: google__jax-82
Token count is too large: huggingface__transformers-7248


Generating train split: 3476 examples [04:16, 25.50 examples/s]

Token count is too large: numpy__numpy-6588
Token count is too large: Qiskit__qiskit-2414
Token count is too large: ipython__ipython-1652
Token count is too large: explosion__spaCy-3080
Token count is too large: Lightning-AI__lightning-1016
Token count is too large: pypa__pip-3539
Token count is too large: pandas-dev__pandas-20543
Token count is too large: ipython__ipython-9617
Token count is too large: ytdl-org__youtube-dl-13605
Token count is too large: pandas-dev__pandas-36690
Token count is too large: ytdl-org__youtube-dl-10382
Token count is too large: pandas-dev__pandas-5606
Token count is too large: jupyterlab__jupyterlab-2718


Generating train split: 3480 examples [04:16, 21.90 examples/s]

Token count is too large: numpy__numpy-18118
Token count is too large: numpy__numpy-22391
Token count is too large: pantsbuild__pants-13653
Token count is too large: apache__airflow-16601
Token count is too large: Lightning-AI__lightning-3344
Token count is too large: pandas-dev__pandas-6845
Token count is too large: ytdl-org__youtube-dl-16707
Token count is too large: mesonbuild__meson-3777
Token count is too large: numpy__numpy-7879
Token count is too large: pandas-dev__pandas-17831


Generating train split: 3484 examples [04:17, 22.57 examples/s]

Token count is too large: mesonbuild__meson-5103
Token count is too large: open-mmlab__mmdetection-6632
Token count is too large: numpy__numpy-8497
Token count is too large: DataDog__integrations-core-10093
Token count is too large: apache__airflow-26554
Token count is too large: open-mmlab__mmdetection-9151


Generating train split: 3488 examples [04:17, 16.38 examples/s]

Token count is too large: googleapis__google-cloud-python-2223
Token count is too large: pandas-dev__pandas-39751
Token count is too large: numpy__numpy-6347
Token count is too large: pandas-dev__pandas-18258
Token count is too large: pandas-dev__pandas-38120
Token count is too large: pandas-dev__pandas-21036
Token count is too large: pypa__pip-2169
Token count is too large: pandas-dev__pandas-34435
Token count is too large: open-mmlab__mmdetection-7041


Generating train split: 3490 examples [04:17, 13.83 examples/s]

Token count is too large: open-mmlab__mmdetection-4378
Token count is too large: mesonbuild__meson-2260
Token count is too large: pandas-dev__pandas-19427
Token count is too large: conan-io__conan-3718
Token count is too large: pandas-dev__pandas-11248
Token count is too large: pandas-dev__pandas-22108
Token count is too large: docker__compose-2620
Token count is too large: pandas-dev__pandas-23031
Token count is too large: conda__conda-5201
Token count is too large: googleapis__google-cloud-python-3472
Token count is too large: pandas-dev__pandas-8410
Token count is too large: pandas-dev__pandas-33553
Token count is too large: pantsbuild__pants-15292
Token count is too large: wagtail__wagtail-9946
Token count is too large: googleapis__google-cloud-python-6706
Token count is too large: pandas-dev__pandas-28748
Token count is too large: huggingface__transformers-12449
Token count is too large: huggingface__transformers-10688
Token count is too large: huggingface__transformers-24618


Generating train split: 3492 examples [04:18,  8.79 examples/s]

Token count is too large: pandas-dev__pandas-37310
Token count is too large: googleapis__google-cloud-python-9231
Token count is too large: numpy__numpy-17141
Token count is too large: Qiskit__qiskit-7477
Token count is too large: pandas-dev__pandas-37761
Token count is too large: DataDog__integrations-core-698
Token count is too large: Qiskit__qiskit-6700
Token count is too large: pandas-dev__pandas-18882
Token count is too large: huggingface__transformers-19073
Token count is too large: googleapis__google-cloud-python-7483
Token count is too large: apache__airflow-12949
Token count is too large: apache__airflow-11241
Token count is too large: huggingface__transformers-3354
Token count is too large: mesonbuild__meson-1006


Generating train split: 3496 examples [04:18, 11.94 examples/s]

Token count is too large: pandas-dev__pandas-26063
Token count is too large: pyca__cryptography-4504
Token count is too large: pantsbuild__pants-13178
Token count is too large: pandas-dev__pandas-17736
Token count is too large: huggingface__transformers-7034
Token count is too large: pandas-dev__pandas-23593
Token count is too large: pandas-dev__pandas-28739
Token count is too large: jupyterlab__jupyterlab-3781
Token count is too large: googleapis__google-cloud-python-11203
Token count is too large: conda__conda-11255
Token count is too large: pandas-dev__pandas-7099
Token count is too large: huggingface__transformers-16812
Token count is too large: pantsbuild__pants-5033
Token count is too large: pantsbuild__pants-5168


Generating train split: 3500 examples [04:18, 11.69 examples/s]

Token count is too large: ytdl-org__youtube-dl-1633
Token count is too large: numpy__numpy-22002
Token count is too large: huggingface__transformers-18119
Token count is too large: mesonbuild__meson-8568
Token count is too large: pantsbuild__pants-12399
Token count is too large: pandas-dev__pandas-32520


Generating train split: 3502 examples [04:19,  8.79 examples/s]

Token count is too large: huggingface__transformers-21614
Token count is too large: ipython__ipython-9551
Token count is too large: pandas-dev__pandas-5084
Token count is too large: conda__conda-5252
Token count is too large: pandas-dev__pandas-36297
Token count is too large: googleapis__google-cloud-python-11371
Token count is too large: pandas-dev__pandas-37129
Token count is too large: PrefectHQ__prefect-2825
Token count is too large: Lightning-AI__lightning-2200
Token count is too large: docker__compose-5699
Token count is too large: google__jax-1025
Token count is too large: numpy__numpy-9976


Generating train split: 3507 examples [04:19, 12.18 examples/s]

Token count is too large: mesonbuild__meson-2266
Token count is too large: googleapis__google-cloud-python-8734
Token count is too large: google__jax-1395
Token count is too large: huggingface__transformers-22293
Token count is too large: googleapis__google-cloud-python-5217
Token count is too large: open-mmlab__mmdetection-4282
Token count is too large: pandas-dev__pandas-6953
Token count is too large: mesonbuild__meson-3399


Generating train split: 3510 examples [04:19, 13.23 examples/s]

Token count is too large: pandas-dev__pandas-23406
Token count is too large: huggingface__transformers-19376
Token count is too large: PrefectHQ__prefect-1393
Token count is too large: pandas-dev__pandas-11714
Token count is too large: pandas-dev__pandas-5050
Token count is too large: googleapis__google-cloud-python-9245
Token count is too large: googleapis__google-cloud-python-8101


Generating train split: 3513 examples [04:19, 12.88 examples/s]

Token count is too large: Qiskit__qiskit-2166
Token count is too large: pandas-dev__pandas-17633
Token count is too large: mesonbuild__meson-11183
Token count is too large: pandas-dev__pandas-21195
Token count is too large: conan-io__conan-4446
Token count is too large: conan-io__conan-6042
Token count is too large: docker__compose-5250
Token count is too large: googleapis__google-cloud-python-1844
Token count is too large: pandas-dev__pandas-7272
Token count is too large: apache__airflow-18244
Token count is too large: pypa__pip-3626


Generating train split: 3516 examples [04:20, 14.90 examples/s]

Token count is too large: pantsbuild__pants-6618
Token count is too large: conda__conda-5200
Token count is too large: Qiskit__qiskit-6541
Token count is too large: pantsbuild__pants-13714
Token count is too large: mesonbuild__meson-8071
Token count is too large: pandas-dev__pandas-18293
Token count is too large: ipython__ipython-6902
Token count is too large: open-mmlab__mmdetection-6079
Token count is too large: googleapis__google-cloud-python-10106
Token count is too large: google__jax-1573


Generating train split: 3523 examples [04:20, 16.36 examples/s]

Token count is too large: pandas-dev__pandas-4756
Token count is too large: pandas-dev__pandas-26431
Token count is too large: pandas-dev__pandas-26170
Token count is too large: pantsbuild__pants-15686
Token count is too large: docker__compose-5223
Token count is too large: ytdl-org__youtube-dl-2700
Token count is too large: ray-project__ray-9141
Token count is too large: huggingface__transformers-23891
Token count is too large: PrefectHQ__prefect-865
Token count is too large: ytdl-org__youtube-dl-6731
Token count is too large: ipython__ipython-1020
Token count is too large: pandas-dev__pandas-27934
Token count is too large: googleapis__google-cloud-python-5699
Token count is too large: pantsbuild__pants-12367
Token count is too large: Qiskit__qiskit-6008
Token count is too large: ipython__ipython-12926
Token count is too large: Lightning-AI__lightning-2828
Token count is too large: numpy__numpy-15229


Generating train split: 3529 examples [04:20, 16.99 examples/s]

Token count is too large: pypa__pip-9813
Token count is too large: numpy__numpy-3855
Token count is too large: pandas-dev__pandas-14211
Token count is too large: pandas-dev__pandas-30519
Token count is too large: pandas-dev__pandas-22543
Token count is too large: mesonbuild__meson-7306
Token count is too large: pandas-dev__pandas-22971
Token count is too large: numpy__numpy-8384
Token count is too large: pandas-dev__pandas-16247
Token count is too large: pandas-dev__pandas-3914
Token count is too large: googleapis__google-cloud-python-9064
Token count is too large: numpy__numpy-19642
Token count is too large: Qiskit__qiskit-2909
Token count is too large: numpy__numpy-14623
Token count is too large: conan-io__conan-13729
Token count is too large: conan-io__conan-11348
Token count is too large: pandas-dev__pandas-39497
Token count is too large: numpy__numpy-19110
Token count is too large: pantsbuild__pants-15113
Token count is too large: pandas-dev__pandas-36790
Token count is too large:

Generating train split: 3532 examples [04:21, 13.24 examples/s]

Token count is too large: pandas-dev__pandas-7619
Token count is too large: pypa__pip-8839
Token count is too large: celery__celery-7373
Token count is too large: apache__airflow-15731
Token count is too large: dagster-io__dagster-8874
Token count is too large: Qiskit__qiskit-7319
Token count is too large: pandas-dev__pandas-19780
Token count is too large: pantsbuild__pants-14961
Token count is too large: pandas-dev__pandas-19058
Token count is too large: pandas-dev__pandas-4926


Generating train split: 3535 examples [04:21, 12.46 examples/s]

Token count is too large: pantsbuild__pants-18225
Token count is too large: celery__celery-6741
Token count is too large: pandas-dev__pandas-34933
Token count is too large: Qiskit__qiskit-4579
Token count is too large: pypa__pip-7534
Token count is too large: mesonbuild__meson-2017
Token count is too large: ray-project__ray-10531
Token count is too large: pandas-dev__pandas-16442


Generating train split: 3542 examples [04:21, 18.37 examples/s]

Token count is too large: pandas-dev__pandas-5432
Token count is too large: googleapis__google-cloud-python-10081
Token count is too large: mesonbuild__meson-7955
Token count is too large: conda__conda-6671
Token count is too large: ipython__ipython-12244
Token count is too large: pandas-dev__pandas-3645
Token count is too large: pandas-dev__pandas-27103
Token count is too large: Qiskit__qiskit-3285
Token count is too large: Qiskit__qiskit-8047
Token count is too large: pantsbuild__pants-14352


Generating train split: 3545 examples [04:21, 15.81 examples/s]

Token count is too large: huggingface__transformers-15831
Token count is too large: jupyterlab__jupyterlab-7125
Token count is too large: pandas-dev__pandas-7525
Token count is too large: numpy__numpy-17456
Token count is too large: Qiskit__qiskit-308
Token count is too large: googleapis__google-cloud-python-4127
Token count is too large: pandas-dev__pandas-25584
Token count is too large: pandas-dev__pandas-18401
Token count is too large: conda__conda-6778
Token count is too large: pandas-dev__pandas-38657
Token count is too large: mesonbuild__meson-1011
Token count is too large: ipython__ipython-1784


Generating train split: 3549 examples [04:22, 18.49 examples/s]

Token count is too large: mesonbuild__meson-9392
Token count is too large: conda__conda-5103
Token count is too large: docker__compose-4817
Token count is too large: Lightning-AI__lightning-832
Token count is too large: pandas-dev__pandas-34421
Token count is too large: pandas-dev__pandas-30467
Token count is too large: pandas-dev__pandas-24155
Token count is too large: pandas-dev__pandas-9899
Token count is too large: ipython__ipython-5067
Token count is too large: PrefectHQ__prefect-225
Token count is too large: pandas-dev__pandas-9102
Token count is too large: pandas-dev__pandas-18591
Token count is too large: mesonbuild__meson-4724


Generating train split: 3558 examples [04:22, 20.84 examples/s]

Token count is too large: huggingface__transformers-24137
Token count is too large: conan-io__conan-5423
Token count is too large: pantsbuild__pants-15505
Token count is too large: huggingface__transformers-4884
Token count is too large: pandas-dev__pandas-6177
Token count is too large: mesonbuild__meson-5173
Token count is too large: mesonbuild__meson-3436
Token count is too large: pypa__pip-6089
Token count is too large: mesonbuild__meson-7484
Token count is too large: celery__celery-5399
Token count is too large: mesonbuild__meson-3770
Token count is too large: pandas-dev__pandas-20860
Token count is too large: conan-io__conan-6139
Token count is too large: Lightning-AI__lightning-2221
Token count is too large: huggingface__transformers-19722
Token count is too large: pypa__pip-11949
Token count is too large: pypa__pip-7931
Token count is too large: pandas-dev__pandas-22304
Token count is too large: mesonbuild__meson-5143
Token count is too large: mesonbuild__meson-6341
Token count 

Generating train split: 3563 examples [04:22, 14.17 examples/s]

Token count is too large: huggingface__transformers-24635
Token count is too large: pandas-dev__pandas-33062
Token count is too large: ipython__ipython-3606
Token count is too large: pandas-dev__pandas-37644
Token count is too large: wagtail__wagtail-598
Token count is too large: google__jax-2136
Token count is too large: numpy__numpy-11777
Token count is too large: pandas-dev__pandas-17310
Token count is too large: docker__compose-7411
Token count is too large: conda__conda-7191
Token count is too large: ytdl-org__youtube-dl-10996
Token count is too large: celery__celery-5168
Token count is too large: open-mmlab__mmdetection-4939
Token count is too large: pandas-dev__pandas-16951
Token count is too large: google__jax-2753
Token count is too large: celery__celery-1970
Token count is too large: pandas-dev__pandas-19281
Token count is too large: pantsbuild__pants-16911
Token count is too large: PrefectHQ__prefect-1703
Token count is too large: Qiskit__qiskit-1200
Token count is too large

Generating train split: 3566 examples [04:23, 11.69 examples/s]

Token count is too large: Qiskit__qiskit-1153
Token count is too large: huggingface__transformers-7641
Token count is too large: Qiskit__qiskit-1522
Token count is too large: pypa__pip-3008
Token count is too large: pantsbuild__pants-12337
Token count is too large: huggingface__transformers-19750
Token count is too large: conda__conda-12612
Token count is too large: Qiskit__qiskit-3235
Token count is too large: mesonbuild__meson-225
Token count is too large: pyca__cryptography-2978
Token count is too large: Lightning-AI__lightning-55
Token count is too large: ray-project__ray-2183


Generating train split: 3570 examples [04:23, 13.68 examples/s]

Token count is too large: docker__compose-6726
Token count is too large: googleapis__google-cloud-python-7046
Token count is too large: mesonbuild__meson-190
Token count is too large: Lightning-AI__lightning-752
Token count is too large: pandas-dev__pandas-15929
Token count is too large: googleapis__google-cloud-python-2803
Token count is too large: pandas-dev__pandas-3831
Token count is too large: numpy__numpy-17955
Token count is too large: apache__airflow-13308
Token count is too large: pandas-dev__pandas-25503
Token count is too large: conan-io__conan-5044
Token count is too large: huggingface__transformers-23126
Token count is too large: pandas-dev__pandas-27653


Generating train split: 3572 examples [04:24,  9.54 examples/s]

Token count is too large: huggingface__transformers-21624
Token count is too large: numpy__numpy-8016
Token count is too large: Lightning-AI__lightning-405
Token count is too large: mesonbuild__meson-10092
Token count is too large: apache__airflow-16414
Token count is too large: conda__conda-6609
Token count is too large: pandas-dev__pandas-37823
Token count is too large: pandas-dev__pandas-25402
Token count is too large: googleapis__google-cloud-python-3245
Token count is too large: dagster-io__dagster-9666
Token count is too large: pypa__pip-9945
Token count is too large: pandas-dev__pandas-7565
Token count is too large: pandas-dev__pandas-22626
Token count is too large: Qiskit__qiskit-1857
Token count is too large: pandas-dev__pandas-39475


Generating train split: 3581 examples [04:24, 17.58 examples/s]

Token count is too large: pandas-dev__pandas-26736
Token count is too large: conda__conda-7067
Token count is too large: ytdl-org__youtube-dl-30675
Token count is too large: huggingface__transformers-7186
Token count is too large: Qiskit__qiskit-7656
Token count is too large: jupyterlab__jupyterlab-9051
Token count is too large: conan-io__conan-12307
Token count is too large: Qiskit__qiskit-1651
Token count is too large: huggingface__transformers-19254
Token count is too large: pandas-dev__pandas-5270
Token count is too large: docker__compose-5679
Token count is too large: google__jax-574
Token count is too large: Qiskit__qiskit-9427
Token count is too large: pandas-dev__pandas-14230
Token count is too large: ray-project__ray-8802
Token count is too large: conda__conda-5422
Token count is too large: pandas-dev__pandas-34396
Token count is too large: pandas-dev__pandas-25712
Token count is too large: huggingface__transformers-23976
Token count is too large: pantsbuild__pants-13403
Token

Generating train split: 3586 examples [04:24, 15.79 examples/s]

Token count is too large: numpy__numpy-21851
Token count is too large: mesonbuild__meson-5277
Token count is too large: pantsbuild__pants-10409
Token count is too large: pandas-dev__pandas-27102
Token count is too large: celery__celery-6142
Token count is too large: numpy__numpy-12064
Token count is too large: open-mmlab__mmdetection-6512
Token count is too large: pandas-dev__pandas-31493
Token count is too large: dagster-io__dagster-10446
Token count is too large: pandas-dev__pandas-3017
Token count is too large: googleapis__google-cloud-python-11347
Token count is too large: pandas-dev__pandas-36290
Token count is too large: conan-io__conan-4917


Generating train split: 3589 examples [04:25, 11.39 examples/s]

Token count is too large: numpy__numpy-17546
Token count is too large: ray-project__ray-506
Token count is too large: pandas-dev__pandas-7924
Token count is too large: pandas-dev__pandas-16996
Token count is too large: pypa__pip-11052
Token count is too large: pyca__cryptography-3329
Token count is too large: apache__airflow-18953
Token count is too large: pandas-dev__pandas-23384
Token count is too large: conan-io__conan-6214
Token count is too large: pandas-dev__pandas-39023


Generating train split: 3596 examples [04:25, 15.35 examples/s]

Token count is too large: pandas-dev__pandas-26242
Token count is too large: pandas-dev__pandas-10527
Token count is too large: Qiskit__qiskit-6588
Token count is too large: pandas-dev__pandas-10386
Token count is too large: huggingface__transformers-12938
Token count is too large: pandas-dev__pandas-7424
Token count is too large: docker__compose-2132
Token count is too large: numpy__numpy-12447
Token count is too large: pandas-dev__pandas-5321
Token count is too large: ipython__ipython-11354
Token count is too large: pyca__cryptography-876
Token count is too large: ipython__ipython-5784
Token count is too large: huggingface__transformers-8878
Token count is too large: Qiskit__qiskit-8702
Token count is too large: docker__compose-5910
Token count is too large: huggingface__transformers-21847
Token count is too large: Qiskit__qiskit-1629
Token count is too large: Qiskit__qiskit-3873
Token count is too large: ytdl-org__youtube-dl-29698
Token count is too large: docker__compose-6359
Token

Generating train split: 3599 examples [04:25, 12.39 examples/s]

Token count is too large: pandas-dev__pandas-8695
Token count is too large: mesonbuild__meson-919
Token count is too large: pandas-dev__pandas-33846
Token count is too large: apache__airflow-9170
Token count is too large: googleapis__google-cloud-python-7060
Token count is too large: pandas-dev__pandas-18462
Token count is too large: pandas-dev__pandas-22886
Token count is too large: ytdl-org__youtube-dl-6768
Token count is too large: pandas-dev__pandas-7457
Token count is too large: docker__compose-5237
Token count is too large: mesonbuild__meson-7392
Token count is too large: huggingface__transformers-8848
Token count is too large: wagtail__wagtail-3983
Token count is too large: Qiskit__qiskit-2157
Token count is too large: conda__conda-7216
Token count is too large: mesonbuild__meson-9994
Token count is too large: mesonbuild__meson-10289
Token count is too large: pandas-dev__pandas-31046
Token count is too large: mesonbuild__meson-9607


Generating train split: 3602 examples [04:25, 13.96 examples/s]

Token count is too large: pandas-dev__pandas-18576
Token count is too large: pandas-dev__pandas-31946
Token count is too large: apache__airflow-13318
Token count is too large: Lightning-AI__lightning-1505
Token count is too large: pantsbuild__pants-8484
Token count is too large: pandas-dev__pandas-30394
Token count is too large: pandas-dev__pandas-28165
Token count is too large: mesonbuild__meson-1002
Token count is too large: pandas-dev__pandas-26362
Token count is too large: pandas-dev__pandas-2950
Token count is too large: huggingface__transformers-24259
Token count is too large: ray-project__ray-8211
Token count is too large: Qiskit__qiskit-7423
Token count is too large: huggingface__transformers-11220
Token count is too large: ytdl-org__youtube-dl-29682


Generating train split: 3604 examples [04:26,  9.70 examples/s]

Token count is too large: pyca__cryptography-5498
Token count is too large: conda__conda-6364
Token count is too large: pandas-dev__pandas-19937
Token count is too large: ytdl-org__youtube-dl-27937
Token count is too large: pantsbuild__pants-15366
Token count is too large: mesonbuild__meson-1620
Token count is too large: pandas-dev__pandas-26255
Token count is too large: PrefectHQ__prefect-2868
Token count is too large: pandas-dev__pandas-18252
Token count is too large: pyca__cryptography-3382
Token count is too large: googleapis__google-cloud-python-3027
Token count is too large: apache__airflow-9320


Generating train split: 3606 examples [04:26,  7.12 examples/s]

Token count is too large: huggingface__transformers-24016
Token count is too large: pandas-dev__pandas-4657
Token count is too large: docker__compose-8005
Token count is too large: ray-project__ray-8514
Token count is too large: pandas-dev__pandas-4228
Token count is too large: huggingface__transformers-4533
Token count is too large: Lightning-AI__lightning-1863
Token count is too large: numpy__numpy-10478
Token count is too large: pyca__cryptography-4843
Token count is too large: Lightning-AI__lightning-3194
Token count is too large: googleapis__google-cloud-python-6697


Generating train split: 3613 examples [04:27, 11.59 examples/s]

Token count is too large: mesonbuild__meson-2511
Token count is too large: conan-io__conan-3245
Token count is too large: pandas-dev__pandas-7214
Token count is too large: huggingface__transformers-8391
Token count is too large: pantsbuild__pants-18327
Token count is too large: ipython__ipython-9609
Token count is too large: pandas-dev__pandas-10983
Token count is too large: pandas-dev__pandas-27618
Token count is too large: pypa__pip-8061
Token count is too large: celery__celery-6419
Token count is too large: huggingface__transformers-12226
Token count is too large: jupyterlab__jupyterlab-1222
Token count is too large: pypa__pip-11853
Token count is too large: numpy__numpy-9941
Token count is too large: PrefectHQ__prefect-929
Token count is too large: numpy__numpy-13340
Token count is too large: ipython__ipython-2233
Token count is too large: pandas-dev__pandas-16213
Token count is too large: ytdl-org__youtube-dl-1790
Token count is too large: googleapis__google-cloud-python-8193
Toke

Generating train split: 3618 examples [04:27, 13.61 examples/s]

Token count is too large: pandas-dev__pandas-22087
Token count is too large: ipython__ipython-7708
Token count is too large: pandas-dev__pandas-23655
Token count is too large: pandas-dev__pandas-19894
Token count is too large: pandas-dev__pandas-15623
Token count is too large: mesonbuild__meson-910
Token count is too large: Qiskit__qiskit-890
Token count is too large: dagster-io__dagster-10589
Token count is too large: googleapis__google-cloud-python-2478
Token count is too large: ipython__ipython-2223
Token count is too large: ytdl-org__youtube-dl-4442


Generating train split: 3621 examples [04:27, 13.86 examples/s]

Token count is too large: mesonbuild__meson-11366
Token count is too large: numpy__numpy-23971
Token count is too large: pandas-dev__pandas-5224
Token count is too large: pandas-dev__pandas-15066
Token count is too large: dagster-io__dagster-10706
Token count is too large: conda__conda-7780


Generating train split: 3625 examples [04:27, 15.18 examples/s]

Token count is too large: google__jax-2054
Token count is too large: twisted__twisted-11703
Token count is too large: Lightning-AI__lightning-3151
Token count is too large: pandas-dev__pandas-14060
Token count is too large: pandas-dev__pandas-9427
Token count is too large: ytdl-org__youtube-dl-15137
Token count is too large: open-mmlab__mmdetection-9570
Token count is too large: pandas-dev__pandas-38157
Token count is too large: PrefectHQ__prefect-471
Token count is too large: docker__compose-5940
Token count is too large: pandas-dev__pandas-23153
Token count is too large: googleapis__google-cloud-python-3311


Generating train split: 3627 examples [04:28, 13.14 examples/s]

Token count is too large: mesonbuild__meson-6410
Token count is too large: pantsbuild__pants-19136
Token count is too large: wagtail__wagtail-1529
Token count is too large: conda__conda-7396
Token count is too large: ipython__ipython-13021
Token count is too large: pantsbuild__pants-12566
Token count is too large: pandas-dev__pandas-26750
Token count is too large: apache__airflow-23463
Token count is too large: pandas-dev__pandas-30498
Token count is too large: pandas-dev__pandas-3127
Token count is too large: Qiskit__qiskit-9101


Generating train split: 3633 examples [04:28, 14.57 examples/s]

Token count is too large: pandas-dev__pandas-20028
Token count is too large: conan-io__conan-3809
Token count is too large: googleapis__google-cloud-python-6576
Token count is too large: pandas-dev__pandas-20885
Token count is too large: pandas-dev__pandas-4437
Token count is too large: googleapis__google-cloud-python-1486
Token count is too large: pandas-dev__pandas-32539
Token count is too large: ray-project__ray-890
Token count is too large: docker__compose-2326
Token count is too large: conan-io__conan-5419


Generating train split: 3636 examples [04:28, 15.19 examples/s]

Token count is too large: pandas-dev__pandas-39272
Token count is too large: pandas-dev__pandas-35231
Token count is too large: pandas-dev__pandas-20611
Token count is too large: conda__conda-5265
Token count is too large: googleapis__google-cloud-python-6388
Token count is too large: docker__compose-4267
Token count is too large: pandas-dev__pandas-17430
Token count is too large: numpy__numpy-24142
Token count is too large: pypa__pip-9575
Token count is too large: Lightning-AI__lightning-311
Token count is too large: pandas-dev__pandas-7832
Token count is too large: pandas-dev__pandas-28362
Token count is too large: googleapis__google-cloud-python-3160
Token count is too large: pandas-dev__pandas-19833
Token count is too large: pypa__pip-2833
Token count is too large: pandas-dev__pandas-27790
Token count is too large: jupyterlab__jupyterlab-4920
Token count is too large: pandas-dev__pandas-39362


Generating train split: 3646 examples [04:29, 17.59 examples/s]

Token count is too large: pandas-dev__pandas-13979
Token count is too large: huggingface__transformers-6596
Token count is too large: google__jax-2112
Token count is too large: huggingface__transformers-8738
Token count is too large: googleapis__google-cloud-python-2379
Token count is too large: numpy__numpy-21251
Token count is too large: ray-project__ray-3161
Token count is too large: mesonbuild__meson-3041
Token count is too large: googleapis__google-cloud-python-8179
Token count is too large: numpy__numpy-13433
Token count is too large: googleapis__google-cloud-python-2303
Token count is too large: mesonbuild__meson-1076
Token count is too large: pandas-dev__pandas-27664
Token count is too large: pandas-dev__pandas-8090
Token count is too large: wagtail__wagtail-208
Token count is too large: pandas-dev__pandas-28029


Generating train split: 3649 examples [04:29, 16.49 examples/s]

Token count is too large: pandas-dev__pandas-5705
Token count is too large: pandas-dev__pandas-17474
Token count is too large: open-mmlab__mmdetection-7407
Token count is too large: pandas-dev__pandas-22679
Token count is too large: mesonbuild__meson-6878
Token count is too large: wagtail__wagtail-1087
Token count is too large: googleapis__google-cloud-python-8546
Token count is too large: huggingface__transformers-16119
Token count is too large: googleapis__google-cloud-python-326
Token count is too large: pandas-dev__pandas-27129
Token count is too large: pandas-dev__pandas-8767
Token count is too large: pypa__pip-6522
Token count is too large: pandas-dev__pandas-11603
Token count is too large: docker__compose-3588
Token count is too large: pypa__pip-1032
Token count is too large: pypa__pip-12197
Token count is too large: numpy__numpy-11082
Token count is too large: Qiskit__qiskit-1172
Token count is too large: pandas-dev__pandas-11252
Token count is too large: pandas-dev__pandas-797

Generating train split: 3652 examples [04:29, 11.57 examples/s]

Token count is too large: Qiskit__qiskit-1386
Token count is too large: pandas-dev__pandas-8837
Token count is too large: huggingface__transformers-8585
Token count is too large: pandas-dev__pandas-16325
Token count is too large: googleapis__google-cloud-python-2254
Token count is too large: docker__compose-6608
Token count is too large: conan-io__conan-5221


Generating train split: 3656 examples [04:30, 11.85 examples/s]

Token count is too large: pandas-dev__pandas-14433
Token count is too large: pandas-dev__pandas-5844
Token count is too large: docker__compose-2997
Token count is too large: pandas-dev__pandas-18231
Token count is too large: huggingface__transformers-10526
Token count is too large: Qiskit__qiskit-5855
Token count is too large: ray-project__ray-9966


Generating train split: 3663 examples [04:30, 13.90 examples/s]

Token count is too large: mesonbuild__meson-5681
Token count is too large: Qiskit__qiskit-7459
Token count is too large: pandas-dev__pandas-21794
Token count is too large: Qiskit__qiskit-10332
Token count is too large: huggingface__transformers-8095
Token count is too large: mesonbuild__meson-11091
Token count is too large: pandas-dev__pandas-33406
Token count is too large: googleapis__google-cloud-python-20
Token count is too large: huggingface__transformers-6908
Token count is too large: mesonbuild__meson-6689
Token count is too large: pandas-dev__pandas-39639


Generating train split: 3667 examples [04:30, 17.60 examples/s]

Token count is too large: pandas-dev__pandas-5472
Token count is too large: google__jax-3350
Token count is too large: numpy__numpy-8216
Token count is too large: googleapis__google-cloud-python-9982
Token count is too large: scipy__scipy-5623
Token count is too large: ytdl-org__youtube-dl-22091
Token count is too large: pandas-dev__pandas-8551
Token count is too large: docker__compose-4553
Token count is too large: pypa__pip-7072
Token count is too large: pandas-dev__pandas-18637
Token count is too large: pyca__cryptography-7853
Token count is too large: docker__compose-7417
Token count is too large: pantsbuild__pants-5201
Token count is too large: pandas-dev__pandas-31458
Token count is too large: pandas-dev__pandas-17984


Generating train split: 3671 examples [04:31, 13.75 examples/s]

Token count is too large: celery__celery-6750
Token count is too large: pypa__pip-2049
Token count is too large: Lightning-AI__lightning-1408
Token count is too large: conan-io__conan-2908
Token count is too large: numpy__numpy-5116
Token count is too large: googleapis__google-cloud-python-9365
Token count is too large: huggingface__transformers-17902
Token count is too large: Lightning-AI__lightning-337
Token count is too large: pandas-dev__pandas-18374
Token count is too large: pandas-dev__pandas-17739


Generating train split: 3673 examples [04:31, 12.54 examples/s]

Token count is too large: pandas-dev__pandas-20901
Token count is too large: conan-io__conan-9431
Token count is too large: conan-io__conan-6010
Token count is too large: huggingface__transformers-12749
Token count is too large: huggingface__transformers-2310
Token count is too large: conan-io__conan-2529
Token count is too large: ytdl-org__youtube-dl-13962
Token count is too large: apache__airflow-20870
Token count is too large: pandas-dev__pandas-4659
Token count is too large: ipython__ipython-1875
Token count is too large: docker__compose-1933
Token count is too large: ipython__ipython-12886
Token count is too large: mesonbuild__meson-6641
Token count is too large: googleapis__google-cloud-python-932
Token count is too large: pandas-dev__pandas-21427


Generating train split: 3675 examples [04:31, 12.54 examples/s]

Token count is too large: pandas-dev__pandas-29583
Token count is too large: conda__conda-7041
Token count is too large: wagtail__wagtail-277
Token count is too large: pandas-dev__pandas-19549
Token count is too large: pandas-dev__pandas-3137


Generating train split: 3677 examples [04:31,  9.33 examples/s]

Token count is too large: pandas-dev__pandas-31060
Token count is too large: googleapis__google-cloud-python-2025
Token count is too large: pantsbuild__pants-19179
Token count is too large: pandas-dev__pandas-38420
Token count is too large: pandas-dev__pandas-14739
Token count is too large: pandas-dev__pandas-3219
Token count is too large: google__jax-1349


Generating train split: 3680 examples [04:32,  9.88 examples/s]

Token count is too large: pandas-dev__pandas-21566
Token count is too large: numpy__numpy-6537
Token count is too large: docker__compose-6115
Token count is too large: Qiskit__qiskit-8790
Token count is too large: pyca__cryptography-8397
Token count is too large: ipython__ipython-6896
Token count is too large: apache__airflow-24215
Token count is too large: pantsbuild__pants-14549
Token count is too large: apache__airflow-16393
Token count is too large: pandas-dev__pandas-22737
Token count is too large: conan-io__conan-4835
Token count is too large: Qiskit__qiskit-8802
Token count is too large: Qiskit__qiskit-9924
Token count is too large: pandas-dev__pandas-34192
Token count is too large: pandas-dev__pandas-26483
Token count is too large: Qiskit__qiskit-4900
Token count is too large: ipython__ipython-11353
Token count is too large: ipython__ipython-10737
Token count is too large: pandas-dev__pandas-7442
Token count is too large: googleapis__google-cloud-python-8883
Token count is too 

Generating train split: 3683 examples [04:32,  7.49 examples/s]

Token count is too large: pandas-dev__pandas-37511
Token count is too large: pandas-dev__pandas-24021
Token count is too large: ipython__ipython-4372
Token count is too large: Qiskit__qiskit-5506
Token count is too large: pandas-dev__pandas-6362
Token count is too large: pantsbuild__pants-6205
Token count is too large: numpy__numpy-8206
Token count is too large: pandas-dev__pandas-31941
Token count is too large: pandas-dev__pandas-26264
Token count is too large: pandas-dev__pandas-9835


Generating train split: 3684 examples [04:32,  7.41 examples/s]

Token count is too large: google__jax-117
Token count is too large: huggingface__transformers-24266
Token count is too large: huggingface__transformers-9017
Token count is too large: docker__compose-5767
Token count is too large: conda__conda-2736
Token count is too large: apache__airflow-9699
Token count is too large: mesonbuild__meson-9523
Token count is too large: apache__airflow-9497
Token count is too large: dagster-io__dagster-6112
Token count is too large: scipy__scipy-3929


Generating train split: 3686 examples [04:33,  8.32 examples/s]

Token count is too large: conan-io__conan-4464
Token count is too large: googleapis__google-cloud-python-11118
Token count is too large: pandas-dev__pandas-20444
Token count is too large: pandas-dev__pandas-15868
Token count is too large: conan-io__conan-5481
Token count is too large: pandas-dev__pandas-3554


Generating train split: 3690 examples [04:33, 10.39 examples/s]

Token count is too large: google__jax-1913
Token count is too large: pandas-dev__pandas-6591
Token count is too large: apache__airflow-22389
Token count is too large: ytdl-org__youtube-dl-23193
Token count is too large: apache__airflow-28799
Token count is too large: pantsbuild__pants-10035
Token count is too large: numpy__numpy-11813
Token count is too large: pandas-dev__pandas-17291
Token count is too large: pantsbuild__pants-17743
Token count is too large: pandas-dev__pandas-37878
Token count is too large: wagtail__wagtail-10009


Generating train split: 3692 examples [04:33,  9.12 examples/s]

Token count is too large: pandas-dev__pandas-13052
Token count is too large: pandas-dev__pandas-7350
Token count is too large: pandas-dev__pandas-4797
Token count is too large: PrefectHQ__prefect-2705
Token count is too large: pandas-dev__pandas-28172
Token count is too large: wagtail__wagtail-1026
Token count is too large: pandas-dev__pandas-35647
Token count is too large: pyca__cryptography-1159


Generating train split: 3695 examples [04:34,  8.20 examples/s]

Token count is too large: apache__airflow-1271
Token count is too large: googleapis__google-cloud-python-6361
Token count is too large: Lightning-AI__lightning-88
Token count is too large: ytdl-org__youtube-dl-27673
Token count is too large: mesonbuild__meson-5436
Token count is too large: PrefectHQ__prefect-374
Token count is too large: mesonbuild__meson-11323
Token count is too large: numpy__numpy-17039
Token count is too large: conan-io__conan-4941
Token count is too large: celery__celery-7057
Token count is too large: pandas-dev__pandas-8519
Token count is too large: pyca__cryptography-4555
Token count is too large: pypa__pip-9700
Token count is too large: pandas-dev__pandas-30505
Token count is too large: apache__airflow-9779


Generating train split: 3696 examples [04:34,  6.64 examples/s]

Token count is too large: huggingface__transformers-19981
Token count is too large: apache__airflow-14869
Token count is too large: googleapis__google-cloud-python-3069
Token count is too large: googleapis__google-cloud-python-56
Token count is too large: googleapis__google-cloud-python-7710
Token count is too large: Qiskit__qiskit-4034
Token count is too large: mesonbuild__meson-1842
Token count is too large: pypa__pip-3715
Token count is too large: huggingface__transformers-10183


Generating train split: 3700 examples [04:34,  7.58 examples/s]

Token count is too large: huggingface__transformers-20631
Token count is too large: pandas-dev__pandas-7907
Token count is too large: pandas-dev__pandas-38334
Token count is too large: ray-project__ray-5971
Token count is too large: pandas-dev__pandas-21132
Token count is too large: numpy__numpy-23206
Token count is too large: pandas-dev__pandas-29124
Token count is too large: pandas-dev__pandas-37606
Token count is too large: pantsbuild__pants-18636
Token count is too large: pandas-dev__pandas-10096


Generating train split: 3704 examples [04:35,  9.32 examples/s]

Token count is too large: conda__conda-7252
Token count is too large: docker__compose-5769
Token count is too large: huggingface__transformers-20401
Token count is too large: pandas-dev__pandas-18247
Token count is too large: pandas-dev__pandas-7144
Token count is too large: apache__airflow-8571
Token count is too large: jupyterlab__jupyterlab-2628
Token count is too large: apache__airflow-23071


Generating train split: 3708 examples [04:35, 14.21 examples/s]

Token count is too large: huggingface__transformers-18786
Token count is too large: ytdl-org__youtube-dl-3744
Token count is too large: dagster-io__dagster-4528
Token count is too large: googleapis__google-cloud-python-2743
Token count is too large: pantsbuild__pants-6574
Token count is too large: huggingface__transformers-11573
Token count is too large: ray-project__ray-8012
Token count is too large: huggingface__transformers-11569
Token count is too large: pandas-dev__pandas-22125
There was an error processing
Token count is too large: numpy__numpy-5847
Token count is too large: numpy__numpy-19244


Generating train split: 3718 examples [04:35, 22.80 examples/s]

Token count is too large: dagster-io__dagster-11982
Token count is too large: googleapis__google-cloud-python-6630
Token count is too large: Lightning-AI__lightning-256
Token count is too large: ray-project__ray-1545
Token count is too large: pandas-dev__pandas-21904
Token count is too large: pandas-dev__pandas-18952
Token count is too large: huggingface__transformers-18387
Token count is too large: googleapis__google-cloud-python-11292
Token count is too large: pandas-dev__pandas-6657
Token count is too large: docker__compose-6352
Token count is too large: pandas-dev__pandas-35271
Token count is too large: ipython__ipython-1713
Token count is too large: pandas-dev__pandas-14571
Token count is too large: mesonbuild__meson-10976


Generating train split: 3721 examples [04:36, 13.43 examples/s]

Token count is too large: pandas-dev__pandas-6406
Token count is too large: numpy__numpy-6674
Token count is too large: pandas-dev__pandas-15499
Token count is too large: celery__celery-3693
Token count is too large: open-mmlab__mmdetection-4473


Generating train split: 3723 examples [04:36, 11.47 examples/s]

Token count is too large: pandas-dev__pandas-36493
Token count is too large: numpy__numpy-3495
Token count is too large: docker__compose-6606
Token count is too large: mesonbuild__meson-9926
Token count is too large: pandas-dev__pandas-37905
Token count is too large: apache__airflow-33404
Token count is too large: pandas-dev__pandas-25964
Token count is too large: Qiskit__qiskit-7302
Token count is too large: conan-io__conan-3477
Token count is too large: pandas-dev__pandas-14007
Token count is too large: pandas-dev__pandas-14762
Token count is too large: gitpython-developers__GitPython-317
Token count is too large: pandas-dev__pandas-6142
Token count is too large: pandas-dev__pandas-4752
Token count is too large: pandas-dev__pandas-35532
Token count is too large: googleapis__google-cloud-python-11325
Token count is too large: gitpython-developers__GitPython-1190
Token count is too large: numpy__numpy-17522
Token count is too large: huggingface__transformers-22631
Token count is too la

Generating train split: 3730 examples [04:36, 13.35 examples/s]

Token count is too large: ytdl-org__youtube-dl-11787
Token count is too large: pandas-dev__pandas-6603
Token count is too large: pandas-dev__pandas-4709
Token count is too large: pandas-dev__pandas-11318
Token count is too large: numpy__numpy-21991
Token count is too large: pypa__pip-11560
Token count is too large: ray-project__ray-8782
Token count is too large: ytdl-org__youtube-dl-17199
Token count is too large: Qiskit__qiskit-1564
Token count is too large: mesonbuild__meson-1077
Token count is too large: pandas-dev__pandas-14862
Token count is too large: pandas-dev__pandas-6558
Token count is too large: celery__celery-8098
Token count is too large: pandas-dev__pandas-25992
Token count is too large: PrefectHQ__prefect-632
Token count is too large: docker__compose-6800
Token count is too large: conda__conda-5192
Token count is too large: ytdl-org__youtube-dl-401
Token count is too large: huggingface__transformers-21191
Token count is too large: google__jax-759


Generating train split: 3733 examples [04:37, 11.56 examples/s]

Token count is too large: mesonbuild__meson-6859
Token count is too large: pandas-dev__pandas-30246
Token count is too large: Qiskit__qiskit-2493
Token count is too large: pandas-dev__pandas-37198
Token count is too large: Qiskit__qiskit-7860
Token count is too large: huggingface__transformers-20864


Generating train split: 3735 examples [04:37,  9.77 examples/s]

Token count is too large: huggingface__transformers-24719
Token count is too large: ytdl-org__youtube-dl-7045
Token count is too large: PrefectHQ__prefect-924
Token count is too large: Qiskit__qiskit-9020
Token count is too large: pandas-dev__pandas-32214
Token count is too large: pantsbuild__pants-14251
Token count is too large: pandas-dev__pandas-5682
Token count is too large: pandas-dev__pandas-10163
Token count is too large: pypa__pip-11598
Token count is too large: huggingface__transformers-25608
Token count is too large: pantsbuild__pants-17505
Token count is too large: numpy__numpy-10608
Token count is too large: pandas-dev__pandas-36613
Token count is too large: mesonbuild__meson-6106
Token count is too large: pandas-dev__pandas-27119
Token count is too large: conda__conda-872
Token count is too large: pandas-dev__pandas-8714
Token count is too large: googleapis__google-cloud-python-1950


Generating train split: 3739 examples [04:38,  8.69 examples/s]

Token count is too large: huggingface__transformers-21572
Token count is too large: mesonbuild__meson-10630
Token count is too large: celery__celery-3892
Token count is too large: pandas-dev__pandas-19751
Token count is too large: pandas-dev__pandas-6365
Token count is too large: pandas-dev__pandas-33985
Token count is too large: googleapis__google-cloud-python-449
Token count is too large: numpy__numpy-10621
Token count is too large: pandas-dev__pandas-17169
Token count is too large: google__jax-662
Token count is too large: google__jax-2512
Token count is too large: huggingface__transformers-9532
Token count is too large: pandas-dev__pandas-6685
Token count is too large: numpy__numpy-8543
Token count is too large: celery__celery-6757
Token count is too large: Lightning-AI__lightning-308
Token count is too large: pantsbuild__pants-13926


Generating train split: 3744 examples [04:38,  9.83 examples/s]

Token count is too large: pandas-dev__pandas-19880
Token count is too large: numpy__numpy-5468
Token count is too large: jupyterlab__jupyterlab-2409
Token count is too large: pandas-dev__pandas-37170
Token count is too large: apache__airflow-19723
Token count is too large: Lightning-AI__lightning-2671
Token count is too large: huggingface__transformers-22458
Token count is too large: Lightning-AI__lightning-1232


Generating train split: 3747 examples [04:38, 12.67 examples/s]

Token count is too large: pypa__pip-2122
Token count is too large: pandas-dev__pandas-34201
Token count is too large: PrefectHQ__prefect-2185
Token count is too large: mesonbuild__meson-10824
Token count is too large: pandas-dev__pandas-27026
Token count is too large: pantsbuild__pants-19225
Token count is too large: pandas-dev__pandas-3661


Generating train split: 3750 examples [04:38, 14.59 examples/s]

Token count is too large: Qiskit__qiskit-5596
Token count is too large: ipython__ipython-10479
Token count is too large: conan-io__conan-227
Token count is too large: pandas-dev__pandas-10644
Token count is too large: google__jax-1541
Token count is too large: conda__conda-4626
Token count is too large: mesonbuild__meson-1652
Token count is too large: pantsbuild__pants-18166
Token count is too large: pandas-dev__pandas-8577
Token count is too large: conan-io__conan-9050
Token count is too large: pantsbuild__pants-18634


Generating train split: 3752 examples [04:39, 10.74 examples/s]

Token count is too large: huggingface__transformers-14576
Token count is too large: pandas-dev__pandas-20537
Token count is too large: huggingface__transformers-18687
Token count is too large: googleapis__google-cloud-python-1249
Token count is too large: apache__airflow-20542
Token count is too large: pandas-dev__pandas-35503
Token count is too large: mesonbuild__meson-8253
Token count is too large: pandas-dev__pandas-5593


Generating train split: 3759 examples [04:39, 16.38 examples/s]

Token count is too large: pandas-dev__pandas-23495
Token count is too large: celery__celery-4880
Token count is too large: huggingface__transformers-22772
Token count is too large: apache__airflow-15302
Token count is too large: huggingface__transformers-12134
Token count is too large: Qiskit__qiskit-836
Token count is too large: pandas-dev__pandas-38170
Token count is too large: googleapis__google-cloud-python-4321
Token count is too large: Lightning-AI__lightning-2483
Token count is too large: conan-io__conan-3834
Token count is too large: scipy__scipy-2806
Token count is too large: Qiskit__qiskit-7099
Token count is too large: pandas-dev__pandas-8622
Token count is too large: pantsbuild__pants-13479
Token count is too large: conan-io__conan-4923
Token count is too large: Qiskit__qiskit-8617
Token count is too large: numpy__numpy-10411
Token count is too large: pandas-dev__pandas-5209
Token count is too large: pandas-dev__pandas-5415
Token count is too large: pypa__pip-1688
Token cou

Generating train split: 3765 examples [04:39, 15.73 examples/s]

Token count is too large: numpy__numpy-3238
Token count is too large: mesonbuild__meson-6197
Token count is too large: ytdl-org__youtube-dl-20341
Token count is too large: Qiskit__qiskit-6952
Token count is too large: pantsbuild__pants-5400
Token count is too large: huggingface__transformers-10236
Token count is too large: pyca__cryptography-3680
Token count is too large: conda__conda-10721
Token count is too large: google__jax-832
Token count is too large: mesonbuild__meson-5197
Token count is too large: mesonbuild__meson-5255
Token count is too large: conda__conda-2571
Token count is too large: pantsbuild__pants-15265
Token count is too large: pandas-dev__pandas-37256


Generating train split: 3769 examples [04:40, 12.67 examples/s]

Token count is too large: huggingface__transformers-23139
Token count is too large: huggingface__transformers-20712
Token count is too large: pandas-dev__pandas-38089
Token count is too large: numpy__numpy-23604
Token count is too large: pandas-dev__pandas-4148
Token count is too large: pantsbuild__pants-18077


Generating train split: 3771 examples [04:40, 13.39 examples/s]

Token count is too large: huggingface__transformers-16516
Token count is too large: Lightning-AI__lightning-1327
Token count is too large: pandas-dev__pandas-17517
Token count is too large: pandas-dev__pandas-11301
Token count is too large: pandas-dev__pandas-5090
Token count is too large: wagtail__wagtail-7512
Token count is too large: pandas-dev__pandas-4374
Token count is too large: Qiskit__qiskit-9006


Generating train split: 3775 examples [04:40, 16.42 examples/s]

Token count is too large: scipy__scipy-4648
Token count is too large: pandas-dev__pandas-6824
Token count is too large: apache__airflow-19375
Token count is too large: googleapis__google-cloud-python-2776
Token count is too large: numpy__numpy-12634
Token count is too large: numpy__numpy-12828
Token count is too large: googleapis__google-cloud-python-6440
Token count is too large: Qiskit__qiskit-6304


Generating train split: 3777 examples [04:41, 11.07 examples/s]

Token count is too large: ytdl-org__youtube-dl-28074
Token count is too large: conda__conda-2604
Token count is too large: pandas-dev__pandas-33373
Token count is too large: mesonbuild__meson-10747
Token count is too large: Qiskit__qiskit-8995
Token count is too large: pandas-dev__pandas-27267
Token count is too large: pypa__pip-7388


Generating train split: 3781 examples [04:41, 14.44 examples/s]

Token count is too large: pandas-dev__pandas-25265
Token count is too large: docker__compose-7543
Token count is too large: dagster-io__dagster-2721
Token count is too large: Qiskit__qiskit-2226
Token count is too large: wagtail__wagtail-8786
Token count is too large: pandas-dev__pandas-38816
Token count is too large: pandas-dev__pandas-30838


Generating train split: 3787 examples [04:41, 19.78 examples/s]

Token count is too large: googleapis__google-cloud-python-10002
Token count is too large: pandas-dev__pandas-8218
Token count is too large: conan-io__conan-12854
Token count is too large: huggingface__transformers-7272
Token count is too large: ipython__ipython-12954


Generating train split: 3790 examples [04:41, 16.99 examples/s]

Token count is too large: scipy__scipy-4756
Token count is too large: pandas-dev__pandas-6159
Token count is too large: mesonbuild__meson-1263
Token count is too large: pandas-dev__pandas-23482
Token count is too large: conda__conda-7951
Token count is too large: pandas-dev__pandas-24337
Token count is too large: pandas-dev__pandas-36185
Token count is too large: pandas-dev__pandas-3647
Token count is too large: googleapis__google-cloud-python-3231


Generating train split: 3793 examples [04:41, 14.28 examples/s]

Token count is too large: pandas-dev__pandas-22066
Token count is too large: pandas-dev__pandas-37039
Token count is too large: pandas-dev__pandas-35578
Token count is too large: pypa__pip-3794
Token count is too large: pandas-dev__pandas-17587
Token count is too large: Qiskit__qiskit-2740
Token count is too large: conan-io__conan-2717
Token count is too large: pandas-dev__pandas-27712
Token count is too large: PrefectHQ__prefect-2755
Token count is too large: pandas-dev__pandas-25525
Token count is too large: numpy__numpy-16950
Token count is too large: Qiskit__qiskit-8906
Token count is too large: Lightning-AI__lightning-728
Token count is too large: tiangolo__fastapi-1553
Token count is too large: numpy__numpy-16300
Token count is too large: pyca__cryptography-7132
Token count is too large: pypa__pip-7826


Generating train split: 3797 examples [04:42, 10.52 examples/s]

Token count is too large: pandas-dev__pandas-9901
Token count is too large: docker__compose-5938
Token count is too large: pandas-dev__pandas-35543
Token count is too large: pandas-dev__pandas-22163
Token count is too large: open-mmlab__mmdetection-8273
Token count is too large: tensorflow__models-6799


Generating train split: 3802 examples [04:42, 14.96 examples/s]

Token count is too large: mesonbuild__meson-9294
Token count is too large: mesonbuild__meson-3101
Token count is too large: pandas-dev__pandas-22865
Token count is too large: numpy__numpy-21890
Token count is too large: apache__airflow-26806
Token count is too large: pandas-dev__pandas-4868
Token count is too large: wagtail__wagtail-148
Token count is too large: pantsbuild__pants-5624


Generating train split: 3804 examples [04:42, 13.42 examples/s]

Token count is too large: mesonbuild__meson-8083
Token count is too large: googleapis__google-cloud-python-6522
Token count is too large: mesonbuild__meson-6961
Token count is too large: pandas-dev__pandas-38834
Token count is too large: pandas-dev__pandas-19216
Token count is too large: pandas-dev__pandas-8855
Token count is too large: huggingface__transformers-13824
Token count is too large: pantsbuild__pants-19155
Token count is too large: mesonbuild__meson-1588
Token count is too large: pandas-dev__pandas-28876
Token count is too large: pandas-dev__pandas-24434
Token count is too large: huggingface__transformers-19784
Token count is too large: Qiskit__qiskit-7221


Generating train split: 3807 examples [04:43, 11.42 examples/s]

Token count is too large: pandas-dev__pandas-17821
Token count is too large: googleapis__google-cloud-python-3825
Token count is too large: numpy__numpy-8762
Token count is too large: googleapis__google-cloud-python-6079
Token count is too large: ray-project__ray-2320
Token count is too large: numpy__numpy-4437
Token count is too large: mesonbuild__meson-10449


Generating train split: 3809 examples [04:43,  9.51 examples/s]

Token count is too large: pandas-dev__pandas-33863
Token count is too large: pandas-dev__pandas-30788
Token count is too large: pantsbuild__pants-15592
Token count is too large: pandas-dev__pandas-34814
Token count is too large: pandas-dev__pandas-33339
Token count is too large: conan-io__conan-2525


Generating train split: 3812 examples [04:43, 10.45 examples/s]

Token count is too large: pandas-dev__pandas-28854
Token count is too large: docker__compose-3653
Token count is too large: ipython__ipython-1077
Token count is too large: mesonbuild__meson-3093
Token count is too large: Qiskit__qiskit-908
Token count is too large: pandas-dev__pandas-33137
Token count is too large: ray-project__ray-10563
Token count is too large: pandas-dev__pandas-35707
Token count is too large: googleapis__google-cloud-python-3436
Token count is too large: twisted__twisted-11658


Generating train split: 3821 examples [04:44, 15.41 examples/s]

Token count is too large: googleapis__google-cloud-python-11318
Token count is too large: pandas-dev__pandas-28789
Token count is too large: pantsbuild__pants-11773
Token count is too large: huggingface__transformers-18562
Token count is too large: apache__airflow-26103
Token count is too large: googleapis__google-cloud-python-1779
Token count is too large: jupyterlab__jupyterlab-7136
Token count is too large: PrefectHQ__prefect-2743
Token count is too large: Qiskit__qiskit-3538
Token count is too large: Qiskit__qiskit-8852
Token count is too large: mesonbuild__meson-6139
Token count is too large: conan-io__conan-12578
Token count is too large: conda__conda-3391
Token count is too large: pandas-dev__pandas-39278
Token count is too large: mesonbuild__meson-12053
Token count is too large: pantsbuild__pants-12331
Token count is too large: wagtail__wagtail-10657
Token count is too large: numpy__numpy-19805
Token count is too large: google__jax-326
Token count is too large: pandas-dev__pand

Generating train split: 3825 examples [04:44, 10.89 examples/s]

Token count is too large: pandas-dev__pandas-23197
Token count is too large: google__jax-884
Token count is too large: pandas-dev__pandas-5790
Token count is too large: pandas-dev__pandas-28245
Token count is too large: huggingface__transformers-23842
Token count is too large: Qiskit__qiskit-2769
Token count is too large: conda__conda-6511
Token count is too large: pandas-dev__pandas-9622
Token count is too large: huggingface__transformers-24859
Token count is too large: huggingface__transformers-21811
Token count is too large: open-mmlab__mmdetection-10568
Token count is too large: docker__compose-5490
Token count is too large: pandas-dev__pandas-23808
Token count is too large: pandas-dev__pandas-5474
Token count is too large: huggingface__transformers-11582


Generating train split: 3828 examples [04:45,  6.85 examples/s]

Token count is too large: pandas-dev__pandas-17484
Token count is too large: mesonbuild__meson-5078
Token count is too large: pandas-dev__pandas-9078
Token count is too large: mesonbuild__meson-1266
There was an error processing
Token count is too large: huggingface__transformers-17849
Token count is too large: mesonbuild__meson-9400


Generating train split: 3831 examples [04:46,  7.26 examples/s]

Token count is too large: huggingface__transformers-16792
Token count is too large: apache__airflow-19821
Token count is too large: pandas-dev__pandas-14531
Token count is too large: dagster-io__dagster-6348
Token count is too large: conda__conda-7989
Token count is too large: google__jax-410
Token count is too large: pandas-dev__pandas-17011
Token count is too large: celery__celery-7244
Token count is too large: ytdl-org__youtube-dl-29236
Token count is too large: pantsbuild__pants-4685
Token count is too large: pantsbuild__pants-6428
Token count is too large: huggingface__transformers-12981
Token count is too large: pypa__pip-10577


Generating train split: 3835 examples [04:46,  8.97 examples/s]

Token count is too large: huggingface__transformers-25407
Token count is too large: ipython__ipython-4452
Token count is too large: mesonbuild__meson-1334
Token count is too large: google__jax-643
Token count is too large: mesonbuild__meson-5146
Token count is too large: numpy__numpy-10324
Token count is too large: pandas-dev__pandas-9501


Generating train split: 3840 examples [04:46, 11.53 examples/s]

Token count is too large: pandas-dev__pandas-13772
Token count is too large: ray-project__ray-9527
Token count is too large: pantsbuild__pants-9903
Token count is too large: ray-project__ray-5673
Token count is too large: PrefectHQ__prefect-2681
Token count is too large: Lightning-AI__lightning-2166
Token count is too large: docker__compose-6088
Token count is too large: pantsbuild__pants-18798


Generating train split: 3842 examples [04:47,  9.31 examples/s]

Token count is too large: mesonbuild__meson-5986
Token count is too large: pandas-dev__pandas-5880
Token count is too large: Qiskit__qiskit-1944
Token count is too large: pandas-dev__pandas-6810
Token count is too large: wagtail__wagtail-8689
Token count is too large: googleapis__google-cloud-python-10219
Token count is too large: pantsbuild__pants-16093
Token count is too large: pandas-dev__pandas-13859
Token count is too large: huggingface__transformers-7699
Token count is too large: ipython__ipython-8985


Generating train split: 3846 examples [04:47, 10.72 examples/s]

Token count is too large: ipython__ipython-4536
Token count is too large: ytdl-org__youtube-dl-8843
Token count is too large: googleapis__google-cloud-python-9785
Token count is too large: Qiskit__qiskit-9725
Token count is too large: pandas-dev__pandas-11898
Token count is too large: ipython__ipython-11172
Token count is too large: pantsbuild__pants-15637
Token count is too large: pandas-dev__pandas-24628
Token count is too large: pantsbuild__pants-6088
Token count is too large: googleapis__google-cloud-python-4458
Token count is too large: apache__airflow-20217
Token count is too large: huggingface__transformers-19721
Token count is too large: numpy__numpy-10537
Token count is too large: pandas-dev__pandas-30578


Generating train split: 3857 examples [04:47, 14.60 examples/s]

Token count is too large: numpy__numpy-3242
Token count is too large: googleapis__google-cloud-python-11206
Token count is too large: ytdl-org__youtube-dl-31000
Token count is too large: Qiskit__qiskit-6463
Token count is too large: apache__airflow-16345
Token count is too large: apache__airflow-17980
Token count is too large: pandas-dev__pandas-38742
Token count is too large: numpy__numpy-14520
Token count is too large: pandas-dev__pandas-31666
Token count is too large: pandas-dev__pandas-21350
Token count is too large: pandas-dev__pandas-38998
Token count is too large: pandas-dev__pandas-3913
Token count is too large: pandas-dev__pandas-32633
Token count is too large: pantsbuild__pants-5414
Token count is too large: pypa__pip-7927
Token count is too large: pandas-dev__pandas-36923
Token count is too large: Qiskit__qiskit-2958
Token count is too large: pandas-dev__pandas-34394
Token count is too large: apache__airflow-9174
Token count is too large: mesonbuild__meson-11911
Token count 

Generating train split: 3862 examples [04:48, 10.80 examples/s]

Token count is too large: huggingface__transformers-24274
Token count is too large: numpy__numpy-8682
Token count is too large: conan-io__conan-7585
Token count is too large: pypa__pip-7494
Token count is too large: mesonbuild__meson-4263
Token count is too large: googleapis__google-cloud-python-552
Token count is too large: pantsbuild__pants-18957
Token count is too large: pyca__cryptography-2507
Token count is too large: ipython__ipython-9827
Token count is too large: conda__conda-739
Token count is too large: googleapis__google-cloud-python-3765
Token count is too large: docker__compose-7787
Token count is too large: pandas-dev__pandas-28300


Generating train split: 3872 examples [04:48, 15.51 examples/s]

Token count is too large: mesonbuild__meson-5775
Token count is too large: numpy__numpy-16332
Token count is too large: ipython__ipython-13706
Token count is too large: pandas-dev__pandas-19584
Token count is too large: pandas-dev__pandas-19277
Token count is too large: open-mmlab__mmdetection-7157
Token count is too large: mesonbuild__meson-794
Token count is too large: pandas-dev__pandas-29545
Token count is too large: pandas-dev__pandas-15020


Generating train split: 3878 examples [04:49, 19.53 examples/s]

Token count is too large: mesonbuild__meson-2112
Token count is too large: pandas-dev__pandas-35838
Token count is too large: conda__conda-6719
Token count is too large: pandas-dev__pandas-27650
Token count is too large: mesonbuild__meson-5750
Token count is too large: Qiskit__qiskit-10271
Token count is too large: pandas-dev__pandas-17723
Token count is too large: pantsbuild__pants-5837
Token count is too large: googleapis__google-cloud-python-9053


Generating train split: 3881 examples [04:49, 18.15 examples/s]

Token count is too large: pandas-dev__pandas-4757
Token count is too large: numpy__numpy-10120
Token count is too large: conan-io__conan-6433
Token count is too large: apache__airflow-29441
Token count is too large: docker__compose-7328
Token count is too large: numpy__numpy-12224
Token count is too large: googleapis__google-cloud-python-2365
Token count is too large: pandas-dev__pandas-10960
Token count is too large: Qiskit__qiskit-8041
Token count is too large: Qiskit__qiskit-1454
Token count is too large: docker__compose-6596
Token count is too large: pantsbuild__pants-19135
Token count is too large: apache__airflow-25821


Generating train split: 3887 examples [04:49, 20.91 examples/s]

Token count is too large: pandas-dev__pandas-39109
Token count is too large: pandas-dev__pandas-26022
Token count is too large: Qiskit__qiskit-2112
Token count is too large: ray-project__ray-3270
Token count is too large: googleapis__google-cloud-python-9162
Token count is too large: ray-project__ray-6942


Generating train split: 3895 examples [04:49, 21.45 examples/s]

Token count is too large: googleapis__google-cloud-python-6083
Token count is too large: pandas-dev__pandas-27466
Token count is too large: huggingface__transformers-6322
Token count is too large: docker__compose-4820
Token count is too large: conda__conda-6347
Token count is too large: Qiskit__qiskit-458
Token count is too large: pantsbuild__pants-16148
Token count is too large: Qiskit__qiskit-6111
Token count is too large: googleapis__google-cloud-python-560
Token count is too large: ytdl-org__youtube-dl-31235
Token count is too large: open-mmlab__mmdetection-7685
Token count is too large: celery__celery-3731


Generating train split: 3899 examples [04:50, 17.49 examples/s]

Token count is too large: celery__celery-6599
Token count is too large: ipython__ipython-4054
Token count is too large: huggingface__transformers-25497
Token count is too large: apache__airflow-468
Token count is too large: pandas-dev__pandas-5387


Generating train split: 3913 examples [04:50, 31.01 examples/s]

Token count is too large: numpy__numpy-4479
Token count is too large: pantsbuild__pants-6772
Token count is too large: conan-io__conan-2314
Token count is too large: Lightning-AI__lightning-1385
Token count is too large: PrefectHQ__prefect-1069
Token count is too large: mesonbuild__meson-3483
Token count is too large: apache__airflow-12386
Token count is too large: ray-project__ray-7501
Token count is too large: pandas-dev__pandas-5188
Token count is too large: pandas-dev__pandas-29420
Token count is too large: huggingface__transformers-1383
Token count is too large: pyca__cryptography-2543
Token count is too large: jupyterlab__jupyterlab-3115
Token count is too large: Qiskit__qiskit-3748
Token count is too large: googleapis__google-cloud-python-1241
Token count is too large: google__jax-2113
Token count is too large: Qiskit__qiskit-2815
Token count is too large: conan-io__conan-5223


Generating train split: 3920 examples [04:50, 27.62 examples/s]

Token count is too large: huggingface__transformers-21843
Token count is too large: Qiskit__qiskit-1532
Token count is too large: scipy__scipy-4150
Token count is too large: ipython__ipython-3846
Token count is too large: google__jax-1916
Token count is too large: pyca__cryptography-5314
Token count is too large: pandas-dev__pandas-8586
Token count is too large: DataDog__integrations-core-446
Token count is too large: pandas-dev__pandas-38246
Token count is too large: pandas-dev__pandas-16429
Token count is too large: pantsbuild__pants-8540


Generating train split: 3925 examples [04:51, 23.95 examples/s]

Token count is too large: pandas-dev__pandas-5757
Token count is too large: pandas-dev__pandas-4602
Token count is too large: open-mmlab__mmdetection-7585
Token count is too large: docker__compose-5266
Token count is too large: huggingface__transformers-15427
Token count is too large: Qiskit__qiskit-4656
Token count is too large: wagtail__wagtail-8008
Token count is too large: pandas-dev__pandas-29353
Token count is too large: numpy__numpy-19736
Token count is too large: googleapis__google-cloud-python-4546
Token count is too large: docker__compose-6542
Token count is too large: numpy__numpy-3208
Token count is too large: pandas-dev__pandas-35645
Token count is too large: pypa__pip-12078
Token count is too large: pandas-dev__pandas-8483
Token count is too large: pandas-dev__pandas-32278
Token count is too large: wagtail__wagtail-8175
Token count is too large: pypa__pip-10502
Token count is too large: docker__compose-6641
Token count is too large: pandas-dev__pandas-17586
Token count is

Generating train split: 3929 examples [04:51, 15.30 examples/s]

Token count is too large: open-mmlab__mmdetection-4810
Token count is too large: Lightning-AI__lightning-1108
Token count is too large: pandas-dev__pandas-4641
Token count is too large: ipython__ipython-1065
Token count is too large: mesonbuild__meson-2598
Token count is too large: docker__compose-1889
Token count is too large: celery__celery-5952


Generating train split: 3936 examples [04:51, 20.27 examples/s]

Token count is too large: conan-io__conan-10516
Token count is too large: numpy__numpy-8255
Token count is too large: Qiskit__qiskit-10484
Token count is too large: celery__celery-6223
Token count is too large: pandas-dev__pandas-27303
Token count is too large: pandas-dev__pandas-25142
Token count is too large: conda__conda-10735
Token count is too large: conda__conda-8083
Token count is too large: numpy__numpy-7587
Token count is too large: apache__airflow-14179


Generating train split: 3940 examples [04:52, 16.71 examples/s]

Token count is too large: pandas-dev__pandas-17367
Token count is too large: pandas-dev__pandas-6054
Token count is too large: pandas-dev__pandas-7544
Token count is too large: pantsbuild__pants-15595
Token count is too large: conan-io__conan-7353
Token count is too large: Lightning-AI__lightning-751
Token count is too large: conda__conda-6708
Token count is too large: Qiskit__qiskit-5804
Token count is too large: huggingface__transformers-357


Generating train split: 3943 examples [04:52, 15.93 examples/s]

Token count is too large: pandas-dev__pandas-10718
Token count is too large: huggingface__transformers-20136
Token count is too large: docker__compose-6654
Token count is too large: apache__airflow-21307
Token count is too large: pandas-dev__pandas-19717
Token count is too large: pandas-dev__pandas-4123


Generating train split: 3946 examples [04:52, 17.35 examples/s]

Token count is too large: pandas-dev__pandas-29212
Token count is too large: pandas-dev__pandas-22802
Token count is too large: pandas-dev__pandas-22811
Token count is too large: pandas-dev__pandas-27549
Token count is too large: huggingface__transformers-12035
Token count is too large: mesonbuild__meson-11926
Token count is too large: pandas-dev__pandas-18416
Token count is too large: Qiskit__qiskit-4626
Token count is too large: numpy__numpy-8739


Generating train split: 3949 examples [04:53, 12.10 examples/s]

Token count is too large: conda__conda-6293
Token count is too large: wagtail__wagtail-8895
Token count is too large: pandas-dev__pandas-36145
Token count is too large: numpy__numpy-10905
Token count is too large: pandas-dev__pandas-15671
Token count is too large: pandas-dev__pandas-34611
Token count is too large: pandas-dev__pandas-36061
Token count is too large: ytdl-org__youtube-dl-3534
Token count is too large: pandas-dev__pandas-27920
Token count is too large: pandas-dev__pandas-19561
Token count is too large: huggingface__transformers-23122


Generating train split: 3953 examples [04:53, 10.69 examples/s]

Token count is too large: conda__conda-7719
Token count is too large: apache__airflow-9730
Token count is too large: ipython__ipython-3789
Token count is too large: mesonbuild__meson-5717
Token count is too large: pandas-dev__pandas-21497
Token count is too large: mesonbuild__meson-10595


Generating train split: 3957 examples [04:53, 11.41 examples/s]

Token count is too large: pandas-dev__pandas-34069
Token count is too large: PrefectHQ__prefect-2433
Token count is too large: apache__airflow-12829
Token count is too large: google__jax-234
Token count is too large: google__jax-1055
Token count is too large: pandas-dev__pandas-28762
Token count is too large: wagtail__wagtail-8676
Token count is too large: pandas-dev__pandas-24494
Token count is too large: scipy__scipy-3270
Token count is too large: mesonbuild__meson-1621
Token count is too large: scipy__scipy-4611
Token count is too large: dagster-io__dagster-14163
Token count is too large: docker__compose-5560
Token count is too large: Lightning-AI__lightning-2904
Token count is too large: googleapis__google-cloud-python-2178
Token count is too large: twisted__twisted-1650
Token count is too large: googleapis__google-cloud-python-7797
Token count is too large: wagtail__wagtail-10130
Token count is too large: PrefectHQ__prefect-864
Token count is too large: ipython__ipython-10787


Generating train split: 3961 examples [04:54, 12.16 examples/s]

Token count is too large: pandas-dev__pandas-36697
Token count is too large: mesonbuild__meson-4001
Token count is too large: ytdl-org__youtube-dl-613
Token count is too large: ipython__ipython-2364
Token count is too large: tiangolo__fastapi-1122
Token count is too large: pandas-dev__pandas-21987
Token count is too large: pandas-dev__pandas-24680
Token count is too large: conda__conda-6881
Token count is too large: pandas-dev__pandas-26537


Generating train split: 3965 examples [04:54, 12.16 examples/s]

Token count is too large: pandas-dev__pandas-33047
Token count is too large: huggingface__transformers-15566
Token count is too large: pandas-dev__pandas-18677
Token count is too large: huggingface__transformers-11994
Token count is too large: scipy__scipy-3504
Token count is too large: apache__airflow-30856
Token count is too large: numpy__numpy-12280
Token count is too large: Qiskit__qiskit-8877


Generating train split: 3967 examples [04:54, 12.64 examples/s]

Token count is too large: mesonbuild__meson-9202
Token count is too large: Qiskit__qiskit-6926
Token count is too large: mesonbuild__meson-5325
Token count is too large: pandas-dev__pandas-8787
Token count is too large: huggingface__transformers-8623
Token count is too large: conan-io__conan-8769
Token count is too large: Qiskit__qiskit-10376
Token count is too large: googleapis__google-cloud-python-11350
Token count is too large: pandas-dev__pandas-31359
Token count is too large: ray-project__ray-10662


Generating train split: 3972 examples [04:54, 14.61 examples/s]

Token count is too large: pandas-dev__pandas-19297
Token count is too large: pandas-dev__pandas-35061
Token count is too large: dagster-io__dagster-8765
Token count is too large: PrefectHQ__prefect-963
Token count is too large: Qiskit__qiskit-8166
Token count is too large: apache__airflow-3828
Token count is too large: Qiskit__qiskit-7933
Token count is too large: pandas-dev__pandas-6338
Token count is too large: huggingface__transformers-16906
Token count is too large: Qiskit__qiskit-8978
Token count is too large: pandas-dev__pandas-3635
Token count is too large: open-mmlab__mmdetection-3634


Generating train split: 3974 examples [04:55, 14.19 examples/s]

Token count is too large: conan-io__conan-6724
Token count is too large: mesonbuild__meson-3066
Token count is too large: numpy__numpy-12587
Token count is too large: pandas-dev__pandas-4478
Token count is too large: pantsbuild__pants-6475
Token count is too large: googleapis__google-cloud-python-733
Token count is too large: pandas-dev__pandas-37043
Token count is too large: conda__conda-6932
Token count is too large: apache__airflow-15361
Token count is too large: googleapis__google-cloud-python-7532


Generating train split: 3976 examples [04:55, 11.29 examples/s]

Token count is too large: conda__conda-12985
Token count is too large: ipython__ipython-9335
Token count is too large: pandas-dev__pandas-4627
Token count is too large: Lightning-AI__lightning-2115
Token count is too large: mesonbuild__meson-10050
Token count is too large: mesonbuild__meson-6555
Token count is too large: pandas-dev__pandas-36482
Token count is too large: numpy__numpy-10352
Token count is too large: pandas-dev__pandas-32166
Token count is too large: scipy__scipy-5178


Generating train split: 3982 examples [04:56, 10.76 examples/s]

Token count is too large: conan-io__conan-10175
Token count is too large: conan-io__conan-3615
Token count is too large: pandas-dev__pandas-17902
Token count is too large: googleapis__google-cloud-python-11112
Token count is too large: mesonbuild__meson-5975
Token count is too large: pandas-dev__pandas-22436
Token count is too large: Qiskit__qiskit-3142
Token count is too large: wagtail__wagtail-7738
Token count is too large: celery__celery-6147
Token count is too large: ray-project__ray-8324


Generating train split: 3986 examples [04:56, 13.66 examples/s]

Token count is too large: mesonbuild__meson-11159
Token count is too large: mesonbuild__meson-10111
Token count is too large: conda__conda-6794
Token count is too large: pandas-dev__pandas-28074
Token count is too large: mesonbuild__meson-2591
Token count is too large: pandas-dev__pandas-23051
Token count is too large: numpy__numpy-21253
Token count is too large: huggingface__transformers-18832
Token count is too large: pantsbuild__pants-11557
Token count is too large: apache__airflow-26667
Token count is too large: mesonbuild__meson-9696
Token count is too large: huggingface__transformers-19773
Token count is too large: Lightning-AI__lightning-685
Token count is too large: huggingface__transformers-25033
Token count is too large: numpy__numpy-5168
Token count is too large: ytdl-org__youtube-dl-900
Token count is too large: conda__conda-10022


Generating train split: 3988 examples [04:56,  9.19 examples/s]

Token count is too large: huggingface__transformers-14753
Token count is too large: pandas-dev__pandas-35900
Token count is too large: Qiskit__qiskit-2883
Token count is too large: pandas-dev__pandas-35583
Token count is too large: pandas-dev__pandas-19829
Token count is too large: pandas-dev__pandas-6467
Token count is too large: huggingface__transformers-19000
Token count is too large: wagtail__wagtail-9369
Token count is too large: ipython__ipython-2399


Generating train split: 3990 examples [04:56,  8.75 examples/s]

Token count is too large: googleapis__google-cloud-python-6435
Token count is too large: pantsbuild__pants-5867
Token count is too large: google__jax-629
Token count is too large: mesonbuild__meson-4432
Token count is too large: pandas-dev__pandas-17703
Token count is too large: pandas-dev__pandas-7341
Token count is too large: pandas-dev__pandas-8726
Token count is too large: apache__airflow-31140
Token count is too large: kubeflow__pipelines-1269
Token count is too large: ipython__ipython-9418
Token count is too large: ray-project__ray-1744


Generating train split: 3993 examples [04:57,  8.24 examples/s]

Token count is too large: pandas-dev__pandas-24128
Token count is too large: wagtail__wagtail-371
Token count is too large: Qiskit__qiskit-5740
Token count is too large: pandas-dev__pandas-28221
Token count is too large: google__jax-1063
Token count is too large: pandas-dev__pandas-5125
Token count is too large: pandas-dev__pandas-4943
Token count is too large: pandas-dev__pandas-7149
Token count is too large: huggingface__transformers-5749
Token count is too large: Qiskit__qiskit-2237


Generating train split: 3997 examples [04:57, 10.79 examples/s]

Token count is too large: ipython__ipython-1229
Token count is too large: PrefectHQ__prefect-1100
Token count is too large: Qiskit__qiskit-10375
Token count is too large: apache__airflow-19933
Token count is too large: pandas-dev__pandas-7087
Token count is too large: pandas-dev__pandas-31013
Token count is too large: Qiskit__qiskit-3053
Token count is too large: pantsbuild__pants-18839
Token count is too large: numpy__numpy-9025
Token count is too large: ytdl-org__youtube-dl-8703
Token count is too large: ipython__ipython-13464
Token count is too large: docker__compose-1643
Token count is too large: numpy__numpy-9718
Token count is too large: google__jax-699
Token count is too large: googleapis__google-cloud-python-5343
Token count is too large: pandas-dev__pandas-33493
Token count is too large: pandas-dev__pandas-7435


Generating train split: 3999 examples [04:57,  9.12 examples/s]

Token count is too large: huggingface__transformers-21968
Token count is too large: pandas-dev__pandas-5718
Token count is too large: Lightning-AI__lightning-128
Token count is too large: pyca__cryptography-1048
Token count is too large: mesonbuild__meson-8142
Token count is too large: Qiskit__qiskit-578
Token count is too large: pandas-dev__pandas-37499
Token count is too large: scipy__scipy-5140
Token count is too large: wagtail__wagtail-5138
Token count is too large: pandas-dev__pandas-38754


Generating train split: 4002 examples [04:58, 10.04 examples/s]

Token count is too large: numpy__numpy-3753
Token count is too large: google__jax-477
Token count is too large: wagtail__wagtail-3467
Token count is too large: conda__conda-3390
Token count is too large: pandas-dev__pandas-34199
Token count is too large: pandas-dev__pandas-4440
Token count is too large: mesonbuild__meson-10696


Generating train split: 4013 examples [04:58, 17.03 examples/s]

Token count is too large: open-mmlab__mmdetection-7559
Token count is too large: mesonbuild__meson-3507
Token count is too large: googleapis__google-cloud-python-9475
Token count is too large: pandas-dev__pandas-6352
Token count is too large: ray-project__ray-11218
Token count is too large: pandas-dev__pandas-10107
Token count is too large: ipython__ipython-7468
Token count is too large: docker__compose-1963
Token count is too large: numpy__numpy-15054
Token count is too large: numpy__numpy-17812
Token count is too large: ray-project__ray-8533
Token count is too large: pandas-dev__pandas-19948


Generating train split: 4016 examples [04:58, 16.39 examples/s]

Token count is too large: googleapis__google-cloud-python-4056
Token count is too large: PrefectHQ__prefect-215
Token count is too large: pandas-dev__pandas-7519
Token count is too large: docker__compose-2288
Token count is too large: pandas-dev__pandas-4515
Token count is too large: pandas-dev__pandas-38244
Token count is too large: apache__airflow-25661
Token count is too large: pantsbuild__pants-4487
Token count is too large: huggingface__transformers-24322
Token count is too large: mesonbuild__meson-8878
Token count is too large: pypa__pip-3542
Token count is too large: ipython__ipython-1129
Token count is too large: mesonbuild__meson-11706
Token count is too large: googleapis__google-cloud-python-11577
Token count is too large: pyca__cryptography-3725
Token count is too large: huggingface__transformers-20713
Token count is too large: pandas-dev__pandas-25329
Token count is too large: ray-project__ray-2892
Token count is too large: Qiskit__qiskit-5577
Token count is too large: meso

Generating train split: 4023 examples [05:00,  7.38 examples/s]

Token count is too large: docker__compose-6835
Token count is too large: pandas-dev__pandas-3040
Token count is too large: pandas-dev__pandas-2231
Token count is too large: pypa__pip-6215
Token count is too large: pandas-dev__pandas-22809
Token count is too large: googleapis__google-cloud-python-8979
Token count is too large: googleapis__google-cloud-python-1924
Token count is too large: pandas-dev__pandas-21332
Token count is too large: pandas-dev__pandas-25625
Token count is too large: Lightning-AI__lightning-1211


Generating train split: 4025 examples [05:00,  6.80 examples/s]

Token count is too large: huggingface__transformers-25083
Token count is too large: pandas-dev__pandas-25479
Token count is too large: pypa__pip-2290
Token count is too large: pandas-dev__pandas-10691
Token count is too large: pandas-dev__pandas-4991
Token count is too large: apache__airflow-23070
Token count is too large: conan-io__conan-6021
Token count is too large: ray-project__ray-7875
Token count is too large: Qiskit__qiskit-2121
Token count is too large: pandas-dev__pandas-11432
Token count is too large: pyca__cryptography-520
Token count is too large: googleapis__google-cloud-python-656
Token count is too large: mesonbuild__meson-11174
Token count is too large: huggingface__transformers-7078
Token count is too large: huggingface__transformers-7153
Token count is too large: pandas-dev__pandas-31788
Token count is too large: pyca__cryptography-5879
Token count is too large: mesonbuild__meson-4874
Token count is too large: pandas-dev__pandas-17388
Token count is too large: conan-i

Generating train split: 4031 examples [05:01,  7.55 examples/s]

Token count is too large: pandas-dev__pandas-23767
Token count is too large: pandas-dev__pandas-13680
Token count is too large: googleapis__google-cloud-python-4265
Token count is too large: conda__conda-6616
Token count is too large: pandas-dev__pandas-37547
Token count is too large: gitpython-developers__GitPython-818
Token count is too large: Qiskit__qiskit-3597
Token count is too large: pantsbuild__pants-12296
Token count is too large: pantsbuild__pants-11656
Token count is too large: ray-project__ray-956
Token count is too large: PrefectHQ__prefect-1004
Token count is too large: celery__celery-6233
Token count is too large: pantsbuild__pants-13602
Token count is too large: pantsbuild__pants-14962
Token count is too large: pandas-dev__pandas-13188
Token count is too large: pandas-dev__pandas-36763
Token count is too large: pandas-dev__pandas-26360
Token count is too large: Qiskit__qiskit-3483


Generating train split: 4034 examples [05:01,  7.89 examples/s]

Token count is too large: jupyterlab__jupyterlab-1607
Token count is too large: ipython__ipython-576
Token count is too large: pandas-dev__pandas-7068
Token count is too large: PrefectHQ__prefect-629
Token count is too large: pandas-dev__pandas-16397
Token count is too large: pandas-dev__pandas-7591
Token count is too large: ray-project__ray-5382
Token count is too large: wagtail__wagtail-7069
Token count is too large: google__jax-2500


Generating train split: 4039 examples [05:02, 11.11 examples/s]

Token count is too large: pantsbuild__pants-12066
Token count is too large: conda__conda-5115
Token count is too large: numpy__numpy-10797
Token count is too large: PrefectHQ__prefect-1572
Token count is too large: googleapis__google-cloud-python-1367
Token count is too large: pandas-dev__pandas-33465
Token count is too large: pandas-dev__pandas-16449
Token count is too large: pandas-dev__pandas-25620
Token count is too large: pantsbuild__pants-4324
Token count is too large: google__jax-3413
Token count is too large: DataDog__integrations-core-8335


Generating train split: 4042 examples [05:02, 12.20 examples/s]

Token count is too large: pypa__pip-9450
Token count is too large: scipy__scipy-4570
Token count is too large: ytdl-org__youtube-dl-16250
Token count is too large: pandas-dev__pandas-3780
Token count is too large: pandas-dev__pandas-32953
Token count is too large: wagtail__wagtail-1120
Token count is too large: pandas-dev__pandas-21252
Token count is too large: mesonbuild__meson-2627
Token count is too large: mesonbuild__meson-9989
Token count is too large: numpy__numpy-16081
Token count is too large: google__jax-1171
Token count is too large: numpy__numpy-5592


Generating train split: 4045 examples [05:02, 11.97 examples/s]

Token count is too large: pandas-dev__pandas-22920
Token count is too large: PrefectHQ__prefect-2865
Token count is too large: pypa__pip-10771
Token count is too large: numpy__numpy-9336
Token count is too large: Qiskit__qiskit-6567
Token count is too large: pandas-dev__pandas-33089
Token count is too large: conda__conda-12996
Token count is too large: pandas-dev__pandas-22515
Token count is too large: conan-io__conan-3661


Generating train split: 4050 examples [05:02, 13.54 examples/s]

Token count is too large: pandas-dev__pandas-27140
Token count is too large: huggingface__transformers-9428
Token count is too large: ray-project__ray-11041
Token count is too large: pantsbuild__pants-15064
Token count is too large: pypa__pip-7845
Token count is too large: pandas-dev__pandas-5000
Token count is too large: ray-project__ray-7065
Token count is too large: Lightning-AI__lightning-2969
Token count is too large: pandas-dev__pandas-14392
Token count is too large: ipython__ipython-7099
Token count is too large: mesonbuild__meson-3784
Token count is too large: pandas-dev__pandas-23082
Token count is too large: pandas-dev__pandas-38277
Token count is too large: ytdl-org__youtube-dl-30556
Token count is too large: Qiskit__qiskit-4810
Token count is too large: pandas-dev__pandas-24293
Token count is too large: pandas-dev__pandas-3762
Token count is too large: pandas-dev__pandas-28651
Token count is too large: Qiskit__qiskit-636
Token count is too large: conda__conda-5091
Token cou

Generating train split: 4053 examples [05:03,  8.71 examples/s]

Token count is too large: pandas-dev__pandas-8484
Token count is too large: pantsbuild__pants-4747
Token count is too large: pandas-dev__pandas-16532
Token count is too large: Lightning-AI__lightning-2358
Token count is too large: Qiskit__qiskit-2723
Token count is too large: pandas-dev__pandas-38220
Token count is too large: pandas-dev__pandas-27928


Generating train split: 4058 examples [05:03, 10.64 examples/s]

Token count is too large: huggingface__transformers-11651
Token count is too large: conan-io__conan-7243
Token count is too large: pandas-dev__pandas-37675
Token count is too large: ytdl-org__youtube-dl-16326
Token count is too large: pandas-dev__pandas-20939
Token count is too large: pandas-dev__pandas-17882
Token count is too large: mesonbuild__meson-5058


Generating train split: 4060 examples [05:04, 10.29 examples/s]

Token count is too large: Qiskit__qiskit-7013
Token count is too large: pandas-dev__pandas-10597
Token count is too large: ytdl-org__youtube-dl-31517
Token count is too large: pandas-dev__pandas-21009
Token count is too large: pandas-dev__pandas-27425
Token count is too large: Qiskit__qiskit-6545
Token count is too large: celery__celery-6749
Token count is too large: conan-io__conan-301


Generating train split: 4066 examples [05:04, 15.75 examples/s]

Token count is too large: docker__compose-3319
Token count is too large: Qiskit__qiskit-4887
Token count is too large: pandas-dev__pandas-27630
Token count is too large: pandas-dev__pandas-16123
Token count is too large: pandas-dev__pandas-24421
Token count is too large: pandas-dev__pandas-16786
Token count is too large: ipython__ipython-1023
Token count is too large: celery__celery-5700
Token count is too large: pypa__pip-2767
Token count is too large: conan-io__conan-2425
Token count is too large: pandas-dev__pandas-34048
Token count is too large: googleapis__google-cloud-python-6082


Generating train split: 4070 examples [05:04, 18.14 examples/s]

Token count is too large: pandas-dev__pandas-32490
Token count is too large: jupyterlab__jupyterlab-7976
Token count is too large: google__jax-633
Token count is too large: pandas-dev__pandas-22031
Token count is too large: Lightning-AI__lightning-2689
Token count is too large: google__jax-3439
Token count is too large: numpy__numpy-4300
Token count is too large: pandas-dev__pandas-18488
Token count is too large: pantsbuild__pants-11312
Token count is too large: pandas-dev__pandas-18110
Token count is too large: pypa__pip-3265
Token count is too large: open-mmlab__mmdetection-2280
Token count is too large: pandas-dev__pandas-36638
Token count is too large: google__jax-981
Token count is too large: pandas-dev__pandas-37166
Token count is too large: huggingface__transformers-9076
Token count is too large: pandas-dev__pandas-38759
Token count is too large: mesonbuild__meson-8500
Token count is too large: open-mmlab__mmdetection-6781
Token count is too large: pandas-dev__pandas-21628
Token

Generating train split: 4077 examples [05:04, 16.41 examples/s]

Token count is too large: apache__airflow-25312
Token count is too large: pantsbuild__pants-7116
Token count is too large: mesonbuild__meson-1186
Token count is too large: twisted__twisted-11909
Token count is too large: numpy__numpy-20796
Token count is too large: Lightning-AI__lightning-2581
Token count is too large: conda__conda-5526
Token count is too large: pandas-dev__pandas-13392
Token count is too large: pandas-dev__pandas-37268
Token count is too large: pandas-dev__pandas-38136
Token count is too large: ray-project__ray-4868


Generating train split: 4084 examples [05:05, 20.20 examples/s]

Token count is too large: pandas-dev__pandas-24254
Token count is too large: conda__conda-7606
Token count is too large: docker__compose-7052
Token count is too large: dagster-io__dagster-6920
Token count is too large: googleapis__google-cloud-python-6648
Token count is too large: Qiskit__qiskit-2539
Token count is too large: pandas-dev__pandas-8832
Token count is too large: huggingface__transformers-19798
Token count is too large: pandas-dev__pandas-23802
Token count is too large: pandas-dev__pandas-3743


Generating train split: 4087 examples [05:05, 16.60 examples/s]

Token count is too large: huggingface__transformers-11785
Token count is too large: pandas-dev__pandas-32757
Token count is too large: pandas-dev__pandas-14005
Token count is too large: ipython__ipython-5047
Token count is too large: Lightning-AI__lightning-1431
Token count is too large: numpy__numpy-7053
Token count is too large: pandas-dev__pandas-5134
Token count is too large: pypa__pip-6389
Token count is too large: conda__conda-12904
Token count is too large: apache__airflow-24034
Token count is too large: pandas-dev__pandas-11863
Token count is too large: celery__celery-8312


Generating train split: 4096 examples [05:05, 22.03 examples/s]

Token count is too large: huggingface__transformers-14190
Token count is too large: googleapis__google-cloud-python-11349
Token count is too large: googleapis__google-cloud-python-299
Token count is too large: googleapis__google-cloud-python-5655
Token count is too large: pandas-dev__pandas-9289
Token count is too large: googleapis__google-cloud-python-941
Token count is too large: dagster-io__dagster-1217
Token count is too large: Qiskit__qiskit-10377
Token count is too large: Qiskit__qiskit-4243
Token count is too large: ipython__ipython-10434
Token count is too large: conan-io__conan-13041
Token count is too large: pyca__cryptography-849
Token count is too large: gitpython-developers__GitPython-1102
Token count is too large: wagtail__wagtail-4514
Token count is too large: googleapis__google-cloud-python-11373
Token count is too large: pandas-dev__pandas-16790
Token count is too large: ipython__ipython-1008
Token count is too large: conan-io__conan-2518


Generating train split: 4099 examples [05:06, 18.31 examples/s]

Token count is too large: huggingface__transformers-21347
Token count is too large: ipython__ipython-11698
Token count is too large: twisted__twisted-11718
Token count is too large: open-mmlab__mmdetection-7386
Token count is too large: conan-io__conan-187
Token count is too large: Qiskit__qiskit-1009
Token count is too large: Qiskit__qiskit-7613
Token count is too large: Qiskit__qiskit-4932
Token count is too large: huggingface__transformers-3833
Token count is too large: conan-io__conan-6739
Token count is too large: pypa__pip-9123
Token count is too large: Qiskit__qiskit-2058
Token count is too large: ytdl-org__youtube-dl-889
Token count is too large: Lightning-AI__lightning-2428
Token count is too large: Qiskit__qiskit-816


Generating train split: 4106 examples [05:06, 22.61 examples/s]

Token count is too large: ray-project__ray-3630
Token count is too large: Qiskit__qiskit-9961
Token count is too large: Lightning-AI__lightning-1145
Token count is too large: pandas-dev__pandas-39547
Token count is too large: pandas-dev__pandas-16108
Token count is too large: pandas-dev__pandas-17691
Token count is too large: pandas-dev__pandas-16683
Token count is too large: huggingface__transformers-9423


Generating train split: 4109 examples [05:06, 17.20 examples/s]

Token count is too large: huggingface__transformers-13132
Token count is too large: ipython__ipython-10795
Token count is too large: wagtail__wagtail-9814
Token count is too large: pandas-dev__pandas-21573
Token count is too large: pantsbuild__pants-16226
Token count is too large: celery__celery-6869
Token count is too large: pandas-dev__pandas-6983
Token count is too large: PrefectHQ__prefect-84
Token count is too large: ray-project__ray-6485
Token count is too large: pandas-dev__pandas-6089


Generating train split: 4111 examples [05:06, 16.00 examples/s]

Token count is too large: numpy__numpy-9779
Token count is too large: huggingface__transformers-12441
Token count is too large: mesonbuild__meson-4051
Token count is too large: ipython__ipython-3180
Token count is too large: huggingface__transformers-16492
Token count is too large: dagster-io__dagster-13240
Token count is too large: wagtail__wagtail-9147
Token count is too large: pandas-dev__pandas-24187
Token count is too large: conda__conda-8160
Token count is too large: mesonbuild__meson-6457
Token count is too large: pandas-dev__pandas-24217
Token count is too large: pyca__cryptography-6599
Token count is too large: wagtail__wagtail-7082
Token count is too large: numpy__numpy-10635
Token count is too large: ray-project__ray-1467


Generating train split: 4118 examples [05:07, 15.70 examples/s]

Token count is too large: pandas-dev__pandas-32544
Token count is too large: pyca__cryptography-4811
Token count is too large: wagtail__wagtail-5510
Token count is too large: mesonbuild__meson-1356
Token count is too large: Qiskit__qiskit-6713
Token count is too large: huggingface__transformers-13919


Generating train split: 4121 examples [05:07, 14.77 examples/s]

Token count is too large: numpy__numpy-9106
Token count is too large: conan-io__conan-3186
Token count is too large: pantsbuild__pants-19076
Token count is too large: pandas-dev__pandas-6475
Token count is too large: pandas-dev__pandas-25745
Token count is too large: Lightning-AI__lightning-955
Token count is too large: numpy__numpy-8750
Token count is too large: pandas-dev__pandas-18670
Token count is too large: jupyterlab__jupyterlab-9390
Token count is too large: pandas-dev__pandas-20240
Token count is too large: pandas-dev__pandas-21263
Token count is too large: googleapis__google-cloud-python-3514
Token count is too large: pandas-dev__pandas-15161
Token count is too large: ipython__ipython-2036
Token count is too large: celery__celery-3867
Token count is too large: pandas-dev__pandas-27243
Token count is too large: pandas-dev__pandas-21482
Token count is too large: pandas-dev__pandas-4073
Token count is too large: pantsbuild__pants-15032
Token count is too large: numpy__numpy-1349

Generating train split: 4125 examples [05:07, 13.31 examples/s]

Token count is too large: numpy__numpy-10086
Token count is too large: conan-io__conan-2448
Token count is too large: ipython__ipython-1906
Token count is too large: pandas-dev__pandas-9264
Token count is too large: conda__conda-8819
Token count is too large: conda__conda-5414
Token count is too large: googleapis__google-cloud-python-9294
Token count is too large: mesonbuild__meson-2837
Token count is too large: huggingface__transformers-11387


Generating train split: 4128 examples [05:08, 13.23 examples/s]

Token count is too large: pandas-dev__pandas-3559
Token count is too large: mesonbuild__meson-9983
Token count is too large: PrefectHQ__prefect-2391
Token count is too large: pandas-dev__pandas-32546
Token count is too large: pyca__cryptography-2180
Token count is too large: apache__airflow-13822
Token count is too large: Qiskit__qiskit-1406
Token count is too large: Qiskit__qiskit-1962
Token count is too large: googleapis__google-cloud-python-9949
Token count is too large: googleapis__google-cloud-python-6442
Token count is too large: gitpython-developers__GitPython-780
Token count is too large: googleapis__google-cloud-python-2978
Token count is too large: apache__airflow-17539


Generating train split: 4131 examples [05:08, 13.24 examples/s]

Token count is too large: ray-project__ray-1976
Token count is too large: numpy__numpy-22725
Token count is too large: pandas-dev__pandas-33646
Token count is too large: ray-project__ray-10706
Token count is too large: pandas-dev__pandas-22785
Token count is too large: pantsbuild__pants-12504
Token count is too large: mesonbuild__meson-795
Token count is too large: Lightning-AI__lightning-3004
Token count is too large: pandas-dev__pandas-17841


Generating train split: 4133 examples [05:08, 10.04 examples/s]

Token count is too large: googleapis__google-cloud-python-11306
Token count is too large: pandas-dev__pandas-16433
Token count is too large: wagtail__wagtail-7253
Token count is too large: docker__compose-4860
Token count is too large: pandas-dev__pandas-23715
Token count is too large: pantsbuild__pants-15588
Token count is too large: pandas-dev__pandas-16486
Token count is too large: pandas-dev__pandas-29700
Token count is too large: pyca__cryptography-1252


Generating train split: 4136 examples [05:09,  9.18 examples/s]

Token count is too large: pandas-dev__pandas-4220
Token count is too large: celery__celery-6394
Token count is too large: Qiskit__qiskit-3468
Token count is too large: apache__airflow-23030
Token count is too large: pandas-dev__pandas-18015
Token count is too large: ytdl-org__youtube-dl-9195
Token count is too large: huggingface__transformers-24980


Generating train split: 4138 examples [05:09,  9.26 examples/s]

Token count is too large: pandas-dev__pandas-26634
Token count is too large: mesonbuild__meson-577
Token count is too large: pantsbuild__pants-13856
Token count is too large: huggingface__transformers-7291
Token count is too large: pandas-dev__pandas-3845
Token count is too large: wagtail__wagtail-8310
Token count is too large: mesonbuild__meson-2376
Token count is too large: apache__airflow-22872
Token count is too large: pypa__pip-6972
Token count is too large: ipython__ipython-448
Token count is too large: googleapis__google-cloud-python-7752


Generating train split: 4140 examples [05:09,  9.57 examples/s]

Token count is too large: pandas-dev__pandas-24581
Token count is too large: pandas-dev__pandas-28689
Token count is too large: ray-project__ray-11181
Token count is too large: huggingface__transformers-19464
Token count is too large: celery__celery-4736
Token count is too large: pandas-dev__pandas-11153
Token count is too large: pandas-dev__pandas-4783
Token count is too large: conan-io__conan-3383
Token count is too large: pandas-dev__pandas-22869
Token count is too large: mesonbuild__meson-8921
Token count is too large: Qiskit__qiskit-9792
Token count is too large: ipython__ipython-12443


Generating train split: 4148 examples [05:09, 13.65 examples/s]

Token count is too large: huggingface__transformers-12049
Token count is too large: conan-io__conan-7322
Token count is too large: pandas-dev__pandas-11607
Token count is too large: pandas-dev__pandas-10419
Token count is too large: ipython__ipython-10931
Token count is too large: ipython__ipython-7762
Token count is too large: pandas-dev__pandas-29260
Token count is too large: conan-io__conan-6184
Token count is too large: pandas-dev__pandas-4299
Token count is too large: celery__celery-7680
Token count is too large: pandas-dev__pandas-18309


Generating train split: 4150 examples [05:10, 14.33 examples/s]

Token count is too large: pandas-dev__pandas-29317
Token count is too large: Lightning-AI__lightning-1996
Token count is too large: Qiskit__qiskit-5442
Token count is too large: pantsbuild__pants-17205
Token count is too large: numpy__numpy-6761
Token count is too large: pandas-dev__pandas-34059
Token count is too large: googleapis__google-cloud-python-914
Token count is too large: pypa__pip-9331
Token count is too large: ipython__ipython-7191
Token count is too large: pandas-dev__pandas-37750
Token count is too large: Qiskit__qiskit-6072
Token count is too large: pantsbuild__pants-17013


Generating train split: 4158 examples [05:10, 16.12 examples/s]

Token count is too large: huggingface__transformers-22062
Token count is too large: google__jax-737
Token count is too large: pandas-dev__pandas-10158
Token count is too large: mesonbuild__meson-5294
Token count is too large: mesonbuild__meson-297
Token count is too large: Qiskit__qiskit-8440
Token count is too large: ytdl-org__youtube-dl-717
Token count is too large: apache__airflow-19443
Token count is too large: apache__airflow-17281
Token count is too large: google__jax-1099
Token count is too large: numpy__numpy-7618
Token count is too large: numpy__numpy-6355
Token count is too large: conda__conda-11889
Token count is too large: ytdl-org__youtube-dl-2173
Token count is too large: apache__airflow-15130
Token count is too large: numpy__numpy-14145


Generating train split: 4170 examples [05:10, 21.75 examples/s]

Token count is too large: pandas-dev__pandas-21934
Token count is too large: conan-io__conan-6465
Token count is too large: pantsbuild__pants-15087
Token count is too large: docker__compose-6017
Token count is too large: pandas-dev__pandas-30339
Token count is too large: scipy__scipy-4583
Token count is too large: twisted__twisted-11578
Token count is too large: mesonbuild__meson-866
Token count is too large: numpy__numpy-10367
Token count is too large: numpy__numpy-20325
Token count is too large: pandas-dev__pandas-7921
Token count is too large: pandas-dev__pandas-34294
Token count is too large: mesonbuild__meson-10008
Token count is too large: ytdl-org__youtube-dl-14279
Token count is too large: pandas-dev__pandas-37145
Token count is too large: pandas-dev__pandas-35964


Generating train split: 4173 examples [05:11, 18.25 examples/s]

Token count is too large: apache__airflow-28397
Token count is too large: huggingface__transformers-8860
Token count is too large: wagtail__wagtail-10320
Token count is too large: Qiskit__qiskit-4410
Token count is too large: celery__celery-5820
Token count is too large: huggingface__transformers-24301
Token count is too large: Lightning-AI__lightning-1396
Token count is too large: pandas-dev__pandas-7402
Token count is too large: pandas-dev__pandas-6672
Token count is too large: ray-project__ray-5060
Token count is too large: pandas-dev__pandas-31159
Token count is too large: celery__celery-4369
Token count is too large: conan-io__conan-4958
Token count is too large: pandas-dev__pandas-5004
Token count is too large: jupyterlab__jupyterlab-9326


Generating train split: 4176 examples [05:11, 11.95 examples/s]

Token count is too large: huggingface__transformers-21513
Token count is too large: pandas-dev__pandas-4904
Token count is too large: numpy__numpy-10046
Token count is too large: pantsbuild__pants-5420
Token count is too large: pandas-dev__pandas-35029
Token count is too large: pandas-dev__pandas-21775
Token count is too large: ray-project__ray-6886
Token count is too large: ray-project__ray-5599
Token count is too large: celery__celery-4432
Token count is too large: mesonbuild__meson-7816
Token count is too large: scipy__scipy-3060
Token count is too large: conda__conda-7223
Token count is too large: pandas-dev__pandas-27832
Token count is too large: pandas-dev__pandas-8904
Token count is too large: pantsbuild__pants-8881
Token count is too large: pandas-dev__pandas-37207
Token count is too large: numpy__numpy-21999
Token count is too large: numpy__numpy-12962
Token count is too large: PrefectHQ__prefect-2814
Token count is too large: huggingface__transformers-16700


Generating train split: 4180 examples [05:12, 11.32 examples/s]

Token count is too large: ray-project__ray-6450
Token count is too large: pandas-dev__pandas-10202
Token count is too large: pandas-dev__pandas-18653
Token count is too large: numpy__numpy-5324
Token count is too large: ipython__ipython-10546
Token count is too large: pandas-dev__pandas-35214
Token count is too large: numpy__numpy-7218


Generating train split: 4183 examples [05:12, 12.42 examples/s]

Token count is too large: PrefectHQ__prefect-555
Token count is too large: conda__conda-5815
Token count is too large: pandas-dev__pandas-6778
Token count is too large: conan-io__conan-2788
Token count is too large: numpy__numpy-13948
Token count is too large: Qiskit__qiskit-1398
Token count is too large: mesonbuild__meson-1624
Token count is too large: huggingface__transformers-21849


Generating train split: 4190 examples [05:12, 14.24 examples/s]

Token count is too large: conan-io__conan-6241
Token count is too large: pandas-dev__pandas-20840
Token count is too large: pandas-dev__pandas-7479
Token count is too large: pandas-dev__pandas-35212
Token count is too large: pyca__cryptography-2761
Token count is too large: numpy__numpy-7332
Token count is too large: conda__conda-10542
Token count is too large: Qiskit__qiskit-8889
Token count is too large: dagster-io__dagster-14956
Token count is too large: ipython__ipython-10959
Token count is too large: mesonbuild__meson-10703
Token count is too large: pandas-dev__pandas-4676
Token count is too large: conda__conda-125
Token count is too large: pandas-dev__pandas-11484
Token count is too large: Qiskit__qiskit-10537
Token count is too large: google__jax-792
Token count is too large: pantsbuild__pants-13910
Token count is too large: Qiskit__qiskit-5212
Token count is too large: celery__celery-2782
Token count is too large: google__jax-92
Token count is too large: pandas-dev__pandas-3544

Generating train split: 4192 examples [05:13, 11.28 examples/s]

Token count is too large: huggingface__transformers-10085
Token count is too large: conda__conda-7076
Token count is too large: mesonbuild__meson-10013
Token count is too large: pandas-dev__pandas-18117
Token count is too large: mesonbuild__meson-2551
Token count is too large: gitpython-developers__GitPython-700
Token count is too large: mesonbuild__meson-157
Token count is too large: pandas-dev__pandas-25219
Token count is too large: pantsbuild__pants-13817
Token count is too large: huggingface__transformers-9829
Token count is too large: huggingface__transformers-12889
Token count is too large: pypa__pip-8062
Token count is too large: pandas-dev__pandas-35946
Token count is too large: pantsbuild__pants-11204
Token count is too large: pandas-dev__pandas-31991


Generating train split: 4194 examples [05:13, 11.91 examples/s]

Token count is too large: pandas-dev__pandas-5089
Token count is too large: conda__conda-8154
Token count is too large: huggingface__transformers-12630
Token count is too large: pandas-dev__pandas-27615
Token count is too large: pantsbuild__pants-15086
Token count is too large: conan-io__conan-7376
Token count is too large: pandas-dev__pandas-35003
Token count is too large: pantsbuild__pants-18974
Token count is too large: pandas-dev__pandas-38148
Token count is too large: pandas-dev__pandas-39564
Token count is too large: pandas-dev__pandas-35848


Generating train split: 4198 examples [05:13, 12.69 examples/s]

Token count is too large: pandas-dev__pandas-25553
Token count is too large: celery__celery-4456
Token count is too large: pandas-dev__pandas-19510
Token count is too large: mesonbuild__meson-8166
Token count is too large: ipython__ipython-8124
Token count is too large: mesonbuild__meson-318
Token count is too large: conan-io__conan-5763
Token count is too large: pandas-dev__pandas-8682
Token count is too large: pypa__pip-599
Token count is too large: pandas-dev__pandas-23152
Token count is too large: conda__conda-7320


Generating train split: 4200 examples [05:13, 11.83 examples/s]

Token count is too large: pandas-dev__pandas-24984
Token count is too large: pandas-dev__pandas-20941
Token count is too large: numpy__numpy-23747
Token count is too large: pandas-dev__pandas-30937
Token count is too large: pandas-dev__pandas-34908
Token count is too large: google__jax-2903
Token count is too large: google__jax-1790
Token count is too large: apache__airflow-22886
Token count is too large: pandas-dev__pandas-23829
Token count is too large: pypa__pip-5190
Token count is too large: pandas-dev__pandas-26440
Token count is too large: ray-project__ray-3020
Token count is too large: pandas-dev__pandas-36808
Token count is too large: pandas-dev__pandas-39800
Token count is too large: docker__compose-2698


Generating train split: 4203 examples [05:14,  7.85 examples/s]

Token count is too large: huggingface__transformers-23127
Token count is too large: pyca__cryptography-1072
Token count is too large: pantsbuild__pants-13715
Token count is too large: pandas-dev__pandas-28622
Token count is too large: open-mmlab__mmdetection-9694
Token count is too large: Qiskit__qiskit-7213
Token count is too large: googleapis__google-cloud-python-2334
Token count is too large: huggingface__transformers-7289
Token count is too large: pandas-dev__pandas-38262


Generating train split: 4208 examples [05:14, 10.94 examples/s]

Token count is too large: pandas-dev__pandas-17236
Token count is too large: docker__compose-5291
Token count is too large: Qiskit__qiskit-1700
Token count is too large: google__jax-3016
Token count is too large: ytdl-org__youtube-dl-26100
Token count is too large: pantsbuild__pants-11483
Token count is too large: pandas-dev__pandas-33080
Token count is too large: pyca__cryptography-5318
Token count is too large: pandas-dev__pandas-10263
Token count is too large: pandas-dev__pandas-30675
Token count is too large: mesonbuild__meson-9430
Token count is too large: googleapis__google-cloud-python-4209
Token count is too large: dagster-io__dagster-12799


Generating train split: 4212 examples [05:14, 12.70 examples/s]

Token count is too large: ipython__ipython-1414
Token count is too large: huggingface__transformers-22537
Token count is too large: pandas-dev__pandas-35507
Token count is too large: docker__compose-4604
Token count is too large: huggingface__transformers-21150
Token count is too large: pandas-dev__pandas-3244
Token count is too large: conda__conda-5991
Token count is too large: pandas-dev__pandas-39216
Token count is too large: dagster-io__dagster-10150
Token count is too large: numpy__numpy-19388


Generating train split: 4214 examples [05:15,  9.32 examples/s]

Token count is too large: pandas-dev__pandas-36303
Token count is too large: pandas-dev__pandas-6459
Token count is too large: Lightning-AI__lightning-1576
Token count is too large: Qiskit__qiskit-2316
Token count is too large: pandas-dev__pandas-10808
Token count is too large: mesonbuild__meson-10679
Token count is too large: pandas-dev__pandas-34493


Generating train split: 4216 examples [05:15,  9.43 examples/s]

Token count is too large: pandas-dev__pandas-7077
Token count is too large: pandas-dev__pandas-30679
Token count is too large: numpy__numpy-12428
Token count is too large: conda__conda-667
Token count is too large: pandas-dev__pandas-16725
Token count is too large: ytdl-org__youtube-dl-14548
Token count is too large: mesonbuild__meson-11039
Token count is too large: dagster-io__dagster-6446
Token count is too large: Lightning-AI__lightning-492
Token count is too large: pypa__pip-3047


Generating train split: 4220 examples [05:15, 11.54 examples/s]

Token count is too large: huggingface__transformers-20002
Token count is too large: pandas-dev__pandas-7279
Token count is too large: pandas-dev__pandas-10064
Token count is too large: Lightning-AI__lightning-233
Token count is too large: pypa__pip-2137
Token count is too large: pandas-dev__pandas-39006
Token count is too large: wagtail__wagtail-8574
Token count is too large: pandas-dev__pandas-24973
Token count is too large: explosion__spaCy-3075
Token count is too large: pandas-dev__pandas-25058
Token count is too large: googleapis__google-cloud-python-9894
Token count is too large: pandas-dev__pandas-7430


Generating train split: 4223 examples [05:15, 10.96 examples/s]

Token count is too large: twisted__twisted-11732
Token count is too large: pandas-dev__pandas-6736
Token count is too large: huggingface__transformers-16990
Token count is too large: pandas-dev__pandas-9182
Token count is too large: ray-project__ray-7851
Token count is too large: pandas-dev__pandas-28248
Token count is too large: googleapis__google-cloud-python-8882
Token count is too large: docker__compose-3095
Token count is too large: pandas-dev__pandas-23112
Token count is too large: pyca__cryptography-2582
Token count is too large: open-mmlab__mmdetection-5370
Token count is too large: Qiskit__qiskit-3788
Token count is too large: numpy__numpy-10622
Token count is too large: pandas-dev__pandas-31515
Token count is too large: pandas-dev__pandas-10185


Generating train split: 4229 examples [05:16, 12.34 examples/s]

Token count is too large: pandas-dev__pandas-25234
Token count is too large: googleapis__google-cloud-python-11291
Token count is too large: pandas-dev__pandas-19594
Token count is too large: apache__airflow-28730
Token count is too large: pandas-dev__pandas-8179
Token count is too large: pandas-dev__pandas-10183
Token count is too large: PrefectHQ__prefect-221
Token count is too large: numpy__numpy-22254
Token count is too large: pandas-dev__pandas-16462


Generating train split: 4234 examples [05:16, 15.66 examples/s]

Token count is too large: gitpython-developers__GitPython-948
Token count is too large: pandas-dev__pandas-18076
Token count is too large: pandas-dev__pandas-7085
Token count is too large: docker__compose-2728
Token count is too large: google__jax-2503
Token count is too large: pandas-dev__pandas-34488
Token count is too large: pandas-dev__pandas-16834
Token count is too large: dagster-io__dagster-12356


Generating train split: 4236 examples [05:16, 13.81 examples/s]

Token count is too large: googleapis__google-cloud-python-11316
Token count is too large: huggingface__transformers-14000
Token count is too large: conan-io__conan-9752
Token count is too large: pandas-dev__pandas-24185
Token count is too large: pandas-dev__pandas-22229
Token count is too large: huggingface__transformers-12303
Token count is too large: conan-io__conan-7890
Token count is too large: ipython__ipython-3328
Token count is too large: ipython__ipython-5418
Token count is too large: conda__conda-4518
Token count is too large: pantsbuild__pants-6022
Token count is too large: googleapis__google-cloud-python-6770
Token count is too large: pandas-dev__pandas-27520
Token count is too large: apache__airflow-23180


Generating train split: 4239 examples [05:17, 11.41 examples/s]

Token count is too large: pandas-dev__pandas-36950
Token count is too large: huggingface__transformers-10033
Token count is too large: pandas-dev__pandas-30842
Token count is too large: conda__conda-5429
Token count is too large: pandas-dev__pandas-33140
Token count is too large: open-mmlab__mmdetection-2156
Token count is too large: numpy__numpy-8665
Token count is too large: ipython__ipython-10279
Token count is too large: numpy__numpy-10034


Generating train split: 4241 examples [05:17, 11.77 examples/s]

Token count is too large: huggingface__transformers-10304
Token count is too large: pandas-dev__pandas-14236
Token count is too large: conda__conda-12233
Token count is too large: googleapis__google-cloud-python-4343
Token count is too large: google__jax-1720
Token count is too large: googleapis__google-cloud-python-4784
Token count is too large: twisted__twisted-11878
Token count is too large: mesonbuild__meson-10365
Token count is too large: pandas-dev__pandas-6114
Token count is too large: pyca__cryptography-7162


Generating train split: 4248 examples [05:17, 16.07 examples/s]

Token count is too large: Lightning-AI__lightning-3321
Token count is too large: pandas-dev__pandas-24837
Token count is too large: pypa__pip-2227
Token count is too large: pantsbuild__pants-16478
Token count is too large: Qiskit__qiskit-6228
Token count is too large: mesonbuild__meson-10837
Token count is too large: pandas-dev__pandas-32107
Token count is too large: numpy__numpy-12923
Token count is too large: pandas-dev__pandas-37202
Token count is too large: docker__compose-255
Token count is too large: pandas-dev__pandas-31005
Token count is too large: ray-project__ray-3458
Token count is too large: numpy__numpy-18184
Token count is too large: wagtail__wagtail-8006
Token count is too large: numpy__numpy-5638
Token count is too large: conan-io__conan-2416
Token count is too large: mesonbuild__meson-3818
Token count is too large: pandas-dev__pandas-29387
Token count is too large: mesonbuild__meson-1924
Token count is too large: apache__airflow-24496
Token count is too large: pandas-d

Generating train split: 4254 examples [05:18, 15.49 examples/s]

Token count is too large: pandas-dev__pandas-25624
Token count is too large: apache__airflow-25305
Token count is too large: docker__compose-2708
Token count is too large: pandas-dev__pandas-18632
Token count is too large: pandas-dev__pandas-16166
Token count is too large: Lightning-AI__lightning-1495
Token count is too large: pandas-dev__pandas-10625
Token count is too large: huggingface__transformers-14441
Token count is too large: pandas-dev__pandas-5847
Token count is too large: PrefectHQ__prefect-2005
Token count is too large: conan-io__conan-6993
Token count is too large: Qiskit__qiskit-7606
Token count is too large: jupyterlab__jupyterlab-1541


Generating train split: 4260 examples [05:18, 17.76 examples/s]

Token count is too large: ytdl-org__youtube-dl-221
Token count is too large: pantsbuild__pants-18948
Token count is too large: numpy__numpy-3131
Token count is too large: pypa__pip-1984
Token count is too large: pandas-dev__pandas-24601
Token count is too large: pandas-dev__pandas-6339
Token count is too large: pypa__pip-7704
Token count is too large: pantsbuild__pants-11815
Token count is too large: ipython__ipython-13612
Token count is too large: pandas-dev__pandas-7161
Token count is too large: googleapis__google-cloud-python-9237
Token count is too large: Qiskit__qiskit-2249
Token count is too large: numpy__numpy-8122
Token count is too large: pandas-dev__pandas-33767
Token count is too large: PrefectHQ__prefect-496
Token count is too large: pandas-dev__pandas-24572
Token count is too large: ipython__ipython-6010
Token count is too large: pandas-dev__pandas-6548
Token count is too large: pandas-dev__pandas-17793


Generating train split: 4263 examples [05:18, 10.31 examples/s]

Token count is too large: ytdl-org__youtube-dl-342
Token count is too large: DataDog__integrations-core-2360
Token count is too large: celery__celery-6589
Token count is too large: pandas-dev__pandas-36385
Token count is too large: pandas-dev__pandas-25585
Token count is too large: pandas-dev__pandas-27300
Token count is too large: ipython__ipython-11358
Token count is too large: huggingface__transformers-10554
Token count is too large: pandas-dev__pandas-34461
Token count is too large: Qiskit__qiskit-2104
Token count is too large: conan-io__conan-6559
Token count is too large: docker__compose-6548
Token count is too large: twisted__twisted-11739
Token count is too large: ytdl-org__youtube-dl-22954
Token count is too large: ytdl-org__youtube-dl-9465
Token count is too large: pyca__cryptography-5307
Token count is too large: ipython__ipython-3854
Token count is too large: pandas-dev__pandas-15570


Generating train split: 4270 examples [05:19, 14.44 examples/s]

Token count is too large: conda__conda-12518
Token count is too large: pandas-dev__pandas-4924
Token count is too large: pypa__pip-3198
Token count is too large: googleapis__google-cloud-python-2362
Token count is too large: pandas-dev__pandas-7941
Token count is too large: pandas-dev__pandas-5311


Generating train split: 4272 examples [05:19, 14.12 examples/s]

Token count is too large: huggingface__transformers-22776
Token count is too large: pandas-dev__pandas-4696
Token count is too large: mesonbuild__meson-3687
Token count is too large: pandas-dev__pandas-37056
Token count is too large: Qiskit__qiskit-4662
Token count is too large: pandas-dev__pandas-4100
Token count is too large: pandas-dev__pandas-17670
Token count is too large: pypa__pip-1109


Generating train split: 4278 examples [05:19, 14.11 examples/s]

Token count is too large: Qiskit__qiskit-1025
Token count is too large: huggingface__transformers-13693
Token count is too large: pypa__pip-11269
Token count is too large: googleapis__google-cloud-python-542
Token count is too large: mesonbuild__meson-4652
Token count is too large: dagster-io__dagster-9407
Token count is too large: pypa__pip-9428
Token count is too large: pandas-dev__pandas-33279
Token count is too large: Qiskit__qiskit-9818
Token count is too large: pandas-dev__pandas-6022


Generating train split: 4281 examples [05:20, 12.42 examples/s]

Token count is too large: apache__airflow-23966
Token count is too large: ipython__ipython-8030
Token count is too large: Qiskit__qiskit-8656
Token count is too large: pandas-dev__pandas-4995
Token count is too large: Lightning-AI__lightning-1842
Token count is too large: huggingface__transformers-378
Token count is too large: pandas-dev__pandas-6593
Token count is too large: mesonbuild__meson-4847
Token count is too large: Qiskit__qiskit-1369


Generating train split: 4283 examples [05:20, 11.07 examples/s]

Token count is too large: huggingface__transformers-16131
Token count is too large: pandas-dev__pandas-28078
Token count is too large: pandas-dev__pandas-5373
Token count is too large: pandas-dev__pandas-39065
Token count is too large: ytdl-org__youtube-dl-24642
Token count is too large: huggingface__transformers-14790
There was an error processing
Token count is too large: PrefectHQ__prefect-529
Token count is too large: docker__compose-5706
Token count is too large: pantsbuild__pants-16531


Generating train split: 4285 examples [05:20, 11.12 examples/s]

Token count is too large: conan-io__conan-5283
Token count is too large: pandas-dev__pandas-38097
Token count is too large: pandas-dev__pandas-19425
Token count is too large: pandas-dev__pandas-37072
Token count is too large: conda__conda-8248
Token count is too large: huggingface__transformers-9488
Token count is too large: Qiskit__qiskit-3394
Token count is too large: conan-io__conan-4260
Token count is too large: pypa__pip-7354
Token count is too large: Qiskit__qiskit-7666
Token count is too large: conan-io__conan-6298
Token count is too large: conda__conda-5249
Token count is too large: conda__conda-6741
Token count is too large: jupyterlab__jupyterlab-7728


Generating train split: 4287 examples [05:20,  9.94 examples/s]

Token count is too large: pandas-dev__pandas-4498
Token count is too large: pandas-dev__pandas-21933
Token count is too large: Lightning-AI__lightning-689
Token count is too large: pantsbuild__pants-6940
Token count is too large: pandas-dev__pandas-24549
Token count is too large: googleapis__google-cloud-python-11355
Token count is too large: Qiskit__qiskit-8570
Token count is too large: pandas-dev__pandas-37873
Token count is too large: docker__compose-6609
Token count is too large: googleapis__google-cloud-python-3270
Token count is too large: pandas-dev__pandas-18524
Token count is too large: pandas-dev__pandas-36223


Generating train split: 4290 examples [05:20, 11.75 examples/s]

Token count is too large: conan-io__conan-4207
Token count is too large: conda__conda-9614
Token count is too large: googleapis__google-cloud-python-8806
Token count is too large: ytdl-org__youtube-dl-2948
Token count is too large: pandas-dev__pandas-11212
Token count is too large: ipython__ipython-8587
Token count is too large: google__jax-1240
Token count is too large: jupyterlab__jupyterlab-3567
Token count is too large: pandas-dev__pandas-9245
Token count is too large: pantsbuild__pants-7603
Token count is too large: pandas-dev__pandas-8320
Token count is too large: pantsbuild__pants-16254
Token count is too large: ray-project__ray-7250
Token count is too large: pandas-dev__pandas-23731


Generating train split: 4301 examples [05:21, 22.44 examples/s]

Token count is too large: apache__airflow-28899
Token count is too large: mesonbuild__meson-7117
Token count is too large: numpy__numpy-11691
Token count is too large: ray-project__ray-9521
Token count is too large: pandas-dev__pandas-5064
Token count is too large: pandas-dev__pandas-2349
Token count is too large: conan-io__conan-5725
Token count is too large: ipython__ipython-1306
Token count is too large: numpy__numpy-3642
Token count is too large: googleapis__google-cloud-python-7491
Token count is too large: numpy__numpy-18176
Token count is too large: conan-io__conan-11505


Generating train split: 4305 examples [05:21, 25.44 examples/s]

Token count is too large: mesonbuild__meson-8478
Token count is too large: pandas-dev__pandas-37148
Token count is too large: pandas-dev__pandas-16303
Token count is too large: pandas-dev__pandas-8523
Token count is too large: pandas-dev__pandas-32074
Token count is too large: pandas-dev__pandas-30961
Token count is too large: conda__conda-8198
Token count is too large: ipython__ipython-4187
Token count is too large: docker__compose-2894
Token count is too large: pandas-dev__pandas-17926
Token count is too large: ytdl-org__youtube-dl-3430
Token count is too large: numpy__numpy-6628
Token count is too large: dagster-io__dagster-2803
Token count is too large: pandas-dev__pandas-7219
Token count is too large: apache__airflow-16931
Token count is too large: conan-io__conan-3086
Token count is too large: mesonbuild__meson-7149
Token count is too large: mesonbuild__meson-9665
Token count is too large: huggingface__transformers-8880
Token count is too large: pandas-dev__pandas-23183
Token cou

Generating train split: 4308 examples [05:22, 10.72 examples/s]

Token count is too large: huggingface__transformers-8624
Token count is too large: pandas-dev__pandas-16949
Token count is too large: pandas-dev__pandas-33336
Token count is too large: huggingface__transformers-2051
Token count is too large: pantsbuild__pants-15605
Token count is too large: pandas-dev__pandas-9321
Token count is too large: conan-io__conan-4660
Token count is too large: Qiskit__qiskit-3079
Token count is too large: pyca__cryptography-4321


Generating train split: 4314 examples [05:22, 12.12 examples/s]

Token count is too large: huggingface__transformers-12759
Token count is too large: ytdl-org__youtube-dl-3644
Token count is too large: pandas-dev__pandas-3245
Token count is too large: conan-io__conan-3941
Token count is too large: pandas-dev__pandas-8925
Token count is too large: huggingface__transformers-6744
Token count is too large: celery__celery-5795
Token count is too large: pandas-dev__pandas-28197
Token count is too large: numpy__numpy-14209
Token count is too large: pandas-dev__pandas-35287
Token count is too large: apache__airflow-11395
Token count is too large: googleapis__google-cloud-python-1717
Token count is too large: celery__celery-4205
Token count is too large: celery__celery-6614
Token count is too large: numpy__numpy-8131
Token count is too large: Lightning-AI__lightning-1891
Token count is too large: Qiskit__qiskit-9623
Token count is too large: mesonbuild__meson-5830
Token count is too large: huggingface__transformers-7087
Token count is too large: gitpython-dev

Generating train split: 4322 examples [05:22, 16.74 examples/s]

Token count is too large: celery__celery-5382
Token count is too large: mesonbuild__meson-7562
Token count is too large: pypa__pip-4483
Token count is too large: Qiskit__qiskit-517
Token count is too large: apache__airflow-21852
Token count is too large: mesonbuild__meson-5397
Token count is too large: pandas-dev__pandas-30446
Token count is too large: conan-io__conan-9502


Generating train split: 4329 examples [05:23, 18.57 examples/s]

Token count is too large: celery__celery-6501
Token count is too large: dagster-io__dagster-14055
Token count is too large: pandas-dev__pandas-11622
Token count is too large: huggingface__transformers-12113
Token count is too large: googleapis__google-cloud-python-6199
Token count is too large: pandas-dev__pandas-34208
Token count is too large: mesonbuild__meson-9751
Token count is too large: conan-io__conan-3025
Token count is too large: apache__airflow-26223
Token count is too large: PrefectHQ__prefect-732
Token count is too large: pandas-dev__pandas-6873


Generating train split: 4332 examples [05:23, 16.11 examples/s]

Token count is too large: googleapis__google-cloud-python-5390
Token count is too large: pandas-dev__pandas-35633
There was an error processing
Token count is too large: Qiskit__qiskit-3120
Token count is too large: pandas-dev__pandas-27999
Token count is too large: pypa__pip-11634
Token count is too large: mesonbuild__meson-10728
Token count is too large: Qiskit__qiskit-3004
Token count is too large: huggingface__transformers-11819
Token count is too large: Qiskit__qiskit-4734
Token count is too large: huggingface__transformers-18134
Token count is too large: pandas-dev__pandas-7851
Token count is too large: pandas-dev__pandas-3615
Token count is too large: huggingface__transformers-18602
Token count is too large: pandas-dev__pandas-7572
Token count is too large: ipython__ipython-11350


Generating train split: 4336 examples [05:23, 16.03 examples/s]

Token count is too large: mesonbuild__meson-4435
Token count is too large: pandas-dev__pandas-18604
Token count is too large: pandas-dev__pandas-9022
Token count is too large: scipy__scipy-150
Token count is too large: numpy__numpy-18181
Token count is too large: mesonbuild__meson-703


Generating train split: 4339 examples [05:24, 12.80 examples/s]

Token count is too large: pandas-dev__pandas-5707
Token count is too large: open-mmlab__mmdetection-3836
Token count is too large: huggingface__transformers-25042
Token count is too large: googleapis__google-cloud-python-9634
Token count is too large: pandas-dev__pandas-23575
Token count is too large: Qiskit__qiskit-7823


Generating train split: 4342 examples [05:24, 13.17 examples/s]

Token count is too large: conda__conda-8067
Token count is too large: docker__compose-2126
Token count is too large: numpy__numpy-5666
Token count is too large: mesonbuild__meson-2258
Token count is too large: huggingface__transformers-16771
Token count is too large: googleapis__google-cloud-python-5712
Token count is too large: numpy__numpy-16919
Token count is too large: numpy__numpy-3236
Token count is too large: ipython__ipython-10244
Token count is too large: pandas-dev__pandas-20708
Token count is too large: google__jax-3485
Token count is too large: jupyterlab__jupyterlab-3356
Token count is too large: ytdl-org__youtube-dl-18425


Generating train split: 4345 examples [05:24, 13.62 examples/s]

Token count is too large: conan-io__conan-6052
Token count is too large: Qiskit__qiskit-6962
Token count is too large: google__jax-2288
Token count is too large: pandas-dev__pandas-25586
Token count is too large: pandas-dev__pandas-3840
Token count is too large: googleapis__google-cloud-python-11328
Token count is too large: pandas-dev__pandas-21116
Token count is too large: pandas-dev__pandas-36457
Token count is too large: pandas-dev__pandas-19109
Token count is too large: ray-project__ray-4504
Token count is too large: pandas-dev__pandas-11806
Token count is too large: pandas-dev__pandas-7245
Token count is too large: apache__airflow-11509


Generating train split: 4348 examples [05:24, 10.58 examples/s]

Token count is too large: pandas-dev__pandas-11690
Token count is too large: pandas-dev__pandas-34296
Token count is too large: celery__celery-6849
Token count is too large: dagster-io__dagster-2517
Token count is too large: conan-io__conan-4309
Token count is too large: mesonbuild__meson-2826
Token count is too large: mesonbuild__meson-3057
Token count is too large: open-mmlab__mmdetection-2671
Token count is too large: pandas-dev__pandas-6068
Token count is too large: google__jax-778


Generating train split: 4350 examples [05:25, 11.15 examples/s]

Token count is too large: pandas-dev__pandas-3879
Token count is too large: conda__conda-11398
Token count is too large: Qiskit__qiskit-691
Token count is too large: pandas-dev__pandas-32079
Token count is too large: pypa__pip-5280
Token count is too large: pandas-dev__pandas-25949
Token count is too large: pandas-dev__pandas-35668
Token count is too large: numpy__numpy-22721
Token count is too large: tiangolo__fastapi-17


Generating train split: 4352 examples [05:25,  9.26 examples/s]

Token count is too large: pandas-dev__pandas-4953
Token count is too large: ytdl-org__youtube-dl-854
Token count is too large: pandas-dev__pandas-26677
Token count is too large: pandas-dev__pandas-39442
Token count is too large: jupyterlab__jupyterlab-6194
Token count is too large: ray-project__ray-3593
Token count is too large: pypa__pip-6267
Token count is too large: pyca__cryptography-2126
Token count is too large: Qiskit__qiskit-3573
Token count is too large: numpy__numpy-13560
Token count is too large: conda__conda-6777


Generating train split: 4356 examples [05:25, 10.26 examples/s]

Token count is too large: huggingface__transformers-22498
Token count is too large: Qiskit__qiskit-1134
Token count is too large: google__jax-2026
Token count is too large: pantsbuild__pants-12505
Token count is too large: conda__conda-3143
Token count is too large: mesonbuild__meson-3010


Generating train split: 4362 examples [05:25, 15.54 examples/s]

Token count is too large: apache__airflow-15247
Token count is too large: pantsbuild__pants-5352
Token count is too large: PrefectHQ__prefect-95
Token count is too large: conda__conda-5531
Token count is too large: pandas-dev__pandas-8763
Token count is too large: pandas-dev__pandas-6443
Token count is too large: PrefectHQ__prefect-1963
Token count is too large: ytdl-org__youtube-dl-3789
Token count is too large: pantsbuild__pants-16219
Token count is too large: apache__airflow-16805
Token count is too large: pypa__pip-4638


Generating train split: 4366 examples [05:26, 16.91 examples/s]

Token count is too large: pandas-dev__pandas-33771
Token count is too large: huggingface__transformers-15456
Token count is too large: Qiskit__qiskit-9000
Token count is too large: pandas-dev__pandas-3939
Token count is too large: pandas-dev__pandas-17982
Token count is too large: conan-io__conan-2639
Token count is too large: pandas-dev__pandas-28838
Token count is too large: pandas-dev__pandas-20412
Token count is too large: google__jax-1309
Token count is too large: pandas-dev__pandas-24355
Token count is too large: Qiskit__qiskit-3772


Generating train split: 4369 examples [05:26, 17.39 examples/s]

Token count is too large: pandas-dev__pandas-8990
Token count is too large: jupyterlab__jupyterlab-8961
Token count is too large: googleapis__google-cloud-python-5405
Token count is too large: huggingface__transformers-8621
Token count is too large: docker__compose-6377
Token count is too large: ipython__ipython-3561
Token count is too large: conan-io__conan-6169
Token count is too large: Qiskit__qiskit-8977
Token count is too large: huggingface__transformers-21630
Token count is too large: ipython__ipython-4106
Token count is too large: mesonbuild__meson-2413
Token count is too large: conda__conda-6446
Token count is too large: docker__compose-6368


Generating train split: 4374 examples [05:26, 16.20 examples/s]

Token count is too large: celery__celery-2666
Token count is too large: pandas-dev__pandas-36365
Token count is too large: pyca__cryptography-5831
Token count is too large: pantsbuild__pants-18840
Token count is too large: pandas-dev__pandas-28257
Token count is too large: pandas-dev__pandas-36022
Token count is too large: Qiskit__qiskit-1317
Token count is too large: open-mmlab__mmdetection-5820
Token count is too large: celery__celery-7077
Token count is too large: pypa__pip-4496
Token count is too large: mesonbuild__meson-2310
Token count is too large: pandas-dev__pandas-18753


Generating train split: 4379 examples [05:26, 18.58 examples/s]

Token count is too large: Qiskit__qiskit-9635
Token count is too large: pantsbuild__pants-4578
Token count is too large: conan-io__conan-10178
Token count is too large: pantsbuild__pants-13540
Token count is too large: pandas-dev__pandas-32903
Token count is too large: pandas-dev__pandas-32836
Token count is too large: conan-io__conan-2682
Token count is too large: dagster-io__dagster-1199
Token count is too large: Lightning-AI__lightning-2853
Token count is too large: pandas-dev__pandas-7688
Token count is too large: wagtail__wagtail-5559
Token count is too large: pandas-dev__pandas-23471
Token count is too large: huggingface__transformers-11596
Token count is too large: apache__airflow-33706
Token count is too large: Lightning-AI__lightning-1191
Token count is too large: wagtail__wagtail-10623
Token count is too large: pandas-dev__pandas-36464


Generating train split: 4381 examples [05:27, 12.12 examples/s]

Token count is too large: pandas-dev__pandas-33218
Token count is too large: Lightning-AI__lightning-2755
Token count is too large: pandas-dev__pandas-6646
Token count is too large: docker__compose-6425
Token count is too large: huggingface__transformers-16198
Token count is too large: Qiskit__qiskit-7614
Token count is too large: apache__airflow-14810
Token count is too large: Lightning-AI__lightning-2911


Generating train split: 4386 examples [05:27, 14.84 examples/s]

Token count is too large: pandas-dev__pandas-39258
Token count is too large: pandas-dev__pandas-3145
Token count is too large: pandas-dev__pandas-18407
Token count is too large: PrefectHQ__prefect-679
Token count is too large: pypa__pip-4144
Token count is too large: ytdl-org__youtube-dl-32138


Generating train split: 4390 examples [05:27, 16.54 examples/s]

Token count is too large: ray-project__ray-6376
Token count is too large: pandas-dev__pandas-37727
Token count is too large: pandas-dev__pandas-5334
Token count is too large: Lightning-AI__lightning-2202
Token count is too large: Lightning-AI__lightning-2624
Token count is too large: pandas-dev__pandas-25338
Token count is too large: Qiskit__qiskit-6018
Token count is too large: ray-project__ray-2754
Token count is too large: Qiskit__qiskit-8847
Token count is too large: pandas-dev__pandas-29792
Token count is too large: apache__airflow-12916
Token count is too large: jupyterlab__jupyterlab-3140
Token count is too large: wagtail__wagtail-942
Token count is too large: conda__conda-4008


Generating train split: 4392 examples [05:27, 12.69 examples/s]

Token count is too large: huggingface__transformers-23010
There was an error processing
Token count is too large: Lightning-AI__lightning-2293
Token count is too large: mesonbuild__meson-7735
Token count is too large: googleapis__google-cloud-python-11416
Token count is too large: pandas-dev__pandas-16549
Token count is too large: googleapis__google-cloud-python-7061
Token count is too large: conda__conda-7064
Token count is too large: pandas-dev__pandas-35187
Token count is too large: google__jax-3156


Generating train split: 4396 examples [05:28, 13.65 examples/s]

Token count is too large: huggingface__transformers-17063
Token count is too large: pandas-dev__pandas-14536
Token count is too large: huggingface__transformers-17710
Token count is too large: explosion__spaCy-866
Token count is too large: mesonbuild__meson-8464
Token count is too large: pandas-dev__pandas-37736
Token count is too large: huggingface__transformers-22684
Token count is too large: pandas-dev__pandas-9914
Token count is too large: huggingface__transformers-23173
Token count is too large: pandas-dev__pandas-32723
Token count is too large: Qiskit__qiskit-1479
Token count is too large: pandas-dev__pandas-19324


Generating train split: 4398 examples [05:28, 11.90 examples/s]

Token count is too large: celery__celery-5423
Token count is too large: mesonbuild__meson-4689
Token count is too large: pandas-dev__pandas-7923
Token count is too large: ipython__ipython-10809
Token count is too large: pantsbuild__pants-17473
Token count is too large: googleapis__google-cloud-python-9143
Token count is too large: pandas-dev__pandas-25531
Token count is too large: ytdl-org__youtube-dl-16157
Token count is too large: huggingface__transformers-17423
Token count is too large: pandas-dev__pandas-5111
Token count is too large: pypa__pip-11872
Token count is too large: pandas-dev__pandas-7973
Token count is too large: huggingface__transformers-23235
Token count is too large: huggingface__transformers-9382
Token count is too large: Qiskit__qiskit-3441
Token count is too large: pandas-dev__pandas-10570
Token count is too large: googleapis__google-cloud-python-1664
Token count is too large: numpy__numpy-16022
Token count is too large: pandas-dev__pandas-7963
Token count is too 

Generating train split: 4402 examples [05:29,  8.69 examples/s]

Token count is too large: wagtail__wagtail-1022
Token count is too large: pandas-dev__pandas-34344
Token count is too large: ipython__ipython-13928
Token count is too large: pandas-dev__pandas-4806
Token count is too large: mesonbuild__meson-11746
Token count is too large: Qiskit__qiskit-740
Token count is too large: pandas-dev__pandas-7093


Generating train split: 4404 examples [05:29,  8.71 examples/s]

Token count is too large: huggingface__transformers-22966
Token count is too large: googleapis__google-cloud-python-2052
Token count is too large: pypa__pip-3003
Token count is too large: googleapis__google-cloud-python-8701
Token count is too large: apache__airflow-13640
Token count is too large: pandas-dev__pandas-6390
Token count is too large: huggingface__transformers-10184
Token count is too large: numpy__numpy-4758
Token count is too large: pandas-dev__pandas-9473
Token count is too large: pandas-dev__pandas-28743
Token count is too large: ipython__ipython-11812
Token count is too large: mesonbuild__meson-4604
Token count is too large: googleapis__google-cloud-python-897
Token count is too large: huggingface__transformers-7672
Token count is too large: google__jax-1335
Token count is too large: docker__compose-6544


Generating train split: 4409 examples [05:29, 11.57 examples/s]

Token count is too large: pandas-dev__pandas-6743
Token count is too large: pandas-dev__pandas-3451
Token count is too large: ray-project__ray-7347
Token count is too large: pandas-dev__pandas-10502
Token count is too large: pandas-dev__pandas-8834
Token count is too large: pandas-dev__pandas-5220
Token count is too large: ytdl-org__youtube-dl-11929
Token count is too large: googleapis__google-cloud-python-3713
Token count is too large: ipython__ipython-6769
Token count is too large: ipython__ipython-7278
Token count is too large: Qiskit__qiskit-10244
Token count is too large: huggingface__transformers-10034
Token count is too large: pandas-dev__pandas-10508
Token count is too large: pandas-dev__pandas-25789
Token count is too large: ray-project__ray-10594
Token count is too large: googleapis__google-cloud-python-9973
Token count is too large: huggingface__transformers-19657
Token count is too large: ipython__ipython-1988
Token count is too large: huggingface__transformers-15394
Token 

Generating train split: 4411 examples [05:30,  8.17 examples/s]

Token count is too large: pandas-dev__pandas-39341
Token count is too large: Qiskit__qiskit-6065
Token count is too large: Qiskit__qiskit-7789
Token count is too large: huggingface__transformers-9271


Generating train split: 4421 examples [05:30, 15.05 examples/s]

Token count is too large: ipython__ipython-3450
Token count is too large: pandas-dev__pandas-37102
Token count is too large: wagtail__wagtail-3423
Token count is too large: conan-io__conan-4083
Token count is too large: pandas-dev__pandas-11023
Token count is too large: mesonbuild__meson-4157


Generating train split: 4424 examples [05:30, 14.67 examples/s]

Token count is too large: googleapis__google-cloud-python-7284
Token count is too large: numpy__numpy-9965
Token count is too large: pandas-dev__pandas-9028
Token count is too large: docker__compose-2361
Token count is too large: pyca__cryptography-5533
Token count is too large: pandas-dev__pandas-21029
Token count is too large: pypa__pip-3289
Token count is too large: docker__compose-171


Generating train split: 4426 examples [05:30, 14.14 examples/s]

Token count is too large: pandas-dev__pandas-6820
Token count is too large: mesonbuild__meson-3573
Token count is too large: twisted__twisted-11693
Token count is too large: PrefectHQ__prefect-991
Token count is too large: huggingface__transformers-19655
Token count is too large: pandas-dev__pandas-32040
Token count is too large: pandas-dev__pandas-33769
Token count is too large: Qiskit__qiskit-937
Token count is too large: pypa__pip-5571


Generating train split: 4435 examples [05:30, 22.66 examples/s]

Token count is too large: apache__airflow-15890
Token count is too large: mesonbuild__meson-2800
Token count is too large: pandas-dev__pandas-21924
Token count is too large: google__jax-1903
Token count is too large: pandas-dev__pandas-9014
Token count is too large: conan-io__conan-5321
Token count is too large: Lightning-AI__lightning-2375
Token count is too large: Lightning-AI__lightning-607


Generating train split: 4439 examples [05:31, 22.40 examples/s]

Token count is too large: pandas-dev__pandas-24458
Token count is too large: celery__celery-6872
Token count is too large: conda__conda-11302
Token count is too large: google__jax-2532
Token count is too large: pandas-dev__pandas-11628
Token count is too large: pandas-dev__pandas-24490
Token count is too large: pandas-dev__pandas-19330
Token count is too large: pandas-dev__pandas-7450
Token count is too large: googleapis__google-cloud-python-6229


Generating train split: 4445 examples [05:31, 27.38 examples/s]

Token count is too large: pandas-dev__pandas-36606
Token count is too large: Lightning-AI__lightning-610
Token count is too large: google__jax-2610
Token count is too large: pandas-dev__pandas-8715
Token count is too large: ray-project__ray-7719
Token count is too large: googleapis__google-cloud-python-4349
Token count is too large: pandas-dev__pandas-33664
Token count is too large: huggingface__transformers-20146
Token count is too large: apache__airflow-26702


Generating train split: 4450 examples [05:31, 20.72 examples/s]

Token count is too large: pandas-dev__pandas-9597
Token count is too large: mesonbuild__meson-9215
Token count is too large: huggingface__transformers-13334
Token count is too large: conda__conda-10864
Token count is too large: Lightning-AI__lightning-303
Token count is too large: huggingface__transformers-18815
Token count is too large: pantsbuild__pants-13972
Token count is too large: googleapis__google-cloud-python-5825
Token count is too large: pandas-dev__pandas-5906
Token count is too large: pandas-dev__pandas-35098
Token count is too large: PrefectHQ__prefect-416
Token count is too large: pandas-dev__pandas-9802
Token count is too large: Qiskit__qiskit-7952
Token count is too large: pandas-dev__pandas-36431
Token count is too large: pypa__pip-6268
Token count is too large: conda__conda-682
Token count is too large: conan-io__conan-3552


Generating train split: 4456 examples [05:32, 18.68 examples/s]

Token count is too large: pandas-dev__pandas-27201
Token count is too large: huggingface__transformers-14857
Token count is too large: pandas-dev__pandas-23345
Token count is too large: apache__airflow-28753
Token count is too large: Qiskit__qiskit-230
Token count is too large: pyca__cryptography-7520
Token count is too large: googleapis__google-cloud-python-904
Token count is too large: Qiskit__qiskit-9018
Token count is too large: Qiskit__qiskit-105
Token count is too large: ray-project__ray-3704
Token count is too large: pandas-dev__pandas-20347
Token count is too large: ipython__ipython-4895
Token count is too large: celery__celery-4251
Token count is too large: pandas-dev__pandas-10507


Generating train split: 4459 examples [05:32, 18.93 examples/s]

Token count is too large: twisted__twisted-778
Token count is too large: numpy__numpy-10670
Token count is too large: pandas-dev__pandas-19013


Generating train split: 4463 examples [05:32, 16.17 examples/s]

Token count is too large: pandas-dev__pandas-8184
Token count is too large: Qiskit__qiskit-4392
Token count is too large: pandas-dev__pandas-39000
Token count is too large: pandas-dev__pandas-24441
Token count is too large: conda__conda-5098
Token count is too large: Qiskit__qiskit-9777
Token count is too large: apache__airflow-25086
Token count is too large: Lightning-AI__lightning-203
Token count is too large: Lightning-AI__lightning-2062
Token count is too large: ytdl-org__youtube-dl-12268
Token count is too large: pantsbuild__pants-18814


Generating train split: 4473 examples [05:32, 25.84 examples/s]

Token count is too large: huggingface__transformers-15327
Token count is too large: pandas-dev__pandas-16616
Token count is too large: googleapis__google-cloud-python-9178
Token count is too large: pypa__pip-5180
Token count is too large: conda__conda-6766
Token count is too large: pandas-dev__pandas-12065
Token count is too large: conda__conda-5010
Token count is too large: Qiskit__qiskit-10291
Token count is too large: pandas-dev__pandas-31563
Token count is too large: google__jax-2966
Token count is too large: pandas-dev__pandas-30350
Token count is too large: pandas-dev__pandas-7202
Token count is too large: docker__compose-2130
Token count is too large: apache__airflow-8512
Token count is too large: conan-io__conan-14378
Token count is too large: pandas-dev__pandas-21363
Token count is too large: pandas-dev__pandas-15548
Token count is too large: numpy__numpy-22009
Token count is too large: huggingface__transformers-19206


Generating train split: 4477 examples [05:33, 16.66 examples/s]

Token count is too large: docker__compose-5939
Token count is too large: celery__celery-7555
Token count is too large: Qiskit__qiskit-1184
Token count is too large: google__jax-2789
Token count is too large: pypa__pip-3153
Token count is too large: conan-io__conan-5206
Token count is too large: pandas-dev__pandas-6137
Token count is too large: Qiskit__qiskit-8900
Token count is too large: googleapis__google-cloud-python-4757
Token count is too large: pypa__pip-8562
Token count is too large: apache__airflow-23177
Token count is too large: Qiskit__qiskit-5063
Token count is too large: conan-io__conan-3091
Token count is too large: pandas-dev__pandas-21683


Generating train split: 4480 examples [05:33, 13.00 examples/s]

Token count is too large: pandas-dev__pandas-16970
Token count is too large: docker__compose-7031
Token count is too large: pandas-dev__pandas-30311
Token count is too large: huggingface__transformers-15068
Token count is too large: pantsbuild__pants-14957
Token count is too large: conda__conda-8219


Generating train split: 4488 examples [05:34, 17.70 examples/s]

Token count is too large: pandas-dev__pandas-10967
Token count is too large: pandas-dev__pandas-34183
Token count is too large: ray-project__ray-1668
Token count is too large: Qiskit__qiskit-5759
Token count is too large: pandas-dev__pandas-9226
Token count is too large: pandas-dev__pandas-16434
Token count is too large: mesonbuild__meson-9473
Token count is too large: conda__conda-6550
Token count is too large: pypa__pip-3898


Generating train split: 4491 examples [05:34, 17.10 examples/s]

Token count is too large: huggingface__transformers-20166
Token count is too large: ytdl-org__youtube-dl-31675
Token count is too large: ipython__ipython-11419
Token count is too large: Lightning-AI__lightning-706
Token count is too large: docker__compose-5415
Token count is too large: pandas-dev__pandas-19626
Token count is too large: PrefectHQ__prefect-311
Token count is too large: pandas-dev__pandas-24273
Token count is too large: conan-io__conan-2510


Generating train split: 4494 examples [05:34, 18.19 examples/s]

Token count is too large: huggingface__transformers-18369
Token count is too large: ipython__ipython-13436
Token count is too large: pandas-dev__pandas-8460
Token count is too large: apache__airflow-12072
Token count is too large: Qiskit__qiskit-8151
Token count is too large: pandas-dev__pandas-18929
Token count is too large: ray-project__ray-4676


Generating train split: 4500 examples [05:34, 17.30 examples/s]

Token count is too large: mesonbuild__meson-6552
Token count is too large: googleapis__google-cloud-python-9436
Token count is too large: pypa__pip-3225
Token count is too large: googleapis__google-cloud-python-11321
Token count is too large: celery__celery-6462
Token count is too large: Lightning-AI__lightning-1718


Generating train split: 4502 examples [05:34, 16.81 examples/s]

Token count is too large: mesonbuild__meson-4349
Token count is too large: celery__celery-6733
Token count is too large: pandas-dev__pandas-26106
Token count is too large: numpy__numpy-5733
Token count is too large: pandas-dev__pandas-10810
Token count is too large: googleapis__google-cloud-python-2589
Token count is too large: numpy__numpy-16349
Token count is too large: pypa__pip-6928
Token count is too large: ytdl-org__youtube-dl-4248
Token count is too large: Lightning-AI__lightning-2930
Token count is too large: pandas-dev__pandas-37613


Generating train split: 4504 examples [05:34, 16.54 examples/s]

Token count is too large: pypa__pip-2029
Token count is too large: ray-project__ray-9516
Token count is too large: conan-io__conan-1039
Token count is too large: wagtail__wagtail-116
Token count is too large: conda__conda-7603
Token count is too large: Lightning-AI__lightning-2640
Token count is too large: Qiskit__qiskit-7175
Token count is too large: numpy__numpy-19566


Generating train split: 4509 examples [05:35, 18.31 examples/s]

Token count is too large: huggingface__transformers-23862
Token count is too large: googleapis__google-cloud-python-3195
Token count is too large: PrefectHQ__prefect-2155
Token count is too large: pandas-dev__pandas-22482
Token count is too large: apache__airflow-26239
Token count is too large: pandas-dev__pandas-5003
Token count is too large: Qiskit__qiskit-10521
Token count is too large: PrefectHQ__prefect-186
Token count is too large: pandas-dev__pandas-17279


Generating train split: 4511 examples [05:35, 14.45 examples/s]

Token count is too large: pandas-dev__pandas-18852
Token count is too large: pandas-dev__pandas-32842
Token count is too large: conan-io__conan-3101
Token count is too large: googleapis__google-cloud-python-1932
Token count is too large: mesonbuild__meson-10656
Token count is too large: numpy__numpy-10111


Generating train split: 4513 examples [05:35, 10.80 examples/s]

Token count is too large: mesonbuild__meson-7181
Token count is too large: pandas-dev__pandas-25394
Token count is too large: conda__conda-795
Token count is too large: mesonbuild__meson-3783
Token count is too large: Qiskit__qiskit-1344
Token count is too large: pandas-dev__pandas-24993
Token count is too large: pandas-dev__pandas-5227
Token count is too large: huggingface__transformers-12116
Token count is too large: pandas-dev__pandas-28099
Token count is too large: pandas-dev__pandas-19723
Token count is too large: pandas-dev__pandas-24407
Token count is too large: Qiskit__qiskit-303
Token count is too large: conan-io__conan-5215
Token count is too large: mesonbuild__meson-3885
Token count is too large: numpy__numpy-6541
Token count is too large: docker__compose-7653
Token count is too large: huggingface__transformers-22158


Generating train split: 4518 examples [05:36, 10.98 examples/s]

Token count is too large: Qiskit__qiskit-639
Token count is too large: docker__compose-4716
Token count is too large: mesonbuild__meson-1594
Token count is too large: wagtail__wagtail-3973
Token count is too large: Lightning-AI__lightning-3045
Token count is too large: huggingface__transformers-18010
Token count is too large: apache__airflow-29445
Token count is too large: pandas-dev__pandas-5680
Token count is too large: googleapis__google-cloud-python-11304
Token count is too large: googleapis__google-cloud-python-2798
Token count is too large: pypa__pip-10625


Generating train split: 4520 examples [05:36, 11.79 examples/s]

Token count is too large: pandas-dev__pandas-36822
Token count is too large: pandas-dev__pandas-22490
Token count is too large: pypa__pip-6603
Token count is too large: googleapis__google-cloud-python-6202
Token count is too large: pypa__pip-5726
Token count is too large: pantsbuild__pants-18446
Token count is too large: pandas-dev__pandas-32737


Generating train split: 4523 examples [05:36, 12.84 examples/s]

Token count is too large: huggingface__transformers-23856
Token count is too large: pandas-dev__pandas-24005
Token count is too large: numpy__numpy-9063
Token count is too large: pandas-dev__pandas-26295
Token count is too large: google__jax-460
Token count is too large: Qiskit__qiskit-9955
Token count is too large: pyca__cryptography-5976
Token count is too large: Qiskit__qiskit-2569
Token count is too large: ipython__ipython-6006
Token count is too large: google__jax-2536
Token count is too large: jupyterlab__jupyterlab-5462
Token count is too large: conan-io__conan-5899


Generating train split: 4528 examples [05:36, 18.72 examples/s]

Token count is too large: pandas-dev__pandas-18440
Token count is too large: mesonbuild__meson-1943
Token count is too large: pandas-dev__pandas-24186
Token count is too large: Qiskit__qiskit-5562
Token count is too large: conda__conda-6855
Token count is too large: docker__compose-1787
Token count is too large: mesonbuild__meson-1651
Token count is too large: conan-io__conan-2705
Token count is too large: docker__compose-2393
Token count is too large: mesonbuild__meson-2918
Token count is too large: pandas-dev__pandas-18710
Token count is too large: pantsbuild__pants-14550


Generating train split: 4535 examples [05:37, 20.18 examples/s]

Token count is too large: open-mmlab__mmdetection-3695
Token count is too large: numpy__numpy-8816
Token count is too large: ipython__ipython-1089
Token count is too large: pandas-dev__pandas-7741
Token count is too large: googleapis__google-cloud-python-9642
Token count is too large: ipython__ipython-1178
Token count is too large: huggingface__transformers-17326
Token count is too large: ytdl-org__youtube-dl-9324
Token count is too large: huggingface__transformers-14276
Token count is too large: pantsbuild__pants-18311


Generating train split: 4541 examples [05:37, 24.62 examples/s]

Token count is too large: pantsbuild__pants-9852
Token count is too large: conan-io__conan-10917
Token count is too large: Qiskit__qiskit-7551
Token count is too large: pandas-dev__pandas-24642
Token count is too large: mesonbuild__meson-11760
Token count is too large: Qiskit__qiskit-6483
Token count is too large: Lightning-AI__lightning-530
Token count is too large: apache__airflow-25793
Token count is too large: ray-project__ray-1152
Token count is too large: mesonbuild__meson-2313
Token count is too large: pyca__cryptography-4619
Token count is too large: docker__compose-2351
Token count is too large: pypa__pip-9669
Token count is too large: jupyterlab__jupyterlab-3149
Token count is too large: ipython__ipython-5818


Generating train split: 4547 examples [05:37, 30.15 examples/s]

Token count is too large: apache__airflow-13365
Token count is too large: googleapis__google-cloud-python-6365
Token count is too large: numpy__numpy-17492
Token count is too large: huggingface__transformers-24941
Token count is too large: docker__compose-2384
Token count is too large: google__jax-1245
Token count is too large: pandas-dev__pandas-19000
Token count is too large: twisted__twisted-11831
Token count is too large: pypa__pip-3416
Token count is too large: google__jax-690
Token count is too large: pandas-dev__pandas-7371
Token count is too large: numpy__numpy-10547
Token count is too large: pandas-dev__pandas-7909
Token count is too large: Qiskit__qiskit-5248
Token count is too large: mesonbuild__meson-4551
Token count is too large: huggingface__transformers-25514
Token count is too large: huggingface__transformers-1434
Token count is too large: pypa__pip-2538
Token count is too large: Qiskit__qiskit-3361
Token count is too large: huggingface__transformers-12371
Token count i

Generating train split: 4558 examples [05:38, 19.92 examples/s]

Token count is too large: conda__conda-8208
Token count is too large: jupyterlab__jupyterlab-8212
Token count is too large: pandas-dev__pandas-24956
Token count is too large: pandas-dev__pandas-10922
Token count is too large: conan-io__conan-3292
Token count is too large: pandas-dev__pandas-20984
Token count is too large: conan-io__conan-11803
Token count is too large: pandas-dev__pandas-7336
Token count is too large: pantsbuild__pants-8374
Token count is too large: pantsbuild__pants-5374
Token count is too large: pandas-dev__pandas-11874
Token count is too large: mesonbuild__meson-4514
Token count is too large: pandas-dev__pandas-39161
Token count is too large: apache__airflow-23119
Token count is too large: pandas-dev__pandas-30478
Token count is too large: numpy__numpy-10653
Token count is too large: ray-project__ray-6253
Token count is too large: pandas-dev__pandas-28671
Token count is too large: huggingface__transformers-8554


Generating train split: 4561 examples [05:38, 16.80 examples/s]

Token count is too large: googleapis__google-cloud-python-11342
Token count is too large: ray-project__ray-3676
Token count is too large: huggingface__transformers-24988
Token count is too large: googleapis__google-cloud-python-8039
Token count is too large: pandas-dev__pandas-33446
Token count is too large: conda__conda-8444
Token count is too large: apache__airflow-24117
Token count is too large: Qiskit__qiskit-3093
Token count is too large: pandas-dev__pandas-34954
Token count is too large: PrefectHQ__prefect-2167
Token count is too large: pyca__cryptography-2202
Token count is too large: open-mmlab__mmdetection-5221
Token count is too large: pandas-dev__pandas-23205
Token count is too large: pypa__pip-6656
Token count is too large: conan-io__conan-4918
Token count is too large: mesonbuild__meson-4460
Token count is too large: googleapis__google-cloud-python-4185


Generating train split: 4568 examples [05:38, 15.76 examples/s]

Token count is too large: ipython__ipython-13498
Token count is too large: Qiskit__qiskit-5648
Token count is too large: numpy__numpy-16273
Token count is too large: numpy__numpy-18361
Token count is too large: googleapis__google-cloud-python-811
Token count is too large: twisted__twisted-11756
Token count is too large: pandas-dev__pandas-22750
Token count is too large: pandas-dev__pandas-20933
Token count is too large: ipython__ipython-11608
Token count is too large: twisted__twisted-11615
Token count is too large: pandas-dev__pandas-27584
Token count is too large: conan-io__conan-5261
Token count is too large: mesonbuild__meson-5638
Token count is too large: pandas-dev__pandas-6163
Token count is too large: PrefectHQ__prefect-300
Token count is too large: pandas-dev__pandas-26359
Token count is too large: pandas-dev__pandas-27893
Token count is too large: pandas-dev__pandas-8044
Token count is too large: wagtail__wagtail-497
Token count is too large: docker__compose-6297
Token count 

Generating train split: 4573 examples [05:39, 10.31 examples/s]

Token count is too large: numpy__numpy-19090
Token count is too large: Qiskit__qiskit-5910
Token count is too large: gitpython-developers__GitPython-316
Token count is too large: googleapis__google-cloud-python-2109
Token count is too large: googleapis__google-cloud-python-5429
Token count is too large: numpy__numpy-12713
Token count is too large: pandas-dev__pandas-28399
Token count is too large: mesonbuild__meson-2912
Token count is too large: googleapis__google-cloud-python-1377
Token count is too large: celery__celery-6624
Token count is too large: PrefectHQ__prefect-1341
Token count is too large: huggingface__transformers-18435


Generating train split: 4577 examples [05:40, 11.44 examples/s]

Token count is too large: mesonbuild__meson-8131
Token count is too large: pandas-dev__pandas-5911
Token count is too large: pandas-dev__pandas-39188
Token count is too large: conda__conda-6573
Token count is too large: Qiskit__qiskit-7361
Token count is too large: wagtail__wagtail-10596
Token count is too large: google__jax-2800
Token count is too large: numpy__numpy-19087


Generating train split: 4579 examples [05:40, 11.19 examples/s]

Token count is too large: pandas-dev__pandas-24154
Token count is too large: huggingface__transformers-10338
Token count is too large: pandas-dev__pandas-14026
Token count is too large: apache__airflow-28776
Token count is too large: pandas-dev__pandas-29159
Token count is too large: ipython__ipython-13730
Token count is too large: pandas-dev__pandas-18436
Token count is too large: pandas-dev__pandas-3749
Token count is too large: pypa__pip-7118
Token count is too large: Qiskit__qiskit-8220
Token count is too large: numpy__numpy-7738
Token count is too large: pandas-dev__pandas-17002
Token count is too large: Qiskit__qiskit-10366
Token count is too large: numpy__numpy-12971
Token count is too large: conda__conda-12315
Token count is too large: numpy__numpy-24549


Generating train split: 4584 examples [05:40,  9.98 examples/s]

Token count is too large: pandas-dev__pandas-8520
Token count is too large: pandas-dev__pandas-21394
Token count is too large: Qiskit__qiskit-10140
Token count is too large: pandas-dev__pandas-4938
Token count is too large: pandas-dev__pandas-31596
Token count is too large: googleapis__google-cloud-python-3101
Token count is too large: conan-io__conan-5176
Token count is too large: pandas-dev__pandas-26794
Token count is too large: jupyterlab__jupyterlab-7463
Token count is too large: celery__celery-7470
Token count is too large: pantsbuild__pants-18799


Generating train split: 4587 examples [05:41, 11.05 examples/s]

Token count is too large: pandas-dev__pandas-27239
Token count is too large: pandas-dev__pandas-32499
Token count is too large: docker__compose-5096
Token count is too large: huggingface__transformers-16222
Token count is too large: pandas-dev__pandas-29796
Token count is too large: pandas-dev__pandas-20923
Token count is too large: huggingface__transformers-17289
Token count is too large: pandas-dev__pandas-18643
Token count is too large: pandas-dev__pandas-4871
Token count is too large: ytdl-org__youtube-dl-25980
Token count is too large: huggingface__transformers-21833
Token count is too large: huggingface__transformers-16536
Token count is too large: pandas-dev__pandas-25118
Token count is too large: Qiskit__qiskit-3181
Token count is too large: pypa__pip-9994


Generating train split: 4591 examples [05:41, 10.28 examples/s]

Token count is too large: pandas-dev__pandas-18732
Token count is too large: pandas-dev__pandas-36239
Token count is too large: numpy__numpy-12560
Token count is too large: Qiskit__qiskit-8947
Token count is too large: pantsbuild__pants-17299
Token count is too large: pandas-dev__pandas-27382
Token count is too large: googleapis__google-cloud-python-2984
Token count is too large: pandas-dev__pandas-23032
Token count is too large: apache__airflow-24847
Token count is too large: PrefectHQ__prefect-1382
Token count is too large: docker__compose-4004
Token count is too large: pandas-dev__pandas-17843
Token count is too large: googleapis__google-cloud-python-9450
Token count is too large: numpy__numpy-10822


Generating train split: 4596 examples [05:41, 13.38 examples/s]

Token count is too large: celery__celery-7951
Token count is too large: pandas-dev__pandas-19913
Token count is too large: google__jax-2907
Token count is too large: PrefectHQ__prefect-1071
Token count is too large: pandas-dev__pandas-7373
Token count is too large: ipython__ipython-1893
Token count is too large: numpy__numpy-15991
Token count is too large: conda__conda-11349
Token count is too large: pandas-dev__pandas-36760
Token count is too large: pandas-dev__pandas-35411
Token count is too large: ipython__ipython-10719
Token count is too large: pandas-dev__pandas-22325
Token count is too large: mesonbuild__meson-1657
Token count is too large: pandas-dev__pandas-35750


Generating train split: 4598 examples [05:42,  9.54 examples/s]

Token count is too large: pandas-dev__pandas-20732
Token count is too large: googleapis__google-cloud-python-8023
Token count is too large: scipy__scipy-3253
Token count is too large: ipython__ipython-11393
Token count is too large: pandas-dev__pandas-20884
Token count is too large: Qiskit__qiskit-9587
Token count is too large: pantsbuild__pants-4332
Token count is too large: conan-io__conan-2287
Token count is too large: pandas-dev__pandas-29940
Token count is too large: pandas-dev__pandas-31203
Token count is too large: pandas-dev__pandas-8146
Token count is too large: ipython__ipython-7770
Token count is too large: pandas-dev__pandas-7843
Token count is too large: pantsbuild__pants-11092


Generating train split: 4601 examples [05:42, 11.30 examples/s]

Token count is too large: numpy__numpy-19661
Token count is too large: ray-project__ray-7021
Token count is too large: pandas-dev__pandas-6575
Token count is too large: pandas-dev__pandas-9715
Token count is too large: huggingface__transformers-14016
Token count is too large: pandas-dev__pandas-5921


Generating train split: 4606 examples [05:42, 13.19 examples/s]

Token count is too large: pandas-dev__pandas-11564
Token count is too large: pypa__pip-6191
Token count is too large: pandas-dev__pandas-9138
Token count is too large: pandas-dev__pandas-27717
Token count is too large: numpy__numpy-16356
Token count is too large: ipython__ipython-1782
Token count is too large: ray-project__ray-6395
Token count is too large: pandas-dev__pandas-20779
Token count is too large: conan-io__conan-2634
Token count is too large: pandas-dev__pandas-37425
Token count is too large: conan-io__conan-2883


Generating train split: 4609 examples [05:42, 14.21 examples/s]

Token count is too large: pandas-dev__pandas-35506
Token count is too large: pandas-dev__pandas-20729
Token count is too large: pypa__pip-3203
Token count is too large: pandas-dev__pandas-6592
Token count is too large: apache__airflow-11372
Token count is too large: Qiskit__qiskit-4095
Token count is too large: Qiskit__qiskit-9084
Token count is too large: pandas-dev__pandas-20593
Token count is too large: apache__airflow-855
Token count is too large: pandas-dev__pandas-3164
Token count is too large: numpy__numpy-4297
Token count is too large: pyca__cryptography-7591
Token count is too large: Qiskit__qiskit-8463


Generating train split: 4614 examples [05:43, 14.08 examples/s]

Token count is too large: pandas-dev__pandas-21769
Token count is too large: huggingface__transformers-7305
Token count is too large: mesonbuild__meson-1095
Token count is too large: pantsbuild__pants-16185
Token count is too large: ytdl-org__youtube-dl-6679
Token count is too large: pandas-dev__pandas-7803
Token count is too large: open-mmlab__mmdetection-2269
Token count is too large: pandas-dev__pandas-26356
Token count is too large: pandas-dev__pandas-25360
Token count is too large: mesonbuild__meson-4000
Token count is too large: Qiskit__qiskit-4561
Token count is too large: celery__celery-6899
Token count is too large: docker__compose-2662
Token count is too large: conda__conda-9045
Token count is too large: wagtail__wagtail-8155
Token count is too large: numpy__numpy-11746


Generating train split: 4616 examples [05:43, 12.94 examples/s]

Token count is too large: pandas-dev__pandas-3131
Token count is too large: docker__compose-6454
Token count is too large: googleapis__google-cloud-python-559
Token count is too large: google__jax-767
Token count is too large: pandas-dev__pandas-39394
Token count is too large: googleapis__google-cloud-python-6540
Token count is too large: mesonbuild__meson-6013
Token count is too large: numpy__numpy-4498
Token count is too large: googleapis__google-cloud-python-10807
Token count is too large: jupyterlab__jupyterlab-9021
Token count is too large: pandas-dev__pandas-6861
Token count is too large: Qiskit__qiskit-6965
Token count is too large: pandas-dev__pandas-14788
Token count is too large: Qiskit__qiskit-9719


Generating train split: 4623 examples [05:43, 17.92 examples/s]

Token count is too large: ytdl-org__youtube-dl-5108
Token count is too large: googleapis__google-cloud-python-8650
Token count is too large: mesonbuild__meson-6800
Token count is too large: numpy__numpy-12892
Token count is too large: huggingface__transformers-6437
Token count is too large: scipy__scipy-4958
Token count is too large: huggingface__transformers-13336
Token count is too large: huggingface__transformers-17212
Token count is too large: conda__conda-6533


Generating train split: 4625 examples [05:43, 15.75 examples/s]

Token count is too large: Qiskit__qiskit-4862
Token count is too large: pandas-dev__pandas-6429
Token count is too large: huggingface__transformers-11718
Token count is too large: huggingface__transformers-14879
Token count is too large: wagtail__wagtail-842
Token count is too large: pandas-dev__pandas-3999
Token count is too large: huggingface__transformers-17053
Token count is too large: PrefectHQ__prefect-1960


Generating train split: 4627 examples [05:44, 11.21 examples/s]

Token count is too large: apache__airflow-25736
Token count is too large: apache__airflow-33279
Token count is too large: pandas-dev__pandas-34067
Token count is too large: mesonbuild__meson-2673
Token count is too large: Qiskit__qiskit-6925
Token count is too large: ipython__ipython-3181
Token count is too large: PrefectHQ__prefect-466
Token count is too large: pandas-dev__pandas-4522


Generating train split: 4635 examples [05:44, 18.26 examples/s]

Token count is too large: pandas-dev__pandas-37986
Token count is too large: pantsbuild__pants-11396
Token count is too large: jupyterlab__jupyterlab-9709
Token count is too large: pantsbuild__pants-13519
Token count is too large: pandas-dev__pandas-36125
Token count is too large: pandas-dev__pandas-25103
Token count is too large: PrefectHQ__prefect-863
Token count is too large: pandas-dev__pandas-18131
Token count is too large: conan-io__conan-3387
Token count is too large: docker__compose-2363
Token count is too large: pandas-dev__pandas-33659
Token count is too large: ray-project__ray-1307
Token count is too large: pandas-dev__pandas-23607
Token count is too large: pandas-dev__pandas-4740
Token count is too large: ytdl-org__youtube-dl-15066
Token count is too large: docker__compose-2808
Token count is too large: twisted__twisted-11691
Token count is too large: pantsbuild__pants-13046
Token count is too large: pandas-dev__pandas-22209
Token count is too large: Qiskit__qiskit-9969


Generating train split: 4638 examples [05:44, 12.02 examples/s]

Token count is too large: Qiskit__qiskit-4745
Token count is too large: ytdl-org__youtube-dl-30614
Token count is too large: pandas-dev__pandas-10290
Token count is too large: pandas-dev__pandas-7026
Token count is too large: pandas-dev__pandas-11581
Token count is too large: Qiskit__qiskit-1218
Token count is too large: ipython__ipython-1724


Generating train split: 4646 examples [05:45, 20.66 examples/s]

Token count is too large: pandas-dev__pandas-36106
Token count is too large: celery__celery-6863
Token count is too large: ipython__ipython-13625
Token count is too large: googleapis__google-cloud-python-5011
Token count is too large: conda__conda-6451
Token count is too large: pandas-dev__pandas-14432
Token count is too large: numpy__numpy-4304
Token count is too large: pandas-dev__pandas-32764
Token count is too large: apache__airflow-22008
Token count is too large: open-mmlab__mmdetection-4928
Token count is too large: pandas-dev__pandas-25928
Token count is too large: Lightning-AI__lightning-2190
Token count is too large: wagtail__wagtail-10010
Token count is too large: pandas-dev__pandas-7602


Generating train split: 4650 examples [05:45, 21.95 examples/s]

Token count is too large: huggingface__transformers-22990
Token count is too large: Lightning-AI__lightning-549
Token count is too large: pantsbuild__pants-7120
Token count is too large: pandas-dev__pandas-16512
Token count is too large: dagster-io__dagster-15810


Generating train split: 4654 examples [05:45, 17.26 examples/s]

Token count is too large: Qiskit__qiskit-4812
Token count is too large: google__jax-1020
Token count is too large: pandas-dev__pandas-30943
Token count is too large: mesonbuild__meson-11631
Token count is too large: ytdl-org__youtube-dl-24512
Token count is too large: pandas-dev__pandas-9806
Token count is too large: mesonbuild__meson-4030
Token count is too large: Qiskit__qiskit-4277
Token count is too large: pandas-dev__pandas-5210
Token count is too large: huggingface__transformers-6847
Token count is too large: numpy__numpy-9343
Token count is too large: open-mmlab__mmdetection-2891
Token count is too large: Qiskit__qiskit-6856
Token count is too large: pandas-dev__pandas-22602
Token count is too large: conan-io__conan-4033
Token count is too large: googleapis__google-cloud-python-10356
Token count is too large: numpy__numpy-4588
Token count is too large: pandas-dev__pandas-23858
Token count is too large: ytdl-org__youtube-dl-29303
Token count is too large: apache__airflow-15132
To

Generating train split: 4658 examples [05:46, 12.30 examples/s]

Token count is too large: huggingface__transformers-21580
Token count is too large: ray-project__ray-5754
Token count is too large: mesonbuild__meson-8087
Token count is too large: conda__conda-5159
Token count is too large: conda__conda-5261
Token count is too large: pandas-dev__pandas-23235
Token count is too large: docker__compose-5839


Generating train split: 4662 examples [05:46, 12.39 examples/s]

Token count is too large: pandas-dev__pandas-39451
Token count is too large: numpy__numpy-3780
Token count is too large: pandas-dev__pandas-3228
Token count is too large: PrefectHQ__prefect-226
Token count is too large: pandas-dev__pandas-17815
Token count is too large: apache__airflow-28243
Token count is too large: pandas-dev__pandas-32538
Token count is too large: DataDog__integrations-core-2361
Token count is too large: Lightning-AI__lightning-769


Generating train split: 4664 examples [05:46, 10.22 examples/s]

Token count is too large: pandas-dev__pandas-22343
Token count is too large: googleapis__google-cloud-python-11356
Token count is too large: Qiskit__qiskit-6023
Token count is too large: pandas-dev__pandas-8827
Token count is too large: Qiskit__qiskit-3869
Token count is too large: pandas-dev__pandas-20956
Token count is too large: conda__conda-7598
Token count is too large: pandas-dev__pandas-3459
Token count is too large: pandas-dev__pandas-7770
Token count is too large: google__jax-846
Token count is too large: huggingface__transformers-19403
Token count is too large: Qiskit__qiskit-3541
Token count is too large: conda__conda-4963
Token count is too large: googleapis__google-cloud-python-11353
Token count is too large: mesonbuild__meson-1505
Token count is too large: numpy__numpy-3232
Token count is too large: conda__conda-8564
Token count is too large: jupyterlab__jupyterlab-10063
Token count is too large: PrefectHQ__prefect-2387
Token count is too large: googleapis__google-cloud-p

Generating train split: 4669 examples [05:47,  9.12 examples/s]

Token count is too large: ipython__ipython-5229
Token count is too large: huggingface__transformers-18110
Token count is too large: pandas-dev__pandas-6172
Token count is too large: numpy__numpy-18960


Generating train split: 4674 examples [05:47, 11.44 examples/s]

Token count is too large: huggingface__transformers-23429
Token count is too large: docker__compose-4621
Token count is too large: gitpython-developers__GitPython-1214
Token count is too large: pandas-dev__pandas-36444
Token count is too large: conda__conda-5988
Token count is too large: pandas-dev__pandas-29281
Token count is too large: pandas-dev__pandas-35510
Token count is too large: pandas-dev__pandas-6776
Token count is too large: numpy__numpy-12658
Token count is too large: pyca__cryptography-4128
Token count is too large: pandas-dev__pandas-9754
Token count is too large: numpy__numpy-9070
Token count is too large: pandas-dev__pandas-32566


Generating train split: 4680 examples [05:47, 15.28 examples/s]

Token count is too large: pandas-dev__pandas-15232
Token count is too large: JohnSnowLabs__spark-nlp-13798
Token count is too large: DataDog__integrations-core-503
Token count is too large: pandas-dev__pandas-11237
Token count is too large: ray-project__ray-10519
Token count is too large: pandas-dev__pandas-10705
Token count is too large: pantsbuild__pants-19191
Token count is too large: numpy__numpy-12493
Token count is too large: PrefectHQ__prefect-1239
Token count is too large: conan-io__conan-10941
Token count is too large: docker__compose-6864
Token count is too large: pandas-dev__pandas-18094
Token count is too large: huggingface__transformers-9796


Generating train split: 4689 examples [05:47, 23.30 examples/s]

Token count is too large: scipy__scipy-5384
Token count is too large: mesonbuild__meson-8396
Token count is too large: pandas-dev__pandas-38272
Token count is too large: pandas-dev__pandas-34606
Token count is too large: pandas-dev__pandas-23703
Token count is too large: pandas-dev__pandas-26765
Token count is too large: pandas-dev__pandas-26994
Token count is too large: mesonbuild__meson-11619
Token count is too large: numpy__numpy-20243
Token count is too large: numpy__numpy-18759
Token count is too large: huggingface__transformers-2134
Token count is too large: pandas-dev__pandas-37778
Token count is too large: pandas-dev__pandas-24069
Token count is too large: pandas-dev__pandas-12043


Generating train split: 4693 examples [05:48, 15.29 examples/s]

Token count is too large: pandas-dev__pandas-34887
Token count is too large: pandas-dev__pandas-34942
Token count is too large: huggingface__transformers-6999
Token count is too large: apache__airflow-11325
Token count is too large: Qiskit__qiskit-9913
Token count is too large: pandas-dev__pandas-32320
Token count is too large: PrefectHQ__prefect-2202
Token count is too large: ytdl-org__youtube-dl-8611
Token count is too large: pandas-dev__pandas-35995


Generating train split: 4696 examples [05:48, 15.60 examples/s]

Token count is too large: googleapis__google-cloud-python-11336
Token count is too large: huggingface__transformers-6098
Token count is too large: dagster-io__dagster-7086
Token count is too large: open-mmlab__mmdetection-2492
Token count is too large: mesonbuild__meson-3277
Token count is too large: pandas-dev__pandas-21481
Token count is too large: docker__compose-4713
Token count is too large: pandas-dev__pandas-4857
Token count is too large: ray-project__ray-9497
Token count is too large: googleapis__google-cloud-python-7311
Token count is too large: docker__compose-5596
Token count is too large: pandas-dev__pandas-17201
Token count is too large: pandas-dev__pandas-37394
Token count is too large: mesonbuild__meson-10668


Generating train split: 4701 examples [05:49, 12.50 examples/s]

Token count is too large: numpy__numpy-3205
Token count is too large: pandas-dev__pandas-22083
Token count is too large: celery__celery-7544
Token count is too large: mesonbuild__meson-11259
Token count is too large: pandas-dev__pandas-14055
Token count is too large: pandas-dev__pandas-10429
Token count is too large: mesonbuild__meson-4698
Token count is too large: Qiskit__qiskit-4670
Token count is too large: PrefectHQ__prefect-457
Token count is too large: googleapis__google-cloud-python-9889
Token count is too large: Qiskit__qiskit-764
Token count is too large: mesonbuild__meson-605
Token count is too large: Qiskit__qiskit-6546
Token count is too large: pandas-dev__pandas-6941
Token count is too large: pandas-dev__pandas-36208
Token count is too large: ipython__ipython-13825
Token count is too large: pandas-dev__pandas-38431
Token count is too large: celery__celery-6746
Token count is too large: gitpython-developers__GitPython-922
Token count is too large: apache__airflow-24378
Toke

Generating train split: 4703 examples [05:49, 11.24 examples/s]

Token count is too large: conan-io__conan-5236
Token count is too large: pandas-dev__pandas-14085
Token count is too large: numpy__numpy-19151
Token count is too large: numpy__numpy-4309
Token count is too large: pandas-dev__pandas-38026
Token count is too large: pandas-dev__pandas-29712
Token count is too large: pandas-dev__pandas-23999


Generating train split: 4706 examples [05:49, 10.50 examples/s]

Token count is too large: pandas-dev__pandas-28024
Token count is too large: huggingface__transformers-23663


Generating train split: 4710 examples [05:50, 11.94 examples/s]

Token count is too large: pandas-dev__pandas-10265
Token count is too large: huggingface__transformers-11300
Token count is too large: dagster-io__dagster-14773
Token count is too large: pandas-dev__pandas-37989
Token count is too large: docker__compose-5455
Token count is too large: scipy__scipy-4849
Token count is too large: docker__compose-5657
Token count is too large: pypa__pip-2294
Token count is too large: huggingface__transformers-13829
Token count is too large: ytdl-org__youtube-dl-14369
Token count is too large: pandas-dev__pandas-16907


Generating train split: 4712 examples [05:50,  9.84 examples/s]

Token count is too large: mesonbuild__meson-7798
Token count is too large: mesonbuild__meson-5911
Token count is too large: pypa__pip-3522
Token count is too large: conan-io__conan-5005
Token count is too large: huggingface__transformers-12562
Token count is too large: pandas-dev__pandas-30145
Token count is too large: wagtail__wagtail-4704
Token count is too large: mesonbuild__meson-579
Token count is too large: conan-io__conan-8567
Token count is too large: huggingface__transformers-14779
Token count is too large: ipython__ipython-13902
Token count is too large: conan-io__conan-9267
Token count is too large: huggingface__transformers-7155


Generating train split: 4715 examples [05:50,  9.81 examples/s]

Token count is too large: ytdl-org__youtube-dl-30527
Token count is too large: pandas-dev__pandas-10090
Token count is too large: pantsbuild__pants-5231
Token count is too large: dagster-io__dagster-14784
There was an error processing
Token count is too large: pandas-dev__pandas-28215
Token count is too large: googleapis__google-cloud-python-1128
Token count is too large: pandas-dev__pandas-21212
Token count is too large: pyca__cryptography-4534
Token count is too large: conan-io__conan-7108


Generating train split: 4719 examples [05:51, 11.27 examples/s]

Token count is too large: huggingface__transformers-15438
Token count is too large: huggingface__transformers-24298
Token count is too large: pandas-dev__pandas-34661
Token count is too large: conan-io__conan-4787
Token count is too large: conan-io__conan-2723
Token count is too large: Qiskit__qiskit-9843
Token count is too large: Qiskit__qiskit-5474
Token count is too large: Qiskit__qiskit-3314
Token count is too large: pandas-dev__pandas-39644
Token count is too large: conan-io__conan-7507
Token count is too large: mesonbuild__meson-1221
Token count is too large: pandas-dev__pandas-6421
Token count is too large: googleapis__google-cloud-python-5097
Token count is too large: celery__celery-6699
Token count is too large: googleapis__google-cloud-python-3443
Token count is too large: Qiskit__qiskit-6151
Token count is too large: conan-io__conan-2458


Generating train split: 4721 examples [05:51,  8.37 examples/s]

Token count is too large: DataDog__integrations-core-998
Token count is too large: huggingface__transformers-14055
Token count is too large: mesonbuild__meson-4617
Token count is too large: pandas-dev__pandas-35513
Token count is too large: pandas-dev__pandas-27317
Token count is too large: Lightning-AI__lightning-1513
Token count is too large: conan-io__conan-2412
Token count is too large: Qiskit__qiskit-7515
Token count is too large: pandas-dev__pandas-17621
Token count is too large: tiangolo__fastapi-3372
Token count is too large: apache__airflow-26081


Generating train split: 4724 examples [05:51, 10.16 examples/s]

Token count is too large: apache__airflow-18210
Token count is too large: pandas-dev__pandas-23000
Token count is too large: pandas-dev__pandas-38331
Token count is too large: googleapis__google-cloud-python-7545
Token count is too large: ipython__ipython-5077
Token count is too large: Qiskit__qiskit-3137
Token count is too large: pandas-dev__pandas-6990
Token count is too large: pandas-dev__pandas-30234
Token count is too large: pandas-dev__pandas-9322


Generating train split: 4726 examples [05:52,  8.99 examples/s]

Token count is too large: ipython__ipython-6045
Token count is too large: pandas-dev__pandas-11127
Token count is too large: pandas-dev__pandas-33651
Token count is too large: explosion__spaCy-3393
Token count is too large: numpy__numpy-6653
Token count is too large: Lightning-AI__lightning-2425
Token count is too large: pandas-dev__pandas-10346
Token count is too large: mesonbuild__meson-3064
Token count is too large: mesonbuild__meson-3432
Token count is too large: mesonbuild__meson-6941
Token count is too large: pandas-dev__pandas-25568
Token count is too large: conda__conda-5107
Token count is too large: conda__conda-4647
Token count is too large: pantsbuild__pants-17051
Token count is too large: google__jax-3150
Token count is too large: huggingface__transformers-13321
Token count is too large: huggingface__transformers-12654
Token count is too large: pandas-dev__pandas-38427
Token count is too large: ytdl-org__youtube-dl-27234
Token count is too large: numpy__numpy-7417
Token cou

Generating train split: 4730 examples [05:52, 10.49 examples/s]

Token count is too large: mesonbuild__meson-8024
Token count is too large: huggingface__transformers-11631
Token count is too large: googleapis__google-cloud-python-9077
Token count is too large: ray-project__ray-7758
Token count is too large: google__jax-1352
Token count is too large: pandas-dev__pandas-27594
Token count is too large: numpy__numpy-8508
Token count is too large: pypa__pip-9124
Token count is too large: pantsbuild__pants-4521
Token count is too large: googleapis__google-cloud-python-6078
Token count is too large: conda__conda-7211


Generating train split: 4735 examples [05:52, 11.28 examples/s]

Token count is too large: conda__conda-12090
Token count is too large: google__jax-3436
Token count is too large: conan-io__conan-2991
Token count is too large: conda__conda-9841
Token count is too large: pandas-dev__pandas-19707
Token count is too large: scipy__scipy-4145
Token count is too large: numpy__numpy-9408
Token count is too large: Qiskit__qiskit-1158
Token count is too large: google__jax-1613
Token count is too large: pandas-dev__pandas-16339
Token count is too large: pandas-dev__pandas-36610
Token count is too large: apache__airflow-15794
Token count is too large: pandas-dev__pandas-9025


Generating train split: 4738 examples [05:52, 12.74 examples/s]

Token count is too large: mesonbuild__meson-4386
Token count is too large: Qiskit__qiskit-1281
Token count is too large: pandas-dev__pandas-6021
Token count is too large: ipython__ipython-1693
Token count is too large: numpy__numpy-18375
Token count is too large: docker__compose-2882
Token count is too large: pandas-dev__pandas-27375
Token count is too large: ray-project__ray-8521
Token count is too large: ytdl-org__youtube-dl-8354
Token count is too large: pypa__pip-3382
Token count is too large: pandas-dev__pandas-9522
Token count is too large: pantsbuild__pants-19253


Generating train split: 4744 examples [05:53, 19.15 examples/s]

Token count is too large: wagtail__wagtail-1478
Token count is too large: pantsbuild__pants-15299
Token count is too large: pandas-dev__pandas-28733
Token count is too large: pandas-dev__pandas-3533
Token count is too large: googleapis__google-cloud-python-5377
Token count is too large: mesonbuild__meson-4236
Token count is too large: conda__conda-4628
Token count is too large: pandas-dev__pandas-5022
Token count is too large: ytdl-org__youtube-dl-31060
Token count is too large: conan-io__conan-9879


Generating train split: 4752 examples [05:53, 15.93 examples/s]

Token count is too large: googleapis__google-cloud-python-11340
Token count is too large: PrefectHQ__prefect-2886
Token count is too large: conan-io__conan-13788
Token count is too large: pandas-dev__pandas-7113
Token count is too large: pandas-dev__pandas-8557
Token count is too large: pantsbuild__pants-18930
Token count is too large: googleapis__google-cloud-python-1862
Token count is too large: ytdl-org__youtube-dl-14555
Token count is too large: ipython__ipython-11307
Token count is too large: googleapis__google-cloud-python-961
Token count is too large: pandas-dev__pandas-28802
Token count is too large: pandas-dev__pandas-25644
Token count is too large: pandas-dev__pandas-39012
Token count is too large: conda__conda-2529


Generating train split: 4755 examples [05:53, 16.42 examples/s]

Token count is too large: conda__conda-10117
Token count is too large: pyca__cryptography-3794
Token count is too large: Qiskit__qiskit-1748
Token count is too large: Lightning-AI__lightning-2216
Token count is too large: wagtail__wagtail-9240
Token count is too large: Qiskit__qiskit-5396
Token count is too large: pandas-dev__pandas-8946
Token count is too large: pandas-dev__pandas-36516
Token count is too large: pandas-dev__pandas-5426
Token count is too large: pantsbuild__pants-7781
Token count is too large: celery__celery-6838
Token count is too large: DataDog__integrations-core-2937
Token count is too large: pandas-dev__pandas-37479
Token count is too large: conda__conda-3747


Generating train split: 4763 examples [05:53, 25.72 examples/s]

Token count is too large: mesonbuild__meson-1227
Token count is too large: numpy__numpy-19131
Token count is too large: pandas-dev__pandas-28680
Token count is too large: Lightning-AI__lightning-3098
Token count is too large: PrefectHQ__prefect-2877
Token count is too large: Qiskit__qiskit-7407
Token count is too large: pandas-dev__pandas-34654
Token count is too large: pantsbuild__pants-14788
Token count is too large: pandas-dev__pandas-10250
Token count is too large: mesonbuild__meson-11328
Token count is too large: pandas-dev__pandas-6691
Token count is too large: pandas-dev__pandas-7736
Token count is too large: celery__celery-4611
Token count is too large: Lightning-AI__lightning-1152
Token count is too large: google__jax-2812
Token count is too large: ytdl-org__youtube-dl-5669
Token count is too large: pandas-dev__pandas-36557
Token count is too large: pandas-dev__pandas-4092
Token count is too large: ipython__ipython-1207
Token count is too large: pandas-dev__pandas-26852
Token 

Generating train split: 4768 examples [05:54, 11.47 examples/s]

Token count is too large: pandas-dev__pandas-38517
Token count is too large: mesonbuild__meson-5560
Token count is too large: apache__airflow-18733
Token count is too large: huggingface__transformers-19538
Token count is too large: pandas-dev__pandas-33487


Generating train split: 4773 examples [05:55, 13.81 examples/s]

Token count is too large: pyca__cryptography-1476
Token count is too large: pandas-dev__pandas-33284
Token count is too large: numpy__numpy-19902
Token count is too large: huggingface__transformers-16065
Token count is too large: tiangolo__fastapi-4871
Token count is too large: pypa__pip-7341
Token count is too large: google__jax-3097
Token count is too large: pandas-dev__pandas-27846
Token count is too large: huggingface__transformers-25358


Generating train split: 4781 examples [05:55, 16.68 examples/s]

Token count is too large: googleapis__google-cloud-python-9426
Token count is too large: jupyterlab__jupyterlab-8956
Token count is too large: pandas-dev__pandas-28463
Token count is too large: google__jax-188
Token count is too large: google__jax-942
Token count is too large: pandas-dev__pandas-21321
Token count is too large: pandas-dev__pandas-32894
Token count is too large: ray-project__ray-4421
Token count is too large: pandas-dev__pandas-11114
Token count is too large: pandas-dev__pandas-33805
Token count is too large: Lightning-AI__lightning-2674
Token count is too large: mesonbuild__meson-5894
Token count is too large: ytdl-org__youtube-dl-14406
Token count is too large: numpy__numpy-16690
Token count is too large: mesonbuild__meson-8507
Token count is too large: pandas-dev__pandas-39592


Generating train split: 4787 examples [05:55, 14.86 examples/s]

Token count is too large: pandas-dev__pandas-28447
Token count is too large: Qiskit__qiskit-3848
Token count is too large: pandas-dev__pandas-38140
Token count is too large: ray-project__ray-7863
Token count is too large: pandas-dev__pandas-25765
Token count is too large: ray-project__ray-5426
Token count is too large: googleapis__google-cloud-python-6367
Token count is too large: apache__airflow-21551
Token count is too large: pandas-dev__pandas-34354
Token count is too large: huggingface__transformers-16819
Token count is too large: pandas-dev__pandas-38325


Generating train split: 4789 examples [05:56,  9.85 examples/s]

Token count is too large: pandas-dev__pandas-31156
Token count is too large: pandas-dev__pandas-14786
Token count is too large: tiangolo__fastapi-2606
Token count is too large: googleapis__google-cloud-python-4667
Token count is too large: pandas-dev__pandas-19914
Token count is too large: pandas-dev__pandas-17580
Token count is too large: huggingface__transformers-18069
Token count is too large: pandas-dev__pandas-11590
Token count is too large: googleapis__google-cloud-python-2284
Token count is too large: pantsbuild__pants-15606


Generating train split: 4792 examples [05:56, 10.62 examples/s]

Token count is too large: huggingface__transformers-7795
Token count is too large: ipython__ipython-10549
Token count is too large: pandas-dev__pandas-18812
Token count is too large: pandas-dev__pandas-36393
Token count is too large: pandas-dev__pandas-18065
Token count is too large: apache__airflow-10612
Token count is too large: pandas-dev__pandas-3136


Generating train split: 4794 examples [05:56, 10.25 examples/s]

Token count is too large: pandas-dev__pandas-6153
Token count is too large: ytdl-org__youtube-dl-30266
Token count is too large: mesonbuild__meson-2824
Token count is too large: mesonbuild__meson-4868
Token count is too large: googleapis__google-cloud-python-9649
Token count is too large: pypa__pip-8144
Token count is too large: pandas-dev__pandas-8977
Token count is too large: pandas-dev__pandas-20152
Token count is too large: numpy__numpy-15468
Token count is too large: apache__airflow-15942
Token count is too large: pandas-dev__pandas-28662
Token count is too large: mesonbuild__meson-5224
Token count is too large: conda__conda-5703
Token count is too large: docker__compose-6592
Token count is too large: conda__conda-7773
Token count is too large: pandas-dev__pandas-24772
Token count is too large: pandas-dev__pandas-7421
Token count is too large: googleapis__google-cloud-python-11341
Token count is too large: pandas-dev__pandas-11531
Token count is too large: pantsbuild__pants-14185


Generating train split: 4798 examples [05:57, 10.14 examples/s]

Token count is too large: apache__airflow-26617
Token count is too large: pandas-dev__pandas-4502
Token count is too large: numpy__numpy-5616
Token count is too large: Qiskit__qiskit-3978
Token count is too large: Qiskit__qiskit-3174
Token count is too large: huggingface__transformers-13542
Token count is too large: pandas-dev__pandas-38710
Token count is too large: pandas-dev__pandas-39064
Token count is too large: docker__compose-5436
Token count is too large: pandas-dev__pandas-34343
Token count is too large: pandas-dev__pandas-8766


Generating train split: 4803 examples [05:57, 14.28 examples/s]

Token count is too large: wagtail__wagtail-811
Token count is too large: Lightning-AI__lightning-223
Token count is too large: PrefectHQ__prefect-248
Token count is too large: google__jax-828
Token count is too large: ray-project__ray-873
Token count is too large: conan-io__conan-5014
Token count is too large: pandas-dev__pandas-2005
Token count is too large: conda__conda-6655
Token count is too large: ipython__ipython-1944


Generating train split: 4805 examples [05:57, 12.67 examples/s]

Token count is too large: pandas-dev__pandas-14002
Token count is too large: ytdl-org__youtube-dl-30582
Token count is too large: apache__airflow-27961
Token count is too large: pandas-dev__pandas-24856
Token count is too large: googleapis__google-cloud-python-7684
Token count is too large: wagtail__wagtail-6275
Token count is too large: apache__airflow-19747
Token count is too large: pandas-dev__pandas-24520
Token count is too large: pandas-dev__pandas-23264
Token count is too large: wagtail__wagtail-7077
Token count is too large: numpy__numpy-6487
Token count is too large: pyca__cryptography-3423


Generating train split: 4811 examples [05:57, 17.70 examples/s]

Token count is too large: pandas-dev__pandas-14419
Token count is too large: numpy__numpy-4316
Token count is too large: Qiskit__qiskit-7191
Token count is too large: pandas-dev__pandas-6388
Token count is too large: ytdl-org__youtube-dl-18343
Token count is too large: huggingface__transformers-6735


Generating train split: 4816 examples [05:58, 18.56 examples/s]

Token count is too large: huggingface__transformers-18057
Token count is too large: mesonbuild__meson-7118
Token count is too large: pandas-dev__pandas-33304
Token count is too large: ytdl-org__youtube-dl-6828
Token count is too large: pandas-dev__pandas-29427
Token count is too large: numpy__numpy-23854
Token count is too large: huggingface__transformers-193
Token count is too large: ipython__ipython-7872
Token count is too large: apache__airflow-24079


Generating train split: 4821 examples [05:58, 20.31 examples/s]

Token count is too large: pandas-dev__pandas-4972
Token count is too large: numpy__numpy-12399
Token count is too large: huggingface__transformers-11380
Token count is too large: numpy__numpy-20666
Token count is too large: huggingface__transformers-5125
Token count is too large: pandas-dev__pandas-32890


Generating train split: 4829 examples [05:58, 20.05 examples/s]

Token count is too large: huggingface__transformers-24952
Token count is too large: pandas-dev__pandas-30508
Token count is too large: pantsbuild__pants-17518
Token count is too large: googleapis__google-cloud-python-11287
Token count is too large: Qiskit__qiskit-7157
Token count is too large: PrefectHQ__prefect-70
Token count is too large: pypa__pip-3466
Token count is too large: google__jax-1238
Token count is too large: PrefectHQ__prefect-446


Generating train split: 4832 examples [05:58, 19.50 examples/s]

Token count is too large: mesonbuild__meson-7047
Token count is too large: ipython__ipython-3859
Token count is too large: googleapis__google-cloud-python-4388
Token count is too large: ray-project__ray-7670
Token count is too large: pandas-dev__pandas-16428
Token count is too large: pypa__pip-5875
Token count is too large: pandas-dev__pandas-39139
Token count is too large: huggingface__transformers-19640
Token count is too large: pantsbuild__pants-6671
Token count is too large: pandas-dev__pandas-10009
Token count is too large: pyca__cryptography-4331
Token count is too large: Qiskit__qiskit-4224
Token count is too large: pandas-dev__pandas-19135
Token count is too large: huggingface__transformers-6093
Token count is too large: pandas-dev__pandas-23271
Token count is too large: numpy__numpy-12297
Token count is too large: PrefectHQ__prefect-1996
Token count is too large: PrefectHQ__prefect-430
Token count is too large: docker__compose-5435
Token count is too large: ray-project__ray-82

Generating train split: 4838 examples [05:59, 12.05 examples/s]

Token count is too large: huggingface__transformers-24993
Token count is too large: Qiskit__qiskit-9085
Token count is too large: mesonbuild__meson-11585
Token count is too large: PrefectHQ__prefect-206
Token count is too large: huggingface__transformers-13857
Token count is too large: googleapis__google-cloud-python-5492
Token count is too large: ray-project__ray-7985
Token count is too large: pandas-dev__pandas-21407
Token count is too large: pantsbuild__pants-17652
Token count is too large: pandas-dev__pandas-6849
Token count is too large: conan-io__conan-8350


Generating train split: 4840 examples [05:59, 12.59 examples/s]

Token count is too large: huggingface__transformers-15473
Token count is too large: pandas-dev__pandas-8208
Token count is too large: mesonbuild__meson-3491
Token count is too large: pandas-dev__pandas-23698
Token count is too large: pandas-dev__pandas-6126
Token count is too large: twisted__twisted-11576
Token count is too large: pandas-dev__pandas-5001


Generating train split: 4847 examples [06:00, 12.43 examples/s]

Token count is too large: Qiskit__qiskit-1600
Token count is too large: pandas-dev__pandas-5101
Token count is too large: pandas-dev__pandas-35914
Token count is too large: pandas-dev__pandas-27284
Token count is too large: huggingface__transformers-13725
Token count is too large: ytdl-org__youtube-dl-6061
Token count is too large: apache__airflow-1290
Token count is too large: googleapis__google-cloud-python-498
Token count is too large: pandas-dev__pandas-3587
Token count is too large: numpy__numpy-12920
Token count is too large: mesonbuild__meson-9131
Token count is too large: huggingface__transformers-10071
Token count is too large: conan-io__conan-4771
Token count is too large: pandas-dev__pandas-5733
Token count is too large: celery__celery-5984


Generating train split: 4854 examples [06:00, 13.98 examples/s]

Token count is too large: huggingface__transformers-25402
Token count is too large: pantsbuild__pants-15390
Token count is too large: Lightning-AI__lightning-332
Token count is too large: pandas-dev__pandas-18373
Token count is too large: Qiskit__qiskit-7460
Token count is too large: pandas-dev__pandas-11486
Token count is too large: pandas-dev__pandas-725
Token count is too large: pandas-dev__pandas-4922
Token count is too large: pandas-dev__pandas-26685
Token count is too large: numpy__numpy-4949
Token count is too large: google__jax-437
Token count is too large: googleapis__google-cloud-python-2553
Token count is too large: numpy__numpy-5489
Token count is too large: pandas-dev__pandas-30386
Token count is too large: ipython__ipython-3525


Generating train split: 4856 examples [06:00, 14.52 examples/s]

Token count is too large: mesonbuild__meson-1238
Token count is too large: pandas-dev__pandas-21400
Token count is too large: pyca__cryptography-1346
Token count is too large: ytdl-org__youtube-dl-31305
Token count is too large: pandas-dev__pandas-8232
Token count is too large: pandas-dev__pandas-8372
Token count is too large: wagtail__wagtail-8268
Token count is too large: googleapis__google-cloud-python-6631
Token count is too large: conan-io__conan-8821
Token count is too large: pandas-dev__pandas-34985
Token count is too large: pantsbuild__pants-9826
Token count is too large: pandas-dev__pandas-26112


Generating train split: 4860 examples [06:01, 16.74 examples/s]

Token count is too large: pandas-dev__pandas-30813
Token count is too large: pypa__pip-1745
Token count is too large: pandas-dev__pandas-10866
Token count is too large: apache__airflow-8256
Token count is too large: pandas-dev__pandas-4063
Token count is too large: pyca__cryptography-4889
Token count is too large: pandas-dev__pandas-5303


Generating train split: 4863 examples [06:01, 16.32 examples/s]

Token count is too large: huggingface__transformers-22279
Token count is too large: googleapis__google-cloud-python-5935
Token count is too large: Qiskit__qiskit-10362
Token count is too large: wagtail__wagtail-8571
Token count is too large: pandas-dev__pandas-7578
Token count is too large: conda__conda-9730
Token count is too large: mesonbuild__meson-7179
Token count is too large: mesonbuild__meson-9027
Token count is too large: scipy__scipy-5205
Token count is too large: Qiskit__qiskit-6782
Token count is too large: pandas-dev__pandas-3797
Token count is too large: ipython__ipython-11953
Token count is too large: apache__airflow-27944


Generating train split: 4866 examples [06:01, 12.39 examples/s]

Token count is too large: pandas-dev__pandas-15782
Token count is too large: apache__airflow-17501
Token count is too large: pandas-dev__pandas-17419
Token count is too large: pandas-dev__pandas-18508
Token count is too large: pandas-dev__pandas-4664
Token count is too large: pandas-dev__pandas-17823
Token count is too large: pandas-dev__pandas-38780


Generating train split: 4870 examples [06:01, 14.58 examples/s]

Token count is too large: pandas-dev__pandas-3890
Token count is too large: pandas-dev__pandas-21038
Token count is too large: pypa__pip-2214
Token count is too large: pandas-dev__pandas-25482
Token count is too large: google__jax-976
Token count is too large: conan-io__conan-2533
Token count is too large: wagtail__wagtail-9153
Token count is too large: pandas-dev__pandas-29529
Token count is too large: Qiskit__qiskit-7039


Generating train split: 4876 examples [06:02, 20.48 examples/s]

Token count is too large: pandas-dev__pandas-3299
Token count is too large: pandas-dev__pandas-31312
Token count is too large: pandas-dev__pandas-25619
Token count is too large: numpy__numpy-8038
Token count is too large: huggingface__transformers-2192
Token count is too large: pandas-dev__pandas-25268
Token count is too large: Lightning-AI__lightning-1176
Token count is too large: pandas-dev__pandas-10023
Token count is too large: Lightning-AI__lightning-2786
Token count is too large: pandas-dev__pandas-28268
Token count is too large: google__jax-911
Token count is too large: huggingface__transformers-20301


Generating train split: 4879 examples [06:02, 17.70 examples/s]

Token count is too large: wagtail__wagtail-9709
Token count is too large: Qiskit__qiskit-5623
Token count is too large: pandas-dev__pandas-5977
Token count is too large: mesonbuild__meson-4616
Token count is too large: pandas-dev__pandas-22423
Token count is too large: Qiskit__qiskit-1771
Token count is too large: Qiskit__qiskit-6587
Token count is too large: pandas-dev__pandas-19831
Token count is too large: pandas-dev__pandas-3135
Token count is too large: jupyterlab__jupyterlab-6265
Token count is too large: pyca__cryptography-4542


Generating train split: 4883 examples [06:02, 16.11 examples/s]

Token count is too large: conda__conda-4392
Token count is too large: googleapis__google-cloud-python-6842
Token count is too large: huggingface__transformers-22040


Generating train split: 4890 examples [06:02, 21.94 examples/s]

Token count is too large: pandas-dev__pandas-32702
Token count is too large: pandas-dev__pandas-16936
Token count is too large: pandas-dev__pandas-25904
Token count is too large: pandas-dev__pandas-25308
Token count is too large: pantsbuild__pants-15300
Token count is too large: pandas-dev__pandas-29858
Token count is too large: apache__airflow-28476
Token count is too large: pantsbuild__pants-16126
Token count is too large: huggingface__transformers-15437


Generating train split: 4895 examples [06:03, 19.96 examples/s]

Token count is too large: pandas-dev__pandas-31809
Token count is too large: pandas-dev__pandas-22264
Token count is too large: conan-io__conan-5946
Token count is too large: ytdl-org__youtube-dl-2113
Token count is too large: huggingface__transformers-15875
Token count is too large: pandas-dev__pandas-32984
Token count is too large: pandas-dev__pandas-21543
Token count is too large: pandas-dev__pandas-6132
Token count is too large: pandas-dev__pandas-26456
Token count is too large: apache__airflow-24530
Token count is too large: huggingface__transformers-10685
Token count is too large: apache__airflow-9217
Token count is too large: pandas-dev__pandas-8475


Generating train split: 4901 examples [06:03, 21.59 examples/s]

Token count is too large: Qiskit__qiskit-2473
Token count is too large: apache__airflow-12320
Token count is too large: docker__compose-4113
Token count is too large: google__jax-1880
Token count is too large: Lightning-AI__lightning-2405
Token count is too large: pantsbuild__pants-9084
Token count is too large: celery__celery-3934
Token count is too large: Qiskit__qiskit-6662
Token count is too large: mesonbuild__meson-5152
Token count is too large: celery__celery-4565
Token count is too large: pantsbuild__pants-17457
Token count is too large: Qiskit__qiskit-7251
Token count is too large: huggingface__transformers-25447
Token count is too large: huggingface__transformers-14487
Token count is too large: Qiskit__qiskit-910
Token count is too large: pandas-dev__pandas-17266
Token count is too large: huggingface__transformers-18358
Token count is too large: ytdl-org__youtube-dl-3790
Token count is too large: pandas-dev__pandas-4507
Token count is too large: ipython__ipython-1831
Token cou

Generating train split: 4912 examples [06:04, 19.02 examples/s]

Token count is too large: huggingface__transformers-24334
Token count is too large: Qiskit__qiskit-369
Token count is too large: Qiskit__qiskit-4621
Token count is too large: pypa__pip-2237
Token count is too large: ytdl-org__youtube-dl-2725
Token count is too large: pandas-dev__pandas-36176
Token count is too large: numpy__numpy-5636
Token count is too large: Qiskit__qiskit-1141
Token count is too large: conan-io__conan-4667
Token count is too large: huggingface__transformers-10334
Token count is too large: pandas-dev__pandas-9525
Token count is too large: pypa__pip-2699
Token count is too large: pantsbuild__pants-6170
Token count is too large: pandas-dev__pandas-6569
Token count is too large: conan-io__conan-2611
Token count is too large: google__jax-151
Token count is too large: numpy__numpy-3645
Token count is too large: celery__celery-5074
Token count is too large: pantsbuild__pants-17385
Token count is too large: ipython__ipython-13745
Token count is too large: pandas-dev__pandas

Generating train split: 4917 examples [06:04, 15.75 examples/s]

Token count is too large: Qiskit__qiskit-2167
Token count is too large: pandas-dev__pandas-32068
Token count is too large: huggingface__transformers-11406
Token count is too large: pypa__pip-9775
Token count is too large: apache__airflow-19854
Token count is too large: mesonbuild__meson-4789
Token count is too large: pandas-dev__pandas-8680
Token count is too large: mesonbuild__meson-1966
Token count is too large: googleapis__google-cloud-python-4439
Token count is too large: pandas-dev__pandas-8488
Token count is too large: numpy__numpy-12251
Token count is too large: googleapis__google-cloud-python-3793


Generating train split: 4920 examples [06:04, 14.12 examples/s]

Token count is too large: pandas-dev__pandas-16860
Token count is too large: ipython__ipython-10338
Token count is too large: apache__airflow-19048
Token count is too large: Qiskit__qiskit-2833
Token count is too large: wagtail__wagtail-9090
Token count is too large: pandas-dev__pandas-21300
Token count is too large: pandas-dev__pandas-4670
Token count is too large: pandas-dev__pandas-8550


Generating train split: 4923 examples [06:05, 14.17 examples/s]

Token count is too large: pandas-dev__pandas-3949
Token count is too large: pandas-dev__pandas-18481
Token count is too large: ytdl-org__youtube-dl-588
Token count is too large: open-mmlab__mmdetection-4467
Token count is too large: huggingface__transformers-8245
Token count is too large: pandas-dev__pandas-7019
Token count is too large: pandas-dev__pandas-17455
Token count is too large: google__jax-735
Token count is too large: pyca__cryptography-1865
Token count is too large: mesonbuild__meson-9390
Token count is too large: ipython__ipython-3558
Token count is too large: huggingface__transformers-8845


Generating train split: 4927 examples [06:05, 14.83 examples/s]

Token count is too large: pandas-dev__pandas-37672
Token count is too large: apache__airflow-9531
Token count is too large: conan-io__conan-4991
Token count is too large: Lightning-AI__lightning-964
Token count is too large: ipython__ipython-356
Token count is too large: Qiskit__qiskit-3843
Token count is too large: mesonbuild__meson-1156
Token count is too large: pandas-dev__pandas-4015


Generating train split: 4930 examples [06:05, 13.80 examples/s]

Token count is too large: pandas-dev__pandas-4113
Token count is too large: pandas-dev__pandas-29173
Token count is too large: Lightning-AI__lightning-919
Token count is too large: ytdl-org__youtube-dl-14716
Token count is too large: huggingface__transformers-18648
Token count is too large: mesonbuild__meson-1541
Token count is too large: mesonbuild__meson-5974
Token count is too large: huggingface__transformers-18907
Token count is too large: mesonbuild__meson-9484
Token count is too large: google__jax-958
Token count is too large: docker__compose-1356
Token count is too large: celery__celery-4240
Token count is too large: numpy__numpy-8423
Token count is too large: ipython__ipython-4303
Token count is too large: pandas-dev__pandas-8330
Token count is too large: pandas-dev__pandas-26371


Generating train split: 4933 examples [06:05, 11.10 examples/s]

Token count is too large: mesonbuild__meson-4489
Token count is too large: conan-io__conan-4663
Token count is too large: pandas-dev__pandas-26916
Token count is too large: numpy__numpy-7016
Token count is too large: pantsbuild__pants-19224
Token count is too large: pantsbuild__pants-15224
Token count is too large: mesonbuild__meson-3209
Token count is too large: ipython__ipython-415
Token count is too large: Qiskit__qiskit-377
Token count is too large: pantsbuild__pants-13813


Generating train split: 4935 examples [06:06, 10.00 examples/s]

Token count is too large: pandas-dev__pandas-3464
Token count is too large: pandas-dev__pandas-3312
Token count is too large: pandas-dev__pandas-19039
Token count is too large: huggingface__transformers-18716
Token count is too large: Qiskit__qiskit-609
Token count is too large: Qiskit__qiskit-4762
Token count is too large: numpy__numpy-13097
Token count is too large: Qiskit__qiskit-1438


Generating train split: 4945 examples [06:06, 18.10 examples/s]

Token count is too large: pandas-dev__pandas-7842
Token count is too large: Qiskit__qiskit-4558
Token count is too large: Lightning-AI__lightning-2789
Token count is too large: pandas-dev__pandas-37270
Token count is too large: ipython__ipython-4521
Token count is too large: ytdl-org__youtube-dl-7514
Token count is too large: pandas-dev__pandas-22054
Token count is too large: pandas-dev__pandas-7581
Token count is too large: Qiskit__qiskit-5588
Token count is too large: numpy__numpy-6660
Token count is too large: wagtail__wagtail-738
Token count is too large: apache__airflow-18119
Token count is too large: pypa__pip-10846
Token count is too large: pandas-dev__pandas-6883
Token count is too large: pandas-dev__pandas-6112
Token count is too large: apache__airflow-15277
Token count is too large: apache__airflow-1431
Token count is too large: ray-project__ray-6941
Token count is too large: pandas-dev__pandas-4770
Token count is too large: pandas-dev__pandas-25462
Token count is too large: 

Generating train split: 4950 examples [06:06, 14.83 examples/s]

Token count is too large: pypa__pip-6429
Token count is too large: pypa__pip-9993
Token count is too large: mesonbuild__meson-5116
Token count is too large: googleapis__google-cloud-python-9991
Token count is too large: google__jax-818
Token count is too large: pandas-dev__pandas-11294
Token count is too large: pandas-dev__pandas-31569
Token count is too large: pandas-dev__pandas-35852
Token count is too large: apache__airflow-22506
Token count is too large: pandas-dev__pandas-39118


Generating train split: 4954 examples [06:07, 18.13 examples/s]

Token count is too large: mesonbuild__meson-1914
Token count is too large: pantsbuild__pants-18447
Token count is too large: pypa__pip-8678
Token count is too large: pandas-dev__pandas-37676
Token count is too large: pantsbuild__pants-13402
Token count is too large: Lightning-AI__lightning-3188
Token count is too large: docker__compose-2878
Token count is too large: ipython__ipython-13417
Token count is too large: pandas-dev__pandas-9743
Token count is too large: PrefectHQ__prefect-2646
Token count is too large: Lightning-AI__lightning-941
Token count is too large: pandas-dev__pandas-34877
Token count is too large: Qiskit__qiskit-9386
Token count is too large: docker__compose-3056
Token count is too large: googleapis__google-cloud-python-10162


Generating train split: 4959 examples [06:07, 13.50 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 4959/4959 [00:00<00:00, 18409.29 examples/s]


In [40]:
ds.push_to_hub("vdaita/swe-bench-search-replace")




Creating parquet from Arrow format: 100%|██████████| 5/5 [00:00<00:00, 11.65ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.68it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/vdaita/swe-bench-search-replace/commit/bce2418428cb1e0e93b19f92c8b47d99690e65db', commit_message='Upload dataset', commit_description='', oid='bce2418428cb1e0e93b19f92c8b47d99690e65db', pr_url=None, pr_revision=None, pr_num=None)

In [39]:
def process_next_row_with_insertions():
    for row in dataset:
        try:
            input_text = row["text"]
            input_text = input_text.replace(PATCH_FORMATTING_INST, SEARCH_REPLACE_INST)
            row_id = row["instance_id"]

            input_text += f"\n\n{DELIM}\n\n"

            diff_string_trimmed = row["patch"].replace("<patch>", "").replace("</patch>", "")
            # Maybe clean up some more strings at the top
            parsed_diff = parse_diff(diff_string_trimmed)
            fmtd_search_replaces = ""
            # Need to remove the a/ and the b/ at the start
            for block in parsed_diff:
                if "dev/null" in block.previous_filepath:
                    fmtd_search_replaces += f"\n<newfile>\n<filepath>{block.filepath[2:]}</filepath>\n<content>{block.contents}</content>\n</newfile>"
                else:
                    fmtd_search_replaces += f"\n<edit>\n<filepath>{block.filepath[2:]}</filepath>\n<search>{block.search_block}</search>\n<replace>{block.replace_block}</replace>\n</edit>\n"

            token_count = len(encoding.encode(input_text)) + len(encoding.encode(fmtd_search_replaces))
            print(f"Was able to calculate token count")
            if token_count > 14000:
                print(f"Token count is too large: {row_id}")
            else:
                yield {"input": input_text, "output": fmtd_search_replaces}
        except Exception as e:
            print("There was an error processing")

ds = Dataset.from_generator(process_next_row)
ds.save_to_disk("./search_replace_dataset/")
    


Saving the dataset (1/1 shards): 100%|██████████| 4959/4959 [00:00<00:00, 205921.90 examples/s]
