In [2]:
from datasets import load_dataset, IterableDataset

In [52]:
import json
from brx import BRX, sftoq, uif
from dotenv import load_dotenv
import os
from openai import OpenAI
from pydantic import BaseModel, Field

# load_dotenv("../../.env")
brx_client = BRX(os.environ["BRX_ACCESS_TOKEN"])
openai_client = OpenAI(api_key=os.environ["TOGETHER_API_KEY"], base_url="https://api.together.xyz")

class GeneratedBreakdown(BaseModel):
    one_sentence_instruction: str = Field(description="Rewrite the original commit message to be a succinct instruction on what to code. The instruction should be one sentence and be brief.")
    plan_no_code: str = Field(description="Write a numbered step-by-step plan that would be followed to generate the patch. State variable and function names, but do not write lines of code under any circumstance.")
    plan_with_code: str = Field(description="Write a numbered step-by-step plan that would be followed to generate the patch. Write a short snippet of code after the instruction text if useful. You must use instructional text as well as code.")

def call_openai(file_contents, original_commit_message, applied_patch):
    system_prompt = "Given an original file, a starting commit message, and a patch applied, write a one-sentence description of changes made and two numbered step-by-step plans for generating those changes: one that includes code and one that doesn't."
    user_prompt = f"# File contents: \n {file_contents} \n \n ----- \n \n# Original commit message: \n{original_commit_message}\n\n -----\n# Applied Patch\n{applied_patch}"
    response = openai_client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
        response_format={"type": "json_object", "schema": GeneratedBreakdown.model_json_schema()}
    )
    generated_breakdown = json.loads(response.choices[0].message.content)
    return generated_breakdown

def call_brx(file_contents, original_commit_message, applied_patch):
    schema = json.dumps({"description":"Given an original file, a starting commit message, and a patch applied, write a one-sentence description of changes made and a numbered step-by-step plan for generating those changes.","brxName":"Synthetic Dataset Generator for Patches","brxId":"bc7c33e6-8f01-4a34-8846-e084eaae4e84","dependantBrxIds":{},"processType":8,"schemas":{"mainBrxId":"bc7c33e6-8f01-4a34-8846-e084eaae4e84","schemas":{"_isMap":True,"data":[["main_brx_entry_schema",{"schemaFields":{"_isMap":True,"data":[["file_contents",{"fieldValueDataType":"string","fieldValue":"testval"}],["original_commit_message",{"fieldValueDataType":"string","fieldValue":"testval"}],["applied_patch",{"fieldValueDataType":"string","fieldValue":"testval"}]]},"brxName":"Synthetic Dataset Generator for Patches","brxId":"bc7c33e6-8f01-4a34-8846-e084eaae4e84"}]]}}})
    query_rebuild = sftoq(schema)
    output_object = query_rebuild["brx_query"]
    input_fields = query_rebuild["input_fields"]

    input_dict = {
        "file_contents": file_contents,
        "original_commit_message": original_commit_message,
        "applied_patch": applied_patch
    }

    for index, field in enumerate(input_fields):
        for dict_key in input_dict.keys():
            if field["name"] == dict_key:
                input_fields[index]["value"] = input_dict[dict_key]
        
    updated_query = uif(input_fields, output_object)
    result = json.loads(brx_client.execute(updated_query["brx_query"])["brxRes"]["output"])
    return result

In [55]:
import difflib
import re

def process_row_brx(row):
    # Input: original commit message, original file, generated patch
    # Output: rewritten description of changes made, plan generation
    commit_msg = row["subject"]
    original_file = row["old_contents"]
    udiff = "\n".join(difflib.unified_diff(row['old_contents'].splitlines(), row['new_contents'].splitlines(), fromfile=row['old_file'], tofile=row['new_file'], n=3))
    udiff = f"```diff\n{udiff}\n```"
    udiff = re.sub("@@.*@@", "@@...@@", udiff)

    generated_info = call_brx(commit_msg, original_file, udiff)
    return {"synthetic": True, "patch": udiff, "patch_description": generated_info["patch_description"], "plan_no_code": generated_info["plan_no_code"], "plan_with_code": generated_info["plan_with_code"]}

def process_row_together(row):
    udiff = "\n".join(difflib.unified_diff(row['old_contents'].splitlines(), row['new_contents'].splitlines(), fromfile=row['old_file'], tofile=row['new_file'], n=3))
    udiff = f"```diff\n{udiff}\n```"
    udiff = re.sub("@@.*@@", "@@...@@", udiff)
    try:
        print("Processing the row with Together")


        extracted_info = call_openai(row['old_contents'], udiff, row['subject'])
        extracted_info["synthetic"] = True
        extracted_info["patch"] = udiff 
        return extracted_info
    except:
        print("There was an error with this one")
        return {
            "synthetic": False,
            "patch": udiff,
            "one_sentence_instruction": "",
            "plan_no_code": "",
            "plan_with_code": ""
        }

def process_row(row):
    # Input: original commit message, original file, generated patch
    # Output: rewritten description of changes made, plan generation
    commit_msg = row["subject"]
    original_file = row["old_contents"]
    udiff = "\n".join(difflib.unified_diff(row['old_contents'].splitlines(), row['new_contents'].splitlines(), fromfile=row['old_file'], tofile=row['new_file'], n=3))
    udiff = f"```diff\n{udiff}\n```"
    udiff = re.sub("@@.*@@", "@@...@@", udiff)

    generated_info = call_brx(commit_msg, original_file, udiff)
    return {"synthetic": True, "patch": udiff, "patch_description": generated_info["patch_description"], "plan_no_code": generated_info["plan_no_code"], "plan_with_code": generated_info["plan_with_code"]}

In [43]:
languages = ["python", "jsx", "kotlin", "vue", "xml", "javascript", "java", "c#", "ruby", "c", "c++", "typescript", "dart"]
generated_datasets = []
for language in languages:
    dataset = load_dataset("bigcode/commitpackft", language, split="train")
    dataset = dataset.shuffle(seed=42).select(range(15))
    generated_datasets.append(dataset)

In [62]:
from datasets import concatenate_datasets, Dataset

ds = concatenate_datasets(generated_datasets)
ds = ds.map(lambda row: process_row_together(row), num_proc=16)

  StockPickler.save(self, obj, save_persistent_id)
  StockPickler.save(self, obj, save_persistent_id)
Map (num_proc=16):   0%|          | 0/195 [00:00<?, ? examples/s]

Processing the row with Together


Processing the row with TogetherProcessing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with TogetherProcessing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with Together
Processing the row with Together
There was an error with this one


Map (num_proc=16):   1%|          | 1/195 [00:13<43:56, 13.59s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):   1%|          | 2/195 [00:20<31:45,  9.87s/ examples]

Processing the row with Together


Map (num_proc=16):   2%|▏         | 3/195 [00:28<27:42,  8.66s/ examples]

Processing the row with Together


Map (num_proc=16):   2%|▏         | 4/195 [00:28<17:09,  5.39s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):   3%|▎         | 5/195 [00:28<11:06,  3.51s/ examples]

Processing the row with Together


Map (num_proc=16):   3%|▎         | 6/195 [00:34<13:19,  4.23s/ examples]

Processing the row with Together


Map (num_proc=16):   4%|▎         | 7/195 [00:35<10:41,  3.41s/ examples]

Processing the row with Together


Map (num_proc=16):   4%|▍         | 8/195 [00:37<09:14,  2.97s/ examples]

Processing the row with Together


Map (num_proc=16):   5%|▍         | 9/195 [00:38<06:34,  2.12s/ examples]

Processing the row with Together


Map (num_proc=16):   5%|▌         | 10/195 [00:38<04:50,  1.57s/ examples]

Processing the row with Together


Map (num_proc=16):   6%|▌         | 11/195 [00:38<03:34,  1.17s/ examples]

Processing the row with Together


Map (num_proc=16):   6%|▌         | 12/195 [00:42<06:13,  2.04s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):   7%|▋         | 13/195 [00:58<18:59,  6.26s/ examples]

Processing the row with Together


Map (num_proc=16):   7%|▋         | 14/195 [01:18<30:59, 10.27s/ examples]

Processing the row with Together


Map (num_proc=16):   8%|▊         | 15/195 [01:20<23:26,  7.81s/ examples]

Processing the row with Together


Map (num_proc=16):   8%|▊         | 16/195 [01:24<20:00,  6.71s/ examples]

Processing the row with Together


Map (num_proc=16):   9%|▊         | 17/195 [01:28<17:18,  5.83s/ examples]

Processing the row with Together


Map (num_proc=16):   9%|▉         | 18/195 [01:40<22:54,  7.77s/ examples]

Processing the row with Together


Map (num_proc=16):  10%|▉         | 19/195 [01:41<16:17,  5.56s/ examples]

Processing the row with Together


Map (num_proc=16):  10%|█         | 20/195 [01:42<12:19,  4.23s/ examples]

Processing the row with TogetherProcessing the row with Together



Map (num_proc=16):  11%|█▏        | 22/195 [01:42<06:56,  2.41s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  12%|█▏        | 23/195 [01:43<05:45,  2.01s/ examples]

Processing the row with Together
Processing the row with Together


Map (num_proc=16):  13%|█▎        | 25/195 [01:53<09:21,  3.30s/ examples]

Processing the row with Together


Map (num_proc=16):  13%|█▎        | 26/195 [02:13<19:57,  7.09s/ examples]

Processing the row with Together


Map (num_proc=16):  14%|█▍        | 27/195 [02:13<15:05,  5.39s/ examples]

Processing the row with Together


Map (num_proc=16):  14%|█▍        | 28/195 [02:18<14:20,  5.15s/ examples]

Processing the row with Together


Map (num_proc=16):  15%|█▍        | 29/195 [02:29<18:35,  6.72s/ examples]

Processing the row with Together


Map (num_proc=16):  15%|█▌        | 30/195 [02:34<17:03,  6.20s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  16%|█▌        | 31/195 [02:46<21:30,  7.87s/ examples]

Processing the row with Together


Map (num_proc=16):  16%|█▋        | 32/195 [02:49<17:50,  6.57s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  17%|█▋        | 33/195 [02:51<14:04,  5.21s/ examples]

Processing the row with Together


Map (num_proc=16):  17%|█▋        | 34/195 [02:51<10:09,  3.78s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  18%|█▊        | 35/195 [03:44<48:29, 18.18s/ examples]

Processing the row with Together


Map (num_proc=16):  18%|█▊        | 36/195 [04:10<54:06, 20.42s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  19%|█▉        | 37/195 [04:18<44:09, 16.77s/ examples]

Processing the row with Together


Map (num_proc=16):  19%|█▉        | 38/195 [04:24<36:03, 13.78s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  20%|██        | 39/195 [05:03<54:49, 21.09s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  21%|██        | 40/195 [05:20<51:15, 19.84s/ examples]

Processing the row with Together


Map (num_proc=16):  21%|██        | 41/195 [05:32<45:06, 17.58s/ examples]

Processing the row with Together


Map (num_proc=16):  22%|██▏       | 42/195 [05:32<31:40, 12.42s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  22%|██▏       | 43/195 [05:41<28:52, 11.40s/ examples]

Processing the row with Together


Map (num_proc=16):  23%|██▎       | 44/195 [05:58<32:26, 12.89s/ examples]

Processing the row with Together


Map (num_proc=16):  23%|██▎       | 45/195 [05:58<22:46,  9.11s/ examples]

Processing the row with Together
There was an error with this one
Processing the row with Together


Map (num_proc=16):  24%|██▍       | 47/195 [05:58<12:11,  4.94s/ examples]

Processing the row with Together


Map (num_proc=16):  25%|██▍       | 48/195 [05:58<09:20,  3.81s/ examples]

Processing the row with Together


Map (num_proc=16):  25%|██▌       | 49/195 [06:02<08:58,  3.69s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  26%|██▌       | 50/195 [06:02<06:40,  2.76s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  26%|██▌       | 51/195 [06:21<17:24,  7.25s/ examples]

Processing the row with Together


Map (num_proc=16):  27%|██▋       | 52/195 [06:27<16:27,  6.91s/ examples]

Processing the row with Together


Map (num_proc=16):  27%|██▋       | 53/195 [06:29<13:00,  5.50s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  28%|██▊       | 54/195 [06:40<16:33,  7.05s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  28%|██▊       | 55/195 [06:45<15:17,  6.56s/ examples]

Processing the row with Together


Map (num_proc=16):  29%|██▊       | 56/195 [06:47<11:57,  5.16s/ examples]

Processing the row with Together


Map (num_proc=16):  29%|██▉       | 57/195 [06:52<11:54,  5.18s/ examples]

Processing the row with Together


Map (num_proc=16):  30%|██▉       | 58/195 [06:54<09:45,  4.28s/ examples]

Processing the row with Together


Map (num_proc=16):  30%|███       | 59/195 [07:00<10:53,  4.80s/ examples]

Processing the row with Together


Map (num_proc=16):  31%|███       | 60/195 [07:08<12:54,  5.74s/ examples]

Processing the row with Together


Map (num_proc=16):  31%|███▏      | 61/195 [07:09<09:16,  4.15s/ examples]

Processing the row with Together


Map (num_proc=16):  32%|███▏      | 62/195 [07:27<18:42,  8.44s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  32%|███▏      | 63/195 [07:32<16:01,  7.28s/ examples]

Processing the row with Together


Map (num_proc=16):  33%|███▎      | 64/195 [07:34<12:21,  5.66s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  33%|███▎      | 65/195 [07:35<09:15,  4.28s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  34%|███▍      | 66/195 [07:36<07:28,  3.47s/ examples]

Processing the row with Together


Map (num_proc=16):  34%|███▍      | 67/195 [07:58<18:49,  8.82s/ examples]

Processing the row with Together


Map (num_proc=16):  35%|███▍      | 68/195 [08:03<16:19,  7.71s/ examples]

Processing the row with Together


Map (num_proc=16):  35%|███▌      | 69/195 [08:05<12:54,  6.15s/ examples]

Processing the row with Together


Map (num_proc=16):  36%|███▌      | 70/195 [08:19<17:19,  8.32s/ examples]

Processing the row with Together


Map (num_proc=16):  36%|███▋      | 71/195 [08:26<16:43,  8.10s/ examples]

Processing the row with Together


Map (num_proc=16):  37%|███▋      | 72/195 [08:32<15:04,  7.36s/ examples]

Processing the row with Together


Map (num_proc=16):  37%|███▋      | 73/195 [08:51<21:54, 10.77s/ examples]

Processing the row with Together
There was an error with this one
Processing the row with Together
There was an error with this one


Map (num_proc=16):  38%|███▊      | 75/195 [08:53<12:50,  6.42s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  39%|███▉      | 76/195 [09:01<13:35,  6.85s/ examples]

Processing the row with Together


Map (num_proc=16):  39%|███▉      | 77/195 [09:07<12:50,  6.53s/ examples]

Processing the row with Together


Map (num_proc=16):  40%|████      | 78/195 [09:09<10:27,  5.36s/ examples]

Processing the row with Together


Map (num_proc=16):  41%|████      | 79/195 [09:19<12:51,  6.65s/ examples]

Processing the row with Together


Map (num_proc=16):  41%|████      | 80/195 [09:54<28:16, 14.75s/ examples]

Processing the row with Together


Map (num_proc=16):  42%|████▏     | 81/195 [09:55<20:03, 10.55s/ examples]

Processing the row with Together


Map (num_proc=16):  42%|████▏     | 82/195 [10:07<21:07, 11.22s/ examples]

Processing the row with Together


Map (num_proc=16):  43%|████▎     | 83/195 [10:08<14:55,  7.99s/ examples]

Processing the row with Together


Map (num_proc=16):  43%|████▎     | 84/195 [11:28<54:15, 29.33s/ examples]

Processing the row with Together


Map (num_proc=16):  44%|████▎     | 85/195 [11:29<38:15, 20.87s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  44%|████▍     | 86/195 [11:34<29:23, 16.18s/ examples]

Processing the row with Together


Map (num_proc=16):  45%|████▍     | 87/195 [11:34<20:28, 11.38s/ examples]

Processing the row with Together


Map (num_proc=16):  45%|████▌     | 88/195 [11:34<14:17,  8.01s/ examples]

Processing the row with Together


Map (num_proc=16):  46%|████▌     | 89/195 [11:43<14:28,  8.19s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  46%|████▌     | 90/195 [11:47<12:25,  7.10s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  47%|████▋     | 91/195 [11:53<11:39,  6.72s/ examples]

Processing the row with Together


Map (num_proc=16):  47%|████▋     | 92/195 [12:14<18:42, 10.90s/ examples]

Processing the row with Together


Map (num_proc=16):  48%|████▊     | 93/195 [12:16<14:01,  8.25s/ examples]

Processing the row with Together


Map (num_proc=16):  48%|████▊     | 94/195 [12:18<10:55,  6.49s/ examples]

Processing the row with Together


Map (num_proc=16):  49%|████▊     | 95/195 [12:21<08:59,  5.40s/ examples]

Processing the row with Together


Map (num_proc=16):  49%|████▉     | 96/195 [12:21<06:18,  3.83s/ examples]

Processing the row with Together


Map (num_proc=16):  50%|████▉     | 97/195 [12:22<04:45,  2.91s/ examples]

Processing the row with Together


Map (num_proc=16):  50%|█████     | 98/195 [12:25<04:40,  2.89s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  51%|█████     | 99/195 [12:26<04:01,  2.52s/ examples]

Processing the row with Together


Map (num_proc=16):  51%|█████▏    | 100/195 [12:31<05:09,  3.25s/ examples]

Processing the row with Together


Map (num_proc=16):  52%|█████▏    | 101/195 [13:08<20:48, 13.29s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  52%|█████▏    | 102/195 [13:22<20:48, 13.43s/ examples]

Processing the row with Together


Map (num_proc=16):  53%|█████▎    | 103/195 [13:38<21:49, 14.23s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  53%|█████▎    | 104/195 [13:41<16:23, 10.80s/ examples]

Processing the row with Together


Map (num_proc=16):  54%|█████▍    | 105/195 [13:41<11:27,  7.64s/ examples]

Processing the row with Together


Map (num_proc=16):  54%|█████▍    | 106/195 [13:44<09:10,  6.19s/ examples]

Processing the row with TogetherProcessing the row with Together



Map (num_proc=16):  55%|█████▌    | 108/195 [13:53<07:55,  5.46s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  56%|█████▌    | 109/195 [13:54<06:06,  4.26s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  56%|█████▋    | 110/195 [13:56<05:15,  3.71s/ examples]

Processing the row with Together


Map (num_proc=16):  57%|█████▋    | 111/195 [13:57<04:20,  3.11s/ examples]

Processing the row with Together


Map (num_proc=16):  57%|█████▋    | 112/195 [14:00<04:03,  2.94s/ examples]

Processing the row with Together


Map (num_proc=16):  58%|█████▊    | 113/195 [14:01<03:33,  2.60s/ examples]

Processing the row with Together


Map (num_proc=16):  58%|█████▊    | 114/195 [14:12<06:27,  4.78s/ examples]

Processing the row with Together


Map (num_proc=16):  59%|█████▉    | 115/195 [14:42<16:29, 12.37s/ examples]

Processing the row with Together


Map (num_proc=16):  59%|█████▉    | 116/195 [14:47<13:27, 10.22s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  60%|██████    | 117/195 [14:52<11:08,  8.57s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  61%|██████    | 118/195 [15:07<13:33, 10.57s/ examples]

Processing the row with Together


Map (num_proc=16):  61%|██████    | 119/195 [15:17<13:15, 10.46s/ examples]

Processing the row with Together


Map (num_proc=16):  62%|██████▏   | 120/195 [15:32<14:31, 11.62s/ examples]

Processing the row with Together


Map (num_proc=16):  62%|██████▏   | 121/195 [15:51<17:10, 13.93s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  63%|██████▎   | 122/195 [16:23<23:31, 19.33s/ examples]

Processing the row with Together


Map (num_proc=16):  63%|██████▎   | 123/195 [16:26<17:16, 14.40s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  64%|██████▎   | 124/195 [16:41<17:05, 14.45s/ examples]

Processing the row with TogetherProcessing the row with Together

There was an error with this one


Map (num_proc=16):  65%|██████▍   | 126/195 [16:56<13:04, 11.36s/ examples]

Processing the row with Together


Map (num_proc=16):  65%|██████▌   | 127/195 [16:59<10:27,  9.23s/ examples]

Processing the row with Together


Map (num_proc=16):  66%|██████▌   | 128/195 [17:13<11:48, 10.57s/ examples]

Processing the row with Together


Map (num_proc=16):  66%|██████▌   | 129/195 [17:19<10:08,  9.22s/ examples]

Processing the row with Together


Map (num_proc=16):  67%|██████▋   | 130/195 [17:40<13:39, 12.61s/ examples]

Processing the row with Together


Map (num_proc=16):  67%|██████▋   | 131/195 [18:07<17:47, 16.68s/ examples]

Processing the row with Together


Map (num_proc=16):  68%|██████▊   | 132/195 [18:12<13:51, 13.20s/ examples]

Processing the row with Together


Map (num_proc=16):  68%|██████▊   | 133/195 [18:16<10:56, 10.59s/ examples]

Processing the row with Together


Map (num_proc=16):  69%|██████▊   | 134/195 [18:39<14:37, 14.39s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  69%|██████▉   | 135/195 [18:40<10:25, 10.42s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  70%|██████▉   | 136/195 [18:47<09:01,  9.18s/ examples]

Processing the row with Together


Map (num_proc=16):  70%|███████   | 137/195 [18:51<07:32,  7.80s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  71%|███████   | 138/195 [18:57<06:48,  7.16s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  71%|███████▏  | 139/195 [18:59<05:09,  5.52s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  72%|███████▏  | 140/195 [19:00<03:58,  4.33s/ examples]

Processing the row with Together


Map (num_proc=16):  72%|███████▏  | 141/195 [19:06<04:19,  4.81s/ examples]

Processing the row with Together


Map (num_proc=16):  73%|███████▎  | 142/195 [19:12<04:29,  5.09s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  73%|███████▎  | 143/195 [19:14<03:43,  4.30s/ examples]

Processing the row with Together


Map (num_proc=16):  74%|███████▍  | 144/195 [19:21<04:18,  5.06s/ examples]

Processing the row with Together


Map (num_proc=16):  74%|███████▍  | 145/195 [19:30<05:03,  6.07s/ examples]

Processing the row with Together


Map (num_proc=16):  75%|███████▍  | 146/195 [19:32<04:06,  5.04s/ examples]

Processing the row with Together


Map (num_proc=16):  75%|███████▌  | 147/195 [19:35<03:28,  4.35s/ examples]

Processing the row with Together


Map (num_proc=16):  76%|███████▌  | 148/195 [19:36<02:34,  3.28s/ examples]

Processing the row with Together


Map (num_proc=16):  76%|███████▋  | 149/195 [19:37<02:00,  2.63s/ examples]

Processing the row with Together


Map (num_proc=16):  77%|███████▋  | 150/195 [19:42<02:33,  3.41s/ examples]

Processing the row with Together
Processing the row with Together


Map (num_proc=16):  78%|███████▊  | 152/195 [19:47<02:09,  3.02s/ examples]

Processing the row with Together


Map (num_proc=16):  78%|███████▊  | 153/195 [19:48<01:43,  2.47s/ examples]

Processing the row with Together


Map (num_proc=16):  79%|███████▉  | 154/195 [19:49<01:23,  2.04s/ examples]

Processing the row with Together


Map (num_proc=16):  79%|███████▉  | 155/195 [19:52<01:30,  2.25s/ examples]

Processing the row with Together


Map (num_proc=16):  80%|████████  | 156/195 [19:52<01:11,  1.82s/ examples]

Processing the row with TogetherProcessing the row with Together



Map (num_proc=16):  81%|████████  | 158/195 [20:07<02:34,  4.18s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  82%|████████▏ | 159/195 [20:12<02:39,  4.44s/ examples]

Processing the row with Together


Map (num_proc=16):  83%|████████▎ | 162/195 [20:30<02:58,  5.42s/ examples]

Processing the row with Together


Map (num_proc=16):  84%|████████▎ | 163/195 [20:41<03:45,  7.06s/ examples]

Processing the row with Together


Map (num_proc=16):  84%|████████▍ | 164/195 [20:43<02:51,  5.54s/ examples]

Processing the row with Together


Map (num_proc=16):  85%|████████▍ | 165/195 [20:44<02:09,  4.31s/ examples]

Processing the row with Together


Map (num_proc=16):  86%|████████▌ | 167/195 [21:00<02:34,  5.50s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  86%|████████▌ | 168/195 [21:02<02:05,  4.65s/ examples]

Processing the row with Together


Map (num_proc=16):  87%|████████▋ | 170/195 [21:20<02:56,  7.06s/ examples]

Processing the row with Together


Map (num_proc=16):  88%|████████▊ | 172/195 [21:25<01:51,  4.86s/ examples]

Processing the row with Together


Map (num_proc=16):  89%|████████▊ | 173/195 [21:28<01:29,  4.08s/ examples]

Processing the row with Together


Map (num_proc=16):  89%|████████▉ | 174/195 [21:29<01:10,  3.37s/ examples]

Processing the row with Together


Map (num_proc=16):  90%|████████▉ | 175/195 [21:38<01:42,  5.11s/ examples]

Processing the row with Together


Map (num_proc=16):  91%|█████████▏| 178/195 [21:44<00:48,  2.88s/ examples]

Processing the row with Together


Map (num_proc=16):  92%|█████████▏| 179/195 [21:45<00:37,  2.32s/ examples]

Processing the row with Together


Map (num_proc=16):  92%|█████████▏| 180/195 [21:49<00:42,  2.84s/ examples]

Processing the row with Together


Map (num_proc=16):  95%|█████████▍| 185/195 [21:58<00:18,  1.85s/ examples]

Processing the row with Together
There was an error with this one


Map (num_proc=16):  96%|█████████▌| 187/195 [22:02<00:15,  1.98s/ examples]

Processing the row with Together


Map (num_proc=16):  96%|█████████▋| 188/195 [22:45<01:38, 14.03s/ examples]

Processing the row with Together


Map (num_proc=16):  97%|█████████▋| 189/195 [22:53<01:14, 12.45s/ examples]

Processing the row with Together


Map (num_proc=16):  97%|█████████▋| 190/195 [23:12<01:11, 14.36s/ examples]

Processing the row with Together


Map (num_proc=16):  98%|█████████▊| 192/195 [23:13<00:21,  7.25s/ examples]

Processing the row with Together


Map (num_proc=16):  99%|█████████▉| 193/195 [23:17<00:12,  6.27s/ examples]

There was an error with this one


Map (num_proc=16): 100%|██████████| 195/195 [23:26<00:00,  7.21s/ examples]


In [63]:
ds.push_to_hub("vdaita/commitpackft-small-patches-expanded")

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 79.09ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/vdaita/commitpackft-small-patches-expanded/commit/fc7c5076d0b65472c6c1c8608e015f6e83e789e0', commit_message='Upload dataset', commit_description='', oid='fc7c5076d0b65472c6c1c8608e015f6e83e789e0', pr_url=None, pr_revision=None, pr_num=None)