In [2]:
from datasets import load_dataset
ds = load_dataset("RLHFlow/Mistral-PRM-Data", cache_dir="data")

Process `RLHFlow/Mistral-PRM-Data` dataset to convert multi-turn conversations into multiple single-turn conversations. For example, the conversation:
```
[ { "content": "Find the product of the roots of the equation $18t^2 + 45t -500 =0$. Step 1: To find the product of the roots of a quadratic equation, I can use the formula $-b/a$, where $a$ and $b$ are the coefficients of $t^2$ and $t$ respectively.", "role": "user" }, { "content": "-", "role": "assistant" }, { "content": "Step 2: In this case, $a = 18$ and $b = 45$, so the product of the roots is $-45/18$. The answer is: -45/18", "role": "user" }, { "content": "-", "role": "assistant" } ]
```
becomes 
```
[ { "content": "Find the product of the roots of the equation $18t^2 + 45t -500 =0$. Step 1: To find the product of the roots of a quadratic equation, I can use the formula $-b/a$, where $a$ and $b$ are the coefficients of $t^2$ and $t$ respectively.", "role": "user" }, { "content": "-", "role": "assistant" } ]
[ { "content": "Find the product of the roots of the equation $18t^2 + 45t -500 =0$. Step 1: To find the product of the roots of a quadratic equation, I can use the formula $-b/a$, where $a$ and $b$ are the coefficients of $t^2$ and $t$ respectively.\nStep 2: In this case, $a = 18$ and $b = 45$, so the product of the roots is $-45/18$. The answer is: -45/18", "role": "user" }, { "content": "-", "role": "assistant" } ]
```
I'm assuming the assistant responses are the so-called hard labels in the [Math-Shepard paper](https://arxiv.org/abs/2312.08935).

In [3]:
import json
from tqdm import tqdm

def process_row(row):
  rows = []
  trace = ""
  for msg in row["conversations"]:
    if msg["role"] == "user":
      trace += f"{msg['content']}\n"
      continue
    elif msg["role"] == "assistant":
      rows.append({"messages": [{"role": "user", "content": trace.rstrip("\n")}, {"role": "assistant", "content": msg['content']}]})
    else:
      raise ValueError(f"Unknown role: {msg['role']}")
  return rows

def save_jsonl(data, file_path):
  with open(file_path, 'w') as f:
    for item in data:
      json.dump(item, f)
      f.write(f"\n")

data = []
for row in tqdm(ds["train"]):
  data.extend(process_row(row))
save_jsonl(data, "data/mistral_pcm_data.jsonl")

100%|██████████| 273226/273226 [00:09<00:00, 30028.42it/s]
