In [14]:
from dotenv import load_dotenv
import os
load_dotenv()
hf_token = os.getenv("HUGGINGFACE_TOKEN")
import csv
import random
from typing import List, Tuple
from __future__ import annotations

### Generate the Math Data

In [15]:
_OPERATIONS: tuple[str, ...] = ('+', '-', '*', '/')   


def generate_arithmetic_data(num_samples: int) -> List[Tuple[int, str, int, str, int]]:
    """Return a list of 5-tuples: (x, op, y, '=', z)."""
    rng = random.Random()
    equations: List[Tuple[int, str, int, str, int]] = []

    for _ in range(num_samples):
        x, y = rng.randint(1, 100), rng.randint(1, 100)
        op = rng.choice(_OPERATIONS)

        if op == '+':
            z = x + y
        elif op == '-':
            z = x - y
        elif op == '*':
            z = x * y
        else:                                   # safe integer division
            while y == 0 or x % y != 0:
                y = rng.randint(1, 100)
            z = x // y

        equations.append((x, op, y, '=', z))

    return equations


### Encode the operator

In [16]:
def write_to_csv(equations: List[Tuple[int, str, int, str, int]], filename: str) -> None:
    """Write one equation per row: x, operator, y, '=', z."""
    with open(filename, mode='w', newline='', encoding='utf-8') as fh:
        writer = csv.writer(fh)
        writer.writerow(['x', 'operator', 'y', '=', 'z'])  # header
        writer.writerows(equations)      

In [37]:
rows = generate_arithmetic_data(100000)

write_to_csv(rows, 'arithmetic_data.csv')