In [None]:
# Install required packages
# Uncomment if running in Colab:
# !pip install transformers datasets peft accelerate bitsandbytes sympy tqdm

# For local environments, install via: pip install transformers datasets peft accelerate bitsandbytes sympy tqdm

In [1]:
# Core imports
from datasets import load_dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    TrainingArguments, 
    Trainer, 
    TrainerCallback, 
    EarlyStoppingCallback,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, PeftModel
import torch
from tqdm import tqdm
import re
import os
import json
import sympy
import gc
from pathlib import Path

# Optional: For Colab
try:
    from google.colab import drive
    IN_COLAB = True
except ImportError:
    IN_COLAB = False
    drive = None

In [5]:
# Configuration
# =============

# Model configuration
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Can change to other models
USE_8BIT = True  # Use 8-bit quantization to save memory

# Data paths - using preprocessed JSONL files
TRAIN_DATA_PATH = "MATH_train_full.jsonl"
TEST_DATA_PATH = "MATH_test_full.jsonl"

# Output paths
if IN_COLAB:
    drive.mount('/content/drive')
    BASE_OUTPUT_DIR = "/content/drive/MyDrive/Colab_Notebooks/CSE595_Proj/math-sft-models"
else:
    BASE_OUTPUT_DIR = "./math-sft-models"

os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)
CHECKPOINT_DIR = os.path.join(BASE_OUTPUT_DIR, "checkpoints")
FINAL_MODEL_DIR = os.path.join(BASE_OUTPUT_DIR, "final-model")

# Training hyperparameters
BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 5e-5
NUM_EPOCHS = 1
WARMUP_STEPS = 50
MAX_LENGTH = 1024

# LoRA configuration
LORA_R = 32
LORA_ALPHA = 16
LORA_DROPOUT = 0.05

# Evaluation
VAL_SIZE = 100  # Size of validation set
TEST_SIZE = 500  # Number of test samples to evaluate

print(f"Configuration loaded. Output directory: {BASE_OUTPUT_DIR}")
print(f"Using model: {MODEL_NAME}")

ValueError: mount failed

In [None]:
# Load preprocessed data from JSONL files
# =======================================

def load_jsonl(file_path):
    """Load data from JSONL file."""
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

print("Loading training data...")
train_data = load_jsonl(TRAIN_DATA_PATH)
print(f"Loaded {len(train_data)} training examples")

print("Loading test data...")
test_data = load_jsonl(TEST_DATA_PATH)
print(f"Loaded {len(test_data)} test examples")

# Convert to HuggingFace dataset format
train_ds = load_dataset('json', data_files=TRAIN_DATA_PATH, split='train')
test_ds = load_dataset('json', data_files=TEST_DATA_PATH, split='train')

# Create validation set from test set
val_ds = test_ds.select(range(VAL_SIZE))
test_ds = test_ds.select(range(VAL_SIZE, VAL_SIZE + TEST_SIZE))

print(f"Training examples: {len(train_ds)}")
print(f"Validation examples: {len(val_ds)}")
print(f"Test examples: {len(test_ds)}")

In [None]:
# System prompt for math assistant
SYSTEM_PROMPT = "You are a math assistant. Solve the problem step by step, explain your reasoning, and box the final answer using \\boxed{}."

print(f"System prompt: {SYSTEM_PROMPT}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

algebra/train-00000-of-00001.parquet:   0%|          | 0.00/505k [00:00<?, ?B/s]

algebra/test-00000-of-00001.parquet:   0%|          | 0.00/353k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1744 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1187 [00:00<?, ? examples/s]

counting_and_probability/train-00000-of-(…):   0%|          | 0.00/329k [00:00<?, ?B/s]

counting_and_probability/test-00000-of-0(…):   0%|          | 0.00/175k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/771 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/474 [00:00<?, ? examples/s]

geometry/train-00000-of-00001.parquet:   0%|          | 0.00/549k [00:00<?, ?B/s]

geometry/test-00000-of-00001.parquet:   0%|          | 0.00/264k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/870 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/479 [00:00<?, ? examples/s]

intermediate_algebra/train-00000-of-0000(…):   0%|          | 0.00/575k [00:00<?, ?B/s]

intermediate_algebra/test-00000-of-00001(…):   0%|          | 0.00/395k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1295 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/903 [00:00<?, ? examples/s]

number_theory/train-00000-of-00001.parqu(…):   0%|          | 0.00/309k [00:00<?, ?B/s]

number_theory/test-00000-of-00001.parque(…):   0%|          | 0.00/182k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/869 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/540 [00:00<?, ? examples/s]

prealgebra/train-00000-of-00001.parquet:   0%|          | 0.00/384k [00:00<?, ?B/s]

prealgebra/test-00000-of-00001.parquet:   0%|          | 0.00/268k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1205 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/871 [00:00<?, ? examples/s]

precalculus/train-00000-of-00001.parquet:   0%|          | 0.00/354k [00:00<?, ?B/s]

precalculus/test-00000-of-00001.parquet:   0%|          | 0.00/242k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/746 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/546 [00:00<?, ? examples/s]

In [None]:
# Initialize tokenizer and model
# ===============================

print(f"Loading tokenizer and model: {MODEL_NAME}")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Configure quantization if using 8-bit
quantization_config = None
if USE_8BIT:
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_threshold=6.0,
    )

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=quantization_config,
    torch_dtype=torch.float16 if not USE_8BIT else None,
    trust_remote_code=True
)

# Apply LoRA
print("Applying LoRA configuration...")
lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Print trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")
print(f"Total parameters: {total_params:,}")

Map:   0%|          | 0/7500 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [None]:
# Tokenization function
# =====================

def tokenize_function(examples):
    """Tokenize the messages format for TinyLlama chat template."""
    texts = []
    for msg_list in examples["messages"]:
        # Format: system, user, assistant
        system_msg = msg_list[0]["content"]
        user_msg = msg_list[1]["content"]
        assistant_msg = msg_list[2]["content"]
        
        # TinyLlama chat format
        text = f"<|system|>\n{system_msg}<|end|>\n<|user|>\n{user_msg}<|end|>\n<|assistant|>\n{assistant_msg}<|end|>"
        texts.append(text)
    
    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=MAX_LENGTH,
        padding="max_length"
    )
    # Labels are same as input_ids for causal LM
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

print("Tokenizing datasets...")
tokenized_train = train_ds.map(
    tokenize_function,
    batched=True,
    remove_columns=train_ds.column_names,
    desc="Tokenizing training set"
)
tokenized_val = val_ds.map(
    tokenize_function,
    batched=True,
    remove_columns=val_ds.column_names,
    desc="Tokenizing validation set"
)

print(f"Tokenized training examples: {len(tokenized_train)}")
print(f"Tokenized validation examples: {len(tokenized_val)}")

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
# Evaluation utilities
# ====================

def extract_boxed(latex_string):
    """Extract content from \\boxed{} in LaTeX string."""
    if not latex_string:
        return None
    
    match = re.search(r'\\boxed\s*\{', latex_string, re.IGNORECASE)
    if not match:
        return None

    start_index = match.end()
    brace_count = 1
    content = []

    for i in range(start_index, len(latex_string)):
        char = latex_string[i]
        if char == '{':
            brace_count += 1
            content.append(char)
        elif char == '}':
            brace_count -= 1
            if brace_count == 0:
                return "".join(content)
            else:
                content.append(char)
        else:
            content.append(char)
    return None

def normalize_sympy(s):
    """Normalize mathematical expression using sympy."""
    if not s:
        return None
    try:
        return sympy.sympify(s)
    except (sympy.SympifyError, TypeError):
        return None

def compute_em(eval_pred):
    """Compute exact match metric during evaluation."""
    predictions, _ = eval_pred
    correct = 0
    batch_size = 4

    for i in range(0, len(predictions), batch_size):
        batch_preds = predictions[i:i+batch_size]
        batch_texts = [
            tokenizer.decode(p.tolist(), skip_special_tokens=True)
            for p in batch_preds
        ]
        # Access messages safely - val_ds has the messages structure from JSONL
        batch_indices = list(range(i, min(i+batch_size, len(val_ds))))
        batch_golds = []
        for idx in batch_indices:
            ex = val_ds[idx]
            if "messages" in ex and len(ex["messages"]) > 2:
                batch_golds.append(ex["messages"][2]["content"])
            else:
                batch_golds.append("")  # Fallback if structure is unexpected

        for pred_text, gold_text in zip(batch_texts, batch_golds):
            pred_ans_str = extract_boxed(pred_text)
            gold_ans_str = extract_boxed(gold_text)

            pred_ans_sym = normalize_sympy(pred_ans_str)
            gold_ans_sym = normalize_sympy(gold_ans_str)

            if pred_ans_sym is not None and gold_ans_sym is not None and pred_ans_sym == gold_ans_sym:
                correct += 1
            elif (pred_ans_str == "" or pred_ans_str is None) and (gold_ans_str == "" or gold_ans_str is None):
                correct += 1

        del batch_preds, batch_texts, batch_golds
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    return {"em": correct / len(predictions)}

# Test the evaluation functions
print("Testing evaluation functions...")
test_cases = [
    ("Some text \\boxed{1/2+3} more text", "1/2+3"),
    ("\\boxed{42}", "42"),
    ("No boxed answer here", None)
]
for text, expected in test_cases:
    result = extract_boxed(text)
    print(f"  Input: {text[:50]}...")
    print(f"  Extracted: {result}, Expected: {expected}")
print("Evaluation functions ready!")

Map:   0%|          | 0/7500 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
# Training callbacks
# ==================

class EMPrintCallback(TrainerCallback):
    """Print exact match score during evaluation."""
    def on_evaluate(self, args, state, control, metrics, **kwargs):
        em_score = metrics.get('eval_em', 0)
        print(f"\n{'='*60}")
        print(f"Step {state.global_step} | Validation Exact Match: {em_score:.2%}")
        print(f"{'='*60}\n")

class ModelCheckpointCallback(TrainerCallback):
    """Save model checkpoints to specified directory."""
    def __init__(self, save_dir):
        self.save_dir = save_dir
        os.makedirs(save_dir, exist_ok=True)
    
    def on_save(self, args, state, control, **kwargs):
        checkpoint_dir = f"checkpoint-{state.global_step}"
        local_path = os.path.join(args.output_dir, checkpoint_dir)
        if os.path.exists(local_path):
            print(f"Checkpoint saved: {checkpoint_dir}")

callbacks = [
    EMPrintCallback(),
    ModelCheckpointCallback(BASE_OUTPUT_DIR),
    EarlyStoppingCallback(early_stopping_patience=3)
]

print("Callbacks configured!")

In [None]:
# Training arguments and trainer setup
# =====================================

training_args = TrainingArguments(
    output_dir=CHECKPOINT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    num_train_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    warmup_steps=WARMUP_STEPS,
    lr_scheduler_type="cosine",
    logging_steps=50,
    eval_strategy="steps",
    eval_steps=100,
    save_steps=300,
    save_total_limit=2,
    metric_for_best_model="eval_em",
    greater_is_better=True,
    load_best_model_at_end=True,
    fp16=False,
    bf16=torch.cuda.is_bf16_supported(),
    report_to="none",
    dataloader_num_workers=1,
    remove_unused_columns=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    compute_metrics=compute_em,
    callbacks=callbacks,
)

print("Trainer configured!")
print(f"Total training steps: {len(tokenized_train) // (BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS) * NUM_EPOCHS}")
print(f"Evaluation will run every {training_args.eval_steps} steps")

None
False
None
2
2
None


In [None]:
# Start training
# ===============

print("Starting training...")
print(f"Model: {MODEL_NAME}")
print(f"Training examples: {len(tokenized_train)}")
print(f"Validation examples: {len(tokenized_val)}")
print(f"Output directory: {CHECKPOINT_DIR}")

trainer.train()

print("\nTraining completed!")

In [None]:
# Save final model
# ================

best_checkpoint = trainer.state.best_model_checkpoint
if best_checkpoint:
    print(f"Best checkpoint: {best_checkpoint}")
    
    # Save the final model
    final_model_path = FINAL_MODEL_DIR
    os.makedirs(final_model_path, exist_ok=True)
    
    # Save the PEFT model
    trainer.model.save_pretrained(final_model_path)
    tokenizer.save_pretrained(final_model_path)
    
    print(f"Final model saved to: {final_model_path}")
    
    # Also save training metrics
    metrics_path = os.path.join(BASE_OUTPUT_DIR, "training_metrics.json")
    with open(metrics_path, 'w') as f:
        json.dump({
            "best_checkpoint": best_checkpoint,
            "best_metric": trainer.state.best_metric,
            "total_steps": trainer.state.global_step,
        }, f, indent=2)
    print(f"Training metrics saved to: {metrics_path}")
else:
    print("No best checkpoint found. Saving current model...")
    trainer.model.save_pretrained(FINAL_MODEL_DIR)
    tokenizer.save_pretrained(FINAL_MODEL_DIR)
    print(f"Model saved to: {FINAL_MODEL_DIR}")

In [None]:
# Load model for evaluation
# ==========================

# Load the best model checkpoint
if best_checkpoint and os.path.exists(best_checkpoint):
    print(f"Loading best model from: {best_checkpoint}")
    eval_model = AutoModelForCausalLM.from_pretrained(
        best_checkpoint,
        device_map="auto",
        quantization_config=quantization_config if USE_8BIT else None,
        torch_dtype=torch.float16 if not USE_8BIT else None,
        trust_remote_code=True
    )
    # If it's a PEFT model, we need to load it differently
    if os.path.exists(os.path.join(best_checkpoint, "adapter_config.json")):
        base_model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            device_map="auto",
            quantization_config=quantization_config if USE_8BIT else None,
            torch_dtype=torch.float16 if not USE_8BIT else None,
            trust_remote_code=True
        )
        eval_model = PeftModel.from_pretrained(base_model, best_checkpoint)
else:
    print("Using current model for evaluation")
    eval_model = model

eval_model.eval()
eval_tokenizer = tokenizer

print("Model loaded for evaluation!")



Step,Training Loss,Validation Loss


TypeError: argument 'ids': 'list' object cannot be interpreted as an integer

In [None]:
# Comprehensive evaluation on test set
# =====================================

def evaluate_model(model, tokenizer, dataset, num_samples=None, verbose=True):
    """
    Evaluate model on test dataset.
    Returns: exact_match_score, detailed_results
    """
    if num_samples is None:
        num_samples = len(dataset)
    
    correct = 0
    total = 0
    results = []
    
    for idx, ex in enumerate(tqdm(dataset.select(range(num_samples)), desc="Evaluating")):
        problem = ex["messages"][1]["content"]  # user message
        gold_solution = ex["messages"][2]["content"]  # assistant message
        gold_ans_str = extract_boxed(gold_solution)
        gold_ans_sym = normalize_sympy(gold_ans_str)
        
        # Generate prediction
        prompt = f"<|system|>\n{SYSTEM_PROMPT}<|end|>\n<|user|>\n{problem}<|end|>\n<|assistant|>"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=512,
                do_sample=False,
                temperature=1.0,
                pad_token_id=tokenizer.eos_token_id
            )
        
        pred_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract only the assistant response
        if "<|assistant|>" in pred_text:
            pred_text = pred_text.split("<|assistant|>")[-1]
        
        pred_ans_str = extract_boxed(pred_text)
        pred_ans_sym = normalize_sympy(pred_ans_str)
        
        # Check if correct
        is_correct = False
        if pred_ans_sym is not None and gold_ans_sym is not None:
            is_correct = (pred_ans_sym == gold_ans_sym)
        elif (pred_ans_str == "" or pred_ans_str is None) and (gold_ans_str == "" or gold_ans_str is None):
            is_correct = True
        
        if is_correct:
            correct += 1
        total += 1
        
        results.append({
            "problem": problem[:100] + "..." if len(problem) > 100 else problem,
            "predicted": pred_ans_str,
            "gold": gold_ans_str,
            "correct": is_correct
        })
        
        if verbose and idx < 3:  # Show first 3 examples
            print(f"\n--- Example {idx + 1} ---")
            print(f"Problem: {problem[:150]}...")
            print(f"Predicted answer: {pred_ans_str}")
            print(f"Gold answer: {gold_ans_str}")
            print(f"Correct: {is_correct}")
    
    exact_match = correct / total if total > 0 else 0.0
    return exact_match, results

print("Evaluating on test set...")
test_em, test_results = evaluate_model(eval_model, eval_tokenizer, test_ds, num_samples=TEST_SIZE, verbose=True)

print(f"\n{'='*60}")
print(f"Test Set Exact Match Score: {test_em:.2%}")
print(f"Correct: {sum(r['correct'] for r in test_results)}/{len(test_results)}")
print(f"{'='*60}")

# Save evaluation results
eval_results_path = os.path.join(BASE_OUTPUT_DIR, "evaluation_results.json")
with open(eval_results_path, 'w') as f:
    json.dump({
        "exact_match": test_em,
        "correct": sum(r['correct'] for r in test_results),
        "total": len(test_results),
        "results": test_results[:10]  # Save first 10 for inspection
    }, f, indent=2)
print(f"\nEvaluation results saved to: {eval_results_path}")

In [None]:
# Performance summary and next steps
# ===================================

print("\n" + "="*60)
print("TRAINING AND EVALUATION SUMMARY")
print("="*60)
print(f"Model: {MODEL_NAME}")
print(f"Training examples: {len(train_ds)}")
print(f"Test examples evaluated: {len(test_ds)}")
print(f"Test Exact Match: {test_em:.2%}")
print(f"Best checkpoint: {best_checkpoint if 'best_checkpoint' in locals() else 'N/A'}")
print(f"Model saved to: {FINAL_MODEL_DIR}")
print("="*60)

print("\nNext steps:")
print("1. Review evaluation results in:", eval_results_path)
print("2. To use the model for inference, load from:", FINAL_MODEL_DIR)
print("3. For RAG integration, see the RAG notebook or create a new one")
print("\nTo load the model later:")
print(f"  from peft import PeftModel")
print(f"  base_model = AutoModelForCausalLM.from_pretrained('{MODEL_NAME}')")
print(f"  model = PeftModel.from_pretrained(base_model, '{FINAL_MODEL_DIR}')")

# RAG Integration (Future Work)

This section can be used to integrate RAG components with the fine-tuned model.

Key components needed:
1. **Knowledge Base**: Mathematical concepts, formulas, theorems
2. **Embedding Model**: For encoding queries and documents
3. **Vector Store**: FAISS, ChromaDB, or similar
4. **Retrieval**: Semantic search to find relevant context
5. **Generation**: Use retrieved context with the fine-tuned model

See `595 RAG.ipynb` for a basic RAG implementation example.


In [None]:
# Example: Loading the fine-tuned model for inference
# ===================================================

# To load the saved model later, use:
"""
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16
)

# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, FINAL_MODEL_DIR)
tokenizer = AutoTokenizer.from_pretrained(FINAL_MODEL_DIR)

model.eval()

# Example inference
def solve_math_problem(problem_text):
    prompt = f"<|system|>\n{SYSTEM_PROMPT}<|end|>\n<|user|>\n{problem_text}<|end|>\n<|assistant|>"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=False,
            temperature=1.0,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "<|assistant|>" in response:
        response = response.split("<|assistant|>")[-1]
    return response

# Test with a sample problem
# sample_problem = "What is 2 + 2?"
# answer = solve_math_problem(sample_problem)
# print(answer)
"""