# Homework Starter — Stage 15: Orchestration & System Design
Complete the sections below. Keep your answers concise and focused on orchestration readiness.

## 1) Project Task Decomposition
List 4–8 tasks. Add more rows as needed.

In [None]:
from pathlib import Path
import pandas as pd
tasks = pd.DataFrame({
    'task': ['ingest', 'clean', 'train_or_score', 'report'],
    'inputs': ['/data/raw.ext', 'prices_raw.json', 'prices_clean.json', 'model.json'],
    'outputs': ['prices_raw.json', 'prices_clean.json', 'model.json', 'report.txt'],
    'idempotent': [True, True, True, True]
})
tasks

## 2) Dependencies (DAG)
Describe dependencies and paste a small diagram if you have one.

In [None]:
dag = {
    'ingest': [],
    'clean': ['ingest'],
    'train_or_score': ['clean'],
    'report': ['train_or_score']
}
dag

## 3) Logging & Checkpoints Plan
Specify what you will log and where you will checkpoint for each task.

In [None]:
logging_plan = pd.DataFrame({
    'task': ['ingest', 'clean', 'train_or_score', 'report'],
    'log_messages': [
        'start/end, rows, source URI',
        'start/end, rows in/out',
        'params, metrics',
        'artifact path'
    ],
    'checkpoint_artifact': [
        'prices_raw.json',
        'prices_clean.json',
        'model.json',
        'report.txt'
    ]
})
logging_plan

## 4) Right-Sizing Automation
Which parts will you automate now? Which stay manual? Why?

*(Write your rationale here.)*

## 5) (Stretch) Refactor One Task into a Function + CLI
Use the templates below.

In [None]:
import argparse, json, logging, sys
from datetime import datetime

def my_task(input_path: str, output_path: str) -> None:
    '''Example task template: read → transform → write JSON.'''
    logging.info('[my_task] start')
    # TODO: implement your logic
    result = {'run_at': datetime.utcnow().isoformat(), 'note': 'replace with real output'}
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    Path(output_path).write_text(json.dumps(result, indent=2))
    logging.info('[my_task] wrote %s', output_path)

def main(argv=None):
    parser = argparse.ArgumentParser(description='Homework task wrapper')
    parser.add_argument('--input', required=True)
    parser.add_argument('--output', required=True)
    args = parser.parse_args(argv)
    logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler(sys.stdout)])
    my_task(args.input, args.output)

if **name** == '**main**'':
    # Example simulated CLI in notebook:
    main(['--input', 'data/in.ext', '--output', 'data/out.json'])

### Optional: Simple Retry Wrapper (fill in)
Add a small retry with linear backoff to harden a task.

In [None]:
import time
def retry(n_tries=3, delay=0.2):
    def wrapper(fn, *args, **kwargs):
        # TODO: implement try/except loop with sleep backoff
        return fn(*args, **kwargs)
    return wrapper