Profiler (#303)

* Add profiler to run_workflow script * Wrap CI workflow run in memory profiler * Enable workflow script to run just one step (ecm_prep or run) * Add flag in run workflow script to turn on/off profiler * Trigger CI when a pull request is created * Enable integration test runs on PR event triggers
trynthink · Oct 19, 2023 · c0f91c0 · c0f91c0
1 parent 572331d
commit c0f91c0
Show file tree

Hide file tree

Showing 3 changed files with 123 additions and 17 deletions.
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -38,28 +38,39 @@ jobs:
     steps:
       - uses: actions/checkout@v3
         with:
-          ref: ${{ github.head_ref }}
+          ref: ${{ github.ref }}
       - name: Set up Python 3.10
         uses: actions/setup-python@v4
         with:
           python-version: '3.10'
       - name: Install Python dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install numpy>=1.16 pandas>=2.2 scipy requests numpy-financial matplotlib xlsxwriter
-      - name: Run workflow
-        run: python tests/integration_testing/run_workflow.py
+          pip install numpy>=1.16 pandas>=2.2 scipy requests numpy-financial matplotlib xlsxwriter psrecord     
+      - name: Run and profile workflow
+        run: |
+          branch_name="${{ github.ref }}"
+          if [[ $branch_name == 'refs/heads/master' ]]; then
+            steps=("ecm_prep" "run")
+            for step in "${steps[@]}"; do
+              psrecord --log memory_log_${step}.txt --include-children --interval 1 "python tests/integration_testing/run_workflow.py --run_step $step --with_profiler"
+              (echo -e; echo "# Elapsed time,CPU (%),Peak Real (MB),Peak Virtual (MB)"; grep -v "time" memory_log_${step}.txt | sort -k3 -n -r | head -n 1 | column -t | tr -s '[:blank:]' ',') >> tests/integration_testing/results/profile_${step}.csv
+              mv memory_log_${step}.txt ./tests/integration_testing/results/
+            done
+          else
+            python tests/integration_testing/run_workflow.py
+          fi
+          mv ./results/*.json ./tests/integration_testing/results/
       - name: Upload artifacts
         uses: actions/upload-artifact@v3
         with:
           name: results
-          path: ./results/*.json
+          path: ./tests/integration_testing/results/
       - name: Commit test results
         run: |
-          branch_name="${{ github.head_ref }}"
+          branch_name="${{ github.ref }}"
           git pull origin $branch_name
-          cp -r ./results/*.json ./tests/integration_testing/results
-          git add ./tests/integration_testing/*.json
+          git add ./tests/integration_testing/results/*.json
           if [[ $(git diff --cached --exit-code) ]]; then
             git config --system user.email "github-action@users.noreply.github.com"
             git config --system user.name "GitHub Action"

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,6 +1,8 @@
 name: tests
 on:
   push:
+  pull_request:
+    types: [opened, ready_for_review]
 jobs:
 
   code-quality-checks:
@@ -61,9 +63,15 @@ jobs:
       - name: Check if PR
         uses: 8BitJonny/gh-get-current-pr@2.2.0
         id: PR
+        with:
+          filterOutClosed: true
+          filterOutDraft: true
 
   integration-tests:
     needs: [check-PR]
-    if: needs.check-PR.outputs.PR_status == 'true' && fromJSON(needs.check-PR.outputs.PR).base.ref == 'master'
+    if: |
+      (needs.check-PR.outputs.PR_status == 'true' && fromJSON(needs.check-PR.outputs.PR).base.ref == 'master') ||
+      (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'master' && github.event.pull_request.draft == false) ||
+      github.ref == 'refs/heads/master'
     uses: ./.github/workflows/integration_tests.yml
     secrets: inherit
diff --git a/tests/integration_testing/run_workflow.py b/tests/integration_testing/run_workflow.py
@@ -1,24 +1,111 @@
+from __future__ import annotations
 from pathlib import Path
+from argparse import ArgumentParser
+import io
 import sys
+import logging
+import cProfile
+import pstats
 
 sys.path.append(str(Path(__file__).parent.parent.parent.parent))
 sys.path.append(str(Path(__file__).parent.parent.parent))
 from scout import ecm_prep  # noqa: E402
 from scout.ecm_prep_args import ecm_args  # noqa: E402
 from scout import run  # noqa: E402
 
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s: %(message)s",
+    datefmt="%d-%b-%y %H:%M:%S",
+)
+
+
+def run_workflow(run_step: str = None, with_profiler: bool = False) -> None:
+    """Runs Scout workflow steps with optional profiling
+
+    Args:
+        run_step (str, optional): Specify which step to run {ecm_prep, run}, if None
+                                  then both run. Defaults to None.
+        with_profiler (Bool, optional): Run workfow step(s) with profiler to track
+                                        compute time and peak memory. Defaults to False.
+    """
+
+    results_dir = Path(__file__).parent / "results"
 
-def run_workflow():
     # Run ecm_prep.py
-    opts = ecm_args(
-        ["--add_typ_eff", "--rp_persist", "--alt_regions_option", "EMM"]
-    )
-    ecm_prep.main(opts)
+    if run_step == "ecm_prep" or run_step is None:
+        opts = ecm_args(["--add_typ_eff", "--rp_persist", "--alt_regions_option", "EMM"])
+        if with_profiler:
+            run_with_profiler(ecm_prep.main, opts, results_dir / "profile_ecm_prep.csv")
+        else:
+            ecm_prep.main(opts)
 
     # Run run.py
-    opts = run.parse_args([])
-    run.main(opts)
+    if run_step == "run" or run_step is None:
+        opts = run.parse_args([])
+        if with_profiler:
+            run_with_profiler(run.main, opts, results_dir / "profile_run.csv")
+        else:
+            run.main(opts)
+
+
+def run_with_profiler(
+    func: Callable[[argparse.Namespace], None],  # noqa: F821
+    args: argparse.Namespace,  # noqa: F821
+    output_file: pathlib.Path,  # noqa: F821
+) -> None:
+    """Runs a function wrapped in a profiler using the cProfile library, writes profile stats
+
+    Args:
+        func (Callable[[argparse.Namespace], None]): A function that takes argsparse.Namespace args
+        args (argparse.Namespace): The arguments to the function
+        output_file (pathlib.Path): .csv filepath to write profiling stats
+    """
+
+    pr = cProfile.Profile()
+    pr.enable()
+    func(args)
+    pr.disable()
+    write_profile_stats(pr, output_file)
+
+
+def write_profile_stats(pr: cProfile.Profile, filepath: pathlib.Path) -> None:  # noqa: F821
+    """Writes profile stats and stores a .csv file
+
+    Args:
+        pr (cProfile.Profile): Profile instance that has previously been enabled (pr.enable())
+        filepath (pathlib.Path): .csv filepath to write profiling stats
+    """
+
+    # Capture io stream
+    result = io.StringIO()
+    pstats.Stats(pr, stream=result).sort_stats("cumulative").print_stats()
+    result = result.getvalue()
+
+    # Parse stats and write to csv
+    top_data, result = result.split("ncalls")
+    top_data = "\n".join([line.strip() for line in top_data.split("\n")])
+    result = "ncalls" + result
+    result = "\n".join([",".join(line.rstrip().split(None, 5)) for line in result.split("\n")])
+    result_out = top_data + result
+
+    f = open(filepath, "w")
+    f.write(result_out)
+    f.close()
+    logger.info(f"Wrote profiler stats to {filepath}")
 
 
 if __name__ == "__main__":
-    run_workflow()
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--run_step", choices=["ecm_prep", "run"], required=False, help="Specify which step to run"
+    )
+    parser.add_argument(
+        "--with_profiler",
+        action="store_true",
+        required=False,
+        help="Run workflow step(s) with profiler",
+    )
+    opts = parser.parse_args()
+    run_workflow(run_step=opts.run_step, with_profiler=opts.with_profiler)