# 03 QC Report Viewer

Purpose: inspect outputs from script-based checks, not reimplement them.

Upstream pipeline steps:
1. `scripts/validate_raw_sessions.py`
2. `scripts/run_qc.py`


In [None]:
from pathlib import Path
import json
import pandas as pd
import matplotlib.pyplot as plt


def find_reports_dir(start: Path) -> Path:
    for p in [start.resolve()] + list(start.resolve().parents):
        candidate = p / 'reports'
        if candidate.exists():
            return candidate
    raise FileNotFoundError('Could not find reports directory')

REPORTS_DIR = find_reports_dir(Path.cwd())
pre_path = REPORTS_DIR / 'prelaunch_validation.json'
qc_path = REPORTS_DIR / 'qc_summary.json'

print('Reports dir:', REPORTS_DIR)
print('prelaunch exists:', pre_path.exists())
print('qc exists:', qc_path.exists())


In [None]:
pre = json.loads(pre_path.read_text()) if pre_path.exists() else {}
qc = json.loads(qc_path.read_text()) if qc_path.exists() else {}

print('Prelaunch verdict:', pre.get('verdict'))
print('QC verdict:', qc.get('verdict'))


In [None]:
if pre:
    print('--- Prelaunch ---')
    display(pd.DataFrame([
        {
            'verdict': pre.get('verdict'),
            'sessions_scanned': pre.get('sessions_scanned'),
            'participants_found': pre.get('participants_found')
        }
    ]))

    sess = pre.get('sessions', {})
    if sess:
        sess_df = pd.DataFrame.from_dict(sess, orient='index').reset_index().rename(columns={'index': 'sessionId'})
        display(sess_df.sort_values('sessionId'))


In [None]:
if qc:
    print('--- QC ---')
    display(pd.DataFrame([
        {
            'verdict': qc.get('verdict'),
            'participants_count': qc.get('participants_count'),
            'sessions_count': qc.get('sessions_count'),
            'total_windows': qc.get('total_windows'),
            'typing_presence': qc.get('typing_presence'),
            'tapping_presence': qc.get('tapping_presence')
        }
    ]))

    miss = qc.get('missingness_core', {})
    if miss:
        miss_df = pd.DataFrame.from_dict(miss, orient='index').reset_index().rename(columns={'index': 'feature'})
        miss_df = miss_df.sort_values('missing_frac', ascending=False)
        display(miss_df)


In [None]:
if qc and qc.get('missingness_core'):
    miss_df = pd.DataFrame.from_dict(qc['missingness_core'], orient='index')
    miss_df = miss_df.sort_values('missing_frac', ascending=True)

    plt.figure(figsize=(8, max(4, 0.35 * len(miss_df))))
    plt.barh(miss_df.index, miss_df['missing_frac'])
    plt.title('Core Feature Missingness')
    plt.xlabel('Missing fraction')
    plt.xlim(0, 1)
    plt.tight_layout()
    plt.show()


## Interpretation

- Use this notebook for quick interpretation and screenshots.
- Trust pass/fail decisions from the scripts and JSON reports.
