In [2]:
from pathlib import Path
import pandas as pd

# 1) Detect project root (qepc_project/)
cur = Path.cwd()
project_root = None
for _ in range(8):
    if cur.name == "qepc_project" or (cur / "data").exists():
        project_root = cur
        break
    cur = cur.parent

if project_root is None:
    project_root = Path.cwd()
    print(f"‚ö†Ô∏è Could not auto-detect qepc_project root. Using CWD: {project_root}")
else:
    print(f"üìÇ Detected project root: {project_root}")

# 2) Find all CSV and XML files under root
csv_files = sorted(project_root.rglob("*.csv"), key=lambda p: str(p).lower())
xml_files = sorted(project_root.rglob("*.xml"), key=lambda p: str(p).lower())

print(f"\nFound {len(csv_files)} CSV files and {len(xml_files)} XML files under project root.\n")

# 3) Helper: pretty separator
def sep(title: str):
    print("\n" + "=" * 80)
    print(title)
    print("=" * 80)

# 4) Dump info for each CSV
for path in csv_files:
    rel = path.relative_to(project_root)
    sep(f"üìÑ CSV FILE: {rel}")
    try:
        # Read a small sample
        df = pd.read_csv(path, nrows=5)
        cols = list(df.columns)
        print(f"Columns ({len(cols)}): {cols}")
        print("Sample rows (up to 5):")
        print(df.head(5))
    except Exception as e:
        print(f"‚ùå Could not read CSV sample: {e}")

# 5) Dump info for each XML (just first few lines of text)
for path in xml_files:
    rel = path.relative_to(project_root)
    sep(f"üìÑ XML FILE: {rel}")
    try:
        with path.open("r", encoding="utf-8", errors="ignore") as f:
            lines = []
            for i, line in enumerate(f):
                if i >= 15:   # limit lines per file
                    break
                lines.append(line.rstrip("\n"))
        print("First ~15 lines:")
        for ln in lines:
            print(ln)
    except Exception as e:
        print(f"‚ùå Could not read XML sample: {e}")

print("\n‚úÖ Scan complete. You can copy/paste this entire output as a data manifest backup.")


üìÇ Detected project root: C:\Users\wdors\qepc_project

Found 69 CSV files and 0 XML files under project root.


üìÑ CSV FILE: data\.ipynb_checkpoints\GameResults_2025-checkpoint.csv
Columns (1): ['version https://git-lfs.github.com/spec/v1']
Sample rows (up to 5):
          version https://git-lfs.github.com/spec/v1
0  oid sha256:bf753b100630246552ee3f73510026ade38...
1                                        size 104970

üìÑ CSV FILE: data\.ipynb_checkpoints\Games-checkpoint.csv
Columns (1): ['version https://git-lfs.github.com/spec/v1']
Sample rows (up to 5):
          version https://git-lfs.github.com/spec/v1
0  oid sha256:bfd107bbc7d79d201fc7d3c91aa48d7cfaa...
1                                        size 161425

üìÑ CSV FILE: data\.ipynb_checkpoints\Injury_Overrides-checkpoint.csv
Columns (5): ['Team', 'PlayerName', 'Status', 'Impact', 'Note']
Sample rows (up to 5):
                    Team         PlayerName          Status  Impact  \
0         Indiana Pacers  Tyrese Halibur