In [1]:
"""
把本地 WebScraperData 下的 Excel 先转成 CSV 再上传到
/AppleStockChecker/purchasing-price-records/import-tradein-xlsx/?dry_run=0

依赖（至少装 openpyxl；如需 .xls/.xlsb/.ods 再按需加）：
  pip install pandas openpyxl
  pip install "xlrd<2.0"      # 读 .xls（可选）
  pip install pyxlsb          # 读 .xlsb（可选）
  pip install odfpy           # 读 .ods（可选）
"""

import os, glob, pathlib, subprocess, time, uuid, shutil
from pathlib import Path
import pandas as pd

# ------------- 配置区 -------------
ACCESS = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ0b2tlbl90eXBlIjoiYWNjZXNzIiwiZXhwIjoxNzYwNjg3NzIwLCJpYXQiOjE3NjA2ODU5MjAsImp0aSI6IjlhMmExNmExMmFlMTRiMjhiYTZjZjRiODY4ZWZkNjhlIiwidXNlcl9pZCI6IjEifQ.jbaRx2uCvbbrZgUlNzCpAkV5944Pc480AehxJQ8i4Do"   # 建议用环境变量注入
URL = "https://yamaguti.ngrok.io/AppleStockChecker/purchasing-price-records/import-tradein-xlsx/?dry_run=0"
ROOT = "WebScraperData"                           # 本地根目录
SLEEP_BETWEEN = 1.0                               # 每个文件之间的间隔（秒）
RETRIES = 2
BACKOFF = 2.0
TMP_DIR = Path(".tmp_csv_uploads")                # 转换后的临时 CSV 存放目录
TMP_DIR.mkdir(exist_ok=True)

# 匹配 Excel/CSV
PATTERNS = ["**/*.xlsx", "**/*.xls", "**/*.xlsm", "**/*.xlsb", "**/*.ods", "**/*.csv"]

# ---------------------------------

ENGINE_HINT = {
    ".xlsx": "openpyxl",
    ".xlsm": "openpyxl",
    ".xls":  "xlrd",    # 需要 xlrd<2.0
    ".xlsb": "pyxlsb",
    ".ods":  "odf",
}

def list_files(root: str | Path) -> list[str]:
    files: list[str] = []
    for pat in PATTERNS:
        files += glob.glob(str(Path(root) / pat), recursive=True)
    # 过滤临时文件 ~$
    return sorted(f for f in files if not Path(f).name.startswith("~$"))

def to_csv_if_needed(path: str | Path) -> Path:
    """
    若是 Excel -> 转 CSV 到 TMP_DIR；若已是 CSV 直接返回原路径。
    返回：CSV 文件路径
    """
    p = Path(path)
    suf = p.suffix.lower()
    if suf == ".csv":
        return p

    engine = ENGINE_HINT.get(suf)
    # 读 Excel 为 DataFrame（默认取第一个 sheet；如需更复杂可扩展）
    try:
        df = pd.read_excel(p, engine=engine) if engine else pd.read_excel(p)
    except ImportError as e:
        raise RuntimeError(f"缺少读取 {suf} 的依赖（{e}）。请按需安装：openpyxl / xlrd<2 / pyxlsb / odfpy") from e
    except Exception as e:
        raise RuntimeError(f"读取 {p.name} 失败：{e}") from e

    # 写到临时 CSV（UTF-8 BOM，便于日后人工打开）
    out = TMP_DIR / (p.stem + ".csv")
    try:
        df.to_csv(out, index=False, encoding="utf-8-sig")
    except Exception as e:
        raise RuntimeError(f"写出临时 CSV 失败：{out.name}，{e}") from e
    return out

def post_one(csv_path: Path, access: str, retries: int = 2, backoff: float = 2.0) -> tuple[str, str]:
    """
    逐个文件 POST；返回 (状态行, 整个响应字符串)
    """
    # 用 curl 发送 multipart；注意不要把 -H 里的 token 打印到日志
    cmd = [
        "curl", "-sS", "-i", "-X", "POST", URL,
        "-H", f"Authorization: Bearer {access}",
        "-F", f"files=@{str(csv_path)}",
    ]
    last_err = ""
    for attempt in range(retries + 1):
        try:
            out = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            text = out.decode("utf-8", "replace")
            status_line = text.splitlines()[0] if text else ""
            return status_line, text
        except subprocess.CalledProcessError as e:
            last_err = e.output.decode("utf-8", "replace")
            if attempt >= retries:
                return f"ERROR after {retries} retries", last_err
            time.sleep(backoff * (attempt + 1))
    return "ERROR", last_err

def main():
    if not ACCESS or ACCESS == "<token>":
        raise SystemExit("请先设置 ACCESS（export ACCESS=...）或在脚本顶部填写。")

    files = list_files(ROOT)
    print(f"Found {len(files)} files under {ROOT}")

    success = fail = 0
    for i, f in enumerate(files[10:], 1):
        src = Path(f)
        # 从文件名提取清洗器名：<dir>/<shopX>.<ext> → shopX
        source_name = src.stem
        print(f"[{i}/{len(files)}] {src}  ->  source={source_name}")

        try:
            csv_path = to_csv_if_needed(src)
        except Exception as e:
            fail += 1
            print(f"  ✗ 转换失败：{e}")
            continue

        status, resp = post_one(csv_path, access=ACCESS, retries=RETRIES, backoff=BACKOFF)
        print("  ", status)
        # 简要解析是否 202/200
        ok = status.startswith("HTTP/1.1 202") or status.startswith("HTTP/2 202") or \
             status.startswith("HTTP/1.1 200") or status.startswith("HTTP/2 200")
        if not ok:
            fail += 1
            # 打印一小段响应帮助定位
            snippet = "\n".join(resp.splitlines()[:15])
            print(snippet)
        else:
            success += 1

        time.sleep(SLEEP_BETWEEN)

    print(f"\nDone. success={success}, fail={fail}, tmp_csv_dir={TMP_DIR.resolve()}")
    # 如需清理临时 CSV，解除注释：
    # shutil.rmtree(TMP_DIR, ignore_errors=True)

if __name__ == "__main__":
    main()

Found 15 files under WebScraperData
[1/15] WebScraperData/shop6-3/shop6-3.xlsx  ->  source=shop6-3
   HTTP/2 503 
HTTP/2 503 
content-type: text/html
ngrok-error-code: ERR_NGROK_3004
referrer-policy: no-referrer
content-length: 2516
date: Fri, 17 Oct 2025 08:02:11 GMT

<!DOCTYPE html>
<html class="h-full" lang="en-US" dir="ltr">
  <head>
    <link rel="preload" href="https://cdn.ngrok.com/static/fonts/euclid-square/EuclidSquare-Regular-WebS.woff" as="font" type="font/woff" crossorigin="anonymous" />
    <link rel="preload" href="https://cdn.ngrok.com/static/fonts/euclid-square/EuclidSquare-RegularItalic-WebS.woff" as="font" type="font/woff" crossorigin="anonymous" />
    <link rel="preload" href="https://cdn.ngrok.com/static/fonts/euclid-square/EuclidSquare-Medium-WebS.woff" as="font" type="font/woff" crossorigin="anonymous" />
    <link rel="preload" href="https://cdn.ngrok.com/static/fonts/euclid-square/EuclidSquare-Semibold-WebS.woff" as="font" type="font/woff" crossorigin="anonymou

KeyboardInterrupt: 