In [3]:
import pdfplumber
import re
import csv
from pathlib import Path
import time

# ==== 1) 从 PDF 提取 注文番号 (W+10位数字) ====

ORDER_RE = re.compile(r"W\d{10}")  # 严格 W + 10 位数字

def extract_order_number_from_pdf(pdf_path: Path) -> str:
    """
    从 PDF 中找出 ご注文番号 对应的 W********** 字符串
    """
    texts = []

    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            t = page.extract_text() or ""
            texts.append(t)

    full_text = "\n".join(texts)

    # ① 优先匹配「ご注文番号」这一行
    m = re.search(r"ご注.?文番号[:：]?\s*(W\d{10})", full_text)
    if m:
        return m.group(1)

    # ② 兜底：全文找第一个 W+10位数字
    m2 = ORDER_RE.search(full_text)
    if m2:
        return m2.group(0)

    raise ValueError(f"{pdf_path.name} 中没有找到符合规则的注文番号 (W+10位数字)")


# ==== 2) 读取 CSV：注文番号 -> 変更名前 ====

def load_name_mapping(csv_path: Path) -> dict:
    """
    把 CSV 读成字典:
    key: 注文番号 (W1562921866)
    val: 変更名前 (A2-AD-420 389600 ...)
    """
    mapping = {}
    with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            order_no = (row.get("注文番号") or "").strip()
            new_name = (row.get("変更名前") or "").strip()
            if not order_no or not new_name:
                continue
            mapping[order_no] = new_name
    return mapping


# ==== 3) 文件名简单清洗（防止有特殊字符） ====

def sanitize_filename(name: str) -> str:
    """
    简单处理一下文件名中不安全的字符
    macOS / Windows 都不允许的符号统统换成下划线
    """
    return re.sub(r'[\\/:*?"<>|]', "_", name).strip()



pdf_dir = Path("./pdfs")         # PDF 所在目录
csv_path = Path("./name_map.csv")  # CSV 路径

mapping = load_name_mapping(csv_path)

for index,pdf_path in enumerate(pdf_dir.glob("*.pdf")):
    try:
        order_no = extract_order_number_from_pdf(pdf_path)
    except Exception as e:
        print(f"[跳过] {pdf_path.name}: {e}")
        continue

    new_base = mapping.get(order_no)
    if not new_base:
        print(f"[警告] CSV 中找不到 注文番号={order_no} 对应的 変更名前，文件: {pdf_path.name}")
        continue

    safe_base = sanitize_filename(new_base)
    new_path = pdf_path.with_name(safe_base + pdf_path.suffix)

    if new_path.exists():
        print(f"[警告] 目标文件已存在，跳过: {new_path.name}")
        continue

    print(f"重命名: {pdf_path.name} -> {new_path.name}")
    pdf_path.rename(new_path)
    print(index)
    # time.sleep(0.5)




重命名: MC15705050.pdf -> A2-AD-420  　389600　max 256 SI 2台.pdf
0


## 后台翻译

usage: ipykernel_launcher.py [-h] {scan,apply} ...
ipykernel_launcher.py: error: argument cmd: invalid choice: '/Users/syu/Library/Jupyter/runtime/kernel-146c6582-e048-40e3-8881-03ff6fc4ee7d.json' (choose from 'scan', 'apply')


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
