In [None]:
!pip install yomitoku fastapi uvicorn pyngrok nest-asyncio python-multipart

import nest_asyncio
from fastapi import FastAPI, HTTPException, File, UploadFile
import uvicorn
import asyncio
from PIL import Image, ImageOps
import io
import numpy as np
import warnings
import tempfile
import sys

# Colab環境チェック
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

# ライブラリインポート
try:
    from yomitoku.document_analyzer import DocumentAnalyzer
    from yomitoku.data.functions import load_pdf
    from pyngrok import ngrok, conf
except ImportError as e:
    print(f"ライブラリが見つかりません: {e}")
    sys.exit(1)

app = FastAPI()

# OCRエンジンの初期化
try:
    warnings.filterwarnings('ignore', category=UserWarning, module='onnxruntime')
    import torch
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    analyzer = DocumentAnalyzer(device=device)
except Exception as e:
    print(f"DocumentAnalyzerの初期化に失敗しました: {e}")
    analyzer = None

def extract_text_preserving_layout(results):
    if not results:
        return ""

    elements = []
    source_list = []
    if hasattr(results, "lines") and results.lines:
        source_list = results.lines
    elif hasattr(results, "paragraphs") and results.paragraphs:
        source_list = results.paragraphs
    else:
        return str(results)

    for item in source_list:
        text = ""
        box = None
        if hasattr(item, "content"): text = item.content
        elif hasattr(item, "contents"): text = item.contents
        elif hasattr(item, "text"): text = item.text

        if hasattr(item, "box"):
            box = item.box
        elif hasattr(item, "points"):
            pts = np.array(item.points)
            if pts.size > 0:
                box = [np.min(pts[:, 0]), np.min(pts[:, 1]), np.max(pts[:, 0]), np.max(pts[:, 1])]

        if text and box is not None:
            try:
                center_y = (box[1] + box[3]) / 2
                height = box[3] - box[1]
                elements.append({"text": text, "box": box, "cy": center_y, "h": height, "x": box[0]})
            except:
                continue

    if not elements:
        return ""

    elements.sort(key=lambda e: e["cy"])
    merged_lines = []
    current_line_elements = []

    for e in elements:
        if not current_line_elements:
            current_line_elements.append(e)
            continue

        last_e = current_line_elements[-1]
        if abs(e["cy"] - last_e["cy"]) < (last_e["h"] * 0.6):
            current_line_elements.append(e)
        else:
            current_line_elements.sort(key=lambda x: x["x"])
            line_str = "  ".join([el["text"] for el in current_line_elements])
            merged_lines.append(line_str)
            current_line_elements = [e]

    if current_line_elements:
        current_line_elements.sort(key=lambda x: x["x"])
        line_str = "  ".join([el["text"] for el in current_line_elements])
        merged_lines.append(line_str)

    return "\n".join(merged_lines)

@app.get("/")
def read_root():
    return {"message": "Yomitoku API is running"}

@app.post("/ocr")
async def run_ocr(file: UploadFile = File(...)):
    if analyzer is None:
        raise HTTPException(status_code=503, detail="OCR Not Initialized")

    try:
        image_bytes = await file.read()
        all_ocr_text = ""

        if file.content_type == "application/pdf":
            with tempfile.NamedTemporaryFile(delete=True, suffix=".pdf") as temp_pdf:
                temp_pdf.write(image_bytes)
                temp_pdf.flush()
                imgs = load_pdf(temp_pdf.name)

            if not imgs:
                raise HTTPException(status_code=400, detail="PDF Error")

            loop = asyncio.get_event_loop()
            page_texts = []
            for img in imgs:
                results, _, _ = await loop.run_in_executor(None, analyzer, img)
                page_texts.append(extract_text_preserving_layout(results))
            all_ocr_text = "\n\n".join(page_texts)

        else:
            pil_image = Image.open(io.BytesIO(image_bytes))
            pil_image = ImageOps.exif_transpose(pil_image)
            pil_image = pil_image.convert("RGB")
            img = np.array(pil_image)[:, :, ::-1].copy()

            loop = asyncio.get_event_loop()
            results, _, _ = await loop.run_in_executor(None, analyzer, img)
            all_ocr_text = extract_text_preserving_layout(results)

        return {"result": all_ocr_text}

    except Exception as e:
        import traceback
        traceback.print_exc()
        raise HTTPException(status_code=500, detail=str(e))

from google.colab import userdata

NGROK_AUTHTOKEN = userdata.get('NGROK_AUTHTOKEN')

if NGROK_AUTHTOKEN:
    conf.get_default().auth_token = NGROK_AUTHTOKEN
    conf.get_default().region = "jp"
    nest_asyncio.apply()

    try:
        tunnel = ngrok.connect(8000)
        public_url = tunnel.public_url

        print("\n" + "="*80)
        print("サーバーが起動しました")
        print(f"公開URL: {public_url}")
        
        print("\n---------------------------------------------------------")
        print("   環境変数設定コマンド (コピーして実行してください)")
        print("---------------------------------------------------------")

        print("\n【 Windows (PowerShell) 】")
        print(f'$env:OCR_API_URL = "{public_url}"')

        print("\n【 Windows (コマンドプロンプト) 】")
        print(f'set OCR_API_URL={public_url}')

        print("\n【 Ubuntu / Mac (Bash/Zsh) 】")
        print(f'export OCR_API_URL="{public_url}"')
        
        print("="*80 + "\n")

        config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="error")
        server = uvicorn.Server(config)
        await server.serve()

    except Exception as e:
        print(f"起動エラー: {e}")
else:
    print("NGROK_AUTHTOKEN が設定されていません。")