In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import pipeline
import pdfplumber
import os

app = Flask(__name__)
CORS(app)

# Load fine-tuned model
summarizer = pipeline("summarization", model="./bart-cnn-finetuned", tokenizer="./bart-cnn-finetuned")

MAX_LEN = 1024
ALLOWED_EXTENSIONS = {"pdf", "txt"}
TEMP_DIR = "temp"

def is_valid(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def extract_text(path, ext):
    if ext == "pdf":
        with pdfplumber.open(path) as pdf:
            return " ".join(page.extract_text() or "" for page in pdf.pages)
    with open(path, "r", encoding="utf-8") as f:
        return f.read()

@app.route("/summarize", methods=["POST"])
def summarize_file():
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400

    file = request.files["file"]
    if not file or not is_valid(file.filename):
        return jsonify({"error": "Invalid file type"}), 400

    os.makedirs(TEMP_DIR, exist_ok=True)
    path = os.path.join(TEMP_DIR, file.filename)
    file.save(path)

    try:
        ext = file.filename.rsplit(".", 1)[1].lower()
        text = extract_text(path, ext)
        if len(text) > MAX_LEN:
            text = text[:MAX_LEN]

        result = summarizer(text, max_length=130, min_length=30, do_sample=False)
        summary = result[0]['summary_text']

        return jsonify({
            "summary": summary,
            "original_length": len(text),
            "summary_length": len(summary)
        })
    finally:
        if os.path.exists(path):
            os.remove(path)

if __name__ == "__main__":
    app.run(debug=True)