In [2]:
from google.colab import files
uploaded = files.upload()   # choose your .zip (e.g., led_large_adapter.zip)


Saving led_large_finalproject_adapter.zip to led_large_finalproject_adapter.zip


In [5]:
import os, zipfile

zip_name = list(uploaded.keys())[0]
print("Uploaded:", zip_name)

os.makedirs("models", exist_ok=True)

with zipfile.ZipFile(zip_name, "r") as z:
    z.extractall("models")

print("Unzipped into models/. Listing:")
!ls -R models | head -n 60


Uploaded: led_large_finalproject_adapter.zip
Unzipped into models/. Listing:
models:
content

models/content:
Briefly_FullPaper

models/content/Briefly_FullPaper:
led_large_ckpt

models/content/Briefly_FullPaper/led_large_ckpt:
final_adapter

models/content/Briefly_FullPaper/led_large_ckpt/final_adapter:
adapter_config.json
adapter_model.safetensors
merges.txt
README.md
special_tokens_map.json
tokenizer_config.json
tokenizer.json
vocab.json


In [3]:
!pip -q install transformers accelerate sentencepiece peft rouge-score pymupdf gradio


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m24.1/24.1 MB[0m [31m112.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone


In [6]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel

BASE_MODEL_ID = "allenai/led-large-16384-arxiv"  # must match your training
# üëá Change this to the exact path you saw from ls -R
ADAPTER_PATH = "models/content/Briefly_FullPaper/led_large_ckpt/final_adapter"

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)

base_model = AutoModelForSeq2SeqLM.from_pretrained(
    BASE_MODEL_ID,
    torch_dtype=torch.float16 if device == "cuda" else None
).to(device)

model = PeftModel.from_pretrained(base_model, ADAPTER_PATH).to(device)
model.eval()

print("‚úÖ Loaded base + LoRA adapter")


Device: cuda
‚úÖ Loaded base + LoRA adapter


In [7]:
import fitz
import re

def extract_pdf_text(pdf_path, max_pages=30):
    doc = fitz.open(pdf_path)
    pages = min(len(doc), max_pages)
    chunks = []
    for i in range(pages):
        chunks.append(doc[i].get_text("text"))
    return "\n".join(chunks)

def clean_text(t):
    t = re.sub(r"\s+", " ", t)
    return t.strip()

@torch.inference_mode()
def summarize_text(text, max_input_len=1024, max_new_tokens=128, num_beams=1):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=max_input_len
    ).to(device)

    # LED needs global attention on first token
    global_attention_mask = torch.zeros_like(inputs["input_ids"])
    global_attention_mask[:, 0] = 1

    # out_ids = model.generate(
    #     **inputs,
    #     global_attention_mask=global_attention_mask,
    #     max_new_tokens=max_new_tokens,
    #     num_beams=num_beams,
    #     no_repeat_ngram_size=3,
    #     repetition_penalty=1.15,
    # )

    output_ids = model.generate(
    **inputs,
    global_attention_mask=global_attention_mask,
    max_new_tokens=256,          # increase (try 256‚Äì384)
    min_new_tokens=120,          # ‚¨ÖÔ∏è FORCE a proper ending
    num_beams=4,                 # better fluency
    length_penalty=1.1,
    no_repeat_ngram_size=3,
    repetition_penalty=1.15,
)

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)



In [10]:
import gradio as gr

def pdf_to_summary(pdf_file, fast_mode=True, max_input_len=1024, max_new_tokens=128, num_beams=1):
    pdf_path = pdf_file.name
    raw = extract_pdf_text(pdf_path, max_pages=30)
    cleaned = clean_text(raw)

    # clip chars for speed
    cleaned = cleaned[:6000] if fast_mode else cleaned[:12000]

    summary = summarize_text(
        cleaned,
        max_input_len=1024 if fast_mode else int(max_input_len),
        max_new_tokens=128 if fast_mode else int(max_new_tokens),
        num_beams=1 if fast_mode else int(num_beams),
    )

    preview = cleaned[:1200]
    return preview, summary

with gr.Blocks() as demo:
    gr.Markdown("# üìÑ Briefly PDF Summarizer (Colab GPU)\nUpload a PDF ‚Üí extract text ‚Üí LED + LoRA summary")

    pdf = gr.File(file_types=[".pdf"], label="Upload PDF")
    fast_mode = gr.Checkbox(value=True, label="Fast mode (recommended)")
    max_input_len = gr.Slider(512, 4096, value=1024, step=256, label="Max input tokens")
    max_new_tokens = gr.Slider(64, 512, value=128, step=32, label="Max new tokens")
    num_beams = gr.Slider(1, 4, value=1, step=1, label="Beams")

    btn = gr.Button("Generate Summary")

    preview_out = gr.Textbox(label="Extracted Preview (first ~1200 chars)", lines=8)
    summary_out = gr.Textbox(label="Generated Summary", lines=10)

    btn.click(
        pdf_to_summary,
        inputs=[pdf, fast_mode, max_input_len, max_new_tokens, num_beams],
        outputs=[preview_out, summary_out]
    )

demo.queue()
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a82ff44cc6ebd0d09f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


