In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
!unzip  '/content/drive/MyDrive/Collab Projects/fine-tuned-model.zip' -d fine-tuned-model

Archive:  /content/drive/MyDrive/Collab Projects/fine-tuned-model.zip
  inflating: fine-tuned-model/vocab.txt  
  inflating: fine-tuned-model/special_tokens_map.json  
  inflating: fine-tuned-model/model.safetensors  
  inflating: fine-tuned-model/tokenizer_config.json  
  inflating: fine-tuned-model/config.json  
  inflating: fine-tuned-model/tokenizer.json  


In [8]:
!pip install transformers torch accelerate pymupdf


Collecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymupdf
  Downloading PyMuPDF-1.24.5-cp310-none-manylinux2014_x86_64.whl (3.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collec

In [9]:
import gc
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
import fitz  # PyMuPDF

# Clear all previous states and cache
gc.collect()
torch.cuda.empty_cache()


In [25]:
import fitz
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        text += page.get_text()
    return text

In [11]:
model_path = "./fine-tuned-model"

# Load the fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForQuestionAnswering.from_pretrained(model_path)

# Initialize the pipeline
qa_pipeline = pipeline('question-answering', model=model, tokenizer=tokenizer)


In [30]:
# Example usage
pdf_path = '/content/drive/MyDrive/Collab Projects/Problem Description.pdf'
question = ["What is problem Statement?"]
context = extract_text_from_pdf(pdf_path)
result = qa_pipeline(question=question, context=context)
print(f"Answer: {result['answer']}")

Answer: Json file


In [13]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.1 (from gradio)
  Downloading gradio_client-1.0.1-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.1/318.1 kB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [29]:
import gradio as gr

# Define the Gradio interface function
def qa_interface(pdf_file, question):
  pdf_path = pdf_file
  context = extract_text_from_pdf(pdf_path)
  result = qa_pipeline(question=question, context=context)
  return(f"{result['answer']}")

# Create the Gradio interface
interface = gr.Interface(
    fn=qa_interface,
    inputs=[
        gr.File(file_count="single", label="Upload PDF"),
        gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question") ],
    outputs="text",
    title="PDF Question Answering",
    description="Upload a PDF and ask a question. The model will extract the answer from the PDF content."
)

# Launch the interface
interface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://37deff064689806d29.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


