In [21]:
!pip install transformers ipywidgets python-docx PyMuPDF

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting PyMuPDF
  Downloading PyMuPDF-1.24.9-cp310-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting PyMuPDFb==1.24.9 (from PyMuPDF)
  Downloading PyMuPDFb-1.24.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.4 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyMuPDF-1.24.9-cp310-none-manylinux2014_x86_64.whl (3.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyMuPDFb-1.24.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (15.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx, PyMuPDFb, PyMuPDF
Suc

In [42]:
from transformers import pipeline

# Initialize the text summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_document(document):
    # Generate summary
    summary = summarizer(document, max_length=120, min_length=20, do_sample=False)
    return summary[0]['summary_text']


In [43]:
import docx
import fitz  # PyMuPDF
from io import BytesIO

def read_docx(file):
    doc = docx.Document(file)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

def read_pdf(file):
    doc = fitz.open(stream=file.read(), filetype="pdf")
    full_text = []
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        full_text.append(page.get_text())
    return '\n'.join(full_text)


In [46]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Create input text area widget
document_input = widgets.Textarea(
    value='',
    placeholder='Type your document here',
    description='Document:',
    disabled=False,
    layout=widgets.Layout(width='100%', height='200px')
)

# Create file upload widget
file_upload = widgets.FileUpload(
    accept='.txt,.docx,.pdf',  # Accepted file extensions
    multiple=False  # Accept single file
)

# Create output text area widget
summary_output = widgets.Textarea(
    value='',
    placeholder='Summary will appear here',
    description='Summary:',
    disabled=True,
    layout=widgets.Layout(width='100%', height='200px')
)

# Create a button to trigger summarization
summarize_button = widgets.Button(
    description='Summarize',
    disabled=False,
    button_style='success',
    tooltip='Click to summarize the document',
    icon='check'
)

# Create an output area
output = widgets.Output()

# Function to handle button click
def on_summarize_button_click(b):
    with output:
        clear_output()
        document = document_input.value
        if file_upload.value:
            # Read the uploaded file
            uploaded_file = list(file_upload.value.values())[0]
            file_name = list(file_upload.value.keys())[0]
            file_content = BytesIO(uploaded_file['content'])

            if file_name.endswith('.txt'):
                document = file_content.read().decode('utf-8')
            elif file_name.endswith('.docx'):
                document = read_docx(file_content)
            elif file_name.endswith('.pdf'):
                document = read_pdf(file_content)

            document_input.value = document

        if document.strip():  # Ensure document is not empty
            summary = summarize_document(document)
            summary_output.value = summary
        else:
            summary_output.value = "Please upload a valid document or enter text."

# Set up button click event
summarize_button.on_click(on_summarize_button_click)

# Display widgets
display(file_upload, document_input, summarize_button, summary_output, output)


FileUpload(value={}, accept='.txt,.docx,.pdf', description='Upload')

Textarea(value='', description='Document:', layout=Layout(height='200px', width='100%'), placeholder='Type you…

Button(button_style='success', description='Summarize', icon='check', style=ButtonStyle(), tooltip='Click to s…

Textarea(value='', description='Summary:', disabled=True, layout=Layout(height='200px', width='100%'), placeho…

Output()