In [1]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
from openai import OpenAI

client = OpenAI()

In [3]:
def get_completion_from_messages(messages, model="gpt-5"):
    response = client.responses.create(
        model=model,
        input=messages,
    )
    return response.output_text

In [4]:
def _guess_mime(filename: str) -> str:
    fn = (filename or "").lower()
    if fn.endswith(".png"):
        return "image/png"
    if fn.endswith(".jpg") or fn.endswith(".jpeg"):
        return "image/jpeg"
    if fn.endswith(".webp"):
        return "image/webp"
    return "image/png"

In [5]:
def _to_data_url(file_bytes: bytes, mime: str) -> str:
    b64 = base64.b64encode(file_bytes).decode("utf-8")
    return f"data:{mime};base64,{b64}"

In [6]:
def collect_messages(_):
    prompt = (inp.value_input or "").strip()
    inp.value = ''  # clear input box
    try:
        inp.value_input = ''  # keep UI in sync if supported
    except Exception:
        pass
    # Build multimodal user content (text + optional image)
    user_content = []
    if prompt:
        user_content.append({"type": "input_text", "text": prompt})
    if file_inp.value:
        mime = _guess_mime(file_inp.filename)
        data_url = _to_data_url(file_inp.value, mime)
        user_content.append({"type": "input_image", "image_url": data_url})
    if not user_content:
        return pn.Column(*panels, height=300, scroll=True, sizing_mode="stretch_width", styles={"overflow": "auto"})
    # Store user message in context:
    # - text-only -> keep course-style string content
    # - text+image or image-only -> use multimodal list content
    if len(user_content) == 1 and user_content[0]["type"] == "input_text":
        context.append({'role': 'user', 'content': user_content[0]["text"]})
    else:
        context.append({'role': 'user', 'content': user_content})
    response = get_completion_from_messages(context)
    # Collapse excessive blank lines to keep chat height reasonable
    response = re.sub(r"\n{3,}", "\n\n", response).strip()
    display_prompt = re.sub(r"\n{3,}", "\n\n", prompt).strip()
    context.append({'role': 'assistant', 'content': response})
    # Render UI
    if prompt:
        panels.append(pn.Row('User:', pn.pane.Markdown(display_prompt, sizing_mode='stretch_width', styles={'white-space': 'pre-wrap', 'overflow-wrap': 'anywhere', 'word-break': 'break-word'})))
    if file_inp.value:
        panels.append(pn.Row('Image:', pn.pane.Image(file_inp.value, width=250, height=180, sizing_mode='fixed', styles={'object-fit': 'contain'})))
        # clear upload after sending
        file_inp.value = b""
        file_inp.filename = ""
    panels.append(
        pn.Row('Assistant:', pn.pane.Markdown(
            response,
            sizing_mode='stretch_width',
            styles={
                'background-color': '#F6F6F6',
                'white-space': 'pre-wrap',
                'overflow-wrap': 'anywhere',
                'word-break': 'break-word',
            }
        ))
    )

    if len(panels) > MAX_PANELS:
        del panels[: len(panels) - MAX_PANELS]

    return pn.Column(*panels, height=300, scroll=True, sizing_mode="stretch_width", styles={"overflow": "auto"})

In [7]:
import panel as pn  # GUI
import re
import base64
pn.extension()

panels = [] # collect display 
MAX_PANELS = 30  # limit rendered rows to avoid huge output

context = [ {'role':'system', 'content':"""
You are StudyBuddy, a helpful learning tutor. \
The user may ask a question as text or upload an image (e.g., homework, notes, diagrams, code, math). \
Your job is to: \
1) briefly restate the task in your own words, \
2) ask 1–2 clarifying questions ONLY if truly necessary, \
3) give a step-by-step explanation with key concepts, \
4) provide a short mini-quiz (2–4 questions) to check understanding, \
5) if the user shares their attempt, diagnose mistakes and give targeted hints. \
Be concise, accurate, and student-friendly. Use Markdown. \
If an image is unclear, ask the user to upload a clearer photo or crop the relevant region. \
"""} ]  # accumulate messages


inp = pn.widgets.TextInput(value="Hi", placeholder='Enter text here…')
file_inp = pn.widgets.FileInput(accept="image/*")
button_conversation = pn.widgets.Button(name="Chat!")

interactive_conversation = pn.bind(collect_messages, button_conversation)

dashboard = pn.Column(
    inp,
    pn.Row(file_inp, button_conversation),  # upload next to button (closest to course)
    pn.panel(interactive_conversation, loading_indicator=True, height=300, sizing_mode='stretch_width'),
)

dashboard