# 사진에서 숨겨진 의미있는 부분을 찾아 네모를 그려줍니다.

In [4]:
import google.generativeai as genai
from PIL import Image, ImageDraw, ImageFont
import PIL
from IPython.display import Markdown
from dotenv import load_dotenv
import os
load_dotenv()
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

def draw_scaled_rectangles_pil(image_path, coordinates, labels=None, font_path=None, font_size=20):
    """
    Draw rectangles on an image based on scaled coordinates using PIL, with optional labels.

    Args:
        image_path (str): Path to the input image.
        coordinates (list): List of coordinates in the form [ymin, xmin, ymax, xmax].
                            The values should be scaled for a base size of 1000.
        labels (list, optional): List of labels corresponding to each rectangle. Default is None.
        font_path (str, optional): Path to the .ttf font file. If None, default font is used.
        font_size (int, optional): Font size for the labels. Default is 20.

    Returns:
        None: Displays the image with drawn rectangles and labels.
    """
    # Load the image
    try:
        image = Image.open(image_path)
    except Exception as e:
        print(f"Error: Unable to load image. {e}")
        return

    # Get the actual dimensions of the image
    width, height = image.size

    # Scale factor based on 1000 units
    scale_x = width / 1000
    scale_y = height / 1000

    # Create a drawable object
    draw = ImageDraw.Draw(image)

    # Load font
    try:
        if font_path:
            font = ImageFont.truetype(font_path, font_size)
        else:
            font = ImageFont.load_default()
    except Exception as e:
        print(f"Error: Unable to load font. {e}")
        font = ImageFont.load_default()

    for i, coord in enumerate(coordinates):
        if len(coord) != 4:
            print(f"Skipping invalid coordinate: {coord}")
            continue

        # Scale the coordinates
        ymin = int(coord[0] * scale_y)
        xmin = int(coord[1] * scale_x)
        ymax = int(coord[2] * scale_y)
        xmax = int(coord[3] * scale_x)

        # Draw the rectangle on the image
        draw.rectangle([xmin, ymin, xmax, ymax], outline="green", width=2)
        # Add label if provided
        if labels and i < len(labels):
            label = labels[i]
            text_width, text_height = draw.textbbox((0, 0), label, font=font)[2:]
            text_x = xmin
            text_y = ymin - text_height if ymin - text_height > 0 else ymin + 2
            draw.rectangle([text_x, text_y, text_x + text_width, text_y + text_height], fill="white")
            draw.text((text_x, text_y), label, fill="black", font=font)

    # Display the image
    image.show()


def request_description(image_path):  
    try:
        image_file = PIL.Image.open(image_path)  
    except Exception as e:
        print(f"[Error] in image_file: {e}")
        return None
    model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")

    # Create a prompt to detect bounding boxes.
    prompt = """

    Instruction:
    You are an advanced AI specialized in image analysis and object detection. Your goal is to carefully analyze the given image, identify its context and significance, and then perform the specified task step by step. Follow the instructions below:


    <Step 1: Contextual Analysis
    Examine the image in detail.

    Describe what the image likely represents.
    Identify the overall theme or subject of the artwork.
    Consider the cultural, artistic, or symbolic importance of the image.
    Answer the following questions:

    What is this artwork primarily about?
    What elements seem central to the story or message of the artwork?>


    <Step 2: Key Object Identification
        Based on the analysis in Step 1, identify three less obvious but thematically significant objects in the artwork. Avoid well-known or central elements and instead focus on details that might be overlooked by the general audience but still contribute uniquely to the artwork's story or theme.

        Criteria for Object Selection:

        The objects should not be the most famous or obvious parts of the artwork (e.g., the central figure or the main scene).
        Instead, focus on smaller, background, or intricately designed elements that reflect the artist's subtle intent or hidden symbolism.
        Object Description:
        For each object:

        Provide its name or description.
        Explain why it is important in the context of this artwork, highlighting its less obvious contribution.
        Questions to Guide Identification:

        What elements are present in the background or secondary areas that carry artistic, cultural, or symbolic value?
        Are there details that hint at the artist's personal signature, humor, or commentary?
        Are there objects that reflect the historical or social context subtly?
        Example Output:

        Object 1: [The small dog at the bottom corner]

        Importance: Often overshadowed by the central figures, the dog adds a sense of domesticity and playfulness, subtly grounding the artwork in everyday life.
        Object 2: [The vase on the far-left shelf]

        Importance: The vase is barely noticeable but may symbolize wealth or the patron’s taste, hinting at the socioeconomic background of the scene.
        Object 3: [The faint reflection in the mirror at the back]

        Importance: While often overlooked, this reflection could symbolize a meta-commentary on the viewer’s role in interpreting the artwork.
        Why This Works
        Focus on Discovery: By avoiding obvious elements, this approach encourages the model to analyze the image in depth and uncover hidden gems.
        Promotes Unique Insights: This approach aligns with the idea of exploring the overlooked and appreciating the subtleties of art.>


    Example Output:

    Step 1: Contextual Analysis
    The image appears to depict a serene landscape with a focus on a central human figure and surrounding nature. The central figure seems to symbolize harmony with the environment.


    Step 2: Key Object Identification
    Object 1: [A woman sitting under a tree]
    Importance: Central figure representing the theme of the artwork.
    Object 2: [A small stream flowing in the foreground]
    Importance: Adds depth and tranquility to the composition.
    Object 3: [A bird perched on the tree]
    Importance: Symbolizes freedom and nature's vitality.

    """
    try:
        response = model.generate_content([image_file, prompt])
    except Exception as e:
        print(f"[Error] in response: {e}")
        return None
    return response

def get_description(response):
    response = Markdown(response.text)
    try:
        res_description = response.data.split("Step 2: ")[0][:-2].split("Contextual Analysis")[1]
    except Exception as e:
        print(f"[Error] in res_description: {e}")
        print("""[Log] response.text: \n""",response.data)
        return "", ["","",""]
    try:
        res_object1 = response.data.split("Step 2: ")[1].split("Object 1:")[1].split("Object 2:")[0]
    except Exception as e:
        print(f"[Error] in res_object1: {e}")
        print("""[Log] response.text.split("Step 2: ")[1]: \n""",response.data.split("Step 2: ")[1])
        res_object1 = ""
    try:
        res_object2 = response.data.split("Step 2: ")[1].split("Object 2:")[1].split("Object 3:")[0]
    except Exception as e:
        print(f"[Error] in res_object2: {e}")
        print("""[Log] response.text.split("Step 2: ")[1].split("Object 2:")[1].split("Object 3:"): \n""",response.data.split("Step 2: ")[1].split("Object 2:")[1].split("Object 3:"))
        res_object2 = ""
    try:
        res_object3 = response.data.split("Step 2: ")[1].split("Object 2:")[1].split("Object 3:")[1]
    except Exception as e:
        print(f"[Error] in res_object3: {e}")
        print("""[Log] response.text.split("Step 2: ")[1].split("Object 2:")[1].split("Object 3:"): \n""",response.data.split("Step 2: ")[1].split("Object 2:")[1].split("Object 3:"))
        res_object3 = ""
    return res_description, [res_object1, res_object2, res_object3]

def get_labels(res_objects):
    res_objects_dropped = []
    for res_object in res_objects:
        if res_object == "":
            continue
        res_objects_dropped.append(res_object)
    for i,res_object in enumerate(res_objects_dropped):
        res_objects_dropped[i] = res_object.split("Name/Description:")[0].strip()[:10]

    return res_objects_dropped

def request_xys(image_path, res_description, res_objects):

    try:
        image_file = PIL.Image.open(image_path)  
    except Exception as e:
        print(f"[Error] in image_file: {e}")
        return None
    res_objects_dropped = []
    for res_object in res_objects:
        if res_object == "":
            continue
        res_objects_dropped.append(res_object)
    model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")
    # Create a prompt to detect bounding boxes.
    prompt = """
Instruction:
You are an advanced AI specialized in image analysis and object localization. Your task is to analyze the provided image and identify the exact coordinates of specific objects mentioned in the description. Follow the steps carefully:

Input Provided:
Image: The image file or base64 representation.
Image Description: A detailed explanation of the image's context and overall content.
Object Description: A object within the image that need to be located.


Your Task:
Analyze the Image:

Carefully examine the image to understand its content in relation to the description.

Locate the Objects:

For a object mentioned, identify its exact location within the image.
Provide the coordinates in the format [ymin, xmin, ymax, xmax]. Ensure these coordinates are accurate and relative to the dimensions of the image.
Output Format:
For a object, return the following:

Object Name: [Name or description of the object]
Coordinates: [ymin, xmin, ymax, xmax]


Example:
Image Description:
The image depicts a vibrant garden with various flowers, a small fountain in the center, and a bench on the right side.

Object Description:
A red rose bush near the bottom left corner.

Expected Output:
[451, 127, 541, 167]

Additional Notes:
Use the descriptions provided to accurately identify the objects.
If an object is ambiguous or cannot be located, explicitly mention the reason.

Return a bounding box for each of the objects in this image in [ymin, xmin, ymax, xmax] format.

    """

    responses = []
    for res_object in res_objects_dropped:
        obj_prompt = f"""
        {prompt}
        Image Description: {res_description}
        Object Description: {res_object}
        """
        print(obj_prompt)
        try:
            response = model.generate_content([image_file, obj_prompt])
            responses.append(response)
        except Exception as e:
            print(f"[Error] in response of {res_object}: {e}")

    return responses

def get_xys(xys_responses):
    xys = []
    for res in xys_responses:
        print(res.text)
        str_xys = res.text.split("[")[-1].split("]")[0]
        int_xys = [int(x) for x in str_xys.split(",")]
        xys.append(int_xys)
    return xys

In [5]:
image_path = "The_Ambassadors.jpg"
res1 = request_description(image_path)
res_des, res_objs = get_description(res1)
res2 = request_xys(image_path, res_des, res_objs)


        
Instruction:
You are an advanced AI specialized in image analysis and object localization. Your task is to analyze the provided image and identify the exact coordinates of specific objects mentioned in the description. Follow the steps carefully:

Input Provided:
Image: The image file or base64 representation.
Image Description: A detailed explanation of the image's context and overall content.
Object Description: A object within the image that need to be located.


Your Task:
Analyze the Image:

Carefully examine the image to understand its content in relation to the description.

Locate the Objects:

For a object mentioned, identify its exact location within the image.
Provide the coordinates in the format [ymin, xmin, ymax, xmax]. Ensure these coordinates are accurate and relative to the dimensions of the image.
Output Format:
For a object, return the following:

Object Name: [Name or description of the object]
Coordinates: [ymin, xmin, ymax, xmax]


Example:
Image Descrip

In [6]:
print(res_objs)
res_xys = get_xys(res2)
res_labels = get_labels(res_objs)
draw_scaled_rectangles_pil(image_path, res_xys, res_labels)

[" The small, partially visible book on the lower shelf.**\n\nImportance: While the larger books are easily noticed, this smaller, almost hidden book hints at the vastness of knowledge and the potentially hidden or esoteric knowledge pursued during the Renaissance.  Its inconspicuous placement suggests a certain level of intimacy or personal study beyond the more publicly displayed objects.  It's a subtle reminder of the individual's journey of intellectual exploration.\n\n\n**", ' The partially obscured writing implements (quill pen, inkwell) on the table.**\n\nImportance: These items are not immediately striking but are crucial to the intellectual pursuits suggested by the painting. They represent the tangible tools of scholarship and artistic creation.  Their presence subtly underscores the idea that knowledge was not passively received but actively created and documented.  It’s a quiet commentary on the active process of learning and creation.\n\n\n**', ' The mirror on the table.**

# 그림의 특정 부분에 대한 질문 처리

In [29]:
from PIL import Image, ImageDraw
import google.generativeai as genai

from IPython.display import Markdown
from dotenv import load_dotenv
import os
load_dotenv
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)
def draw_scaled_rectangles_pil(image_path, coordinates):
    """
    Draw rectangles on an image based on scaled coordinates using PIL, with optional labels.

    Args:
        image_path (str): Path to the input image.
        coordinates (list): List of coordinates in the form [ymin, xmin, ymax, xmax].
                            The values should be scaled for a base size of 1000.
        labels (list, optional): List of labels corresponding to each rectangle. Default is None.
        font_path (str, optional): Path to the .ttf font file. If None, default font is used.
        font_size (int, optional): Font size for the labels. Default is 20.

    Returns:
        None: Displays the image with drawn rectangles and labels.
    """
    # Load the image
    try:
        image = Image.open(image_path)
    except Exception as e:
        print(f"Error: Unable to load image. {e}")
        return

    # Get the actual dimensions of the image
    width, height = image.size

    # Scale factor based on 1000 units
    scale_x = width / 1000
    scale_y = height / 1000

    # Create a drawable object
    draw = ImageDraw.Draw(image)


    for i, coord in enumerate(coordinates):
        if len(coord) != 4:
            print(f"Skipping invalid coordinate: {coord}")
            continue

        # Scale the coordinates
        ymin = int(coord[0] * scale_y)
        xmin = int(coord[1] * scale_x)
        ymax = int(coord[2] * scale_y)
        xmax = int(coord[3] * scale_x)

        # Draw the rectangle on the image
        draw.rectangle([xmin, ymin, xmax, ymax], outline="green", width=2)

    # Display the image
    image.show()
    output_path = "output_" + image_path
    image.save(output_path)
    return output_path


def highlight_region(image_path, coordinates):
    """
    Highlight a specific region in the image by keeping the region and making the rest white.

    Args:
        image_path (str): Path to the input image.
        coordinates (list): Coordinates of the region to keep in the form [ymin, xmin, ymax, xmax].

    Returns:
        None
    """
    try:
        # Load the image
        image = Image.open(image_path).convert("RGBA")
    except Exception as e:
        print(f"Error: Unable to load image. {e}")
        return
# Get the actual dimensions of the image
    output_path = "output_" + image_path
    width, height = image.size

    # Scale factor based on 1000 units
    scale_x = width / 1000
    scale_y = height / 1000
    # Create a white canvas of the same size as the image
    white_canvas = Image.new("RGBA", image.size, "white")

    # Extract the region to keep
    ymin = int(coordinates[0] * scale_y*0.9)
    xmin = int(coordinates[1] * scale_x*0.9)
    ymax = int(coordinates[2] * scale_y*1.1)
    xmax = int(coordinates[3] * scale_x*1.1)

    # Paste the selected region onto the white canvas
    region = image.crop((xmin, ymin, xmax, ymax))
    region.show()
    white_canvas.paste(region, (xmin, ymin))

    # Save the result
    white_canvas = white_canvas.convert("RGB")  # Convert back to RGB if needed
    white_canvas.save(output_path)
    print(f"Image saved to {output_path}")
    return output_path

def request_highlight_question(ori_image_path, highlight_image_path):
    model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")
    ori_image = Image.open(ori_image_path)
    highlight_image = Image.open(highlight_image_path)
    # Create a prompt to detect bounding boxes.
    prompt = """
The provided images include:

The original artwork, representing the broader context of the piece.
A cropped image highlighting a specific object from the artwork for detailed analysis.
Your task is to analyze the provided cropped object in the style of Kenneth Clark, known for his deep exploration of themes, techniques, and artistic intent. Follow the steps below to structure your analysis logically and thoughtfully:

Step 1: Contextual Analysis of the Original Artwork

Examine the original artwork as a whole.
Identify its primary theme, subject matter, and artistic techniques.
Reflect on the broader cultural, historical, or symbolic importance of the piece.
Consider the well-known elements of the artwork and how they might relate to the cropped object.
Step 2: Focused Analysis of the Cropped Object

Analyze the highlighted object while keeping in mind the context provided by the original artwork.
Describe the object’s physical characteristics, placement, and visual impact in the cropped image.
Explore its thematic role and symbolic significance within the larger narrative of the artwork.
Include the Following Points in Step 2:

Object Description:

Detail the object’s appearance, material, and texture.
Note its placement and interaction with surrounding elements.
Role in the Artwork:

Discuss how the object contributes to the larger story or theme.
Highlight its connection to the artwork’s overall composition and message.
Symbolism and Meaning:

Explore any symbolic or thematic meaning the object might convey.
Consider how it reflects broader cultural or historical ideas.
Artistic Techniques:

Analyze the methods used to depict the object, such as brushwork, texture, light, and shadow.
Highlight any unique stylistic choices that enhance its impact.
Connection to the Artist:

Explain how the object reflects the artist’s personal vision, life, or philosophy.
Consider any recurring themes or motifs in the artist’s other works.
Emotional and Interpretive Impact:

Provide insights into the emotional resonance of the object for viewers.
Share your interpretation of its significance within the artwork.
Important Notes:

Ensure your analysis flows naturally and logically.
Always connect the cropped object back to the context of the original artwork.
Write in polished, well-structured English, avoiding overly casual or colloquial language.
Final Instruction:
Start your response by analyzing the original artwork to establish a foundation, then transition to a detailed examination of the cropped object. This approach ensures a cohesive and insightful interpretation that considers both the broader context and specific details.
"""
    response = model.generate_content([ori_image, highlight_image, prompt])
    return response

def request_qurations(image_path, highlight_res):
    model = genai.GenerativeModel(model_name="gemini-1.5-flash-001")
    ori_image = Image.open(image_path)
    # Create a prompt to detect bounding boxes.
    prompt = f"""
Description:
{Markdown(highlight_res.text).data}

Instruction:
You are an expert online curator tasked with providing a clear, engaging, and well-written explanation of a specific part of an artwork. The explanation should flow naturally in Korean and be presented as a single paragraph. Ensure that the text begins with "이 부분은 ~" to introduce the specific section being described. Avoid awkward expressions such as "이 그림의 특정 부분은 ~" and maintain a professional yet approachable tone.

Guidelines:

Start the text with "이 부분은 ~" to naturally introduce the area of focus.
Write in polished, well-structured Korean, avoiding overly casual or colloquial language.
Ensure the explanation highlights the significance of the section within the context of the whole artwork.
Provide details on visual elements, symbolism, and artistic techniques without being overly academic.
Keep the output concise and limited to one paragraph, but do NOT excessively summarize.

Output Format:
Text: Provide a single paragraph in Korean.
Tone: Polished and professional, suitable for an online audience.
Structure: Natural flow, starting with "이 부분은 ~" and focusing on the details and context of the specific section.
"""
    response = model.generate_content([ori_image, prompt])
    return response


In [30]:
# Example usage
image_path = "The_Ambassadors.jpg"
coordinates = [690, 160, 992, 760]   # Replace with your desired coordinates

output_path = highlight_region(image_path, coordinates)
output_path = draw_scaled_rectangles_pil(image_path, [coordinates])
req = request_highlight_question(image_path, output_path)
req2 = request_qurations(image_path, req)
Markdown(req2.text)

Image saved to output_The_Ambassadors.jpg


이 부분은 그림의 아래쪽에 비스듬히 놓인 해골을 보여줍니다. 이 해골은 마치 왜곡된 모습으로 놓여있는 것처럼 보이는데, 실제로는 '아나모픽' 기법을 사용하여 그려졌습니다. 즉, 특정 각도에서만 정확한 해골의 모습을 볼 수 있도록 의도적으로 왜곡한 것입니다. 이는 마치 세상의 화려함과 지적인 업적 속에서도 죽음이라는 현실을 잊지 말라는 메시지를 전달하는 것 같습니다. 또한, 해골이 그림의 다른 사물들과 두 인물 사이에 끼어 있는 모습은, 죽음이 우리의 삶에 어떻게 갑작스럽게 개입하여 모든 것을 뒤바꿀 수 있는지를 보여줍니다.  이처럼 작가는 섬세한 기법을 통해 그림 속에 숨겨진 의미를 담아내고, 관객에게 심오한 메시지를 전달하고 있습니다. 


In [31]:
Markdown(req.text)

The painting before us is Hans Holbein the Younger’s *The Ambassadors*, a work steeped in the intellectual and artistic ferment of the early sixteenth century.  It’s a double portrait, not merely of two men, Jean de Dinteville, the French ambassador to England, and Georges de Selve, Bishop of Lavaur, but of an age obsessed with learning, diplomacy, and the burgeoning awareness of a world expanding beyond European shores.  The richness of the depicted objects – the celestial globe, the lute, the sundial, the scientific instruments – speaks to the Renaissance hunger for knowledge, both earthly and divine.  The figures themselves, poised and confident, embody the humanist ideal of the learned and worldly individual.  Yet, a subtle current of unease permeates the scene, a premonition perhaps of the religious and political upheavals that would soon shatter the apparent stability of the era.

Now, let us turn our attention to the peculiar object lying diagonally across the lower portion of the canvas: the anamorphic skull. Cropped from the larger composition, it loses some of its unsettling power, yet even in isolation, its distorted form demands attention.  This isn't simply a decorative motif; Holbein has employed a sophisticated technique of anamorphosis, requiring the viewer to adopt a specific, oblique angle to perceive the skull's true form.  Rendered with a smooth, almost polished texture, its bone-white surface stands in stark contrast to the richness of the surrounding textiles and the dark hues of the ambassadors' robes.

Within the larger narrative of the painting, the skull serves as a *memento mori*, a stark reminder of mortality amidst the worldly splendor.  It disrupts the carefully constructed image of worldly success and intellectual accomplishment, injecting a note of disquiet.  The placement of the skull, partially obscuring the other objects and intruding into the space between the two figures, suggests the pervasive nature of death, its ability to interrupt and undermine even the most carefully laid plans.  Its distorted form, only fully revealed from a specific vantage point, can be interpreted as a commentary on the limitations of human perception and the hidden truths that lie beneath the surface of appearances.

Holbein’s mastery of technique is evident in the execution of the anamorphic skull.  The skillful manipulation of perspective and foreshortening creates the illusion of distortion while maintaining a remarkable degree of anatomical accuracy.  The smooth, almost ethereal texture of the skull contrasts with the more tangible surfaces of the surrounding objects, lending it an otherworldly quality.

This use of anamorphosis was not merely a technical flourish; it reflects Holbein’s deep understanding of Renaissance humanism and its preoccupation with perspective and the nature of reality.  The skull, hidden in plain sight, becomes a metaphor for the unseen forces that shape human destiny. It challenges the viewer to look beyond the surface, to acknowledge the ephemeral nature of earthly achievements, and to confront the ultimate reality of death.  The emotional impact is powerful; the distorted skull, once deciphered, becomes a haunting presence, a silent reminder of the fragility of life and the inevitability of its end. It transforms the painting from a simple celebration of worldly success into a profound meditation on the human condition.


# 그림을 사진 찍어 넣으면 어떤 그림인지 알려주기

In [3]:
from PIL import Image, ImageDraw
import google.generativeai as genai

from IPython.display import Markdown
from dotenv import load_dotenv
import os
load_dotenv
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

In [4]:
def request_image_info(image_path):
    model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
    ori_image = Image.open(image_path)
    # Create a prompt to detect bounding boxes.
    prompt = f"""
Instruction:
You are an art recognition expert AI. Your task is to identify the uploaded artwork based on its visual characteristics and provide the following information:

The title of the artwork.
The name of the artist.
A brief description of the artwork (no more than 2 sentences).
A link to Google or a reputable art database for further information.

Output Notes:

If the artwork is not recognized, politely explain that the database cannot identify the artwork and provide general art research resources.
Always ensure the description is concise and the link is relevant and accurate.
"""
    response = model.generate_content([ori_image, prompt])
    return response

In [5]:
pictured_photo = "photo2.jpg"
info_res = request_image_info(pictured_photo)
Markdown(info_res.text)

That's a reproduction of **The Gleaners** by **Jean-François Millet**.

**Description:**  The painting depicts three peasant women gleaning (collecting leftover grain) in a field after the harvest.  Millet's work focuses on the hardships and dignity of rural life, often showcasing the toil of the working class.

**Link:**  A good starting point for further information would be the Google Arts & Culture page for the Musée d'Orsay in Paris, which houses the original painting:  [https://artsandculture.google.com/entity/w/jean-francois-millet](https://artsandculture.google.com/entity/w/jean-francois-millet)  (Search for "The Gleaners" within the site).  You can also find more information on reputable art history websites like the Metropolitan Museum of Art or the National Gallery of Art.


: 