In [None]:
import os # work with write directeries
import pandas as pd # mostly used for csv manipulation, database, table form
import ast #
from openai import OpenAI
import base64

from google.colab import userdata, files

# Initialize the OpenAI client using your API key from userdata
client = OpenAI(api_key=userdata.get('openAI'))

# Define dataset root folder and image subfolders
dataset_root = "/content/drive/MyDrive/sidewalk_capstone-detection-.v2-v2_damage_classes.yolov8"
image_folders = ['train', 'test', 'valid']

# Function to find the image in dataset folders.
# If the filename ends with '.txt', replace it with '.jpg'
def find_image_path(filename):
    if filename.endswith(".txt"):
        filename = filename.rsplit('.', 1)[0] + ".jpg"
    for folder in image_folders:
        path = os.path.join(dataset_root, folder, "images", filename)
        if os.path.exists(path):
            return path
    return None

# Load the CSV file
csv_file = 'labels_summary.csv' # attach file to variable
df = pd.read_csv(csv_file) #

#df = df.head(10)

# Convert bounding box string to a list
def parse_bounding_box(bounding_box_str):
    try:
        return ast.literal_eval(bounding_box_str) if isinstance(bounding_box_str, str) else bounding_box_str
    except Exception as e:
        print(f"Error parsing bounding box: {bounding_box_str}")
        return [0, 0, 0, 0]

df['bounding box'] = df['bounding box'].apply(parse_bounding_box)

'''
# Function to generate a fixed damage message (used in Narrative 1)
def get_damage_message(damage_level):
    if damage_level == 1:
        return "There is minor damage; you can walk over it as usual."
    elif damage_level == 2:
        return "There is minor damage; please walk slowly and carefully."
    elif damage_level == 3:
        return "There is moderate damage, which might pose a trip hazard. Proceed with caution or consider avoiding it."
    elif damage_level == 4:
        return "There is severe damage; please get off the sidewalk for a while."
    else:
        return f"Damage level {damage_level} detected."
'''

# Narrative 1: Uses OpenAI to generate a narrative based on the bounding box and damage level.
def create_narrative1(row):
    print(f"Processing Narrative 1 for image: {row['filename']}...")

    bounding_box = row['bounding box']
    damage_level = row['damage level']

    prompt_text = (
    f"I am a visually impaired person walking on a sidewalk without a cane. "
    f"The detected damage has a level of {damage_level} and is located within the region described by bounding box {bounding_box}. "
    "Provide a natural, friendly alert that describes the damage level and its approximate location based on the bounding box. "

    "- Do NOT include the exact damage level or bounding box coordinates as provided."
    "- The alert should only reference the damage level and estimated location."
    "- Damage level classification:"
    "  - Level 1: Minor"
    "  - Level 2: Moderate"
    "  - Level 3: Major (potential trip hazard)"
    "  - Level 4: Severe (definite trip hazard)"

    "- Do NOT provide avoidance directions."
    "- Use bounding box coordinates to estimate where the damage appears within my POV."
    "- Do NOT specify the type of damage; simply refer to it as 'damage'."
    "- Keep the response under 100 words and within four sentences."
    "- Damage levels 1 and 2 are never trip hazards; only levels 3 and 4 can be."
    )

    system_message = (
        "You are an assistant that generates clear and natural narratives for visually impaired users based strictly on textual input. "
        "Your task is to provide a friendly alert about detected sidewalk damage."
    )

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt_text}
    ]

    try:
        print(f"Sending request to OpenAI for Narrative 1 for image: {row['filename']}...")
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # Change to "gpt-4o" if available
            messages=messages,
            max_tokens=150,
            temperature=1,
        )
        generated_narrative = response.choices[0].message.content.strip()
        print(f"Successfully generated Narrative 1 for {row['filename']}.")
        return generated_narrative
    except Exception as e:
        print(f"Error generating Narrative 1 for {row['filename']}: {e}")
        return "Error generating narrative using GPT-4 API."

# Narrative 2: Uses OpenAI to generate a narrative based on computed position and damage level.
def create_narrative2(row):
    print(f"Processing Narrative 2 for image: {row['filename']}...")

    bounding_box = row['bounding box']
    # Assuming bounding box is [xmin, ymin, height, width]
    xmin = bounding_box[0]
    width = bounding_box[3]  # Use the fourth element as the width
    relative_center_x = xmin + (width / 2.0)

    # Divide the normalized value into three parts:
    if relative_center_x < 0.3:
        position = "left"
    elif relative_center_x < 0.7:
        position = "straight ahead"
    else:
        position = "right"

    damage_level = row['damage level']

    prompt_text = (
    f"I am a visually impaired person walking on a sidewalk without a cane. "
    f"There is damage ahead with a level of {damage_level}, located on my {position}. "
    "Provide a natural, friendly alert describing the damage and suggesting the best action to take. "

    "- Do not assume the damage is always a crack or trip hazard; simply refer to it as 'damage'."
    "- Level 1: Inform me and advise walking over it as usual."
    "- Level 2: Inform me and recommend walking over it slowly and cautiously."
    "- Level 3: Inform me and suggest proceeding with caution."
    "- Level 4: Inform me and advise stepping off the sidewalk temporarily."

    "- Do NOT include the exact damage level or bounding box coordinates in the narrative."
    "- For levels below 4, suggest the safest way around the damage when possible."
    "- Estimate the damage size relative to my POV (image area) and adjust the alert accordingly."
    "- Keep the response under 100 words and within four sentences."
    "- Assume the POV nearest to the user’s actual perspective, adjusting for image angle variations."
    "- Based on this estimated POV, suggest the safest avoidance direction (left, right, etc.)."
    "- Remember, levels 1 and 2 are never trip hazards; only levels 3 and 4 can be."
    )

    system_message = (
        "You are an assistant that generates clear and natural narratives for visually impaired users based strictly on textual input. "
        "Your task is to provide a friendly alert about detected sidewalk damage."
    )

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt_text}
    ]

    try:
        print(f"Sending request to OpenAI for Narrative 2 for image: {row['filename']}...")
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # Change to "gpt-4o" if available
            messages=messages,
            max_tokens=150,
            temperature=1,
        )
        generated_narrative = response.choices[0].message.content.strip()
        print(f"Successfully generated Narrative 2 for {row['filename']}.")
        return generated_narrative
    except Exception as e:
        print(f"Error generating Narrative 2 for {row['filename']}: {e}")
        return "Error generating narrative using GPT-4 API."

# Narrative 3: Uses GPT-4 Vision (or gpt-4o-mini) to generate a natural narrative from an image.
def create_narrative3(row):
    print(f"Processing Narrative 3 for image: {row['filename']}...")

    damage_level = row['damage level']
    bounding_box = row['bounding box']
    filename = row['filename']
    image_path = find_image_path(filename)

    if not image_path:
        print(f"Image {filename} not found. Skipping Narrative 3 processing.")
        return "Image not found. Narrative skipped."

    prompt_text = (
        "I am a visually impaired person walking on a sidewalk without a cane. "
        "I will provide an image from my point of view (POV). I am standing in the bottom centre of the image."
        "Your task is to analyze the image and guide me regarding any sidewalk damage. "
        "Follow these guidelines in your response:"

        "- Provide a natural, friendly alert about any detected sidewalk damage."
        "- Use relative terms like 'to your left', 'ahead', or 'to your right' instead of exact distances."
        "- Classify the damage as minor, moderate, major, or severe based on its characteristics."
        "- minor to moderate for non trip hazards and major to severe in case of trip hazards, use your own estimation."
        "- Suggest the best course of action: such as caution for minor/moderate issues or avoidance for major hazards."
        "- Estimate the damage’s relative size within my field of view and adjust your alert accordingly."
        "- Structure your response naturally, beginning with either a damage description or suggested action—whichever is clearer."

        f"The damage level is marked as {damage_level}: "
        "- Level 1: Minor (not a trip hazard)"
        "- Level 2: Moderate (not a trip hazard)"
        "- Level 3: Major (potential trip hazard)"
        "- Level 4: Severe (definite trip hazard)"

        "Only consider sidewalk damage such as cracks or trip hazards—ignore grass or other elements. "
        "Keep responses under 100 words and within four sentences. "
        "Based on this estimated POV, suggest the safest direction to navigate (left, right, etc.)."
    )

    system_message = (
        "You are an assistant that generates clear and natural narratives for visually impaired users based strictly on textual input. "
        "Your task is to provide a friendly alert about detected sidewalk damage."
    )

    try:
        with open(image_path, "rb") as image_file:
            base64_image = base64.b64encode(image_file.read()).decode("utf-8")
    except Exception as e:
        print(f"Error reading image {image_path}: {e}")
        return "Error reading image."

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": [
            {"type": "text", "text": prompt_text},
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
        ]}
    ]

    print(f"Sending request to OpenAI for Narrative 3 for image: {filename}...")
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # Change to "gpt-4o" if available
            messages=messages,
            max_tokens=150,
            temperature=1,
        )
        generated_narrative = response.choices[0].message.content.strip()
        print(f"Successfully generated Narrative 3 for {filename}.")
        return generated_narrative
    except Exception as e:
        print(f"Error generating Narrative 3 for {filename}: {e}")
        return "Error generating narrative using GPT-4 Vision API."

# Generate narratives for all rows (restricted to df.head(10))
print("\nStarting Narrative 1 Processing...")
df['narrative 1'] = df.apply(create_narrative1, axis=1)

print("\nStarting Narrative 2 Processing (using OpenAI)...")
df['narrative 2'] = df.apply(create_narrative2, axis=1)

print("\nStarting Narrative 3 Processing...")
df['narrative 3'] = df.apply(create_narrative3, axis=1)


#display(df.head(10))

# Save DataFrame to a CSV file
df.to_csv('updated_data.csv', index=False)

# Download the file
files.download('updated_data.csv')


Starting Narrative 1 Processing...
Processing Narrative 1 for image: gsv-spgg-18751-SurfaceProblem_png.rf.c6a8f0053c62650f944625c579af3b87.txt...
Sending request to OpenAI for Narrative 1 for image: gsv-spgg-18751-SurfaceProblem_png.rf.c6a8f0053c62650f944625c579af3b87.txt...
Error generating Narrative 1 for gsv-spgg-18751-SurfaceProblem_png.rf.c6a8f0053c62650f944625c579af3b87.txt: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
Processing Narrative 1 for image: gsv-spgg-18751-SurfaceProblem_png.rf.c6a8f0053c62650f944625c579af3b87.txt...
Sending request to OpenAI for Narrative 1 for image: gsv-spgg-18751-SurfaceProblem_png.rf.c6a8f0053c62650f944625c579af3b87.txt...
Error generating Narrative 1 for gsv-spgg-18751-SurfaceProblem_

KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls "/content/drive/MyDrive/sidewalk_capstone-detection-.v2-v2_damage_classes.yolov8/test/images"

gsv-cdmx-1715-SurfaceProblem_png.rf.ea6f5c95b560503610eef0075a522f64.jpg
gsv-cdmx-28678-SurfaceProblem_png.rf.3abe51950026af0de515d542139fdcc2.jpg
gsv-cdmx-3371-SurfaceProblem_png.rf.90097b30caefa3433022bcd8e52988b0.jpg
gsv-cdmx-4030-SurfaceProblem_png.rf.57abbb88d77b390a483800b56b0d0839.jpg
gsv-cdmx-4232-SurfaceProblem_png.rf.ade2e1908142b5f367eb00765e785d22.jpg
gsv-cdmx-4397-SurfaceProblem_png.rf.998efaad7478ba8fdd4fb54d79cd0866.jpg
gsv-cdmx-4433-SurfaceProblem_png.rf.ecb796e79946f52a598fc758e77080d6.jpg
gsv-cdmx-4467-SurfaceProblem_png.rf.5d68056b2cacf732305b3695caa024a1.jpg
gsv-cdmx-4477-SurfaceProblem_png.rf.498b6998f85c72d095e9f3e16fcb245f.jpg
gsv-cdmx-4516-SurfaceProblem_png.rf.f11ff9ca56401e4e624dc144dfdb3fc2.jpg
gsv-cdmx-4539-SurfaceProblem_png.rf.c9637c24fc2b250f6498d70d53ba7f6d.jpg
gsv-cdmx-4575-SurfaceProblem_png.rf.8f27eaa23905c7544e8c580972372818.jpg
gsv-cdmx-4818-SurfaceProblem_png.rf.2ede9734f62ce7b9c46979f055de7b7f.jpg
gsv-cdmx-5049-SurfaceProblem_png.rf.e3b26af48c0fd5

In [None]:
import os
import pandas as pd
import ast
from openai import OpenAI
import base64

from google.colab import userdata, files

# Initialize the OpenAI client using your API key from userdata
client = OpenAI(api_key=userdata.get('openAI'))

# Define dataset root folder and image subfolders
dataset_root = "/content/drive/MyDrive/sidewalk_capstone-detection-.v2-v2_damage_classes.yolov8"
image_folders = ['train', 'test', 'valid']

# Function to find the image in dataset folders.
# If the filename ends with '.txt', replace it with '.jpg'
def find_image_path(filename):
    if filename.endswith(".txt"):
        print('im here')
        filename = filename.rsplit('.', 1)[0] + ".jpg"
    for folder in image_folders:
        path = os.path.join(dataset_root, folder, "images", filename)
        if os.path.exists(path):
            return path
    return None

# Load the CSV file
csv_file = 'labels_summary.csv'
df = pd.read_csv(csv_file)

# Convert bounding box string to a list
def parse_bounding_box(bounding_box_str):
    try:
        return ast.literal_eval(bounding_box_str) if isinstance(bounding_box_str, str) else bounding_box_str
    except Exception as e:
        print(f"Error parsing bounding box: {bounding_box_str}")
        return [0, 0, 0, 0]

df['bounding box'] = df['bounding box'].apply(parse_bounding_box)

# Narrative 3: Uses GPT to generate a natural narrative from an image.
def create_narrative3(row):
    print(f"Processing Narrative 3 for image: {row['filename']}...")

    damage_level = row['damage level']
    bounding_box = row['bounding box']
    filename = row['filename']
    image_path = find_image_path(filename)

    if not image_path:
        print(f"Image {filename} not found. Skipping Narrative 3 processing.")
        return "Image not found. Narrative skipped."

    prompt_text = (
        "I am a visually impaired person walking on a sidewalk without a cane. "
        "I will provide an image from my point of view (POV). I am standing in the bottom centre of the image. "
        "Your task is to analyze the image and guide me regarding any sidewalk damage. "
        "Follow these guidelines in your response:\n"
        "- Provide a natural, friendly alert about any detected sidewalk damage.\n"
        "- Use relative terms like 'to your left', 'ahead', or 'to your right' instead of exact distances.\n"
        "- Classify the damage as minor, moderate, major, or severe based on its characteristics.\n"
        "- minor to moderate for non trip hazards and major to severe in case of trip hazards, use your own estimation.\n"
        "- Suggest the best course of action: such as caution for minor/moderate issues or avoidance for major hazards.\n"
        "- Estimate the damage’s relative size within my field of view and adjust your alert accordingly.\n"
        "- Structure your response naturally, beginning with either a damage description or suggested action—whichever is clearer.\n"
        f"The damage level is marked as {damage_level}: \n"
        "- Level 1: Minor (not a trip hazard)\n"
        "- Level 2: Moderate (not a trip hazard)\n"
        "- Level 3: Major (potential trip hazard)\n"
        "- Level 4: Severe (definite trip hazard)\n\n"
        "Only consider sidewalk damage such as cracks or trip hazards—ignore grass or other elements. "
        "Keep responses under 100 words and within four sentences. "
        "Based on this estimated POV, suggest the safest direction to navigate (left, right, etc.)."
    )

    system_message = (
        "You are an assistant that generates clear and natural narratives for visually impaired users based strictly on textual input. "
        "Your task is to provide a friendly alert about detected sidewalk damage."
    )

    try:
        with open(image_path, "rb") as image_file:
            base64_image = base64.b64encode(image_file.read()).decode("utf-8")
    except Exception as e:
        print(f"Error reading image {image_path}: {e}")
        return "Error reading image."

    # Note: Here we simulate a multi-part user message with text + image.
    # Your model must be capable of handling that format; check docs if needed.
    messages = [
        {"role": "system", "content": system_message},
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
            ]
        }
    ]

    print(f"Sending request to OpenAI for Narrative 3 for image: {filename}...")
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # Or "gpt-4o" if available
            messages=messages,
            max_tokens=150,
            temperature=1,
        )
        generated_narrative = response.choices[0].message.content.strip()
        print(f"Successfully generated Narrative 3 for {filename}.")
        return generated_narrative
    except Exception as e:
        print(f"Error generating Narrative 3 for {filename}: {e}")
        return "Error generating narrative using GPT-4 Vision API."

# Generate ONLY Narrative 3
print("\nStarting Narrative 3 Processing...")
df['narrative 3'] = df.apply(create_narrative3, axis=1)

# Save only 'filename' and the new 'narrative 3' column to a CSV file
out_df = df[['filename', 'narrative 3']]
out_df.to_csv('updated_data.csv', index=False)

# Download the file
files.download('updated_data.csv')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processing Narrative 3 for image: gsv-oradell-1941-SurfaceProblem_png.rf.394b67ad77297691cd4b8ea16e636774.txt...
im here
Sending request to OpenAI for Narrative 3 for image: gsv-oradell-1941-SurfaceProblem_png.rf.394b67ad77297691cd4b8ea16e636774.txt...
Successfully generated Narrative 3 for gsv-oradell-1941-SurfaceProblem_png.rf.394b67ad77297691cd4b8ea16e636774.txt.
Processing Narrative 3 for image: gsv-oradell-1941-SurfaceProblem_png.rf.394b67ad77297691cd4b8ea16e636774.txt...
im here
Sending request to OpenAI for Narrative 3 for image: gsv-oradell-1941-SurfaceProblem_png.rf.394b67ad77297691cd4b8ea16e636774.txt...
Successfully generated Narrative 3 for gsv-oradell-1941-SurfaceProblem_png.rf.394b67ad77297691cd4b8ea16e636774.txt.
Processing Narrative 3 for image: gsv-oradell-4818-SurfaceProblem_png.rf.199f36dff40e6180067e4d5d16b1818d.txt...
im here
Sending request to OpenAI for Narrative 3 for image: gsv-oradell-4818-Surfac

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>