## Required Installs

In [None]:
# Installs OpenAI directly into the current kernel (not subprocess)
!{sys.executable} -m pip install --upgrade openai

In [None]:
# Installs computer vision directly into the current kernel
!{sys.executable} -m pip install azure-cognitiveservices-vision-computervision --upgrade

In [None]:
pip install azure-ai-inference

## Required Imports

In [None]:
import json
import openai
import os
import re
import sys
from azureml.core import Workspace, Datastore
from typing import Dict, Any, List
from azure.ai.inference.models import SystemMessage, UserMessage

# Azure OpenAI client
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential

# Computer Vision Client
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials


In [None]:
# Confirm version
print("OpenAI SDK version:", openai.__version__)

## Download JSON template locally from datastore

In [None]:
# Connect to the workspace
ws = Workspace.from_config()

# Get the datastore
datastore = Datastore.get(ws, datastore_name='testdata')
if datastore is None:
    print("Datastore not found.")
    sys.exit(1)
else:
    print(f"Found the datastore: {datastore}")
    file_path = 'data/jsons/email_template.json'  # path inside the datastore

    # Download the JSON file to a local path
    print("Preparing to download JSON file from datastore...")
    # Create download folder
    download_dir = './downloaded_jsons'
    os.makedirs(download_dir, exist_ok=True)
    # Download the file from the datastore
    try:
        print(f"Downloading {file_path} to {download_dir}...")
        datastore.download(target_path=download_dir, prefix=file_path, overwrite=True)
        print(f"Download complete. File saved to {download_dir}.")
    except Exception as e:
        print(f"Error during download: {e}")
        sys.exit(1)

    # Read the JSON file
    local_file_path = os.path.join(download_dir, file_path)
    with open(local_file_path, 'r') as f:
        email_json = json.load(f)

    # Print the data
    print(json.dumps(email_json, indent=2))

# Download images locally from datastore

In [None]:
image_dir_path = 'data/images'
temp_image_dir = './downloaded_images'
os.makedirs(temp_image_dir, exist_ok=True)

# Download all image files from image directory
datastore.download(
    target_path=temp_image_dir,
    prefix=image_dir_path,
    overwrite=True
)

# List downloaded image files
image_files = []
for root, _, files in os.walk(temp_image_dir):
    for file in files:
        image_files.append(os.path.join(root, file))

print(f"Found {len(image_files)} image files.")

## Setup Azure OpenAI Client

In [None]:
# Initialize Azure OpenAI client
openai_client = ChatCompletionsClient(
    endpoint="https://aisgenaipoctlnx.services.ai.azure.com/models",
    credential=AzureKeyCredential("KEY_HERE"),
)

## Setup Azure Computer Vision Client

In [None]:
# Initialize Azure Computer Vision Client
vision_client = ComputerVisionClient(
    endpoint="https://aisgenaipoctlnx.cognitiveservices.azure.com/", 
    credentials=CognitiveServicesCredentials("KEY_HERE")
)

## Process, map images and generate new JSON with mapped images

In [None]:
IMAGE_BASE_URL = "<REPLACE WITH YOUR IMAGE BASE URL>"  # e.g., "https://yourstorageaccount.blob.core.windows.net/images/"

def load_json(file_path: str) -> Dict[str, Any]:
    with open(file_path, "r") as f:
        return json.load(f)

def list_images(image_directory: str) -> List[str]:
    """
    List all image filenames in the provided directory, filtering only valid image types.
    """
    valid_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'}
    
    # Return only files with valid image extensions
    return [
        f for f in os.listdir(image_directory)
        if os.path.isfile(os.path.join(image_directory, f)) and os.path.splitext(f)[1].lower() in valid_extensions
    ]

def analyze_image(image_path: str) -> Dict[str, Any]:
    """
    Analyze an image using Azure Computer Vision and return relevant features.
    """
    with open(image_path, "rb") as image_file:
        analysis = vision_client.analyze_image_in_stream(
            image_file,
            visual_features=[
                VisualFeatureTypes.description,
                VisualFeatureTypes.tags,
                VisualFeatureTypes.objects,
                VisualFeatureTypes.brands,
                VisualFeatureTypes.color,
                VisualFeatureTypes.categories 
            ]
        )
    
    result = {
        "description": analysis.description.captions[0].text if analysis.description.captions else "",
        "tags": [tag.name for tag in analysis.tags],
        "brands": [brand.name for brand in analysis.brands],
        "objects": [obj.object_property for obj in analysis.objects],
        "color": analysis.color.dominant_colors
    }
    return result

def extract_image_features(image_directory: str) -> Dict[str, Dict[str, Any]]:
    """
    Extract features from all images in a directory.
    """
    image_features = {}
    for image_name in list_images(image_directory):
        image_path = os.path.join(image_directory, image_name)
        print(f"Analyzing image: {image_name}")
        image_features[image_name] = analyze_image(image_path)
    return image_features

def match_image(section_description: str, section_tags: List[str], image_features: Dict[str, Dict[str, Any]]) -> str:
    """
    Use GPT-4o to match the most appropriate image file based on description, tags, and extracted image features.
    """
    prompt = (
        f"You are an image matching assistant. Your task is to match an image with the following description and tags.\n"
        f"Description: {section_description}\n"
        f"Tags: {', '.join(section_tags)}\n"
        f"Below is the list of available images with their extracted features.\n\n"
        f"{json.dumps(image_features, indent=2)}\n\n"
        f"Identify the most relevant image filename and return it. Only return the filename."
    )

    response = openai_client.complete(
        messages=[
            SystemMessage(content="You assist with mapping descriptions to appropriate images."),
            UserMessage(content=prompt),
        ],
        model="gpt-4o",
        temperature=0.2
    )

    result = response.choices[0].message["content"].strip()
    print(f"** Matched Image: {result}")
    return result

def map_images_to_sections(sections: Dict[str, Any], image_features: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:
    """
    Create a new JSON object with images mapped to sections and include URLs under the correct image key.
    This function dynamically finds keys related to images like 'logo_image', 'qr_image', 'image', or any key ending with '_image'.
    It also traverses nested structures such as rows and columns.
    """
    new_json = {"email_template": {"sections": {}}}
    description_pattern = re.compile(r'.*_description$')  # Matches keys ending with '_description'
    image_key_pattern = re.compile(r'.*(_image|^image)$')  # Matches keys ending with '_image' and 'image'

    def process_section_content(section_content):
        """
        Recursively process the section content to find image-related keys and update them with image URLs.
        """
        if isinstance(section_content, dict):
            for key, value in section_content.items():
                
                # If it's a list (rows or columns) iterate over the items
                if isinstance(value, list):
                    for item in value:
                        process_section_content(item)
                
                # Check if the key is an image-related key
                if image_key_pattern.match(key):
                    print(f"Processing Image Key: {key}")
                    found_tags = value.get('tags', [])
                    found_description = None

                    # Find the description key
                    for sub_key, sub_value in value.items():
                        if description_pattern.match(sub_key):
                            found_description = sub_value
                            break

                    # If description or tags are found, attempt to match with an image
                    if found_tags or found_description:
                        description = found_description if found_description else ""
                        tags = found_tags if found_tags else []

                        matched_image = match_image(description, tags, image_features)
                        
                        if matched_image:
                            # Clean up the image name and build the URL
                            matched_image = matched_image.strip().replace('"', '').replace("\\", "")
                            image_url = f"{IMAGE_BASE_URL}{matched_image}"
                            
                            # Add the image_url to the image key
                            section_content[key]['image_url'] = image_url
                            print(f"Mapped image '{matched_image}' to '{key}' with URL: {image_url}")

    # Process each section in the original JSON
    for section_name, section_content in sections.items():
        if isinstance(section_content, dict):
            print(f"\nProcessing Section: {section_name}")
            new_section_content = section_content.copy()  # Make a copy to avoid modifying the original JSON
            
            # Process rows and columns if they exist
            if 'rows' in new_section_content:
                for row in new_section_content['rows']:
                    if 'columns' in row:
                        for column in row['columns']:
                            process_section_content(column)
            else:
                process_section_content(new_section_content)
            
            new_json["email_template"]["sections"][section_name] = new_section_content

    return new_json


# ------ Main Execution ------ # 

# Load JSON
json_data = load_json("./downloaded_jsons/data/jsons/email_template.json")

# Extract image features using Azure Computer Vision
image_directory = "./downloaded_images/data/images"
image_features = extract_image_features(image_directory)

# Split JSON into sections
original_sections = json_data.get("email_template", {}).get("sections", {})

# Map images to JSON sections
new_json_with_images = map_images_to_sections(original_sections, image_features)

# Save the new JSON file with image mappings
new_json_path = "./generated_template_with_images.json"
with open(new_json_path, "w") as f:
    json.dump(new_json_with_images, f, indent=2)
    print(f"New JSON file with image mappings saved to: {new_json_path}")

## Generate final HTML email template

In [None]:
def load_json(file_path: str) -> Dict[str, Any]:
    with open(file_path, "r") as f:
        return json.load(f)

def split_json_into_sections(json_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Split the JSON structure into separate sections for individual processing.
    """
    sections = json_data.get("email_template", {}).get("sections", {})
    return sections

def generate_prompt(section_name: str, section_content: Dict[str, Any]) -> str:
    """
    Create a prompt for GPT-4o to generate HTML from a specific JSON section.
    """
    prompt = (
        f"You are an expert HTML template generator.\n"
        f"The following JSON structure defines the layout, sections, and content of an HTML email template.\n"
        f"Generate a clean, mobile-responsive HTML email based on this JSON.\n"
        f"Do NOT include <!DOCTYPE html>, <html>, <head>, or <body> tags.\n"
        f"Generate only the HTML content for the section, without any code block markers or markdown formatting.\n"
        f"Ensure all text, images, and structural components are included exactly as defined in the JSON.\n"
        f"IMPORTANT:\n"
        f"- If an image section has an 'image_url', only include the <img> tag with that URL.\n"
        f"- Ignore all 'description', 'tags', or other metadata related to the image if 'image_url' is present.\n"
        f"- Maintain the structural integrity of rows and columns:\n"
        f"  - Each row can contain multiple columns.\n"
        f"  - Each column may contain a combination of images, titles, and paragraph text.\n"
        f"  - Ensure that all elements within a column are grouped together within the same column div.\n"
        f"  - Do NOT separate titles, paragraphs, or images that are part of the same column.\n"
        f"  - Respect the layout hierarchy and ensure that content from a single column is not split into multiple columns.\n"
        f"- Do NOT generate HTML content for 'description' or 'tags'.\n"
        f"Section Name: {section_name}\n"
        f"JSON Data:\n{json.dumps(section_content, indent=2)}"
    )
    return prompt

def call_gpt4o(prompt: str) -> str:
    """
    Call the GPT-4o model in Azure OpenAI to generate HTML from a given prompt.
    """
    # Call GPT-4o on Azure OpenAI
    response = openai_client.complete(
        messages=[
            SystemMessage(content="You generate responsive HTML email templates."),
            UserMessage(content=prompt),
        ],
        model="gpt-4o",
        temperature=0.2
    )
    return response.choices[0].message["content"].strip()

def process_sections(sections: Dict[str, Any]) -> Dict[str, str]:
    """
    Process each section of the JSON and generate HTML using GPT-4o.
    """
    html_sections = {}
    for section_name, section_content in sections.items():
        print(f"Processing section: {section_name}")
        
        prompt = generate_prompt(section_name, section_content)
        html_output = call_gpt4o(prompt)
        
        # Remove unwanted tags if they appear
        html_output = html_output.replace("<!DOCTYPE html>", "").replace("<html>", "").replace("</html>", "")
        html_output = html_output.replace("<head>", "").replace("</head>", "").replace("<body>", "").replace("</body>", "")
        
        html_sections[section_name] = html_output.strip()
        print(f"Completed section: {section_name}\n")
    
    return html_sections

def combine_html_sections(html_sections: Dict[str, str]) -> str:
    """
    Combine all HTML parts into a single email template.
    """
    html_content = "".join([html for html in html_sections.values()])
    print(html_content)

    final_html = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Email Template</title>
        <style>
            body {{ font-family: Arial, sans-serif; }}
            .container {{ width: 100%; max-width: 600px; margin: auto; }}
        </style>
    </head>
    <body>
        <div class="container">
            {html_content}
        </div>
    </body>
    </html>
    """
    return final_html

def save_html(html_content: str, output_file: str):
    """
    Save the final HTML content to a file.
    """
    with open(output_file, "w") as f:
        f.write(html_content)
    print(f"HTML email template saved to {output_file}")

# ------ Main Execution ------ #

# Load JSON
json_data = load_json("./generated_template_with_images.json")

# Split JSON into sections
sections = split_json_into_sections(json_data)

# Process each section and generate HTML
html_sections = process_sections(sections)

# Combine all sections into a single HTML template
final_html = combine_html_sections(html_sections)

# Save to file
save_html(final_html, "generated_email.html")


# Display HTML Template

In [None]:
from IPython.display import HTML, display
file_path = './generated_email.html'
with open(file_path, 'r') as html_file:
    html_content = html_file.read()

display(HTML(html_content))