## Step 0: Setup Paths

In [None]:
"""
STEP 0: Define Folder and File Paths

This step sets up all required paths so they don't need to be manually entered later.
"""
import os

def setup_paths():
    # Define base folder
    base_folder = r"D:\folder\alien tattoos"
    
    # Define required file paths based on the base folder
    paths = {
        "csv_path": os.path.join(base_folder, "alien tattoos.csv"),
        "image_folder": base_folder,
        "metadata_file": os.path.join(base_folder, "metadata.json"),
        "html_output": os.path.join(base_folder, "listicle_blog_post_minimal_content.html")
    }
    
    # Print paths for verification
    print("\n===== CONFIGURED PATHS =====")
    for key, value in paths.items():
        print(f"{key}: {value}")
    print("===========================\n")
    
    return paths

# Run setup and store paths globally
paths = setup_paths()

## Step 1: Download Images from Insta

In [None]:
import csv
import os
import instaloader
import pandas as pd
from urllib.parse import urlparse, urlunparse
import random

def clean_instagram_url(url):
    """
    Clean and standardize Instagram post URL.
    """
    parsed_url = urlparse(url)
    cleaned_path = parsed_url.path.split("?")[0]  # Remove query parameters
    cleaned_url = urlunparse((parsed_url.scheme, parsed_url.netloc, cleaned_path, '', '', ''))
    print(f"Original URL: {url} -> Cleaned URL: {cleaned_url}")  # Debugging line
    return cleaned_url

def download_instagram_images(insta_url, save_folder, indexes=None):
    """
    Download Instagram images from a public post using Instaloader.
    """
    loader = instaloader.Instaloader(save_metadata=False)

    # Clean the URL
    insta_url = clean_instagram_url(insta_url)
    shortcode = insta_url.strip("/").split("/")[-1]

    try:
        # Fetch the post
        post = instaloader.Post.from_shortcode(loader.context, shortcode)

        # Ensure the save folder exists
        os.makedirs(save_folder, exist_ok=True)

        # Download media
        if post.typename == "GraphSidecar":  # Carousel post
            all_nodes = list(post.get_sidecar_nodes())
            if indexes:
                selected_nodes = [all_nodes[i - 1] for i in indexes if 0 < i <= len(all_nodes)]
            else:
                selected_nodes = [all_nodes[0]]  # Default to first image only when indexes are empty

            for node in selected_nodes:
                random_suffix = str(random.randint(10000, 99999))  # Generate a random 5-digit number
                file_path = os.path.join(save_folder, random_suffix)  # No extension in filename
                loader.download_pic(file_path, node.display_url, post.date_utc)
                print(f"Saved: {file_path}")
            return "Success"
        else:  # Single image
            random_suffix = str(random.randint(10000, 99999))  # Generate a random 5-digit number
            file_path = os.path.join(save_folder, random_suffix)  # No extension in filename
            loader.download_pic(file_path, post.url, post.date_utc)
            print(f"Saved: {file_path}")
            return "Success"

    except instaloader.exceptions.QueryReturnedNotFoundException:
        error_msg = f"Error: Post not found - {insta_url}"
        print(error_msg)
        return error_msg
    except instaloader.exceptions.ConnectionException:
        error_msg = f"Error: Connection issue with {insta_url}"
        print(error_msg)
        return error_msg
    except Exception as e:
        error_msg = f"Unexpected error: {e}"
        print(error_msg)
        return error_msg

def process_instagram_csv(csv_file, save_folder):
    """
    Process a CSV file with Instagram URLs and optional indexes.
    """
    data = []
    with open(csv_file, "r", encoding="utf-8") as file:
        reader = csv.DictReader(file)
        print("CSV Headers:", reader.fieldnames)  # Debugging line
        for row in reader:
            try:
                url = row["URL"].strip()
                indexes = row["Indexes"].strip() if "Indexes" in row else ""
                if indexes:
                    indexes = [int(i.strip()) for i in indexes.split(",") if i.strip().isdigit()]
                else:
                    indexes = None  # No indexes provided

                print(f"Processing URL: {url} with indexes: {indexes}")
                status = download_instagram_images(url, save_folder, indexes)
                data.append({"URL": url, "Indexes": indexes, "Status": status})

            except KeyError as e:
                error_msg = f"KeyError: Missing expected column in CSV - {e}"
                print(error_msg)
                data.append({"URL": row.get("URL", ""), "Indexes": row.get("Indexes", ""), "Status": error_msg})
            except Exception as e:
                error_msg = f"Unexpected error: {e}"
                print(error_msg)
                data.append({"URL": row.get("URL", ""), "Indexes": row.get("Indexes", ""), "Status": error_msg})

    # Save the log to a DataFrame
    df = pd.DataFrame(data)
    log_file = os.path.join(save_folder, "download_log.csv")
    df.to_csv(log_file, index=False)
    print(f"Download log saved to {log_file}")

if __name__ == "__main__":
    # Use predefined paths from Step 0
    process_instagram_csv(paths["csv_path"], paths["image_folder"])

## Step 2: Process Images (ChatGPT Content Generation)

### Images Metadata

In [None]:
import base64
import os
import json
import random  # Import random for generating random numbers
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI(api_key="xxx")

def encode_image(image_path):
    """
    Encode an image to base64.
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def analyze_images():
    """
    Analyze images in a folder and generate metadata using GPT-4 vision.
    """
    image_folder = paths["image_folder"]
    output_file = paths["metadata_file"]

    # JSON schema you want to enforce
    schema = {
        "type": "object",
        "description": "Schema for generating metadata for images",
        "properties": {
            "original_filename": {"type": "string"},
            "generated_filename": {"type": "string"},
            "blog_h2": {"type": "string"},
            "blog_description": {"type": "string"},
            "pinterest_title": {"type": "string"},
            "pinterest_description": {"type": "string"},
        },
        "required": [
            "original_filename",
            "generated_filename",
            "blog_h2",
            "blog_description",
            "pinterest_title",
            "pinterest_description",
        ],
        "additionalProperties": False,
    }

    data = []

    for img_file in os.listdir(image_folder):
        if img_file.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
            img_path = os.path.join(image_folder, img_file)
            base64_image = encode_image(img_path)

            prompt_text = (
                "I'm creating content around the topic [alien tattoos] in a listicle and I need your help to describe "
                "the tattoos through text. I'll give you images to analyze and describe and explain. Analyze the "
                "following image and generate metadata in JSON format. "
                "\nGenerated Filename: Create a descriptive filename for the image using 4-5 relevant words separated by "
                "underscores, without any file extension. "
                "\nBlog H2: Write a descriptive text (8 words max) that explains the tattoos and is unique to this specific image."
                "\nBlog Description: Provide an informative, concise, and simple explanation of the tattoos and its vibes. "
                "Focus on being descriptive and describing the image casually. 2 sentences total. "
                "\nPinterest Title: Write a descriptive and SEO-optimized title for the image using relevant keywords and terms. "
                "\nPinterest Description: Write a 3-sentence description with these goals: "
                "\n1. Provide context for the pin using informative keywords. "
                "\n2. Explain the benefit of visiting the webpage or content. "
                "\n3. Encourage users to save the pin for later reference. "
            )

            try:
                response = client.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {"type": "text", "text": prompt_text},
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": f"data:image/jpeg;base64,{base64_image}",
                                        "detail": "low",
                                    },
                                },
                            ],
                        }
                    ],
                    response_format={
                        "type": "json_schema",
                        "json_schema": {
                            "name": "image_metadata",
                            "schema": schema,
                        },
                    },
                    max_tokens=1200,
                )

                chat_msg = response.choices[0].message

                if hasattr(chat_msg, "refusal") and chat_msg.refusal:
                    data.append({
                        "original_filename": img_file,
                        "error": "Model refused to produce structured output.",
                        "details": chat_msg.refusal
                    })
                    print(f"Refusal for {img_file}")
                else:
                    json_string = chat_msg.content
                    parsed_json = json.loads(json_string)

                    if "generated_filename" in parsed_json:
                        random_number = random.randint(100, 999)
                        parsed_json["generated_filename"] += str(random_number)

                    parsed_json["original_filename"] = img_file
                    data.append(parsed_json)
                    print(f"Processed: {img_file}")

            except Exception as e:
                print(f"Error generating metadata for {img_file}: {e}")

    try:
        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=4, ensure_ascii=False)
        print(f"Metadata saved to {output_file}")
    except PermissionError:
        print(f"Permission denied: Unable to write to {output_file}.")

if __name__ == "__main__":
    analyze_images()


### Post Metadata

In [None]:
import base64
import os
import json
import random  # Import random for generating random numbers
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI(api_key="xxx")

def generate_post_metadata():
    """
    Generate post metadata including title, meta description, and introduction.
    """
    output_file = os.path.join(paths["image_folder"], "post_metadata.json")

    prompts = {
        "post_title": "Generate blog post title for the post alien tattoos, in listicle format like... XX alien tattoos xyz..",
        "post_meta_description": "Write a compelling meta description for an article about alien tattoos. Keep it under 140 characters.",
        "post_introduction": "Write a short, engaging introduction for a blog post about alien tattoos. Keep it under 90 words and use simple words."
    }

    data = {}

    for key, prompt in prompts.items():
        try:
            response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "user", "content": prompt}
                ],
                max_tokens=300
            )

            chat_msg = response.choices[0].message.content.strip().strip('"')
            data[key] = chat_msg
            print(f"Generated {key}: {chat_msg}")

        except Exception as e:
            print(f"Error generating {key}: {e}")
            data[key] = "Error generating content."

    # Save JSON file
    try:
        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=4, ensure_ascii=False)
        print(f"Post metadata saved to {output_file}")
    except PermissionError:
        print(f"Permission denied: Unable to write to {output_file}.")

if __name__ == "__main__":
    generate_post_metadata()

## Step 3: Rename Images

In [None]:
import os
import json

def rename_images():
    """
    Rename images in the specified folder based on the JSON mapping.
    Appends the original file extension to the generated filename.
    """
    json_path = paths["metadata_file"]
    image_folder = paths["image_folder"]

    # Check if JSON file exists
    if not os.path.isfile(json_path):
        print(f"Error: JSON file not found at '{json_path}'.")
        return

    # Check if image folder exists
    if not os.path.isdir(image_folder):
        print(f"Error: Image folder '{image_folder}' does not exist.")
        return

    # Load JSON data
    try:
        with open(json_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
    except json.JSONDecodeError as e:
        print(f"Error: Failed to parse JSON file. {e}")
        return

    # Initialize counters
    total = len(data)
    success = 0
    failed = 0

    # Process each entry in JSON
    for entry in data:
        original = entry.get("original_filename")
        generated = entry.get("generated_filename")

        if not original or not generated:
            print(f"Skipping entry with missing fields: {entry}")
            failed += 1
            continue

        original_path = os.path.join(image_folder, original)
        original_ext = os.path.splitext(original)[1]  # Includes the dot, e.g., '.jpg'
        new_filename = generated + original_ext
        new_path = os.path.join(image_folder, new_filename)

        # Check if original file exists
        if not os.path.isfile(original_path):
            print(f"Error: Original file '{original}' not found. Skipping.")
            failed += 1
            continue

        # Check if new filename already exists
        if os.path.isfile(new_path):
            print(f"Error: Target filename '{new_filename}' already exists. Skipping.")
            failed += 1
            continue

        # Attempt to rename
        try:
            os.rename(original_path, new_path)
            print(f"Renamed: '{original}' -> '{new_filename}'")
            success += 1
        except OSError as e:
            print(f"Error: Failed to rename '{original}' to '{new_filename}'. {e}")
            failed += 1

    # Summary
    print("\n=== Renaming Summary ===")
    print(f"Total entries processed: {total}")
    print(f"Successfully renamed: {success}")
    print(f"Failed to rename: {failed}")

if __name__ == "__main__":
    rename_images()

## Step 4: Create and Upload Post

### Uploading Images

In [None]:
import os
import json
import re
import subprocess
import base64

# --- Configuration ---
WP_URL = "https://larajose.com"  # Your WordPress site URL (ensure https://)
WP_USER = "admin"  # Your WordPress username
WP_APP_PASSWORD = "xxx"  # Your Application Password

# Supported image extensions
SUPPORTED_EXTS = ('.jpg', '.jpeg', '.png', '.webp', '.gif')

def generate_alt_text(filename_without_ext):
    """
    Removes trailing digits from the filename.
    """
    no_digits = re.sub(r'\d+$', '', filename_without_ext)
    return no_digits.rstrip('-_ ').strip()

def upload_image_to_wp(file_path):
    """
    Upload an image to WordPress using cURL.
    """
    filename = os.path.basename(file_path)
    alt_text = generate_alt_text(os.path.splitext(filename)[0])
    
    # Encode credentials for Basic Auth
    auth = base64.b64encode(f"{WP_USER}:{WP_APP_PASSWORD}".encode('utf-8')).decode('utf-8')
    
    # cURL command to upload the image
    curl_command = [
        "curl",
        "-X", "POST",
        f"{WP_URL}/wp-json/wp/v2/media",
        "-H", f"Authorization: Basic {auth}",
        "-H", "Content-Disposition: attachment; filename=\"{filename}\"",
        "-H", "Content-Type: multipart/form-data",
        "-F", f"file=@{file_path}"
    ]

    # Run cURL command and capture the output
    try:
        result = subprocess.run(curl_command, capture_output=True, text=True)

        if result.returncode == 0:
            response = json.loads(result.stdout)
            media_url = response['guid']['rendered']
            print(f"Uploaded {filename} with alt text '{alt_text}': {media_url}")
            return media_url
        else:
            print(f"Failed to upload {filename}. Error Code: {result.returncode}, Output: {result.stderr}")
            return None
    except Exception as e:
        print(f"Error uploading {filename}: {e}")
        return None

def upload_images_and_update_metadata():
    """
    Upload images to WordPress and update metadata.json with wordpress_url.
    """
    image_folder = paths["image_folder"]
    metadata_path = paths["metadata_file"]

    if not os.path.isdir(image_folder):
        print(f"Error: The specified image folder does not exist: {image_folder}")
        return

    if not os.path.isfile(metadata_path):
        print(f"Error: metadata.json not found at '{metadata_path}'. Please provide a valid path.")
        return

    # Load existing metadata
    try:
        with open(metadata_path, 'r', encoding='utf-8') as f:
            metadata = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error: Failed to parse metadata.json. {e}")
        return

    # Create a mapping from generated_filename to metadata entry
    metadata_map = {entry['generated_filename']: entry for entry in metadata if 'generated_filename' in entry}

    # Track uploads
    uploads_completed = 0
    uploads_failed = 0

    for filename in os.listdir(image_folder):
        if filename.lower().endswith(SUPPORTED_EXTS):
            file_path = os.path.join(image_folder, filename)
            generated_filename, _ = os.path.splitext(filename)
            entry = metadata_map.get(generated_filename)
            if not entry:
                print(f"Warning: No metadata entry found for '{filename}'. Skipping upload.")
                uploads_failed += 1
                continue

            media_url = upload_image_to_wp(file_path)
            if media_url:
                entry['wordpress_url'] = media_url
                uploads_completed += 1
            else:
                uploads_failed += 1

    # Save the updated metadata.json
    try:
        with open(metadata_path, 'w', encoding='utf-8') as f:
            json.dump(metadata, f, indent=4, ensure_ascii=False)
        print(f"Metadata updated with WordPress URLs and saved to '{metadata_path}'.")
    except Exception as e:
        print(f"Error: Failed to write to metadata.json. {e}")

    # Summary
    print("\n=== Upload Summary ===")
    print(f"Successfully uploaded: {uploads_completed}")
    print(f"Failed to upload: {uploads_failed}")

if __name__ == "__main__":
    upload_images_and_update_metadata()

### Creating html file

In [None]:
import os
import json

def generate_minimal_html():
    """
    Generate minimal HTML content for a WordPress post from metadata.json using WordPress image URLs.
    """
    metadata_path = paths["metadata_file"]
    post_metadata_path = os.path.join(paths["image_folder"], "post_metadata.json")
    output_html_path = paths["html_output"]

    # Check if metadata.json exists
    if not os.path.isfile(metadata_path):
        print(f"Error: metadata.json not found at '{metadata_path}'.")
        return
    
    # Check if post_metadata.json exists
    if not os.path.isfile(post_metadata_path):
        print(f"Error: post_metadata.json not found at '{post_metadata_path}'.")
        return

    # Load metadata
    try:
        with open(metadata_path, 'r', encoding='utf-8') as f:
            metadata = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error: Failed to parse metadata.json. {e}")
        return

    # Load post metadata
    try:
        with open(post_metadata_path, 'r', encoding='utf-8') as f:
            post_metadata = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error: Failed to parse post_metadata.json. {e}")
        return

    post_intro = post_metadata.get("post_introduction", "")

    # Start building minimal HTML content
    html_content = f"""
    <p>{post_intro}</p>
    """

    for idx, entry in enumerate(metadata, start=1):
        wordpress_url = entry.get("wordpress_url")
        blog_h2 = entry.get("blog_h2", "No Title")
        blog_description = entry.get("blog_description", "No Description")

        if not wordpress_url:
            print(f"Warning: 'wordpress_url' missing for '{entry.get('generated_filename')}'. Skipping this entry.")
            continue

        # Build numbered h2 elements
        html_content += f"""
    <h2>{idx}. {blog_h2}</h2>
    <img src="{wordpress_url}" alt="{blog_h2}">
    <p>{blog_description}</p>
"""

    # Save HTML content to file
    try:
        with open(output_html_path, 'w', encoding='utf-8') as f:
            f.write(html_content)
        print(f"Minimal HTML content successfully saved to '{output_html_path}'.")
    except Exception as e:
        print(f"Error: Failed to write HTML file. {e}")

if __name__ == "__main__":
    generate_minimal_html()

### Uploading posts(html)

In [None]:
import subprocess
import base64
import json
import os

def create_post_curl_with_html():
    """
    Create a new draft post with HTML content using cURL.
    """
    wp_url = "https://larajose.com"
    username = "admin"
    app_password = "xxx"
    html_file_path = paths["html_output"]
    post_metadata_path = os.path.join(paths["image_folder"], "post_metadata.json")

    endpoint = f"{wp_url}/wp-json/wp/v2/posts"

    # Encode credentials for Basic Auth
    auth = base64.b64encode(f"{username}:{app_password}".encode('utf-8')).decode('utf-8')

    # Read HTML content from the file
    try:
        with open(html_file_path, "r", encoding="utf-8") as html_file:
            html_content = html_file.read()
    except Exception as e:
        print(f"Error reading HTML file: {e}")
        return
    
    # Read post metadata to get the post title
    try:
        with open(post_metadata_path, "r", encoding="utf-8") as metadata_file:
            post_metadata = json.load(metadata_file)
            post_title = post_metadata.get("post_title", "Untitled Post")
    except Exception as e:
        print(f"Error reading post metadata: {e}")
        post_title = "Untitled Post"

    # Post data
    data = json.dumps({
        "title": post_title,
        "content": html_content,
        "status": "draft"
    })

    # cURL command
    curl_command = [
        "curl",
        "-X", "POST",
        endpoint,
        "-H", f"Authorization: Basic {auth}",
        "-H", "Content-Type: application/json",
        "-H", "Accept: application/json",
        "-d", data
    ]

    # Run cURL command and capture the output
    try:
        result = subprocess.run(curl_command, capture_output=True, text=True)

        if result.returncode == 0:
            response = result.stdout
            print("Post created successfully.")
            print("Response:", response)
        else:
            print("Failed to create post.")
            print("Error Code:", result.returncode)
            print("Error Output:", result.stderr)
    except Exception as e:
        print(f"An error occurred while creating the post: {e}")

if __name__ == "__main__":
    create_post_curl_with_html()