In [None]:
import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse

ZENROWS_API_KEY = "08d0a56ec34e95b3cd4f6458558356ceb36596d1"  # Replace with your key
PRODUCT_URL = "https://www.walmart.com/ip/Gucci-Flora-Gorgeous-Magnolia-for-Women-3-3-oz-EDP-Spray-New-Tester/14918266812?classType=REGULAR"  # Replace with your product URL

params = {
    "url": PRODUCT_URL,
    "apikey": ZENROWS_API_KEY,
    "js_render": True,
    "premium_proxy": "true"
}

response = requests.get("https://api.zenrows.com/v1/", params=params)

def extract_site_name(url):
    domain = urlparse(url).netloc
    if "amazon" in domain:
        return "amazon"
    elif "ebay" in domain:
        return "ebay"
    elif "alibaba" in domain:
        return "alibaba"
    return "unknown"

def extract_title(soup, site):
    if site == "amazon":
        tag = soup.find("span", id="productTitle")
    elif site == "ebay":
        tag = soup.find("h1", id="itemTitle") or soup.find("h1", class_=re.compile("mainTitle"))
        if tag:
            return tag.get_text(strip=True).replace("Details about  \xa0", "")
    elif site == "alibaba":
        tag = soup.find("h1", class_=re.compile("ma-title")) or soup.find("h1")
    else:
        tag = soup.find("title")
    return tag.get_text(strip=True) if tag else "Title not found"

def extract_price(soup, site):
    if site == "amazon":
# Amazon usually splits price into whole and fraction
     whole = soup.find("span", class_="a-price-whole")
     fraction = soup.find("span", class_="a-price-fraction")
     if whole:
       return f"{whole.get_text(strip=True)}.{fraction.get_text(strip=True) if fraction else '00'}"

    elif site == "alibaba":
    # Match span with data-spm-anchor-id and $ inside
        tag = soup.find("div", class_=re.compile("price", re.IGNORECASE))
        if tag:
            text = tag.get_text(strip=True)
            if "$" in text:
                return text
        return "Price not found"

    elif site == "ebay":
    # Match span with class ux-textspans that contains a price
        tags = soup.find_all("span", class_="ux-textspans")
        for tag in tags:
            text = tag.get_text(strip=True)
            if re.search(r"\$\d", text):
                return text

    return "Price not found"


def extract_images(soup):
    images = []
    for img in soup.find_all("img"):
        src = img.get("src") or img.get("data-src")
        if src and ".jpg" in src.lower():
            if src.startswith("//"):
                src = "https:" + src
            high_res_src = re.sub(r'\._[A-Z0-9,_]+\_', '', src)
            images.append(high_res_src)
    return list(dict.fromkeys(images))  # remove duplicates

def extract_videos(soup):
    videos = []
    for video_tag in soup.find_all("video"):
        src = video_tag.get("src")
        if src and ".mp4" in src:
            videos.append("https:" + src if src.startswith("//") else src)
    for source in soup.find_all("source"):
        src = source.get("src")
        if src and ".mp4" in src:
            videos.append("https:" + src if src.startswith("//") else src)
    return list(set(videos))

if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html.parser")
    site = extract_site_name(PRODUCT_URL)

    title = extract_title(soup, site)
    price = extract_price(soup, site)
    images = extract_images(soup)
    videos = extract_videos(soup)

    # ✅ Output
    print("🔹 Site:", site)
    print("🛒 Title:", title)
    print("💰 Price:", price)
    print("🖼️ Images (Top 5):", images[:5])
    print("🎥 Video URLs:", videos if videos else "No video found")
else:
    print("❌ Failed to fetch data")
    print("Status Code:", response.status_code)
    print("Response:", response.text)
