In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from skimage.metrics import structural_similarity as ssim

import cv2
import numpy as np
import requests

In [22]:
URL='https://es.pinterest.com/search/pins/?q=personas&rs=typed'
IMAGE_REFERENCE_URL='photo4.jpeg'
DEPTH_SCRAPPING=1

In [23]:
def go_to_url(url):
    driver.get(url)
    driver.implicitly_wait(10)

In [24]:
def get_images_urls(url):
    try:
        images = driver.find_elements(By.TAG_NAME, 'img')
        image_urls = []
        for img in images:
            try:
                src = img.get_attribute('src')
                if src and (src.endswith('.png') or src.endswith('.jpeg') or src.endswith('.jpg') or src.endswith('.webp')):
                    image_urls.append(src)
            except Exception as e:
                print(f"Error obteniendo datos de una imagen: {e}")
        return image_urls
    finally:
        print(f"Se obtuvieron {len(image_urls)} imágenes de: {url}")

In [25]:
def get_anchor_urls(url):
    try:
        anchors = driver.find_elements(By.TAG_NAME, 'a')
        anchor_urls = []
        for anchor in anchors:
            try:
                href = anchor.get_attribute('href')
                if href:
                    anchor_urls.append(href)
            except Exception as e:
                print(f"Error obteniendo datos de un anchor: {e}")
        return anchor_urls
    finally:
        print(f"Se obtuvieron {len(anchor_urls)} anclas de: {url}")

In [26]:
def download_and_format_image(url):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        img_array = np.asarray(bytearray(response.content), dtype=np.uint8)
        image = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        return image
    else:
        raise Exception(f"No se pudo descargar la imagen desde la URL: {url}")

In [27]:
def compare_images(reference_image_url, image_url):
    image_local = cv2.imread(reference_image_url)
    if image_local is None:
        raise Exception(f"No se pudo cargar la imagen local: {reference_image_url}")

    image_url = download_and_format_image(image_url)
    
    image_local_resized = cv2.resize(image_local, (300, 300))
    image_url_resized = cv2.resize(image_url, (300, 300))
    
    gray_local = cv2.cvtColor(image_local_resized, cv2.COLOR_BGR2GRAY)
    gray_url = cv2.cvtColor(image_url_resized, cv2.COLOR_BGR2GRAY)
    
    score, _ = ssim(gray_local, gray_url, full=True)
    return score


In [28]:
def scrap_and_analyze(url):
    go_to_url(URL)
    images = get_images_urls(URL)
    anchors = get_anchor_urls(URL)
    driver.quit()

    for image_url in images:
        try:
            similarity_score = compare_images(IMAGE_REFERENCE_URL, image_url)
            #print(f"Puntuación de similitud (SSIM): {similarity_score}")
            if similarity_score > 0.9:
                print(f"Coincidencia encontrada! : {image_url}")
        except Exception as e:
            print(f"Error: {e}") 

In [29]:
def scrape_and_analyze_recursive(url, depth=0, max_depth=2):
    if depth > max_depth:
        return
    
    print(f"Accediendo a {url} (nivel {depth})")
    go_to_url(url)
    images = get_images_urls(url)
    anchors = get_anchor_urls(url)

    for image_url in images:
        try:
            similarity_score = compare_images(IMAGE_REFERENCE_URL, image_url)
            if similarity_score > 0.8:
                print(f"Coincidencia encontrada en {image_url} con puntuación SSIM: {similarity_score}")
        except Exception as e:
            print(f"Error comparando imagen: {e}")
    
    # Llamada recursiva para seguir los enlaces encontrados
    for anchor_url in anchors:
        if anchor_url not in visited_urls:
            visited_urls.add(anchor_url)
            scrape_and_analyze_recursive(anchor_url, depth + 1, max_depth)

In [30]:
driver = webdriver.Chrome()
visited_urls = set()
visited_urls.add(URL)

try:
    scrape_and_analyze_recursive(URL, depth=0, max_depth=DEPTH_SCRAPPING)
except Exception as e:
    print(f"Error en el rastreo: {e}")
finally:
    driver.quit()

Accediendo a https://es.pinterest.com/search/pins/?q=personas&rs=typed (nivel 0)
Se obtuvieron 16 imágenes de: https://es.pinterest.com/search/pins/?q=personas&rs=typed
Se obtuvieron 28 anclas de: https://es.pinterest.com/search/pins/?q=personas&rs=typed
Coincidencia encontrada en https://i.pinimg.com/236x/2d/3f/aa/2d3faaec52fbda21fcdf790f8d3cbcbb.jpg con puntuación SSIM: 0.8882315847731976
Accediendo a https://es.pinterest.com/ (nivel 1)
Se obtuvieron 190 imágenes de: https://es.pinterest.com/
Se obtuvieron 29 anclas de: https://es.pinterest.com/


KeyboardInterrupt: 