In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from skimage.metrics import structural_similarity as ssim

import cv2
import numpy as np
import requests
import os

In [2]:
URL='https://pixabay.com/images/search/man/'
IMAGE_REFERENCE_URL='photo_reference.jpg'
DEPTH_SCRAPPING=1
SAVE_FOLDER = "results"

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get(URL)
driver.implicitly_wait(10)

def get_images_urls(url):
    try:
        images = driver.find_elements(By.TAG_NAME, 'img')
        image_urls = []
        for img in images:
            try:
                src = img.get_attribute('src')
                if src and (src.endswith('.png') or src.endswith('.jpg') or src.endswith('.webp')):
                    image_urls.append(src)
            except Exception as e:
                print(f"Error obteniendo datos de una imagen: {e}")
        return image_urls
    finally:
        print(f"Se obtuvieron {len(image_urls)} imágenes de: {url}")

images = get_images_urls(URL)

for image_url in images:
    print(image_url)


Se obtuvieron 21 imágenes de: https://pixabay.com/images/search/man/
https://cdn.pixabay.com/photo/2016/11/21/12/42/beard-1845166_1280.jpg
https://cdn.pixabay.com/photo/2015/01/27/09/58/man-613601_640.jpg
https://cdn.pixabay.com/photo/2020/05/12/17/04/man-5163992_640.jpg
https://cdn.pixabay.com/photo/2022/09/02/20/03/man-7428290_640.jpg
https://cdn.pixabay.com/photo/2022/05/21/06/52/standup-paddleboarding-7210815_640.jpg
https://cdn.pixabay.com/photo/2020/09/02/20/52/dock-5539524_640.jpg
https://cdn.pixabay.com/photo/2023/03/02/11/38/man-7825138_640.jpg
https://cdn.pixabay.com/photo/2020/01/07/23/01/sketch-4748895_640.jpg
https://cdn.pixabay.com/photo/2015/03/03/20/42/man-657869_640.jpg
https://cdn.pixabay.com/photo/2016/06/20/04/30/asian-man-1468032_640.jpg
https://cdn.pixabay.com/photo/2019/08/06/08/46/old-man-4387774_640.png
https://cdn.pixabay.com/photo/2023/07/20/04/45/leva-8138344_640.jpg
https://cdn.pixabay.com/photo/2019/12/04/09/30/man-4672229_640.jpg
https://cdn.pixabay.com/p

In [4]:
def get_anchor_urls(url):
    try:
        anchors = driver.find_elements(By.TAG_NAME, 'a')
        anchor_urls = []
        for anchor in anchors:
            try:
                href = anchor.get_attribute('href')
                if href:
                    anchor_urls.append(href)
            except Exception as e:
                print(f"Error obteniendo datos de un anchor: {e}")
        return anchor_urls
    finally:
        print(f"Se obtuvieron {len(anchor_urls)} anclas de: {url}")

anchors = get_anchor_urls(URL)

for anchor_url in anchors:
    print(anchor_url)


Se obtuvieron 619 anclas de: https://pixabay.com/images/search/man/
https://pixabay.com/
https://pixabay.com/blog/posts/advanced-image-search-on-pixabay-46/
https://pixabay.com/photos/
https://pixabay.com/illustrations/
https://pixabay.com/vectors/
https://pixabay.com/videos/
https://pixabay.com/music/
https://pixabay.com/sound-effects/
https://pixabay.com/gifs/
https://pixabay.com/editors_choice/
https://pixabay.com/collections/
https://pixabay.com/playlists/
https://pixabay.com/images/search/
https://pixabay.com/videos/search/
https://pixabay.com/music/search/
https://pixabay.com/popular-searches/
https://pixabay.com/images/search/christmas/
https://pixabay.com/images/search/thanksgiving/
https://pixabay.com/images/search/happy%20birthday/
https://pixabay.com/images/search/good%20morning/
https://pixabay.com/images/search/cartoon/
https://pixabay.com/users/
https://pixabay.com/forum/
https://pixabay.com/blog/
https://pixabay.com/cameras/
https://pixabay.com/service/about/
https://pix

In [5]:
def download_and_format_image(url):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        img_array = np.asarray(bytearray(response.content), dtype=np.uint8)
        image = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        return image
    else:
        raise Exception(f"No se pudo descargar la imagen desde la URL: {url}")

In [6]:
def compare_images(reference_image_url, image_url):
    image_local = cv2.imread(reference_image_url)
    if image_local is None:
        raise Exception(f"No se pudo cargar la imagen local: {reference_image_url}")

    image_url = download_and_format_image(image_url)
    
    image_local_resized = cv2.resize(image_local, (300, 300))
    image_url_resized = cv2.resize(image_url, (300, 300))
    
    gray_local = cv2.cvtColor(image_local_resized, cv2.COLOR_BGR2GRAY)
    gray_url = cv2.cvtColor(image_url_resized, cv2.COLOR_BGR2GRAY)
    
    score, _ = ssim(gray_local, gray_url, full=True)
    return score


In [None]:
for image_url in images:
    try:
        similarity_score = compare_images(IMAGE_REFERENCE_URL, image_url)
        #print(f"Puntuación de similitud (SSIM): {similarity_score}")
        if similarity_score > 0.9:
            print(f"Coincidencia encontrada! : {image_url}")
    except Exception as e:
        print(f"Error: {e}")



Puntuación de similitud (SSIM): 1.0
Coincidencia encontrada! : https://cdn.pixabay.com/photo/2016/11/21/12/42/beard-1845166_1280.jpg
Puntuación de similitud (SSIM): 0.08649627068673969
Puntuación de similitud (SSIM): 0.22506548556596978
Puntuación de similitud (SSIM): 0.1297923444546372
Puntuación de similitud (SSIM): 0.16587200288334594
Puntuación de similitud (SSIM): 0.34964930345872347
Puntuación de similitud (SSIM): 0.17926031666364467
Puntuación de similitud (SSIM): 0.11868848760978037
Puntuación de similitud (SSIM): 0.13775134717981916
Puntuación de similitud (SSIM): 0.2548178434606286
Puntuación de similitud (SSIM): 0.03551579042181091
Puntuación de similitud (SSIM): 0.10749616709683757
Puntuación de similitud (SSIM): 0.014341388475105004
Puntuación de similitud (SSIM): 0.12753145116406747
Puntuación de similitud (SSIM): 0.15491232571968588
Puntuación de similitud (SSIM): 0.20974267261336524
Puntuación de similitud (SSIM): 0.12361448237676853
Puntuación de similitud (SSIM): 0.13