In [6]:
import requests
from bs4 import BeautifulSoup
import os
import time

# 要爬取的目標網址
url = 'https://www.reallygood.com.tw/newExam/inside?str=9159FE2044FF163D03CB37F9F63CE3CD'

# 發送 GET 請求
response = requests.get(url)
response.raise_for_status()  # 確保請求成功

# 解析 HTML
soup = BeautifulSoup(response.text, 'html.parser')

# 找到包含目標圖片的部分，這裡假設 class 是 poster
sections = soup.find_all('div', class_='poster')

# 創建圖片保存資料夾
if not os.path.exists('images'):
    os.makedirs('images')

image_count = 0

for section in sections:
    # 找到所有圖片標籤
    img_tags = section.find_all('img')
    
    if img_tags:
        print("Image tags found in section")
        
        # 下載並保存圖片
        for img in img_tags:
            img_url = img.get('src')
            if img_url:
                # 若圖片 URL 是相對路徑，則補全為絕對路徑
                if not img_url.startswith('http'):
                    img_url = requests.compat.urljoin(url, img_url)
                
                # 如果圖片 URL 指向 Imgur 網頁，轉換為指向圖片文件
                if 'imgur.com' in img_url and not img_url.endswith(('.jpg', '.png', '.gif')):
                    img_url = img_url + '.jpg'
                
                # 確保圖片 URL 是合法的並能夠訪問
                try:
                    img_response = requests.get(img_url)
                    img_response.raise_for_status()  # 確保請求成功
                    
                    # 保存圖片
                    img_path = os.path.join('images', f'image_{image_count + 1}.jpg')
                    with open(img_path, 'wb') as f:
                        f.write(img_response.content)
                    
                    image_count += 1
                    print(f'Image {image_count} downloaded: {img_url}')
                
                except requests.exceptions.RequestException as e:
                    print(f"Failed to download image {img_url}: {e}")
                    # 添加延遲以避免過多請求
                    time.sleep(1)
    else:
        print("No image tags found in this section")

print('All images have been downloaded.')


Image tags found in section
Image 1 downloaded: https://img.reallygood.com.tw/p20240115RG001_1100_380/toPic.action
Image tags found in section
Failed to download image https://imgur.com/lBSfQz9.jpg: 429 Client Error: Unknown Error for url: https://imgur.com/lBSfQz9.jpg
Image tags found in section
Failed to download image https://imgur.com/m9DDygS.jpg: 429 Client Error: Unknown Error for url: https://imgur.com/m9DDygS.jpg
All images have been downloaded.
