## Downloading Images from a csv File Links (URLS)

In [19]:
import os
import pandas as pd
import requests

# Load the CSV file
csv_file = 'Final_Output_Articles_Mr_Henry_Data.csv'  # Change this to your actual CSV file path
df = pd.read_csv(csv_file)

# Function to download and save images using your specified method
def download_image(url, folder, img_name):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            img_path = os.path.join(folder, img_name)
            with open(img_path, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded: {img_name}")
        else:
            print(f"Failed to download: {url}")
    except Exception as e:
        print(f"Error downloading {url}: {e}")

# Function to extract image URLs from the string in column
def extract_image_urls(image_string):
    # Only attempt to split if image_string is a valid string
    if isinstance(image_string, str):
        # Split the string by ', ' to separate multiple <img> tags
        img_tags = image_string.split(", ")
        # Extract the URLs from the 'src' attribute
        urls = [tag.split('"')[1] for tag in img_tags if 'src="' in tag]
        return urls
    else:
        return []

# Loop through each row in the CSV, starting from row 10 (index 9)
for index, row in df.iloc[10:].iterrows():
    folder_name = f"EN{str(index + 1).zfill(3)}"  # Naming convention EN001, EN002, etc.
    
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    # Extract image URLs from column 5 (adjust index if needed)
    img_data = row[3]
    image_urls = extract_image_urls(img_data)
    
    # Download each image and save it with the desired naming convention
    for i, img_url in enumerate(image_urls):
        img_name = f"{folder_name}-{i + 1}.jpg"  # Naming format EN001-1, EN001-2, etc.
        download_image(img_url, folder_name, img_name)


Downloaded: EN012-1.jpg
Downloaded: EN012-2.jpg
Downloaded: EN012-3.jpg
Downloaded: EN012-4.jpg
Downloaded: EN012-5.jpg
Downloaded: EN012-6.jpg
Downloaded: EN013-1.jpg
Downloaded: EN013-2.jpg
Downloaded: EN013-3.jpg
Downloaded: EN013-4.jpg
Downloaded: EN013-5.jpg
Downloaded: EN013-6.jpg
Downloaded: EN013-7.jpg
Downloaded: EN014-1.jpg
Downloaded: EN014-2.jpg
Downloaded: EN014-3.jpg
Downloaded: EN014-4.jpg
Downloaded: EN014-5.jpg
Downloaded: EN014-6.jpg
Downloaded: EN015-1.jpg
Downloaded: EN015-2.jpg
Downloaded: EN015-3.jpg
Downloaded: EN015-4.jpg
Downloaded: EN015-5.jpg
Downloaded: EN015-6.jpg
Downloaded: EN015-7.jpg
Downloaded: EN015-8.jpg
Downloaded: EN015-9.jpg
Downloaded: EN015-10.jpg
Downloaded: EN015-11.jpg
Downloaded: EN015-12.jpg
Downloaded: EN017-1.jpg
Downloaded: EN017-2.jpg
Downloaded: EN017-3.jpg
Downloaded: EN017-4.jpg
Downloaded: EN017-5.jpg
Downloaded: EN017-6.jpg
Downloaded: EN017-7.jpg
Downloaded: EN018-1.jpg
Downloaded: EN018-2.jpg
Downloaded: EN018-3.jpg
Downloaded: E

Downloaded: EN082-1.jpg
Downloaded: EN083-1.jpg
Downloaded: EN083-2.jpg
Downloaded: EN083-3.jpg
Downloaded: EN083-4.jpg
Downloaded: EN083-5.jpg
Downloaded: EN083-6.jpg
Downloaded: EN083-7.jpg
Downloaded: EN083-8.jpg
Downloaded: EN083-9.jpg
Downloaded: EN084-1.jpg
Downloaded: EN084-2.jpg
Downloaded: EN084-3.jpg
Downloaded: EN084-4.jpg
Downloaded: EN084-5.jpg
Downloaded: EN084-6.jpg
Downloaded: EN094-1.jpg
Downloaded: EN094-2.jpg
Downloaded: EN094-3.jpg
Downloaded: EN094-4.jpg
Error downloading .hk/NewImages/3/News/July/WAS00071.JPG: Invalid URL '.hk/NewImages/3/News/July/WAS00071.JPG': No scheme supplied. Perhaps you meant https://.hk/NewImages/3/News/July/WAS00071.JPG?
Downloaded: EN095-1.jpg
Downloaded: EN095-2.jpg
Downloaded: EN095-3.jpg
Downloaded: EN095-4.jpg
Downloaded: EN095-5.jpg
Downloaded: EN095-6.jpg
Downloaded: EN095-7.jpg
Downloaded: EN095-8.jpg
Downloaded: EN095-9.jpg
Downloaded: EN095-10.jpg
Downloaded: EN095-11.jpg
Downloaded: EN095-12.jpg
Downloaded: EN095-13.jpg
Downlo

## Zipping the Folders of all Images

In [20]:
import os
import shutil

# Create a parent folder 'EN' if it doesn't exist
parent_folder = 'EN'
if not os.path.exists(parent_folder):
    os.makedirs(parent_folder)

# Function to compress a folder into a ZIP file and move it to the parent folder
def compress_folder(folder_name, parent_folder):
    output_zip_name = os.path.join(parent_folder, folder_name)  # Path for the zip file in 'EN' folder
    shutil.make_archive(output_zip_name, 'zip', folder_name)  # Compress the folder
    print(f"{folder_name} has been compressed and moved to {parent_folder}")

# Loop through and compress multiple folders (e.g., EN001, EN002, ..., EN100)
for i in range(1, 101):  # Adjust the range based on the number of folders
    folder_to_compress = f"EN{str(i).zfill(3)}"  # Folder name (e.g., EN001, EN002, etc.)
    compress_folder(folder_to_compress, parent_folder)

print("All folders have been compressed and moved to the EN folder.")


EN001 has been compressed and moved to EN
EN002 has been compressed and moved to EN
EN003 has been compressed and moved to EN
EN004 has been compressed and moved to EN
EN005 has been compressed and moved to EN
EN006 has been compressed and moved to EN
EN007 has been compressed and moved to EN
EN008 has been compressed and moved to EN
EN009 has been compressed and moved to EN
EN010 has been compressed and moved to EN
EN011 has been compressed and moved to EN
EN012 has been compressed and moved to EN
EN013 has been compressed and moved to EN
EN014 has been compressed and moved to EN
EN015 has been compressed and moved to EN
EN016 has been compressed and moved to EN
EN017 has been compressed and moved to EN
EN018 has been compressed and moved to EN
EN019 has been compressed and moved to EN
EN020 has been compressed and moved to EN
EN021 has been compressed and moved to EN
EN022 has been compressed and moved to EN
EN023 has been compressed and moved to EN
EN024 has been compressed and move