In [6]:
import requests
import re

import json

import os

# Load the article numbers from an excel file
# Using pandas for this task
import pandas as pd

import math

import shutil

import urllib.request

from concurrent.futures import ThreadPoolExecutor

In [7]:
def remove_trailing_commas(json_str):
    json_str = re.sub(r',\s*}', '}', json_str)
    json_str = re.sub(r',\s*]', ']', json_str)
    return json_str

In [8]:
# Make sure the 'images' directory exists
if not os.path.exists('images'):
    os.makedirs('images')

In [9]:
def process_article(article_number):
    url = f'https://www2.hm.com/en_my/productpage.{article_number}.html'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    response = requests.get(url, headers=headers)
    # Check if the request was successful
    if response.status_code == 200:
        # Look for the productArticleDetails variable in the HTML
        match = re.search(r'productArticleDetails\s*=\s*({.*?})\s*;', response.text, re.DOTALL)
        if match:
            # Extract the JSON from the match and parse it
            json_str = match.group(1)
            json_str = json_str.replace("\'", '\"').replace('\r\n', '').replace('\t', '')

            #deal with js part of the string
            pattern = r'isDesktop \? "(.*?)" : "(.*?)"'
            replacement = r'"\1"'
            json_str = re.sub(pattern, replacement, json_str)

            #deal with more than one spaces
            json_str = re.sub(' +', ' ', json_str)

            #Remove trailing commas
            json_str = remove_trailing_commas(json_str)

            json_str
            data = json.loads(json_str)
            key = str(article_number)
            images = [ image['image'] for image in data[key]['images']]
            dir_path = os.path.join("images", key)
            if not os.path.exists(dir_path):
                os.makedirs(dir_path)
            for i, image in enumerate(images):
                image_url = f"https:{image}"
                print(f"Downloading image {image_url}...")
                urllib.request.urlretrieve(image_url, os.path.join(dir_path, f"image_{i+1}.jpg"))
                print(f"Saved as image_{i+1}.jpg")
        else:
            print(f'Product Article not Found: {article_number}')


# using threads to make the requests faster 
with ThreadPoolExecutor(max_workers=5) as executor:
    executor.map(process_article, article_numbers)

Product Article not Found: 751471096
Product Article not Found: 684021184
Product Article not Found: 751471094
Product Article not Found: 685816182
Product Article not Found: 751471072
Product Article not Found: 751471097
Product Article not Found: 763275050
Product Article not Found: 928133041
Product Article not Found: 930126068
Product Article not Found: 932232024
Product Article not Found: 941811012
Product Article not Found: 970819054
Product Article not Found: 956308014
Product Article not Found: 970818051
Product Article not Found: 970819060
Product Article not Found: 990413001
Product Article not Found: 973277037
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F41%2F26%2F4126aa36a820f55e977f3cba0e9105ecc4c3340e.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Product Article not Found: 1009953017
Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=qual

Saved as image_5.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F7c%2F4d%2F7c4d820a55e63adb2d0acc5396f370472b034800.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5Bmen_shirts_casual%5D%2Ctype%5BDESCRIPTIVEDETAIL%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fa0%2F70%2Fa070dfdc0306f1fc1c0396e8d9493017dd8d7ce6.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_6.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F20%2F83%2F2083aef8c0ec1798d16a0559213030fe8d51687c.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVEDETAIL%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_2.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F3a%2F1b%2F3a1bf98d8fb6318dcfb33

Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F6b%2F13%2F6b137909aae8fb2f471bf5b3bad168c5882f1700.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5Bkids_baby_girl_setsoutfits%5D%2Ctype%5BDESCRIPTIVEDETAIL%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_5.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F24%2F8a%2F248a874d9d8fba3fd5932cc90cfd137c7cda6c7f.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVESTILLLIFE%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_5.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F7d%2F3a%2F7d3a339111b6f6ce7e4c1976e83fb8480b799b90.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVESTILLLIFE%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_2.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Ffe%2Fc2

Product Article not Found: 1119685001
Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F1e%2F8e%2F1e8e36aeb6624ccab7d6f2d0a31c6ad967ed4402.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F17%2F4e%2F174ebd2afcbdc2be0f2095005e7a92aa302a12ee.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5Bkids_boys_clothing_tshirtsshirts_tshirts%5D%2Ctype%5BDESCRIPTIVESTILLLIFE%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F67%2F71%2F677186d7ebecc7116767f268c0b52c2b7071a5ca.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_2.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B

Saved as image_2.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F7c%2F05%2F7c05e699217cc2bbd25629494feadc5bd0435960.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_5.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F62%2F3a%2F623a93ea4d0db7ec2fdfb47c6a6c07ddd504e2b2.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVESTILLLIFE%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fbb%2F99%2Fbb99aaee4c267fe46191667ba2075d614dbc3afc.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_3.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fc2%2F0b%2Fc20be91863cb1e0a232e29d2d59321c24a4ee77e.jpg%5D%2Corigin%5Bdam%5D

Saved as image_3.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F47%2F6d%2F476d249be33a4f34153f645ebd6524bd6ac9ef43.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVEDETAIL%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_2.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fe7%2F37%2Fe7371ab25a2a7c0c07503ffac8e3f93be27bf57a.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F6d%2F6d%2F6d6d479275cd5b44f350b2a737450ee653029c91.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_4.jpg
Saved as image_3.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fce%2F9b%2Fce9baf657ebbe9e4bf6a12ed2b

Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F44%2F3b%2F443b67a0f4866d8c48627fd5831043f84ddd6cca.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_5.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fc0%2F09%2Fc0099dbb8b5a39f5bb64940c37f916ed9b43edbf.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVESTILLLIFE%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F62%2F69%2F62693c7995bc7ea5d4b9fc167e5f621d7800886c.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F17%2F59%2F1759a18954a4a5ab7b61bbddc8180d0c5adc3eda.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ct

Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F36%2F07%2F3607c5b310be194994ed7b43b135de8c7714d698.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_2.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F72%2F4f%2F724f9866f71a2e3f839c7252a7725521e38ae188.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVESTILLLIFE%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_5.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F6b%2F9c%2F6b9cab88cbb180343b954ee36bc90f4cfa9df153.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVESTILLLIFE%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_3.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fd1%2F0a%2Fd10a397aab665c4e0166a2080ccfeb7ba2bff3f1.jpg%5D%2Cori

Saved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F4e%2F2d%2F4e2d7f6f769bf9b2031c035ed5bbf147c4ff4712.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVEDETAIL%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_3.jpg
Saved as image_4.jpgSaved as image_1.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F07%2F3e%2F073e0ad2b999d0aaa99e2f2298dbc0b7dc4fcea1.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...

Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F06%2Ff9%2F06f95b7eba87c473b87918b6e5b1b97c3c707a94.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_4.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F40%2F58%2F4058d9fbc1fe5b61e0a9d5e896

Saved as image_7.jpg
Saved as image_2.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fd1%2F50%2Fd150cb77d716768afa56d30c7b21d32ab42cdcc2.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_3.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fb1%2Fef%2Fb1ef33ca86146a27ec0fff69450478e3b24b99ba.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BLOOKBOOK%5D%2Cres%5Bm%5D%2Chmver%5B1%5D&call=url[file:/product/main]...
Saved as image_5.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2Fab%2F51%2Fab517ee29453e259856ba71888dd0f7fc70c6cd6.jpg%5D%2Corigin%5Bdam%5D%2Ccategory%5B%5D%2Ctype%5BDESCRIPTIVEDETAIL%5D%2Cres%5Bm%5D%2Chmver%5B2%5D&call=url[file:/product/main]...
Saved as image_4.jpg
Downloading image https://lp2.hm.com/hmgoepprod?set=quality%5B79%5D%2Csource%5B%2F73%2F8c%2F738cd98be2fff24779977dca35