In [33]:
import time
import os
import requests
import json
from datetime import datetime 

In [34]:
url = "https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr3No=1150431&benefit=MBSHP_PRD&blckSn=7933&pageMode=END"

In [35]:
# Function to fetch and parse product data from a URL
def fetch_product_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data from {url}, Status Code: {response.status_code}")
        return None

In [36]:
# Function to extract product data from the JSON response
def extract_products_from_json(json_data):
    products = []

    # Check if 'data' and 'blockList' keys are present
    if 'data' in json_data and len(json_data['data']) > 0:
        data_block = json_data['data'][0]
        if 'blockList' in data_block:
            # Loop through all blocks in 'blockList'
            for block in data_block['blockList']:
                # Check if the block contains products (based on its type)
                if block['type'] == 'Acme_PC_ProductBox_Category_Col4_Small':
                    # Extract products from this block
                    for item in block.get('list', []):  # Use get to avoid KeyError if 'list' is missing
                        # Ensure the required fields exist before extraction
                        if 'prdNm' in item:
                            # Extract promotionFlags text if available
                            promotion_flags = item.get('promotionFlags', [])
                            # Join multiple promotion texts if present
                            promotion_texts = ', '.join(flag.get('text', '') for flag in promotion_flags)

                            # Extract product information
                            product_info = {
                                'Image URL': item.get('imageUrl', 'N/A'),
                                'Product Name': item.get('prdNm', 'N/A'),
                                'Original Price': item.get('sellPrice', 'N/A'),
                                'Discounted Price': item.get('finalDscPrice', 'N/A'),
                                'Discount Rate': item.get('discountRate', 'N/A') + item.get('discountRateUnitText', ''),
                                'Link': item.get('linkUrl', 'N/A'),
                                'Review Count': item.get('review', {}).get('count', 'N/A'),
                                'Review Score': item.get('review', {}).get('point', 'N/A'),
                                'Is Sold Out': item.get('isSoldOut', 'N/A'),
                                'Promotion Texts': promotion_texts  # Include promotion texts here
                            }
                            products.append(product_info)

    return products

In [None]:
# Function to find 'nextDataUrl' recursively in JSON
def find_next_data_url(json_data):

    find_URL = None

     # Check if 'data' and 'blockList' keys are present
    if 'data' in json_data and len(json_data['data']) > 0:
        data_block = json_data['data'][0]
        if 'blockList' in data_block:
            # Loop through all blocks in 'blockList'
            for block in data_block['blockList']:
                # Check if the block contains products (based on its type)
                if block['type'] == 'Function_Paging':
                    for item in block.get('list', []): 
                        find_URL = item.get('nextDataUrl', None)

    return find_URL

                    
                    


In [37]:
# Main function to generate and save product data
def main():
    base_url = 'https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD&blckSn=7933&pageMode=END'
    all_products = []

    # Loop through the pages as long as there's a nextDataUrl
    current_url = base_url
    while current_url:
        # Fetch the JSON data from the current URL
        json_data = fetch_product_data(current_url)
        if not json_data:
            break

        # Extract products and append to all_products
        products = extract_products_from_json(json_data)
        all_products.extend(products)

        # Get the next data URL
        #current_url = json_data['data'][0].get('nextDataUrl', None)
        # Loop through all blocks in 'blockList'
        current_url = find_next_data_url(json_data) 

    # Save the collected product data as a JSON file with the current date
    date_str = datetime.now().strftime('%Y-%m-%d')
    output_directory = 'json'  # Directory to save the JSON files
    os.makedirs(output_directory, exist_ok=True)  # Create directory if it doesn't exist
    file_name = os.path.join(output_directory, f'products_{date_str}.json')

    with open(file_name, 'w', encoding='utf-8') as file:
        json.dump(all_products, file, ensure_ascii=False, indent=4)

    print(f"Product data saved: {file_name}")

    # Print the extracted product information for verification
    for product in all_products:
        print(product)


if __name__ == '__main__':
    main()

Fetching next page: https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD&blckSn=7936&pageMode=NEXT&pageNo=2
Fetching next page: https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD&blckSn=7936&pageMode=NEXT&pageNo=3
Fetching next page: https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD&blckSn=7936&pageMode=NEXT&pageNo=4
Fetching next page: https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD&blckSn=7936&pageMode=NEXT&pageNo=5
Fetching next page: https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD&blckSn=7936&pageMode=NEXT&pageNo=6
Fetching next page: https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD&blckSn=7936&pageMode=NEXT&pageNo=7
Fetching next page: https://apis.11st.co.kr/pui/v2/page?pageId=APCCATEGORY&dispCtgr2No=1149914&benefit=MBSHP_PRD

KeyError: 'data'

NameError: name 'all_products' is not defined