# Direction
Run all the Python code to get the product information.

## Get Product URLs

In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_product_links(url):
    response = requests.get(url)
    html_content = response.content
    soup = BeautifulSoup(html_content, "html.parser")
    product_list_div = soup.find("div", class_="productListProducts")
    result_set = set()

    if product_list_div:
        links = product_list_div.find_all("a", class_="productBlock_link")
        for link in links:
            href = link.get("href")
            full_url = "https://www.dermstore.com" + href
            result_set.add(full_url)

    return result_set

def get_all_product_links():
    base_url = "https://www.dermstore.com/brands/skinceuticals.list?pageNumber="
    result_list = []

    for page_number in range(1, 3):
        url = f"{base_url}{page_number}#mainContent"
        result_list.extend(scrape_product_links(url))

    return list(result_list)

all_urls = get_all_product_links()

In [None]:
print(all_urls)
print(len(all_urls))

['https://www.dermstore.com/skinceuticals-clarifying-clay-masque/11289610.html', 'https://www.dermstore.com/skinceuticals-silymarin-cf-serum-1-fl.-oz/12771530.html', 'https://www.dermstore.com/skinceuticals-purifying-cleanser-200ml/11535233.html', 'https://www.dermstore.com/skinceuticals-physical-matte-uv-defense-spf-50/11289628.html', 'https://www.dermstore.com/skinceuticals-discoloration-defense-dark-spot-serum-30ml/11679046.html', 'https://www.dermstore.com/skinceuticals-a.g.e.-advanced-eye-cream-15ml/14919781.html', 'https://www.dermstore.com/skinceuticals-physical-fusion-uv-defense-spf50-sunscreen-various-sizes/12941160.html', 'https://www.dermstore.com/skinceuticals-soothing-cleanser-150ml/14917034.html', 'https://www.dermstore.com/skinceuticals-hydrating-b5-hyaluronic-acid-gel-moisturizer-30ml/11289619.html', 'https://www.dermstore.com/skinceuticals-conditioning-toner-200ml/11289612.html', 'https://www.dermstore.com/skinceuticals-lha-toner/11289622.html', 'https://www.dermstore.

## Get Product Information

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def crawl_product_info(url):
    dom = requests.get(url).text
    soup = BeautifulSoup(dom, 'html.parser')

    # Get Product Title
    title_tag = soup.find('h1', class_="productName_title")
    product_title = title_tag.text.strip() if title_tag else 'No Title'

    # Get Product Price
    price_span = soup.find('p', class_="productPrice_price")
    price = price_span.text.strip() if price_span else "No Price"

    # Get Key Ingredients
    ingredients = ''
    target_tag = soup.find('div', class_="athenaProductPageSynopsisContent")
    ingredients = target_tag.text.strip() if target_tag else "No Ingredients"

    # Get All Ingredients
    all_ingredients = ''
    div_content = soup.find('div', id='product-description-content-lg-7')
    if div_content:
        synopsis_div = div_content.find('div', class_='athenaProductPageSynopsisContent')
        all_ingredients = synopsis_div.get_text(strip=True) if synopsis_div else "No all ingredients"
    else:
        all_ingredients = "No all ingredients"

    # Get How to use
    howtouse = ''
    div_content = soup.find('div', id='product-description-content-15')
    if div_content:
        synopsis_div = div_content.find('div', class_='athenaProductPageSynopsisContent')
        if synopsis_div:
            p_tags = synopsis_div.find_all('p')
            for tag in p_tags:
                howtouse += tag.get_text(strip=True) + '\n'
        else:
            howtouse = 'No How to Use'
    else:
        howtouse = 'No How to Use'

    # Get Skin type
    description_div = soup.find('div', id='product-description-content-')
    skin_type = description_div.text.strip() if description_div else 'No Skin Type'

    return {
        'Product Title': product_title,
        'Product Price': price,
        'Key Ingredients': ingredients,
        'All Ingredients': all_ingredients,
        'How to use': howtouse,
        'Skin Types': skin_type,
    }

def main():
    data = []
    for url in all_urls:
        product_info = crawl_product_info(url)
        data.append(product_info)

    df = pd.DataFrame(data)
    return df

if __name__ == '__main__':
    df = main()

In [None]:
print(df)

                                        Product Title Product Price  \
0    SkinCeuticals Clarifying Clay Mask (2.4 fl. oz.)        $70.00   
1                          SkinCeuticals Silymarin CF       $182.00   
2      SkinCeuticals Purifying Cleanser (6.8 fl. oz.)        $39.00   
3   SkinCeuticals Physical Matte UV Defense SPF 50...        $40.00   
4                 SkinCeuticals Discoloration Defense       $110.00   
..                                                ...           ...   
67  SkinCeuticals Physical UV Defense SPF 30 Miner...        $42.00   
68  SkinCeuticals Biocellulose Restorative Sheet M...       $130.00   
69  SkinCeuticals Advanced Scar Control Skin Prote...       $114.00   
70  SkinCeuticals NEW Clarifying Adult Acne Skin S...       $213.00   
71  SkinCeuticals Hydra Balm Moisturizer Ointment ...        $25.00   

                                      Key Ingredients  \
0   SkinCeuticals Clarifying Clay Mask uses natura...   
1   SkinCeuticals Silymarin CF is

## Data Cleaning

In [None]:
# Data Cleaning
df2 = df.copy()
df2['Product Title'] = df2['Product Title'].str.replace('SkinCeuticals ', '', regex=False)
df2['Product Price'] = df2['Product Price'].str.replace('$', '')
df2['Product URLs'] = all_urls

## Download Excel file

In [None]:
# Download Excel file
from google.colab import files
df2.to_excel('Skinceuticals_products.xlsx', index=False)
files.download('Skinceuticals_products.xlsx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>