In [1]:
import requests
import re
import pandas as pd
import smtplib
from bs4 import BeautifulSoup as soup
from datetime import date
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

today = date.today()
date = today.strftime("%m/%d/%y")

# grabbing the pages and parsing into html
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1","DNT": "1","Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language": "en-US,en;q=0.5","Accept-Encoding": "gzip, deflate"}

links = open('Abercrombie_Links.txt', 'r')
types = links.readlines()
links.close()

# reading in data frame
df = pd.read_csv("Abercrombie_Data.csv")

for type in types:
    html = requests.get(type,headers=headers)
    page_soup = soup(html.text,"html.parser")

    # grabs each product on page
    containers = page_soup.findAll("div",{"class": "product-template ds-override"})

    # general info for all products on the page
    promotion_container = page_soup.findAll("span",{"class":"desktop"})
    promotion = promotion_container[0].text.strip()

    category_text = page_soup.title.text.split('|')
    category = category_text[0].strip()

    # specific product details
    for container in containers:
        # product html detail
        product_container = container.findAll("a",{"class":"product-card__name"})
        product_name = product_container[0].text.strip()

        if("Icon" in product_name or "Logo" in product_name or "Embossed" in product_name):
            continue
            
        sku_container = container.findAll("button",{"class":"button ds-override overlay-button product-card__button product-card__button--l product-card__button--save"})
        sku = sku_container[0]["data-product-id"]
        sku = int(sku)
    
        color_container = container.findAll("a",{"class":"product-card__image-link"})
        product_color = color_container[0].img["alt"].split(',')
        color = product_color[1].title().strip()

        price_container = container.findAll("span",{"class":"product-price-text ds-override"})
        if(len(price_container)>1):
            sale_price = price_container[1].text.strip()
            original_price = price_container[0].text.strip()
            sale_price = float(sale_price[1:])
            current_lowest = sale_price
            original_price = float(original_price[1:])
        else:
            original_price = price_container[0].text.strip()
            sale_price = ""
            original_price = float(original_price[1:])
            current_lowest = original_price
        
        extra_promo_container = container.findAll("span",{"class":"promo-badge"})
        if(len(extra_promo_container)>0):
            extra_promo = extra_promo_container[0].text.strip()
            if("Off In Bag" in extra_promo):
                extra_promo_percent = re.findall('\d+',extra_promo)
                extra_promo_percent = int(extra_promo_percent[0])
            else:
                extra_promo = ""
                extra_promo_percent = 0
        else:
            extra_promo = ""
            extra_promo_percent = 0
            
        # calculations
        price_after_promos = current_lowest * ((100 - extra_promo_percent) / 100)
        price_after_promos = round(price_after_promos,2)
        
        perc_off_orig = 100 - (price_after_promos / original_price) * 100
        perc_off_orig = round(perc_off_orig,2)
        
        df_temp = df.loc[df["ID"] == sku]
        average_price = round(df_temp["Price After Promo"].mean(),2)
        
        perc_from_avg = ((price_after_promos - average_price) / price_after_promos) * 100
        perc_from_avg = round(perc_from_avg,2)

        df = df.append({"Date": date, "Category": category, "Product Name": product_name, 
                        "Product Color": color, "ID": sku, "Original Price": original_price, 
                        "Sale Price": sale_price,"Current Lowest Price": current_lowest, "Promotion": promotion, 
                        "Extra Promotion": extra_promo, "Extra Promotion %": extra_promo_percent,
                        "Price After Promo": price_after_promos, "% Off From Original": perc_off_orig,
                        "Average Price": average_price, "% from Average": perc_from_avg}, 
                        ignore_index = True)
        
df.to_csv("Abercrombie_Data.csv", index=False)

# generating report statistics, dropping and sorting into html
df = pd.read_csv("Abercrombie_Data.csv")
df = df[df["Date"] == date]
df = df.sort_values(["Category","% Off From Original"], ascending=[True, False])
df = df.drop_duplicates(subset="Product Name", keep="first")
df = df.groupby("Category").head(3)
indexNames = df[df["% Off From Original"]==0].index
df.drop(indexNames , inplace=True)
promotion = df.iloc[0]["Promotion"]
df = df.drop(columns=["Date", "Product Color", "ID", "Original Price", "Sale Price", "Current Lowest Price",
                 "Promotion", "Extra Promotion %", "Average Price", "% from Average"])

df_html = df.to_html(index=False, justify="left")

# emailing daily report
fromx = "xujon97python@gmail.com"
to  = "xujon97@gmail.com"
msg = MIMEMultipart("Test")
msg["Subject"] = date + " Daily Report"
msg["From"] = fromx
msg["To"] = to
part_1 = MIMEText("Today's Promotion: " + promotion, "plain")
part_2 = MIMEText(df_html, "html")
msg.attach(part_1)
msg.attach(part_2)

server = smtplib.SMTP("smtp.gmail.com:587")
server.starttls()
server.ehlo()
server.login("xujon97python@gmail.com", "Chicken65%")
server.sendmail(fromx, to, msg.as_string())
server.quit()

(221, b'2.0.0 closing connection m137sm4267133ywd.108 - gsmtp')