In [155]:
"""
This script is used to parse HTML code from Dick's Sporting Goods Clearance website, find the items with the largest discount to original retail value, 
and order them according to largest discounts. The idea behind this is that if you buy the items with the largest discount, you can resell these items
on Websites like Poshmark, eBay or other retail websites at a profit. 
"""

import requests 
import re
import lxml.html as lh
import pandas as pd
from bs4 import BeautifulSoup
import urllib.request
pd.set_option('display.max_rows', 500)

In [156]:
url1=('https://www.dickssportinggoods.com/f/clearance-mens-apparel?pageNumber=0&filterFacets=facetStore%3ABOPIS%2CSHIP%3BX_BRAND%3ANike%2Cadidas%2CPatagonia%2CThe%20North%20Face%2CWalter%20Hagen%2CDSG&pageSize=144')

#Create a handle, page, to handle the contents of the website
page1 = requests.get(url1)

#Store the contents of the website under soup

soup = BeautifulSoup(page1.text,'html.parser')

In [157]:
low_price = soup.find_all('div', class_ = 'rs_item_price') #finds the new resale price
high_price = soup.find_all('div', class_ = 'rs-fplp-wasprice rs-was-item-price') #finds the original retail price
desc = soup.find_all('div', class_ = 'rs_product_description d-block') #Returns product description
rate = soup.find_all('div', class_ = 'rs_rating_container') #returns product rating from other purchasers
low_price[0] #checks to see if scrape was successful

<div class="rs_item_price"><span class="rs_final_price">$12.97 - $17.97</span></div>

In [158]:
def splitter(x): #This function parses the new resale price based on the html code
    if ' - ' in x:
        start = '"rs_final_price">$'
        end = ' -'
        return (str(x).split(start)[1]).split(end)[0]
    elif 'See Price In Cart' in x:
        return '-'
    else:
        start = '$'
        end = '</span'
        return (str(x).split(start)[1]).split(end)[0]
def splitter1(x): #This function parses the original resale price based on the html code
    if ' - ' in x:
        start = '$'
        end = ' -'
        return (str(x).split(start)[1]).split(end)[0]
    else:
        start = '$'
        end = '*</div>'
        return (str(x).split(start)[1]).split(end)[0]
def splitter2(x): #This function parses the description  based on the html code
    start = '-1">'
    end = '</a></div>'
    return ((str(x).split(start)[1]).split(end)[0]).strip()
def splitter3(x): #This function parses the rating based on the html code
    start = 'rating '
    end = '" href='
    return (str(x).split(start)[1]).split(end)[0]


In [159]:
# Next four cells apply the function to the returned html code
low = []
for i in range(len(low_price)):
    low.append(splitter(str(low_price[i])))


In [160]:
high = []
for j in range(len(high_price)):
    high.append(splitter1(str(high_price[j])))

In [161]:
descrip = []
for j in range(len(desc)):
    descrip.append(splitter2(str(desc[j])))

In [162]:
ratings = []
for i in range(len(rate)):
    ratings.append(splitter3(str(rate[i])))


In [166]:
#Creates Table for analysis
df = pd.DataFrame(list(zip(descrip,low,high,ratings)), columns = ['Description','Low','High','Ratings'])
# df = df
# df['Discount'] = df['High'].astype(float) - df['Low'].astype(float)
df = df[df.Low.str.contains('-')==False]
df['Low'] = df['Low'].astype(float)
df['High'] = df['High'].astype(float)
df['Ratings'] = (df['Ratings'].astype(float)/5)*100
df['Discount'] = df['High'] - df['Low']
df['Discount%'] = (df['Discount']/df['High'])
df['Estimated Sale'] = (df['Low']*((df['Discount%']/2)+0.2))+df['Low']
df['Estimated Profit %'] = (df['Estimated Sale'] - df['Low'])/df['Estimated Sale']
df['Estimated Proft'] = (df['Estimated Sale']*df['Estimated Profit %'])

In [167]:
df.sort_values('Discount%', ascending = False)

Unnamed: 0,Description,Low,High,Ratings,Discount,Discount%,Estimated Sale,Estimated Profit %,Estimated Proft
35,Walter Hagen Men's 11 Majors Championship Stri...,13.97,65.0,94.584,51.03,0.785077,22.247762,0.372072,8.277762
33,Walter Hagen Men's Essential Texture Stripe Go...,12.97,55.0,94.34,42.03,0.764182,20.519719,0.367925,7.549719
23,Nike Men's Legend 2.0 V-Neck T-Shirt,8.97,25.0,89.432,16.03,0.6412,13.639782,0.342365,4.669782
25,adidas Men's Drive Heather Block Golf Polo,19.97,55.0,97.778,35.03,0.636909,30.323537,0.341436,10.353537
10,adidas Men's Drive Novelty Solid Golf Polo,19.97,55.0,92.222,35.03,0.636909,30.323537,0.341436,10.353537
9,adidas Men's Tiro 19 Training Pants (Regular a...,16.97,45.0,93.932,28.03,0.622889,25.649212,0.338381,8.679212
19,adidas Men's FreeLift Sport T-Shirt,9.47,25.0,90.98,15.53,0.6212,14.305382,0.338011,4.835382
34,Nike Men's Dri-FIT Elite Basketball Shorts,13.97,35.0,85.0,21.03,0.600857,20.960987,0.333524,6.990987
21,adidas Men's Drive Novelty Heather Golf Polo,21.97,55.0,93.334,33.03,0.600545,32.960992,0.333455,10.990992
30,Nike Men's Solid Dry Victory Golf Polo,21.97,55.0,81.818,33.03,0.600545,32.960992,0.333455,10.990992
