In [42]:
from src import *

In [None]:
url = ("https://www.brotherswestand.com/pages/the-best-of-ethical-menswear")

In [43]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np


# Get all brand names and corresponding links to sub-pages.

def retailer_scraping (url):
    html = requests.get(url)
    soup = BeautifulSoup(html.content, "html.parser")
    
    brands_ = soup.find_all("h4", {"class":"u-textUpper"})
    brands = [i.getText() for i in brands_]
    
    description_ = soup.find_all("div", {"class":"page-content"})
    description = [i.getText().strip() for i in description_]

    links_ = soup.find_all("div", {"class":"o-grid o-grid--item"})
    links = [i.find('a').get('href') for i in links_]

    retailer_brands = {
        'Brands': brands,
        'Description': description,
        'Links': links
    }
    
    return pd.DataFrame(retailer_brands)


# Prepare dataframe model for single brand.

def single_brand (url):
    html = requests.get(url)
    soup = BeautifulSoup(html.content, "html.parser")

    product_ = soup.find_all("p", {"class": "block__p"})
    product = [i.getText().strip() for i in product_]

    price_s = soup.find_all("strong", {"class": "price-item price-item--sale"})
    price_sale = [i.getText().replace('£', '').strip() for i in price_s]
    
    brand_ = soup.find_all("h4", {"class": "block__title block__title--small"})
    brand = [i.getText() for i in brand_]

    brand_products = {
        'Product': product,
        'Selling price': price_sale,
        'Brand': brand
    }
    
    return pd.DataFrame(brand_products) 


# Apply single brand dataframe to all brands (and their corresponding links), and join everything in a new dataframe.

def all_brands (column_retailer):   
    column_retailer = column_retailer.apply(lambda x: f"https://www.brotherswestand.com{x}")
        
    all_products = pd.DataFrame()
    
    for i in column_retailer:
        df = single_brand(i)
        
        all_products = pd.concat([all_products, df], ignore_index=True)
            
    return pd.DataFrame(all_products)


# Get info on:
    # all brand names
    # number of products per brand
    # averages on prices
    
# Convert price type to integer.
# Store result as a new dataframe.

def brand_analysis (df):   
    retailers = all_brands (retailer_scraping(url).Links)
    
    retailers['Selling price'] = retailers['Selling price'].apply(lambda x: int(float(x)))

    retailers['Price-point'] = retailers['Selling price'].apply(lambda x: 'low-end' if x <= 40 
                                                                else ('mid-end' if 41 < x < 99 else 'high-end'))
    
    return retailers['Selling price'].groupby(retailers.Brand).describe()


# Add new column, with a price-range for each brand, based on the average price of all products in the 75th percentile.

def price_point_brands (df):
    result = brand_analysis (all_brands)
    
    result['Price-point'] = result['75%'].apply(lambda x: 'low-end' if x <= 40 
                                                 else ('mid-end' if 41 < x < 99 else 'high-end'))
    
    return result


In [44]:
price_point_brands (brand_analysis)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,Price-point
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Brava Fabrics,46.0,87.652174,31.640493,40.0,65.25,92.5,114.0,165.0,high-end
Elvis and Kresse,19.0,121.842105,99.210362,35.0,66.0,73.0,181.0,310.0,high-end
Idioma,23.0,53.086957,19.51669,35.0,38.0,45.0,64.0,94.0,mid-end
Jollie's,1.0,12.0,,12.0,12.0,12.0,12.0,12.0,low-end
Knowledge Cotton Apparel,48.0,70.833333,35.794528,25.0,32.5,80.0,95.0,138.0,mid-end
Level Collective,20.0,41.0,12.31174,35.0,35.0,35.0,35.0,65.0,low-end
Mud Jeans,35.0,105.142857,13.05676,80.0,104.0,109.0,109.0,130.0,high-end
NWHR,19.0,46.789474,9.852123,32.0,41.0,41.0,54.0,70.0,mid-end
Riz,7.0,107.857143,9.06327,95.0,100.0,115.0,115.0,115.0,high-end
Silverstick,48.0,44.583333,15.605282,22.0,28.0,50.0,55.0,65.0,mid-end
