In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

# Scrapping

In [128]:
search = requests.get(f"https://no.sofacompany.com/design-sofa?product_list_limit=all")
soup = BeautifulSoup(search.content, 'html.parser')
links = soup.find_all("a", attrs={'class': "product photo product-item-photo"})
links = [l['href'] for l in links]
    

trans = {'Bredde': 'width',
         'Maks bredde' : 'max_width',
         'Høyde': 'height',
         'Lengde': 'length',
         'Dybde': 'depth',
         'Maks. dybde':'max_depth',
         'Min. dybde':'min_depth',
         'Sittehøyde': 'sit_height',
         'Setehøyde': 'sit_height',
         'Sete høyde': 'sit_height',
         'Setedybde' : 'sit_depth',
         'Sete dybde' : 'sit_depth',
         'Maks setedybde': "max_sit_depth",
         'Min. setedybde': "min_sit_depth",
         'Sete bredde' : 'sit_width',
         'innvendig bredde' : 'sit_width',
         'Sengebredde': 'bed_width',
         'Sengelengde': 'bed_length',
         'Armlenebredde' : 'arm_width',
         'Ben' : 'legs',
         'Fri høyde under møbler': 'clearance'
        }
    
def scrap_sofacompany(url):
    #print (url)
    content = requests.get(url).content
    soup = BeautifulSoup(content, 'html.parser')
    price_amount = soup.find('meta', attrs={'property':'product:price:amount'}).attrs['content']
    price_currency = soup.find('meta', attrs={'property':'product:price:currency'}).attrs['content']
    model = soup.find('h1', attrs={'class':'page-title'}).text.strip()
    dims_el = soup.find('div', attrs={'class': "product-specifications__list"})
    dims = dims_el.find_all('li')

    item = {
        'source' : 'sofacompany',
        'model' : soup.find('meta', attrs={'property':'og:title'}).attrs['content'],
        'image' : soup.find('meta', attrs={'property':'og:image'}).attrs['content'],
        'description' : soup.find('meta', attrs={'property':'og:description'}).attrs['content'],
        'url' : soup.find('meta', attrs={'property':'og:url'}).attrs['content'],
        'price' : price_amount+' '+price_currency
    }

    spec_list = soup.find('div', attrs={'class': "product-specifications__list"})
    dims_l = spec_list.find_all('li')
    for l in dims_l:
        label = l.find('label').text.split(':')[0].strip()
        label = trans.get(label,label)
        val = l.find('span').text
        if label=="legs":
            if val.endswith('cm'):
                item['clearance'] = val[-5:-2].strip()
            #else: continue
        item[label] = val
        
        
    item['sleep'] = False
    return item

items = (scrap_sofacompany(url) for url in links)

In [129]:
df = pd.DataFrame(items)
def max_depth(model):
    if model == 'Nelson':
        return 198
    if model == 'Vilmar':
        return 163
    
df['max_depth']=df['model'].apply(max_depth)
df['width_i'] = df['width'].str[:-2].astype(int)
df.to_csv("./data/sofacompany.csv")

# Start here

In [3]:
df = pd.read_csv("https://raw.githubusercontent.com/veonua/norske_sofaer/master/data/sofacompany.csv")
len(df)

167

In [131]:
from IPython.display import HTML
def image_formatter(im):
    return f'<img src="{im}">'
def a_formatter(url):
    return f'<a href="{url}"> {url}</a>'

disp = df[df['width_i'].between(250,284)]

HTML(disp.to_html(formatters={'image': image_formatter, 'url': a_formatter}, escape=False))

Unnamed: 0,source,model,image,description,url,price,height,width,depth,sit_depth,sit_width,sit_height,legs,SKU,sleep,clearance,max_depth,width_i
18,sofacompany,Nelson,,,https://no.sofacompany.com/nelson-cornersofa-olena-light-grey-oak-b-16-cm-150307177016,14999 NOK,81cm,264cm,93cm,56cm,227cm,44cm,"Oak, B, 16 cm",150307177016,n,16.0,198.0,264
19,sofacompany,Nelson,,,https://no.sofacompany.com/nelson-cornersofa-olena-antracit-oak-soap-b-16-cm-150307184164,14999 NOK,81cm,264cm,93cm,56cm,227cm,44cm,"Oak Soap, B, 16 cm",150307184164,n,16.0,198.0,264
25,sofacompany,Nelson,,,https://no.sofacompany.com/nelson-cornersofa-olena-light-grey-oak-b-16-cm-150308177016,14999 NOK,81cm,264cm,93cm,56cm,227cm,44cm,"Oak, B, 16 cm",150308177016,n,16.0,198.0,264
27,sofacompany,Nelson,,,https://no.sofacompany.com/nelson-cornersofa-olena-antracit-oak-soap-b-16-cm-150308184164,14999 NOK,81cm,264cm,93cm,56cm,227cm,44cm,"Oak Soap, B, 16 cm",150308184164,n,16.0,198.0,264
56,sofacompany,Vilmar,,,https://no.sofacompany.com/vilmar-chaise-longue-sofa-talent-cool-grey-smoked-oak-e-17-cm-140809153008,13999 NOK,85cm,256cm,98cm,61cm,232cm,46cm,"Smoked Oak, E, 17 cm",140809153008,n,17.0,163.0,256
58,sofacompany,Vilmar,,,https://no.sofacompany.com/vilmar-chaise-longue-sofa-velour-lux-navy-smoked-oak-e-17-cm-140809166008,13999 NOK,85cm,256cm,98cm,61cm,232cm,46cm,"Smoked Oak, E, 17 cm",140809166008,n,17.0,163.0,256
60,sofacompany,Vilmar,,,https://no.sofacompany.com/vilmar-chaise-longue-sofa-olena-antracit-oak-soap-e-17-cm-140809184163,13999 NOK,85cm,256cm,98cm,61cm,232cm,46cm,"Oak Soap, E, 17 cm",140809184163,n,,163.0,256
62,sofacompany,Vilmar,,,https://no.sofacompany.com/vilmar-chaise-longue-sofa-talent-cool-grey-smoked-oak-e-17-cm-140810153008,13999 NOK,85cm,256cm,98cm,61cm,232cm,46cm,"Smoked Oak, E, 17 cm",140810153008,n,17.0,163.0,256
63,sofacompany,Vilmar,,,https://no.sofacompany.com/vilmar-chaise-longue-sofa-velour-lux-navy-smoked-oak-e-17-cm-140810166008,13999 NOK,85cm,256cm,98cm,61cm,232cm,46cm,"Smoked Oak, E, 17 cm",140810166008,n,17.0,163.0,256
64,sofacompany,Vilmar,,,https://no.sofacompany.com/vilmar-chaise-longue-sofa-olena-antracit-oak-soap-e-17-cm-140810184163,13999 NOK,85cm,256cm,98cm,61cm,232cm,46cm,"Oak Soap, E, 17 cm",140810184163,n,,163.0,256
