In [1]:
# Fetch full catalog

from bs4 import BeautifulSoup
import pandas as pd
import requests

# fetch full catalog
# but it's too hard to parse tree-like catalog
# that includes not just product, but supplementary products like handles and fabrics

ikea_cat = requests.get(f"https://www.ikea.com/no/no/meta-data/products/catalog-feed/index.json")
ikea_cat_json = ikea_cat.json()
ikea_resources = ikea_cat_json['resources']
sofas_main = next(filter(lambda x: x['url'] == 'https://www.ikea.com/no/no/cat/sofa-fu003/', ikea_resources))
sofas = filter(lambda x: x['key'] in sofas_main['subCategories'], ikea_resources)
sofas_list = list(sofas)

def make_a_link(pid):
    cat = pid[-3:]
    return f'https://www.ikea.com/no/no/products/{cat}/{pid}.html'

len(sofas_main['productAndGprData'])

171

# Scrapping

In [None]:
ikea_search = requests.get(f"https://sik.search.blue.cdtapps.com/no/no/product-list-page?sessionId=1ab42ab9-983c-470d-bb61-4e8e675c443d&category=fu003&size=9999&c=lf&v=20200617").json()

trans = {'Bredde': 'width',
         'Maks bredde' : 'max_width',         
             'Høyde': 'height',
             'Lengde': 'length',
             'Dybde': 'depth',
             'Maks. dybde':'max_depth',
             'Min. dybde':'min_depth',
             'Sittehøyde': 'sit_height',
             'Setehøyde': 'sit_height',
             'Setedybde' : 'sit_depth',
             'Maks setedybde': "max_sit_depth",
             'Min. setedybde': "min_sit_depth",
             'Sete bredde' : 'sit_width',
             'Sengebredde': 'bed_width',
             'Sengelengde': 'bed_length',
             'Armlenebredde' : 'arm_width',
             'Fri høyde under møbler': 'clearance'
            }


def ikea(ik):
    try:
        url = ik['pipUrl']
        content = requests.get(url).content
        soup = BeautifulSoup(content, 'html.parser')
        js1 = json.loads(soup.find('div', attrs={'class':"js-price-package range-revamp-pip-price-package"}).attrs['data-initial-props'])

        price_amount = js1['price']['mainPriceProps']['price']['integer']
        price_currency = ik['currencyCode']#js1['price']['currencySymbol']

        js1 = json.loads(soup.find('div', attrs={'class':"js-price-package range-revamp-pip-price-package"}).attrs['data-initial-props'])

        item = {
                'source' : 'IKEA',
                'title' : soup.find('meta', attrs={'property':'og:title'}).attrs['content'][:-7],
                'image' : ik['mainImageUrl'], #soup.find('meta', attrs={'property':'og:image'}).attrs['content'],
                'description' : soup.find('meta', attrs={'property':'og:description'}).attrs['content'], # js1['productDescription']
                'url' : soup.find('meta', attrs={'property':'og:url'}).attrs['content'],
                'price' : price_amount+' '+price_currency,
                'typeName': ik['typeName'],
                #'id': ik['id'],
                'onlineSellable': ik['onlineSellable'],
                'other_colors': ik['gprDescription']['colors'],
                #'sku' : soup.find('meta', attrs={'itemprop':'sku'}).attrs['content'],
                'brand' : js1['productName']
            }

        dims = soup.find('dl', attrs={'class':'range-revamp-product-dimensions__list'})
        if dims is not None:
            for dim in dims.find_all('div', attrs={'class':'range-revamp-product-dimensions__list-container'}):
                label = dim.find('dt').text.split(':')[0]
                label = trans.get(label,label)
                val = dim.find('dd').text
                item[label] = val
                #try:
                #    item[label+"_i"] = int(val[:-3])
                #except ValueError:
                #    pass
    except Exception as e:
        print(url+ ' ' + str(e))
        
    return item

ikea_iter = (ikea(ik) for ik in ikea_search['productListPage']['productWindow'])
ikea_pd.to_csv('./data/ikea.csv')

# Start here

In [1]:
ikea_pd = pd.read_csv('https://raw.githubusercontent.com/veonua/norske_sofaer/master/data/ikea.csv')
len(ikea_pd)

461

In [2]:
def max_depth(r):
    r = r.fillna(0)
    val = r['max_depth'] or r['Bredde venstre'] or r['Dybde sjeselong'] or r['Setebredde venstre'] or r['depth'] 
    if val == 0: return 0
    return int(val[:-3])

def min_depth(r):
    r = r.fillna(0)
    val = r['min_depth'] or r['depth']
    if val == 0: return 0
    return int(val[:-3])
    
def width(r):
    r = r.fillna(0)
    val = r['width'] or r['Bredde høyre'] or r['Setebredde høyre'] 
    if val == 0: return 0
    return int(val[:-3])

ikea_pd['max_depth_i'] = ikea_pd.apply(max_depth, axis=1)
ikea_pd['min_depth_i'] = ikea_pd.apply(min_depth, axis=1)
ikea_pd['width_i'] = ikea_pd.apply(width, axis=1)

In [4]:
filt = ikea_pd[['image', 'price', 'brand', 'max_depth_i', 'width_i', 'clearance', 'min_depth_i', 'Bredde venstre', 'Bredde høyre', 'url']]
fw = ikea_pd['width_i'].between(260, 285) # | (ikea_pd['width_a'] == 0)
fd = ikea_pd['max_depth_i'].between(170,223) | (ikea_pd['max_depth_i'] == 0)
filt = filt[fd & fw].sort_values('brand')

In [5]:
from IPython.display import HTML
def image_formatter(im):
    return f'<img src="{im}" width="350px">'
def a_formatter(url):
    return f'<a href="{url}"> {url}</a>'

disp = filt
HTML(disp.to_html(formatters={'image': image_formatter, 'url': a_formatter}, escape=False))

Unnamed: 0,image,price,brand,max_depth_i,width_i,clearance,min_depth_i,Bredde venstre,Bredde høyre,url
408,,13.940 NOK,LIDHULT,205,275,7 cm,98,205 cm,275 cm,https://www.ikea.com/no/no/p/lidhult-hjornesofa-4-seters-lejde-beige-brun-s29257467/
441,,19.890 NOK,LIDHULT,205,275,7 cm,98,205 cm,275 cm,https://www.ikea.com/no/no/p/lidhult-hjornesofa-4-seters-grann-bomstad-gyllenbrun-s89257426/
7,,8.999 NOK,NOCKEBY,175,277,15 cm,97,,,https://www.ikea.com/no/no/p/nockeby-3-seters-sofa-med-sjeselong-hoyre-tallmyra-tallmyra-tre-hvit-svart-tre-s49129266/
21,,8.999 NOK,NOCKEBY,175,277,15 cm,97,,,https://www.ikea.com/no/no/p/nockeby-3-seters-sofa-med-sjeselong-venstre-tallmyra-tallmyra-tre-hvit-svart-tre-s39129257/
137,,15.440 NOK,VALLENTUNA,193,266,,93,193 cm,266 cm,https://www.ikea.com/no/no/p/vallentuna-3-seters-modulhjornesofa-seng-og-oppbevaring-hillared-lys-bla-s49277978/
194,,11.835 NOK,VALLENTUNA,193,266,,93,193 cm,266 cm,https://www.ikea.com/no/no/p/vallentuna-3-seters-modulhjornesofa-med-oppbevaring-orrsta-lys-gra-s29277960/
