# Comparing Amazon Prices across Europe

In [2]:
# import necessary packages
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import sys

In [3]:
# Base data
# reference: https://www.hagglezon.com
countries = ['es', 'de', 'it', 'fr'] # EUR only

In [8]:
# turn HTML into numeric value as object
def to_numeric(x):
    if len(x):
        return re.sub(',','.',re.sub('<\D*>|\xa0€|\.','',str(x[0])))
    else:
        return np.nan
    
def getAmazonPrice(productUrl):
    # send with headers to bypass 1 block
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
    }
    response = requests.get(productUrl, headers=headers)
    #display(response.status_code)
    soup = BeautifulSoup(response.content) #, features = 'lxml')
    # check the price data
    price_html = soup.find_all('span', attrs = {'id' : 'priceblock_ourprice'})
    return price_html

# shipping costs defined to Portugal
def ship_cost(country):
    if country == 'es':
        return 3.99
    else:
        return 8.8


def strip_id(url_in):
    return re.sub('/.*','',re.sub('^.*/dp/','',url_in))

while True:
    url_input = str(input("Copy Amazon product URL here: "))
    match = re.search('^.*/dp/.*$', url_input)
    if match:
        print('Product page detected, checking for prices ...')
        break
    else:
        print('No Product page detected! Try again ...')
        continue

product_id = strip_id(url_input)
print(product_id)
prices_country = pd.DataFrame(columns = ['country', 'price', 'url'])
for country in countries:
    print(country)
    url = 'https://www.amazon.'+country+'/dp/'+product_id
    #print(url)
    price = getAmazonPrice(url)
    #print(price)
    prices_country = prices_country.append({'country':country,'price':price,'url':url}, ignore_index = True)    
# turn html price into numeric number
prices_country['price_num'] = prices_country['price'].apply(to_numeric)
prices_country['price_num'] = prices_country['price_num'].astype('float64')
# add shipping cost to Portugal
prices_country['shipping_costs'] = prices_country['country'].apply(ship_cost)
# calculate total price and sort
prices_country['price_tot'] = prices_country['price_num'] + prices_country['shipping_costs']
#prices_country.sort_values(by=['price_tot'])
prices_country_sorted = prices_country.sort_values('price_tot', ascending=True)
highest_spread_abs = prices_country['price_tot'].max()-prices_country['price_tot'].min()
highest_spread_rel = prices_country['price_tot'].max()/prices_country['price_tot'].min()-1
country_to_buy = prices_country_sorted.iloc[0][0]
#display(prices_country_sorted['country', 'price_tot', 'price_num', 'shipping_costs', 'url'])
print('You should by at amazon.'+country_to_buy)
print('Maximum Spread: '+str(highest_spread_abs)+'EUR ('+str(highest_spread_rel*100)+'%)')
prices_country_sorted[['country', 'price_tot', 'price_num', 'shipping_costs', 'url']]

Copy Amazon product URL here: https://www.amazon.it/dp/B00GZKMI8G?tag=haggle-web-it-21&linkCode=ogi&th=1&psc=1
Product page detected, checking for prices ...
B00GZKMI8G?tag=haggle-web-it-21&linkCode=ogi&th=1&psc=1
es
de
it
fr
You should by at amazon.es
Maximum Spread: 252.69999999999993EUR (60.3260999307694%)


Unnamed: 0,country,price_tot,price_num,shipping_costs,url
0,es,418.89,414.9,3.99,https://www.amazon.es/dp/B00GZKMI8G?tag=haggle...
1,de,488.7,479.9,8.8,https://www.amazon.de/dp/B00GZKMI8G?tag=haggle...
2,it,658.07,649.27,8.8,https://www.amazon.it/dp/B00GZKMI8G?tag=haggle...
3,fr,671.59,662.79,8.8,https://www.amazon.fr/dp/B00GZKMI8G?tag=haggle...


In [None]:
# shipping costs
#es https://www.amazon.es/gp/help/customer/display.html/ref=help_search_1-1?ie=UTF8&nodeId=201910820&qid=1603838216&sr=1-1
#de https://www.amazon.de/-/en/gp/help/customer/display.html/ref=help_search_1-2?ie=UTF8&nodeId=201910850&qid=1603837867&sr=1-2#GUID-94F272E0-F280-4956-87FB-E99C2528462A__SECTION_6D1620B0D0594CE5AF499E782094954A
#it https://www.amazon.it/gp/help/customer/display.html/ref=help_search_1-4?ie=UTF8&nodeId=201910820&qid=1603838098&sr=1-4#GUID-94F104D8-4201-496D-B68B-D10DC9864CF2__SECTION_A8C2956E48AA4A11A1489B916D809CCE
#fr https://www.amazon.fr/gp/help/customer/display.html/ref=help_search_1-3?ie=UTF8&nodeId=201910820&qid=1603838006&sr=1-3