name
size
units


price
in_stock

size

capacity
waist_high
waist_low

backing
brand
name
notes
retailer
shipping
tapes
units
url

total_price = price + shipping
unit_price = total_price / units
ml_per_unit_price = capacity / unit_price

backing: str = "plastic",
brand: str = "",
capacity: int = 0,
in_stock: str = "Maybe",
name: str = "",
notes: str = "",
price: float = 0.00,
retailer: str = "",
shipping: float = 0.0,
size_label: str = "",
tapes: str = "",
units: int = 0,
url: str = "",
waist_high: int = 0,
waist_low: int = 0


In [137]:
# Imports

import requests
import re
import json
import yaml
from enum import Enum
from bs4 import BeautifulSoup, Tag
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
from typing import Tuple

In [2]:
# Urls

urls = [
	'https://us.abuniverse.com/product/agz/',
	'https://us.abuniverse.com/product/bny4/',
	'https://us.abuniverse.com/product/cbc/',
	'https://us.abuniverse.com/product/cus/',
	'https://us.abuniverse.com/product/dno/',
	'https://us.abuniverse.com/product/lkg/',
	'https://us.abuniverse.com/product/pwz/',
	'https://us.abuniverse.com/product/pab/',
	'https://us.abuniverse.com/product/prp/',
	'https://us.abuniverse.com/product/prs/',
	'https://us.abuniverse.com/product/whi/',
	'https://us.abuniverse.com/product/siu/',
	'https://us.abuniverse.com/product/kdo/',
	'https://us.abuniverse.com/product/spc/',
	'https://us.abuniverse.com/product/sdk/',
]

In [199]:
# Products

with open('products.yml', 'r') as file:
	products = yaml.safe_load(file)

In [207]:
# Driver

driver = None

def init_driver():
	global driver
	if driver == None:
		driver = webdriver.Firefox()

def driver_to(url):
	global driver
	init_driver()
	if driver.current_url != url:
		driver.get(url)

def driver_to_reset(url):
	global driver
	init_driver()
	driver.get(url)

In [138]:
# Helpers

def get_response(url):
	with requests.get(url) as response:
		if response.status_code != 200:
			print(f'Error: {response.status_code} for {url}')
			return
		return response

def get_soup(url):
	return BeautifulSoup(get_page(url), 'html.parser')

def get_page(url):
	return get_response(url).text

def get_data(url):
	return get_response(url).json()

def await_price_data(old_price_data, get_price_data) -> Tuple[bool, float]:
		timeout = 0
		while timeout < 10:
			timeout += 1
			price_data = get_price_data()
			if price_data != old_price_data:
				return (False, price_data)
			sleep(0.1 * timeout)
		return (True, old_price_data)

def calculate_derived_info(info: dict) -> dict:
    unit_price: float = info['price'] / info['units']
    total_price: float = info['price'] + info['shipping']
    ml_per_unit_price: int = int(info['capacity'] / unit_price)
    return info | {
        "ml_per_unit_price": ml_per_unit_price,
        "total_price": total_price,
        "unit_price": unit_price,
    }

In [189]:
# ABU
ABU_QUANTITIES = {
    10: 'abu_quantity_1',
    40: 'abu_quantity_2',
    80: 'abu_quantity_3',
}

ABU_SIZES = {
    'xs': 'XSmall',
    's': 'Small',
    's_m': 'SMedium',
    'm': 'Medium',
    'l': 'Large',
    'xl': 'XLarge',
    'xl-plus': 'XXLarge',
}


def abu(url, product) -> list[dict]:
    rows = []
	id = product['id']
    info = product['info']
    sizes = product['sizes']
	soup = get_soup(url)

    variations = get_data(
        f'https://us.abuniverse.com/wp-json/wc/store/products/{id}')['variations']
    for variation in variations:
        attributes = variation['attributes']

        # Skip samples and scented variations
        is_sample = attributes[0]['value'] == 'sample'
        is_scented = attributes[2]['value'] == 'no-scent'
        if is_sample or is_scented:
            continue

        size = ABU_SIZES[attributes[1]['value']]

        # Get stock data from variation json
        variation_id = variation['id']
        variation_data = get_data(
            f'https://us.abuniverse.com/wp-json/wc/store/products/{variation_id}'
        )

        for units, price_ident in ABU_QUANTITIES.items():
            rows.append(calculate_derived_info(info | {
                'price': float(re.search('\d*\.\d*', soup.find('label', {'for': price_ident}).string).group()),
                'in_stock': 'Yes' if variation_data['is_in_stock'] else 'No',
                'size': size,
                'waist_high': sizes[size]['waist_high'],
                'waist_low': sizes[size]['waist_low'],
                'units': units,
            }))

    return rows


def test_abu():
    url = 'https://us.abuniverse.com/product/agz/'
    result = abu(url, products[url])
    print(*result, sep='\n')


test_abu()


{'backing': 'Cloth', 'brand': 'ABU', 'capacity': 7500, 'name': 'AlphaGatorZ', 'notes': None, 'retailer': 'ABU', 'shipping': 0.0, 'tapes': 4, 'url': 'https://us.abuniverse.com/product/agz/', 'price': 43.99, 'in_stock': 'No', 'size': 'Medium', 'waist_high': 36, 'waist_low': 31, 'units': 10, 'ml_per_unit_price': 1704, 'total_price': 43.99, 'unit_price': 4.399}
{'backing': 'Cloth', 'brand': 'ABU', 'capacity': 7500, 'name': 'AlphaGatorZ', 'notes': None, 'retailer': 'ABU', 'shipping': 0.0, 'tapes': 4, 'url': 'https://us.abuniverse.com/product/agz/', 'price': 143.99, 'in_stock': 'No', 'size': 'Medium', 'waist_high': 36, 'waist_low': 31, 'units': 40, 'ml_per_unit_price': 2083, 'total_price': 143.99, 'unit_price': 3.5997500000000002}
{'backing': 'Cloth', 'brand': 'ABU', 'capacity': 7500, 'name': 'AlphaGatorZ', 'notes': None, 'retailer': 'ABU', 'shipping': 0.0, 'tapes': 4, 'url': 'https://us.abuniverse.com/product/agz/', 'price': 279.99, 'in_stock': 'No', 'size': 'Medium', 'waist_high': 36, 'wai

In [7]:
# Amazon

def amazon(url, product) -> dict:
	info = product['info']
	driver_to(url)
	soup = BeautifulSoup(driver.page_source, 'html.parser')

	price: float = float(soup.find('span', {'class': 'apexPriceToPay'}).find('span', {'class': 'a-offscreen'}).string[1:])
	in_stock: str = 'Yes' if soup.find('span', {'class': 'a-color-success'}) else 'No'

	return calculate_derived_info(info | {
		'price': price,
		'in_stock': in_stock
	})

def test_amazon():
	url = 'https://www.amazon.com/dp/B08GL65JKT'
	print(amazon(url, products[url]))

In [23]:
# Bambino

BAMBINO_SIZES = {
    'XS': 'XSmall',
    'S': 'Small',
    'M': 'Medium',
    'L': 'Large',
    'XL': 'XLarge',
    'XXL': 'XXLarge',
}

def bambino(url, product) -> list[dict]:
	rows = []
	info = product['info']
	data = get_data(url + '.json')['product']

	for variant in data['variants']:

		# Skip samples
		if variant['option2'].startswith('1 Sample'):
			continue

		size_data = variant['option1']
		size_options = re.search(f"([S|M|L|XL])+\/?([S|M|L|XL]+)?", size_data).groups()
		waist_options = re.search(f"(\d+)\\\"-(\d+)", size_data).groups()

		for size_tag in size_options:
			size = BAMBINO_SIZES[size_tag]

			rows.append(calculate_derived_info(info | {
				'price': variant['price'],
				# TODO: Add in stock selection for Bambino
                'in_stock': 'Maybe',
                'size': size,
                'waist_high': waist_options[1],
                'waist_low': waist_options[0],
                'units': int(variant['option2'][-2:]),
			}))

	return rows

def test_bambino():
	url = 'https://bambinodiapers.com/products/x-plus-ultrastretch-all-white-diapers'
	result = bambino(url, products[url])
	print(*result, sep='\n')

{'backing': 'plastic', 'brand': 'Bambino', 'capacity': 5000, 'name': 'Cloudee', 'notes': None, 'retailer': 'Bambino', 'shipping': 0.0, 'tapes': 4, 'url': 'https://bambinodiapers.com/products/x-plus-ultrastretch-all-white-diapers', 'price': '40.50', 'in_stock': 'Maybe', 'size': 'Small', 'waist_high': '46', 'waist_low': '26', 'units': 10}
{'backing': 'plastic', 'brand': 'Bambino', 'capacity': 5000, 'name': 'Cloudee', 'notes': None, 'retailer': 'Bambino', 'shipping': 0.0, 'tapes': 4, 'url': 'https://bambinodiapers.com/products/x-plus-ultrastretch-all-white-diapers', 'price': '40.50', 'in_stock': 'Maybe', 'size': 'Medium', 'waist_high': '46', 'waist_low': '26', 'units': 10}
{'backing': 'plastic', 'brand': 'Bambino', 'capacity': 5000, 'name': 'Cloudee', 'notes': None, 'retailer': 'Bambino', 'shipping': 0.0, 'tapes': 4, 'url': 'https://bambinodiapers.com/products/x-plus-ultrastretch-all-white-diapers', 'price': '133.00', 'in_stock': 'Maybe', 'size': 'Small', 'waist_high': '46', 'waist_low': 

In [190]:
#InControl

INCONTROL_SIZES = {
	'XS (Youth)': 'XSmall',
	'Small': 'Small',
	'Medium': 'Medium',
	'Large': 'Large',
	'X-Large': 'XLarge',
}

INCONTROL_UNITS = [12, 36]

def incontrol(url, product) -> list[dict]:
	def get_price_data():
		return driver.find_element(By.CLASS_NAME, 'price--withoutTax').text

	info = product['info']
	sizes = product['sizes']
	rows = []
	driver_to_reset(url)
	soup = BeautifulSoup(driver.page_source, 'html.parser')

	# Setup buttons
	buttons = driver.find_elements(By.CLASS_NAME, 'form-option')
	size_buttons = []
	quantity_buttons = []
	for button in buttons:
		if button.text.startswith('Sample'):
			continue
		elif button.text.startswith(('Bag', 'Case')):
			quantity_buttons.append(button)
		else:
			size_buttons.append(button) 
	
	for size_button in size_buttons:
		size = INCONTROL_SIZES[size_button.text]

		price_data = get_price_data()
		size_button.click()
		err, _ = await_price_data(price_data, get_price_data)
		if err:
			print(f'Timed out: {url}')
		
		for quantity_button in quantity_buttons:
			units = int(re.search('\d+', quantity_button.text).group())

			price_data = get_price_data()
			quantity_button.click()
			err, price_data = await_price_data(price_data, get_price_data)
			if err:
				print(f'Timed out: {url}')
			price = float(re.search('\d+\.\d+', price_data).group())

			# TODO: Add in stock selection for InControl
			in_stock = 'Maybe'

			rows.append(calculate_derived_info(info | {
				'price': price,
                'in_stock': in_stock,
                'size': size,
                'waist_high': sizes[size]['waist_high'],
                'waist_low': sizes[size]['waist_low'],
                'units': units,
			}))

	return rows

def test_incontrol():
	url = 'https://incontroldiapers.com/incontrol-premium-nights-briefs-with-whiff-x-technology/'
	results = incontrol(url, products[url])
	print(*results, sep='\n')

In [191]:
# LittleForBig

LITTLE_FOR_BIG_SIZES = {
	'M': 'Medium',
	'L': 'Large',
}

def little_for_big(url, product) -> list[dict]:
	rows = []
	info = product['info']
	sizes = product['sizes']
	data = get_data(url)

	for variant in data['variations']:
		size, units = re.search('([M|L])(?:[a-zA-z ]*)?(\d*)', variant['attributes'][0]['value']).groups()
		size = LITTLE_FOR_BIG_SIZES[size]

		variant_data = get_data(
			'https://www.littleforbig.com/wp-json/wc/store/products/{0}'.format(variant['id'])
		)

		price = variant_data['prices']['price']
		price = float('{0}.{1}'.format(price[:-2], price[-2:]))

		rows.append(calculate_derived_info(info | {
			'price': price,
            'in_stock': variant_data['is_in_stock'],
            'size': size,
            'waist_high': sizes[size]['waist_high'],
            'waist_low': sizes[size]['waist_low'],
            'units': int(units),
		}))

	return rows

def test_little_for_big():
	url = 'https://www.littleforbig.com/wp-json/wc/store/products/155255'
	result = little_for_big(url, products[url])
	print(*result, sep='\n')

{'backing': 'Plastic', 'brand': 'LittleForBig', 'capacity': 5352, 'name': 'Blushing Baby', 'notes': None, 'retailer': 'LittleForBig', 'shipping': 0.0, 'tapes': 4, 'url': 'https://www.littleforbig.com/product/blushing-baby-adult-diapers-10-pieces-packm-l/', 'price': 139.99, 'in_stock': True, 'size': 'Medium', 'waist_high': 38, 'waist_low': 28, 'units': 40, 'ml_per_unit_price': 1529, 'total_price': 139.99, 'unit_price': 3.49975}
{'backing': 'Plastic', 'brand': 'LittleForBig', 'capacity': 5352, 'name': 'Blushing Baby', 'notes': None, 'retailer': 'LittleForBig', 'shipping': 0.0, 'tapes': 4, 'url': 'https://www.littleforbig.com/product/blushing-baby-adult-diapers-10-pieces-packm-l/', 'price': 248.99, 'in_stock': True, 'size': 'Medium', 'waist_high': 38, 'waist_low': 28, 'units': 80, 'ml_per_unit_price': 1719, 'total_price': 248.99, 'unit_price': 3.112375}
{'backing': 'Plastic', 'brand': 'LittleForBig', 'capacity': 5352, 'name': 'Blushing Baby', 'notes': None, 'retailer': 'LittleForBig', 'sh

In [194]:
#MyInnerbaby

MY_INNER_BABY_SIZES = {
	'Medium (M)': 'Medium',
	'Large (L)': 'Large',
}

def my_inner_baby(url, product) -> list[dict]:
	rows = []
	info = product['info']
	sizes = product['sizes']
	data = get_data(url)['product']

	for variant in data['variants']:
		if variant['option2'].endswith('Sample'):
			continue

		size = MY_INNER_BABY_SIZES[variant['option1']]

		rows.append(calculate_derived_info(info | {
			'price': float(variant['price']),
			# TODO: Add in stock selection for MyInnerBaby
            'in_stock': 'Maybe',
            'size': size,
            'waist_high': sizes[size]['waist_high'],
            'waist_low': sizes[size]['waist_low'],
            'units': int(re.search('\d+', variant['option2']).group()),
		}))
		
	return rows

def test_my_inner_baby():
	url = 'https://myinnerbaby.com/products/ageplay-outfitters-seaside-princess-printed-adult-diaper.json'
	result = my_inner_baby(url, products[url])
	print(*result, sep='\n')

test_my_inner_baby()


{'backing': 'Plastic', 'brand': 'MyInnerBaby', 'capacity': 5000, 'name': 'Seaside Princess', 'notes': None, 'retailer': 'MyInnerBaby', 'shipping': 0.0, 'tapes': 4, 'url': 'https://myinnerbaby.com/products/ageplay-outfitters-seaside-princess-printed-adult-diaper', 'price': 39.95, 'in_stock': 'Maybe', 'size': 'Medium', 'waist_high': 40, 'waist_low': 32, 'units': 10, 'ml_per_unit_price': 1251, 'total_price': 39.95, 'unit_price': 3.995}
{'backing': 'Plastic', 'brand': 'MyInnerBaby', 'capacity': 5000, 'name': 'Seaside Princess', 'notes': None, 'retailer': 'MyInnerBaby', 'shipping': 0.0, 'tapes': 4, 'url': 'https://myinnerbaby.com/products/ageplay-outfitters-seaside-princess-printed-adult-diaper', 'price': 109.95, 'in_stock': 'Maybe', 'size': 'Medium', 'waist_high': 40, 'waist_low': 32, 'units': 40, 'ml_per_unit_price': 1819, 'total_price': 109.95, 'unit_price': 2.7487500000000002}
{'backing': 'Plastic', 'brand': 'MyInnerBaby', 'capacity': 5000, 'name': 'Seaside Princess', 'notes': None, 're

In [224]:
#NorthShore

NORTHSHORE_SIZES = {
	'X-Small': 'XSMall',
	'Small': 'Small',
	'Medium': 'Medium',
	'Large': 'Large',
	'X-Large': 'XLarge',
}

def northshore(url, product) -> dict:
	info = product['info']
	driver_to(url)
	soup = BeautifulSoup(driver.page_source, 'html.parser')

	size_info, units_info = list(map(lambda el: el.text, soup.find_all('span', {'class': 'value'})))[0:3:2]
	
	size, waist_low, waist_high = re.search('(.*), (\d+) - (\d+)', size_info).groups()
	size = NORTHSHORE_SIZES[size]
	units = int(re.search('(?:Case|Pack)\/(\d+)', units_info).groups()[0])
	return calculate_derived_info(info | {
		'price': float(soup.find('span', {'class', 'product-details__price_highlight'}).string[1:]),
		'in_stock': 'Yes' if soup.find('span', {'class', 'icon icon-check'}) else 'No',
		'size': size,
		'waist_high': waist_high,
		'waist_low': waist_low,
		'units': units,
	})

def test_northshore():
	url = 'https://www.northshorecare.com/adult-diapers/adult-diapers-with-tabs/northshore-megamax-tab-style-briefs/northshore-megamax-tab-style-briefs-medium-case40-410s'
	result = northshore(url, products[url])
	print(result)

test_northshore()

{'backing': 'Plastic', 'brand': 'NorthShore', 'capacity': 6500, 'name': 'MEGAMAX', 'size': 'Medium', 'notes': None, 'retailer': 'NorthShore', 'shipping': 0.0, 'tapes': 4, 'units': 40, 'url': 'https://www.northshorecare.com/adult-diapers/adult-diapers-with-tabs/northshore-megamax-tab-style-briefs/northshore-megamax-tab-style-briefs-medium-case40-410s', 'waist_high': '44', 'waist_low': '32', 'price': 114.99, 'in_stock': 'Yes', 'ml_per_unit_price': 2261, 'total_price': 114.99, 'unit_price': 2.8747499999999997}


In [15]:
#Rearz
url = 'https://rearz.ca/rearz-alpaca-overnight-diapers/'
driver.get(url)
sleep(4)
rows = []
stored_product = stored_products[url]
base_items = stored_product['template']



for units, units_id in stored_product['units'].items():
	units = int(units)
	driver.find_element(By.XPATH, units_id).click()
	for size in stored_product['sizes']:
		driver.find_element(By.XPATH, size['attribute']).click()
		sleep(4)
		size_label = size['label']
		waist_high = size['waist_high']
		waist_low = size['waist_low']
		price_element = driver.find_element(By.XPATH, '/html/body/div[3]/div[1]/div[1]/div/div[1]/div[2]/div[2]/div/div[3]/div[3]/span')
		price = float(re.search('\d+.\d+', price_element.text).group())
		in_stock = 'Maybe'
		unit_price = float(price / units)
		ml_per_unit_price = int(base_items['capacity'] / unit_price)
		total_price = float(price + base_items['shipping'])

		additional_items = {
			'size_label': size_label,
			'units': units,
			'waist_low': waist_low,
			'waist_high': waist_high,
			'price': price,
			'in_stock': in_stock,
			'ml_per_unit_price': ml_per_unit_price,
			'total_price': total_price,
			'unit_price': unit_price,
		}

		rows.append(base_items | additional_items)
		
print(*rows, sep='\n')

{'backing': 'plastic', 'brand': 'Rearz', 'capacity': 5800, 'name': 'Alpaca', 'notes': '', 'retailer': 'Rearz', 'shipping': 0.0, 'tapes': '4', 'url': 'https://rearz.ca/rearz-alpaca-overnight-diapers/', 'size_label': 'Large', 'units': 12, 'waist_low': 33, 'waist_high': 42, 'price': 45.99, 'in_stock': 'Maybe', 'ml_per_unit_price': 1513, 'total_price': 45.99, 'unit_price': 3.8325}
{'backing': 'plastic', 'brand': 'Rearz', 'capacity': 5800, 'name': 'Alpaca', 'notes': '', 'retailer': 'Rearz', 'shipping': 0.0, 'tapes': '4', 'url': 'https://rearz.ca/rearz-alpaca-overnight-diapers/', 'size_label': 'Medium', 'units': 12, 'waist_low': 30, 'waist_high': 40, 'price': 41.99, 'in_stock': 'Maybe', 'ml_per_unit_price': 1657, 'total_price': 41.99, 'unit_price': 3.499166666666667}
{'backing': 'plastic', 'brand': 'Rearz', 'capacity': 5800, 'name': 'Alpaca', 'notes': '', 'retailer': 'Rearz', 'shipping': 0.0, 'tapes': '4', 'url': 'https://rearz.ca/rearz-alpaca-overnight-diapers/', 'size_label': 'XLarge', 'un

In [16]:
# Tykables
url = 'https://tykables.com/products/str8up-pink.json'
rows = []
product = get_data(url)['product']
stored_product = stored_products[url]
base_items = stored_product['template']

for variant in product['variants']:
	if variant['option3'] != 'None' or variant['option2'] == '1 Sample':
		continue

	size = variant['option1']
	stored_size = stored_product['sizes'][size]
	size_label = stored_size['label']
	waist_low = int(stored_size['waist_low'])
	waist_high = int(stored_size['waist_high'])
	units = int(variant['option2'][:2])
	price = float(variant['price'])
	in_stock = 'Yes' if variant['inventory_quantity'] > 0 else 'No'
	unit_price = float(price / units)
	ml_per_unit_price = int(base_items['capacity'] / unit_price)
	total_price = float(price + base_items['shipping'])

	additional_items = {
		'size_label': size_label,
		'units': units,
		'waist_low': waist_low,
		'waist_high': waist_high,
		'price': price,
		'in_stock': in_stock,
		'ml_per_unit_price': ml_per_unit_price,
		'total_price': total_price,
		'unit_price': unit_price,
	}
	
	rows.append(base_items | additional_items)

print(*rows, sep='\n')

{'backing': 'plastic', 'brand': 'Tykables', 'capacity': 8000, 'name': 'Str8up Pink', 'notes': '', 'retailer': 'Tykables', 'shipping': 0.0, 'tapes': '4', 'url': 'https://tykables.com/products/str8up-pink', 'size_label': 'Medium', 'units': 10, 'waist_low': 28, 'waist_high': 36, 'price': 47.0, 'in_stock': 'Yes', 'ml_per_unit_price': 1702, 'total_price': 47.0, 'unit_price': 4.7}
{'backing': 'plastic', 'brand': 'Tykables', 'capacity': 8000, 'name': 'Str8up Pink', 'notes': '', 'retailer': 'Tykables', 'shipping': 0.0, 'tapes': '4', 'url': 'https://tykables.com/products/str8up-pink', 'size_label': 'Medium', 'units': 40, 'waist_low': 28, 'waist_high': 36, 'price': 125.0, 'in_stock': 'Yes', 'ml_per_unit_price': 2560, 'total_price': 125.0, 'unit_price': 3.125}
{'backing': 'plastic', 'brand': 'Tykables', 'capacity': 8000, 'name': 'Str8up Pink', 'notes': '', 'retailer': 'Tykables', 'shipping': 0.0, 'tapes': '4', 'url': 'https://tykables.com/products/str8up-pink', 'size_label': 'Medium', 'units': 80

In [None]:
# All urls

rows = []
for url in urls:
	row = abu(url)
	rows.extend(row)

with open('sheet.json', 'w') as file:
	json.dump(rows, file, sort_keys=True, indent=4, separators=(',', ': '))