In [1]:
import pickle
import sys

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

# sudo apt-get install chromium-chromedriver

In [2]:
if sys.platform in "linux":
    service = Service(executable_path="/usr/lib/chromium-browser/chromedriver")
elif sys.platform in "win32":
    service = Service(executable_path="chromedriver.exe")

driver = webdriver.Chrome(service=service)

In [3]:
url = "https://www.wartsila.com/marine/engine-configurator"

In [4]:
engine_applications = [
    "Generating set auxiliary",  # 0
    "Mechanical propulsion & CPP",  # 1
    "Mechanical propulsion & FPP",  # 2
    "Diesel electric",  # 3
    "Dredger",  # 4
]

frequencies = ["50 Hz", "60 Hz"]  # 0, 1

fuel_types = [
    "Marine diesel oil",  # 0
    "Heavy fuel oil",  # 1
    "Gas - LNG",  # 2
    "Future fuels",  # 3
]

emissions = ["IMO Tier 2", "IMO Tier 3"]  # 0, 1

In [5]:
selection = [3, 0, 0, 1]

In [6]:
Wartsila = {}

In [7]:
driver.get(url)

In [8]:
accept_cookies = driver.find_element(
    By.XPATH, "//*[contains(text(), 'Accept all cookies')]"
)
accept_cookies.click()

In [9]:
engine_application = driver.find_element(
    By.XPATH, f"//*[contains(text(), '{engine_applications[selection[0]]}')]"
)
engine_application.click()

Next = driver.find_element(By.XPATH, "//*[contains(text(), 'Next')]")
Next.click()

In [10]:
frequency = driver.find_elements(By.CLASS_NAME, "sc-jDwBTQ.eCADnn")

for hz in frequency:
    if hz.text in frequencies[selection[1]]:
        driver.execute_script(
            "arguments[0].setAttribute('class', 'sc-jDwBTQ kiwTZd')", hz
        )
        hz.click()

Next = driver.find_element(By.XPATH, "//*[contains(text(), 'Next')]")
Next.click()

In [11]:
fuel_type = driver.find_element(
    By.XPATH, f"//*[contains(text(), '{fuel_types[selection[2]]}')]"
)
fuel_type.click()

Next = driver.find_element(By.XPATH, "//*[contains(text(), 'Next')]")
Next.click()

In [13]:
Next = driver.find_element(By.XPATH, "//*[contains(text(), 'Next')]")
Next.click()

In [14]:
emission = driver.find_element(
    By.XPATH, f"//*[contains(text(), '{emissions[selection[3]]}')]"
)
emission.click()

Next = driver.find_element(By.XPATH, "//*[contains(text(), 'Next')]")
Next.click()

In [15]:
Header = driver.find_elements(
    By.XPATH, '//table[@class="sc-kgoBCf cQrLZs"]/thead/tr/th'
)

engine = [h.text for h in Header]
engine = [x for x in engine if x]

Wartsila["engine_models"] = engine

In [16]:
for j in range(len(engine)):
    Engines = driver.find_element(By.XPATH, f"//*[contains(text(), '{engine[j]}')]")
    Engines.click()

    Next = driver.find_element(By.XPATH, "//*[contains(text(), 'Next')]")
    Next.click()

    data = driver.find_element(By.XPATH, '//table[@class="sc-brqgnP fkDYJi"]/tbody')

    spec_name = []
    spec_item = []

    for row in data.find_elements(By.XPATH, ".//tr"):
        tmp = row.find_elements(By.XPATH, "td")

        if len(tmp) == 1:
            spec_name.append(tmp[0].get_attribute("textContent"))
            spec_item.append("")
        else:
            spec_name.append(tmp[0].get_attribute("textContent"))
            spec_item.append(tmp[1].get_attribute("textContent"))

    Wartsila[engine[j]] = pd.DataFrame({"description": spec_name, "values": spec_item})

    back = driver.find_element(By.XPATH, "//*[contains(text(), 'Back')]")
    back.click()

In [17]:
with open("Wartsila_engines.pkl", "wb") as f:
    pickle.dump(Wartsila, f)