In [1]:
# basic libraries
import random
import time
import pickle
import os

# selenium web driver and related tools
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains

# OCR tools
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch
import re

  from .autonotebook import tqdm as notebook_tqdm


Model definition block...

In [2]:
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# Load the processor and model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")

# # Get the tokenizer
# tokenizer = processor.tokenizer

# # Modify the vocabulary to include only numbers and letters
# allowed_tokens = [token for token in tokenizer.get_vocab() if token.isalnum()]
# tokenizer.add_tokens(allowed_tokens)

# # Set the new tokenizer back to the processor
# processor.tokenizer = tokenizer

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Config of the encoder: <class 'transformers.models.vit.modeling_vit.ViTModel'> is overwritten by shared encoder config: ViTConfig {
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "image_size": 384,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": false,
  "transformers_version": "4.46.0"
}

Config of the decoder: <class 'transformers.models.trocr.modeling_trocr.TrOCRForCausalLM'> is overwritten by shared decoder config: TrOCRConfig {
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_cross_attention": true,
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": 0.0,
  "cross_attention_hidden_size": 768,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder

In [3]:
def setup_driver():
    user_agents = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Firefox/89.0",
    ]
    window_sizes = ["1920x1080", "1366x768", "1280x720"]

    options = Options()
    options.add_argument(f'user-agent={random.choice(user_agents)}')
    options.add_argument(f'--window-size={random.choice(window_sizes)}')
    # options.add_argument('--headless')  # Run in headless mode if required

    driver = webdriver.Chrome(options=options)
    return driver

In [4]:
def get_cookies(
    max_attempts = 20,
    timeout = 10,
    url = 'https://www.rdv-prefecture.interieur.gouv.fr/rdvpref/reservation/demarche/4443/cgu/',
    expected_url = "https://www.rdv-prefecture.interieur.gouv.fr/rdvpref/reservation/demarche/4443/creneau/",
):
    driver = setup_driver()
    try:
        count = 0
        driver.get(url)
        # Wait 10 seconds so that the content can be fully loaded
        wait = WebDriverWait(driver, timeout)
        wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
        # Mimicking human behavior by waiting a random time
        time.sleep(random.randint(2,5))
        # Scroll to a specific position (e.g., 500 pixels down)
        # scroll_position = 1000  # Replace with the desired vertical scroll position in pixels
        # driver.execute_script(f"window.scrollTo(0, {scroll_position});")
        while driver.current_url != expected_url and count < max_attempts:
            while count < max_attempts//2:
                count += 1
                try:
                    image_path = f"captchas/screenshot_{count+1}.png"
                    captcha_image = WebDriverWait(driver, timeout).until(
                        EC.presence_of_element_located((By.ID, "captchaFR_CaptchaImage"))
                    )
                    # captcha_image = driver.find_element(By.ID, "captchaFR_CaptchaImage")
                    captcha_image.screenshot(image_path)
                    image = Image.open(image_path).convert("RGB")
                    pixel_values = processor(images=image, return_tensors="pt").pixel_values
                    pixel_values = pixel_values.to(device)
                    with torch.no_grad():
                        outputs = model.generate(pixel_values, max_length=9, min_length=6, num_beams=5)
                    decoded_text = processor.decode(outputs[0], skip_special_tokens=True)
                    recognized_text = re.sub(r"[^A-Z0-9]", "", decoded_text)
                    print(f"Recognized Text {count+1}: {recognized_text}")
                    captcha_input = driver.find_element(By.ID, "captchaFormulaireExtInput")
                    captcha_input.send_keys(recognized_text)
                    button = WebDriverWait(driver, 10).until(
                        EC.element_to_be_clickable((By.XPATH, "//button[.//span[text()='Suivant']]"))
                    )
                    button.click()
                    # Wait until the next page is loaded
                    wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
                except Exception as e:
                    print(f"Solving CAPTCHA {count+1} failed with error: {e}")
                finally:
                    continue
            if driver.current_url != expected_url:
                try: 
                    print(f"Machine failed to solve CAPTCHA {count}, trying manual intervention.")
                    driver.refresh()
                    wait = WebDriverWait(driver, timeout)
                    wait.until(EC.presence_of_element_located((By.ID, "captchaFR_CaptchaImage")))
                    WebDriverWait(driver, 300).until(EC.url_to_be(expected_url))
                except Exception as e:
                    print(f"Failed to solve CAPTCHA {count}, manual intervention failed with error: {e}")
                    driver.quit()
                finally:
                    count += 1
        cookies = driver.get_cookies()
        with open("cookies.pkl", "wb") as file:
            pickle.dump(cookies, file)
        print("Cookies sauvegardés.")
    finally:
        driver.quit()

connecting pushover app...

In [5]:
import json
from pushover_complete import PushoverAPI

def send_pushover_notification(api_token, user_key, title, message):
    pushover = PushoverAPI(api_token)
    pushover.send_message(user_key, message, title=title)
    print("Notification sent successfully!")

with open("config.json", "r") as file:
        config = json.load(file)
        api_token = config.get("API_KEY")
        user_key = config.get("USER_KEY")

In [6]:
def drive_with_cookies(driver, url, expected_url, timeout=10):
    driver.get(url)
    wait = WebDriverWait(driver, timeout) # It's safer to wait due to the dynamic nature of the page
    wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete') 
    with open("cookies.pkl", "rb") as file:
        cookies = pickle.load(file)
    for cookie in cookies:
        driver.add_cookie(cookie)
    driver.get(expected_url)
    wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete')

Putting it all together...

In [7]:
driver = setup_driver()
timeout = 10
url = 'https://www.rdv-prefecture.interieur.gouv.fr/rdvpref/reservation/demarche/4443/cgu/'
expected_url = "https://www.rdv-prefecture.interieur.gouv.fr/rdvpref/reservation/demarche/4443/creneau/"
try:

    drive_with_cookies(driver, url, expected_url, timeout)
    while driver.current_url != expected_url:
        get_cookies()
        with open("cookies.pkl", "rb") as file:
            cookies = pickle.load(file)
        for cookie in cookies:
            driver.add_cookie(cookie)
        driver.get(expected_url)
        wait = WebDriverWait(driver, timeout)
        wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
    
    if driver.current_url == expected_url:
        submit_button = WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.XPATH, "//button[span/span[text()='Suivant']]"))
        )
        if submit_button.is_enabled():
            print("Button is enabled. Attempting to click...")
            # Click the button
            ActionChains(driver).move_to_element(submit_button).click(submit_button).perform()
            # Wait to observe the result of the click
            WebDriverWait(driver, 10).until(
                EC.url_changes(driver.current_url)  # Wait for URL change
            )
            print(f"Form submitted successfully. New URL: {driver.current_url}")
            # Send a Pushover notification
            title = "RDV Préfecture Disponible"
            message = "https://www.rdv-prefecture.interieur.gouv.fr/rdvpref/reservation/demarche/4443/creneau/"
            send_pushover_notification(api_token, user_key, title, message)
        else:
            print("Button is disabled, cannot click. !!!Aucun créneau disponible!!!")
    
    else:
        raise Exception("Failed to reach the expected URL, the cookies may be invalid.")
finally:
    driver.quit()

Button is disabled, cannot click. !!!Aucun créneau disponible!!!
