Merge pull request #3 from ubertidavide/develop

Develop
ubertidavide · Nov 20, 2023 · 73eb03b · 73eb03b
2 parents ca25f63 + 9577f99
commit 73eb03b
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -130,6 +130,16 @@ BOT_RETRY_DELAY=10 #sec default
 When the task is failed the library store the screenshot and the html of the page in the debug folder, useful for debug.
 It will store also all the logs in the log.log file.
 
+### Page Url Check (Automatic)
+Every defined page must have a page url and when it's instantiate and reaced by the bot, the library check that the 
+specified url in the config are the same as the reached page during the navigation, to reduce navigation errors.  
+If you want to disable this function see the Global Wait Section below.
+
+### File Download Wait (Functions)
+This library have the bot.wait_downloaded_file_path(file_extension, new_name_file=None) method that could be used afer a button download click in order
+to wait and get the path of the downloaded file, it will give the ability also to rename the file.  
+The extension is used to check that the file downloaded it's the correct and it's not corrupted.
+
 ### Download Folder and other Folders (Optional)
 ```ini
 -- settings.ini
@@ -150,6 +160,7 @@ The default configured wait are showed below:
 SELENIUM_GLOBAL_IMPLICIT_WAIT=5 #sec default
 SELENIUM_EXPECTED_URL_TIMEOUT=5 #sec default
 SELENIUM_DEFAULT_WAIT=5 #sec default
+SELENIUM_FILE_DOWNLOAD_TIMEOUT=20 #sec default
 
 SELENIUM_EXPECTED_URL_CHECK=False #disable the automatic page url check, the default value it's True
 ```

diff --git a/fastbots/bot.py b/fastbots/bot.py
@@ -17,8 +17,8 @@
 from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.remote.webdriver import WebDriver
 
-from fastbots import config
-from fastbots.exceptions import ExpectedUrlError
+from fastbots import config, logger
+from fastbots.exceptions import ExpectedUrlError, DownloadFileError
 
 
 logger = logging.getLogger(__name__)
@@ -40,10 +40,13 @@ def __init__(self) -> None:
         super().__init__()
 
         # use a temporary directory as default download folder
+        self._temp_dir: str = tempfile.mkdtemp()
+
+        # official downloaded file folder
         if config.BOT_DOWNLOAD_FOLDER_PATH != 'None':
-            self._temp_dir: str = tempfile.mkdtemp(dir=config.BOT_DOWNLOAD_FOLDER_PATH)
+            self._download_dir: str = tempfile.mkdtemp(dir=config.BOT_DOWNLOAD_FOLDER_PATH)
         else:
-            self._temp_dir: str = tempfile.mkdtemp()
+            self._download_dir: str = tempfile.mkdtemp()
 
         # load all the locators
         self._locators: ConfigParser = self.__load_locators__()
@@ -118,10 +121,60 @@ def locator(self, page_name: str, locator_name: str) -> str:
         """
         if not self._locators.has_section(page_name):
             raise ValueError(f'The specified page_name: {page_name} is not declared in locators config.')
+
         if not self._locators.has_option(page_name, locator_name):
             raise ValueError(f'The specified locator_name: {locator_name} is not declared in locators config.')
+
         return self._locators.get(page_name, locator_name)
 
+    def wait_downloaded_file_path(self, file_extension: str, new_file_name: str | None = None) -> str:
+        """
+        Wait Downloaded File Path
+
+        This method allow to wait for a specific downloaded file to be completely available in the download folder.
+        It uses the file extension in order to wait the full download finish.
+        It will also give the ability to rename the downloaded file.
+
+        The file_extension must be specified without the dot "." (ex .png become png)
+        """
+        try:
+            # polling that the page url is the expected, it uses the extension because the temp part file cache by browser
+            # usally have a specific extension that isn't the usally of the files
+            WebDriverWait(driver=self._driver, timeout=config.SELENIUM_FILE_DOWNLOAD_TIMEOUT, poll_frequency=1).until(
+                lambda driver: len(list(Path(self._temp_dir).glob(f'*.{file_extension}'))) == 1
+            )
+
+            # get the latest downloaded file
+            latest_file: Path = max(list(Path(self._temp_dir).glob(f'*.{file_extension}')), key=lambda x: x.stat().st_ctime)
+
+            # build the download path based on renamed file or 
+            downloaded_file_path: Path = None
+            if new_file_name is None:
+                downloaded_file_path = Path(config.BOT_DOWNLOAD_FOLDER_PATH) / latest_file.name
+            else:
+                downloaded_file_path = Path(config.BOT_DOWNLOAD_FOLDER_PATH) / f'{new_file_name}.{file_extension}'
+
+            # move to the download folder the file name
+            shutil.move(src=str(latest_file.absolute()), dst=str(downloaded_file_path.absolute()))
+
+            # remove the temporary downloaded file
+            latest_file.unlink()
+
+            # return the path and filename as string
+            return str(downloaded_file_path.absolute())
+
+        except TimeoutException as te:
+            # if not the expected url raises an exception
+            file_count: int = len(list(Path(self._temp_dir).glob(f'*.{file_extension}')))
+
+            # error string based on the specific error
+            if file_count == 0:
+                raise DownloadFileError('File not founded in the download folder, an error with the download occurs.')
+            elif file_count > 1:
+                raise DownloadFileError(f'Too many downloaded files founded, files number : {file_count}.')
+
+            raise DownloadFileError()
+
     def save_screenshot(self):
         """
         Save Screenshot

diff --git a/fastbots/config.py b/fastbots/config.py
@@ -49,4 +49,5 @@ class DriverType(Enum):
 SELENIUM_EXPECTED_URL_CHECK: bool = config('SELENIUM_EXPECTED_URL_CHECK', default=True, cast=bool)
 SELENIUM_EXPECTED_URL_TIMEOUT: int = config('SELENIUM_EXPECTED_URL_TIMEOUT', default=5, cast=int)
 SELENIUM_DEFAULT_WAIT: int = config('SELENIUM_DEFAULT_WAIT', default=5, cast=int)
+SELENIUM_FILE_DOWNLOAD_TIMEOUT: int = config('SELENIUM_FILE_DOWNLOAD_TIMEOUT', default=20, cast=int)
 SELENIUM_LOCATORS_FILE: str = config('SELENIUM_LOCATORS_FILE', default='locators.ini', cast=str)
diff --git a/fastbots/exceptions.py b/fastbots/exceptions.py
@@ -23,3 +23,17 @@ def __init__(self, current_url: str, expected_url: str) -> None:
 
     def __str__(self) -> str:
         return self.message
+
+class DownloadFileError(GenericError):
+    """
+    Download File Error
+
+    Happen when an error occurs in the downloading process.
+    """
+
+    def __init__(self, message: str = 'Download File Error') -> None:
+        self.message: str = message
+        super().__init__(self.message)
+
+    def __str__(self) -> str:
+        return self.message
diff --git a/fastbots/page.py b/fastbots/page.py
@@ -1,6 +1,6 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import Type, Union
+from typing import Type, Union, Dict
 
 from selenium.webdriver.common.by import By
 from selenium.webdriver.remote.webelement import WebElement
@@ -58,7 +58,29 @@ def __locator__(self, locator_name: str) -> tuple:
 
         """
         # load the locators from file and interprete that as code
-        return eval(self._bot.locator(self._page_name, locator_name))
+        full_locator: str = self._bot.locator(self._page_name, locator_name)
+
+        if not full_locator.startswith('(') or not full_locator.endswith(')'):
+            raise ValueError('The locator must be enclosed in round brackets.')
+
+        # declared locators
+        locator_list: Dict[str, By] = ['By.ID', 'By.XPATH', 'By.NAME', 'By.CLASS_NAME', 'By.CSS_SELECTOR', 
+                                       'By.LINK_TEXT', 'By.PARTIAL_LINK_TEXT', 'By.TAG_NAME']
+
+        # check the used locator
+        parsed_locator: tuple = None
+        for locator in locator_list:
+            # check that the first characters are them of the locators and the next one of the comma 
+            if full_locator[1:-1].strip().startswith(locator) and full_locator[1:-1].strip()[len(locator):].strip().startswith(','):
+                # extract the tuple required as locator
+                parsed_locator = (eval(locator), full_locator[1:-1].strip()[len(locator):].strip()[1:].strip()[1:-1])
+
+                logging.info(f'{parsed_locator}')
+
+                return parsed_locator
+
+        else:
+            raise ValueError('The specified locator is unknown or worng, check by, brackets and commas.')
 
     @abstractmethod
     def forward(self) -> Union[Type['Page'], None]:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "fastbots"
-version = "0.1.3"
-description = "A simple library for bot development using selenium and the POM (Page Object Model) design."
+version = "0.1.4"
+description = "A simple library for fast bot and scraper development using selenium and the POM (Page Object Model) design."
 authors = ["Uberti Davide <24529587+ubertidavide@users.noreply.github.com>"]
 license = "LICENSE"
 readme = "README.md"