Skip to content

Commit

Permalink
Remove PhantomJS functions since it is no longer being actively suppo…
Browse files Browse the repository at this point in the history
…rted.
  • Loading branch information
lordjabez committed Mar 14, 2023
1 parent 276b4bf commit 1310011
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 55 deletions.
1 change: 0 additions & 1 deletion .gitignore
Expand Up @@ -105,4 +105,3 @@ ENV/

# webdrivers
chromedriver
phantomjs
6 changes: 3 additions & 3 deletions README.md
Expand Up @@ -18,14 +18,14 @@ Read more about the motivation behind creating this library in this [blopost](ht
- Improves Selenium's handling of dynamically loading elements.
- Makes cookie handling more flexible in Selenium.
- Makes clicking elements in Selenium more reliable.
- Supports Chrome and PhantomJS.
- Supports Chromedriver natively plus adding a custom webdriver.

## Installation
```bash
pip install requestium
```

You should then download your preferred Selenium webdriver if you plan to use the Selenium part of Requestium: [Chromedriver](https://sites.google.com/a/chromium.org/chromedriver/) or [PhantomJS](http://phantomjs.org)
You should then download your preferred Selenium webdriver if you plan to use the Selenium part of Requestium, such as [Chromedriver](https://sites.google.com/a/chromium.org/chromedriver/).

## Usage
First create a session as you would do on Requests, and optionally add arguments for the web-driver if you plan to use one.
Expand Down Expand Up @@ -179,7 +179,7 @@ s.copy_user_agent_from_driver()
```
Take into account that doing this will launch a browser process.

Note: The Selenium Chrome webdriver doesn't support automatic transfer of proxies from the Session to the Webdriver at the moment. The PhantomJS driver does though.
Note: The Selenium Chrome webdriver doesn't support automatic transfer of proxies from the Session to the Webdriver at the moment.

## Comparison with Requests + Selenium + lxml
A silly working example of a script that runs on Reddit. We'll then show how it compares to using Requests + Selenium + lxml instead of Requestium.
Expand Down
54 changes: 3 additions & 51 deletions requestium/requestium.py
Expand Up @@ -37,14 +37,12 @@ def __init__(self, webdriver_path=None, browser=None, default_timeout=5, webdriv
self._last_requests_url = None

if self._driver is None:
if browser == 'phantomjs':
self._driver_initializer = self._start_phantomjs_browser
elif browser == 'chrome':
if browser == 'chrome':
self._driver_initializer = self._start_chrome_browser
elif browser == 'chrome-headless':
self._driver_initializer = self._start_chrome_headless_browser
else:
raise ValueError('Invalid Argument: browser must be chrome or phantomjs, not: "{}"'.format(browser))
raise ValueError('Invalid Argument: browser must be chrome or chrome-headless, not: "{}"'.format(browser))
else:
for name in DriverMixin.__dict__:
name_private = name.startswith('__') and name.endswith('__')
Expand All @@ -61,34 +59,6 @@ def driver(self):
self._driver = self._driver_initializer()
return self._driver

def _start_phantomjs_browser(self):
# Add headers to driver
for key, value in self.headers.items():
# Manually setting Accept-Encoding to anything breaks it for some reason, so we skip it
if key == 'Accept-Encoding':
continue

webdriver.DesiredCapabilities.PHANTOMJS[
'phantomjs.page.customHeaders.{}'.format(key)] = value

# Set browser options
service_args = ['--load-images=no', '--disk-cache=true']

# Add proxies to driver
if self.proxies:
session_proxy = self.proxies['https'] or self.proxies['http']
proxy_user_and_pass = session_proxy.split('@')[0].split('://')[1]
proxy_ip_address = session_proxy.split('@')[1]
service_args.append('--proxy=' + proxy_ip_address)
service_args.append('--proxy-auth=' + proxy_user_and_pass)

# Create driver process
service_log_filename = os.path.join(tempfile.gettempdir(), 'ghostdriver.log')
return RequestiumPhantomJS(executable_path=self.webdriver_path,
service_log_path=service_log_filename,
service_args=service_args,
default_timeout=self.default_timeout)

def _start_chrome_browser(self):
# TODO transfer of proxies and headers: Not supported by chromedriver atm.
# Choosing not to use plug-ins for this as I don't want to worry about the
Expand Down Expand Up @@ -127,9 +97,8 @@ def _start_chrome_browser(self):
default_timeout=self.default_timeout)

def _start_chrome_headless_browser(self):
self.webdriver_options['arguments']
headless_arguments = [
'headless',
'headless', # TODO use headless=new and get rid of other stuff (after writing some tests)
'disable-infobars',
'disable-gpu', # So we don't need libosmesa.so in our deploy package
'homedir=/tmp', # Ensures we have write permissions in all environments
Expand Down Expand Up @@ -225,15 +194,6 @@ def re_first(self, *args, **kwargs):

class DriverMixin(object):
"""Provides helper methods to our driver classes
This is a temporary solution.
When Chrome headless is finally stable, and we therefore stop using Phantomjs,
it will make sense to stop having this as a mixin and just add these methods to
the RequestiumChrome class, as it will be our only driver class.
(We plan to stop supporting Phantomjs because the developer stated he won't be
maintaining the project any longer)
"""

def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -290,10 +250,6 @@ def ensure_add_cookie(self, cookie, override_domain=None):
# same cookies and not have a request mid-session with no cookies
self.get('http://' + cookie_domain)

# Fixes phantomjs bug, all domains must start with a period
if self.name == "phantomjs":
cookie['domain'] = '.' + cookie['domain']

cookie_added = self.try_add_cookie(cookie)

# If we fail adding the cookie, retry with a more permissive domain
Expand Down Expand Up @@ -460,7 +416,3 @@ def _ensure_click(self):
exception_message
)
)


class RequestiumPhantomJS(DriverMixin, webdriver.PhantomJS):
pass

0 comments on commit 1310011

Please sign in to comment.