Skip to content

Commit

Permalink
Merge pull request #6 from vladcalin/rework-config
Browse files Browse the repository at this point in the history
Rework config
  • Loading branch information
vladcalin committed Jan 1, 2018
2 parents 2816a27 + 4daea09 commit 1ce7485
Show file tree
Hide file tree
Showing 17 changed files with 332 additions and 172 deletions.
20 changes: 10 additions & 10 deletions README.rst
Expand Up @@ -65,15 +65,6 @@ from crawlster.handlers import JsonLinesHandler
::

class MyCrawler(crawlster.Crawlster):
# We define some parameters here
config = crawlster.Configuration({
# the start pages
'core.start_urls': ['https://www.python.org/'],
# the method that will process the start pages
'core.start_step': 'step_start',
# to see in-depth what happens
'log.level': 'debug'
})
# items will be saved to items.jsonl
item_handler = JsonLinesHandler('items.jsonl')

Expand All @@ -96,8 +87,17 @@ from crawlster.handlers import JsonLinesHandler


if __name__ == '__main__':
# defining the configuration
config = crawlster.Configuration({
# the start pages
'core.start_urls': ['https://www.python.org/'],
# the method that will process the start pages
'core.start_step': 'step_start',
# to see in-depth what happens
'log.level': 'debug'
})
# starting the crawler
crawler = MyCrawler()
crawler = MyCrawler(config)
# this will block until everything finishes
crawler.start()
# printing some run stats, such as the number of requests, how many items
Expand Down
3 changes: 2 additions & 1 deletion crawlster/config/__init__.py
@@ -1,10 +1,11 @@
from .config import Configuration
from .config import Configuration, JsonConfiguration
from .options import (ConfigOption, Required,
NumberOption, StringOption, ListOption, ChoiceOption,
UrlOption)

__all__ = [
'Configuration',
'JsonConfiguration',

'ConfigOption',
'Required',
Expand Down
128 changes: 55 additions & 73 deletions crawlster/config/config.py
@@ -1,14 +1,14 @@
import json
import os

from crawlster.config.options import StringOption, ListOption, NumberOption
from crawlster.validators import ValidationError
from crawlster.exceptions import ConfigurationError, OptionNotDefinedError, \
from crawlster.config.options import ListOption, NumberOption
from crawlster.exceptions import OptionNotDefinedError, \
MissingValueError

#: The core options used by the framework
from crawlster.validators import ValidationError

CORE_OPTIONS = {
'core.start_step': StringOption(required=True),
'core.start_urls': ListOption(required=True),
'core.workers': NumberOption(default=os.cpu_count())
}
Expand All @@ -17,90 +17,72 @@
class Configuration(object):
"""Configuration object that stores key-value pairs of options"""

def __init__(self, options):
"""Initializes the values of the configuration object
Args:
options (dict):
the values of the configuration object
"""
self.provided_options = options
self.defined_options = CORE_OPTIONS

def register_options(self, options_dict):
"""Registers multiple option declarations in the current config """
self.defined_options.update(options_dict)

def validate_options(self):
"""Validates the options.
Returns a mapping of option name - list of errors
"""
errors = {}
for option_key in self.defined_options:
op_errors = self.validate_single_option(option_key)
if op_errors:
errors[option_key] = op_errors
if errors:
raise ConfigurationError(errors)
def __init__(self, options=None):
"""Initializes the defined options and the provided values"""
self.defined_opts = CORE_OPTIONS
self.values = options or {}

def validate_single_option(self, option_name):
"""Validates a single option given its name
def register_options(self, options):
"""Registers a mapping of option definitions to the current config"""
self.defined_opts.update(options)

Runs the validators for a single value.
def get(self, key):
"""Retrieves a value from this configuration, if available
Raises:
OptionNotDefinedError:
when the option_name is not defined in the defined_options
Returns:
A list of error messages from the validators
When the option key is not defined by any helper
MissingValueError:
When the option key is defined but its value could not be
determined
ValidationError:
When the provided value fails validation
"""
errors = []
option_spec = self.defined_options.get(option_name)
option_value = self.get(option_name)
for validator in option_spec.validators:
try:
validator(option_value)
except ValidationError as e:
errors.append(str(e))
return errors

def get(self, key, *, raise_if_not_defined=True):
"""Retrieves the value of the specified option
if key not in self.defined_opts:
raise OptionNotDefinedError(
'Option "{}" is not defined'.format(key))
opt_specs = self.defined_opts[key]
if key not in self:
if opt_specs.required:
raise MissingValueError(
'Option {} is required but its value '
'could not be determined'.format(key))
else:
return opt_specs.get_default_value()
value = self[key]
opt_specs.validate(value)
return value

The returned value is the one passed in the config initialization or
the default value.
def __contains__(self, item):
"""Returns whether the value is explicitly provided by the config"""
return item in self.values

Args:
key (str):
The key of the option for which the value must be returned
raise_if_not_defined (bool):
Whether to raise an exception if the required option is not
defined. If False and the option is not defined, None is
returned.
def __getitem__(self, item):
"""Directly retrieves the value.
Raises:
OptionNotDefinedError:
When the specified key is not defined and raise_if_not_defined
is True
Raises KeyError if the value is not provided
"""
option_spec = self.defined_options.get(key)
if not option_spec:
if raise_if_not_defined:
raise OptionNotDefinedError(key)
else:
return
if key not in self.provided_options and option_spec.required:
raise MissingValueError(
'{} is required but not provided'.format(key))
return self.provided_options.get(key, option_spec.get_default_value())
return self.values[item]

def validate_options(self):
"""Validates all the options"""
for key in self.defined_opts:
try:
self.get(key)
except ValidationError:
raise
except (MissingValueError, OptionNotDefinedError):
# ignore options that are not defined or provided. This
# method is only supposed to fail if any validator fails
pass


class JsonConfiguration(Configuration):
"""Reads the configuration from a json file"""

def __init__(self, file_path):
"""Loads the values from a json file"""
super(JsonConfiguration, self).__init__()
with open(file_path, 'r') as fp:
options = json.load(fp)
super(JsonConfiguration, self).__init__(options)
self.values = options
21 changes: 15 additions & 6 deletions crawlster/config/options.py
Expand Up @@ -34,6 +34,19 @@ def get_default_value(self):
else:
return self.default

def validate(self, value):
"""Runs all validators against a provided value
Raises:
ValidationError:
When the validation fails
Returns:
None when validation succeeds
"""
for validator in self.validators:
validator(value)


#: alias for the config option as being an optional value
Optional = ConfigOption
Expand Down Expand Up @@ -95,10 +108,6 @@ def __init__(self, choices, default=None, required=False,
super(ChoiceOption, self).__init__(validators, default, required)


class UrlOption(ConfigOption):
class UrlOption(OptionWithDefaultValidators):
"""An option whose value must be a valid URL"""

def __init__(self, default=None, required=False,
extra_validators=None):
validators = [is_url] + (extra_validators or [])
super(UrlOption, self).__init__(validators, default, required)
default_validators = [is_url]

0 comments on commit 1ce7485

Please sign in to comment.