Skip to content

Commit

Permalink
Bump rebulk 0.6.1 and enhance title guessing
Browse files Browse the repository at this point in the history
  • Loading branch information
Toilal committed Nov 11, 2015
1 parent 6733b78 commit ca469a9
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 36 deletions.
3 changes: 2 additions & 1 deletion HISTORY.rst
Expand Up @@ -4,7 +4,8 @@ History
2.0b1 (unreleased)
------------------

- Nothing changed yet.
- Enhance title guessing.
- Bump rebulk to ``0.6.1`` to use deep copy instead of shallow copy inside Match crop/split methods.


2.0a4 (2015-11-09)
Expand Down
10 changes: 5 additions & 5 deletions guessit/rules/common/numeral.py
Expand Up @@ -108,7 +108,7 @@ def __parse_word(value):
return word_list.index(value.lower())
except ValueError:
pass
raise ValueError
raise ValueError # pragma: no cover


_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
Expand Down Expand Up @@ -159,9 +159,9 @@ def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True
for word in value.split():
try:
return __parse_word(word)
except ValueError:
except ValueError: # pragma: no cover
pass
return __parse_word(value)
except ValueError:
return __parse_word(value) # pragma: no cover
except ValueError: # pragma: no cover
pass
raise ValueError('Invalid numeral: ' + value)
raise ValueError('Invalid numeral: ' + value) # pragma: no cover
11 changes: 0 additions & 11 deletions guessit/rules/common/words.py
Expand Up @@ -10,17 +10,6 @@
_words_rexp = re.compile(r'\w+', re.UNICODE)


def find_words(string):
"""
Find all words in a string.
:param string:
:type string:
:return: list of word
:rtype: list[str]
"""
return _words_rexp.findall(string.replace('_', ' '))


def iter_words(string):
"""
Iterate on all words in a string
Expand Down
39 changes: 26 additions & 13 deletions guessit/rules/processors.py
Expand Up @@ -9,9 +9,10 @@
import copy

import six
from guessit.rules.common.words import iter_words

from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
from .common.formatters import strip
from .common.formatters import cleanup
from .common.comparators import marker_sorted
from .common.date import valid_year

Expand Down Expand Up @@ -66,12 +67,14 @@ def when(self, matches, context):
new_matches = []

for filepath in marker_sorted(matches.markers.named('path'), matches):
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=strip)
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
for name in matches.names:
for hole in list(holes):
for current_match in matches.named(name):
if isinstance(current_match.value, six.string_types) and \
hole.value.lower() == current_match.value.lower():
if 'equivalent-ignore' in current_match.tags:
continue
new_value = _preferred_string(hole.value, current_match.value)
if hole.value != new_value:
hole.value = new_value
Expand Down Expand Up @@ -151,23 +154,33 @@ def _preferred_string(value1, value2): # pylint:disable=too-many-return-stateme
:return: The preferred title
:rtype: str
"""
if value1 and not value2:
return value1
if value2 and not value1:
return value2
if value1 == value2:
return value1
if value1.istitle() and not value2.istitle():
return value1
if value2.istitle() and not value1.istitle():
return value2
if value1[0].isupper() and not value1[0].isupper():
if not value1.isupper() and value2.isupper():
return value1
if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
return value1
if _count_title_words(value1) > _count_title_words(value2):
return value1
if value2[0].isupper() and not value1[0].isupper():
return value2
return value1
return value2


def _count_title_words(value):
"""
Count only many words are titles in value.
:param value:
:type value:
:return:
:rtype:
"""
ret = 0
for word in iter_words(value):
if word.group(0).istitle():
ret += 1
return ret

class SeasonYear(Rule):
"""
If a season is a valid year and no year was found, create an match with year.
Expand Down Expand Up @@ -195,7 +208,7 @@ class Processors(CustomRule):
def when(self, matches, context):
pass

def then(self, matches, when_response, context):
def then(self, matches, when_response, context): # pragma: no cover
pass


Expand Down
9 changes: 6 additions & 3 deletions guessit/rules/properties/title.py
Expand Up @@ -8,7 +8,7 @@
from guessit.rules.properties.film import FilmTitleRule
from guessit.rules.properties.language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, \
SubtitleExtensionRule
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters

from ..common.formatters import cleanup, reorder_title
Expand Down Expand Up @@ -288,7 +288,7 @@ class PreferTitleWithYear(Rule):
Prefer title where filepart contains year.
"""
dependency = TitleFromPosition
consequence = RemoveMatch
consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]

properties = {'title': [None]}

Expand All @@ -311,15 +311,18 @@ def when(self, matches, context):
else:
with_year.append(title_match)

to_tag = []
if with_year_in_group:
title_values = set([title_match.value for title_match in with_year_in_group])
to_tag.extend(with_year_in_group)
elif with_year:
title_values = set([title_match.value for title_match in with_year])
to_tag.extend(with_year)
else:
title_values = set([title_match.value for title_match in titles])

to_remove = []
for title_match in titles:
if title_match.value not in title_values:
to_remove.append(title_match)
return to_remove
return to_remove, to_tag
2 changes: 1 addition & 1 deletion guessit/test/episodes.yml
Expand Up @@ -361,7 +361,7 @@
release_group: CtrlHD

? /home/disaster/Videos/TV/Merlin/merlin_2008.5x02.arthurs_bane_part_two.repack.720p_hdtv_x264-fov.mkv
: title: Merlin
: title: merlin
season: 5
episode: 2
part: 2
Expand Down
16 changes: 16 additions & 0 deletions guessit/test/rules/title.yml
Expand Up @@ -14,3 +14,19 @@
? title_only.mkv
: title: title only

? Some Title/some.title.mkv
? some.title/Some.Title.mkv
: title: Some Title

? SOME TITLE/Some.title.mkv
? Some.title/SOME TITLE.mkv
: title: Some title

? some title/Some.title.mkv
? Some.title/some title.mkv
: title: Some title

? Some other title/Some.Other.title.mkv
? Some.Other title/Some other title.mkv
: title: Some Other title

6 changes: 5 additions & 1 deletion guessit/test/test_yml.py
Expand Up @@ -185,7 +185,11 @@ def check(self, string, expected):
negates, global_, string = self.parse_token_options(string)

options = expected.get('options')
result = guessit(string, options)
try:
result = guessit(string, options)
except Exception as exc:
logger.error('[' + string + '] Exception: ' + str(exc))
raise exc

entry = EntryResult(string, negates)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -11,7 +11,7 @@
README = open(os.path.join(here, 'README.rst')).read()
HISTORY = open(os.path.join(here, 'HISTORY.rst')).read()

install_requires = ['rebulk>=0.6.0', 'regex', 'babelfish>=0.5.5', 'python-dateutil']
install_requires = ['rebulk>=0.6.1', 'regex', 'babelfish>=0.5.5', 'python-dateutil']

setup_requires = ['pytest-runner']

Expand Down

0 comments on commit ca469a9

Please sign in to comment.