Skip to content

Commit

Permalink
Add expected-title option and refactor title/episodeTitle guessing
Browse files Browse the repository at this point in the history
  • Loading branch information
Toilal committed Nov 1, 2015
1 parent da98979 commit eeb5fe1
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 58 deletions.
2 changes: 2 additions & 0 deletions guessit/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def build_argument_parser():
default=False,
help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
'it will be guessed as season 2, episodeNumber 13')
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title',
help='Expected title to parse (can be used multiple times)')

output_opts = opts.add_argument_group("Output")
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
Expand Down
61 changes: 33 additions & 28 deletions guessit/rules/properties/episode_title.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
"""
from collections import defaultdict
from guessit.rules.common import seps, title_seps
from guessit.rules.properties.title import TitleFromPosition
from guessit.rules.properties.title import TitleFromPosition, TitleBaseRule
from rebulk import Rebulk, Rule, AppendMatch, RenameMatch
from rebulk.formatters import formatters

from ..common.formatters import cleanup, reorder_title
from ..common.formatters import cleanup


class TitleToEpisodeTitle(Rule):
Expand Down Expand Up @@ -46,42 +45,48 @@ def then(self, matches, when_response, context):
matches.append(episode_title)


class EpisodeTitleFromPosition(Rule):
class EpisodeTitleFromPosition(TitleBaseRule):
"""
Add episode title match in existing matches
Must run after TitleFromPosition rule.
"""
dependency = TitleToEpisodeTitle
consequence = AppendMatch

def when(self, matches, context):
if matches.named('episodeTitle'):
return
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in ['episodeNumber', 'episodeDetails',
'episodeCount', 'season', 'seasonCount',
'date', 'title']),
0)

crc32 = matches.named('crc32')

filename = matches.markers.named('path', -1)
start, end = filename.span
return episode or crc32

holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
predicate=lambda hole: hole.value)
def filepart_filter(self, filepart, matches):
# Filepart where title was found.
if matches.range(filepart.start, filepart.end, lambda match: match.name == 'title'):
return True
return False

for hole in holes:
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in ['episodeNumber', 'episodeDetails',
'episodeCount', 'season', 'seasonCount',
'date']),
0)
def is_ignored(self, match):
if match.name == 'episodeDetails':
return True
return super(EpisodeTitleFromPosition, self).is_ignored(match)

crc32 = matches.named('crc32')
def should_keep(self, match, to_keep, matches, filepart, hole):
if match.name == 'episodeDetails' and not matches.previous(match, lambda match: match.name == 'season'):
return True, False # Keep episodeDetails, but don't crop title.
return super(EpisodeTitleFromPosition, self).should_keep(match, to_keep, matches, filepart, hole)

if episode or crc32:
group_markers = matches.markers.named('group')
title = hole.crop(group_markers, index=0)
def __init__(self):
super(EpisodeTitleFromPosition, self).__init__('episodeTitle', ['title'])

if title and title.value:
title.name = 'episodeTitle'
title.tags = ['title']
return title
def when(self, matches, context):
if matches.named('episodeTitle'):
return
return super(EpisodeTitleFromPosition, self).when(matches, context)


class AlternativeTitleReplace(Rule):
Expand All @@ -104,7 +109,7 @@ def when(self, matches, context):
lambda previous: any(name in previous.names
for name in ['episodeNumber', 'episodeDetails',
'episodeCount', 'season', 'seasonCount',
'date']),
'date', 'title']),
0)

crc32 = matches.named('crc32')
Expand Down
5 changes: 3 additions & 2 deletions guessit/rules/properties/episodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..common.numeral import numeral, parse_numeral

EPISODES = Rebulk()
EPISODES.regex_defaults(flags=re.IGNORECASE)
EPISODES.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)

# 01x02, 01x02x03x04
EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)' +
Expand All @@ -35,7 +35,8 @@

# episodeDetails property
for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
EPISODES.string(episode_detail, name='episodeDetails')
EPISODES.string(episode_detail, value=episode_detail, name='episodeDetails',
conflict_solver=lambda match, other: None)
EPISODES.regex(r'Extras?', name='episodeDetails', value='Extras')

EPISODES.defaults(validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True)
Expand Down
158 changes: 130 additions & 28 deletions guessit/rules/properties/title.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,69 @@
from ..common.formatters import cleanup, reorder_title
from ..common.comparators import marker_sorted
from ..common import seps, title_seps
from rebulk.utils import find_all


class TitleFromPosition(Rule):
class TitleBaseRule(Rule):
"""
Add title match in existing matches
"""
#pylint:disable=no-self-use,unused-argument
consequence = [AppendMatch, RemoveMatch]
dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]

@staticmethod
def is_ignored(match):
def __init__(self, match_name, match_tags=None, alternative_match_name=None):
super(TitleBaseRule, self).__init__()
self.match_name = match_name
self.match_tags = match_tags
self.alternative_match_name = alternative_match_name

def hole_filter(self, hole, matches):
"""
Filter holes for titles.
:param hole:
:type hole:
:param matches:
:type matches:
:return:
:rtype:
"""
return True

def filepart_filter(self, filepart, matches):
"""
Filter filepart for titles.
:param filepart:
:type filepart:
:param matches:
:type matches:
:return:
:rtype:
"""
return True

def holes_process(self, holes, matches):
"""
process holes
:param holes:
:type holes:
:param matches:
:type matches:
:return:
:rtype:
"""
cropped_holes = []
for hole in holes:
group_markers = matches.markers.named('group')
cropped_holes.extend(hole.crop(group_markers))
return cropped_holes

def is_ignored(self, match):
"""
Ignore matches when scanning for title (hole)
"""
return match.name in ['language', 'country']

@staticmethod
def should_keep(match, to_keep, matches, filepart, hole):
def should_keep(self, match, to_keep, matches, filepart, hole):
"""
Check if this match should be accepted when ending or starting a hole.
:param match:
Expand Down Expand Up @@ -60,58 +105,81 @@ def should_keep(match, to_keep, matches, filepart, hole):

return False

@staticmethod
def check_titles_in_filepart(filepart, matches): # pylint: disable=too-many-locals
def check_titles_in_filepart(self, filepart, matches):
"""
Find title in filepart (ignoring language)
"""
# pylint:disable=too-many-locals,too-many-branches
start, end = filepart.span

holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
ignore=TitleFromPosition.is_ignored,
ignore=self.is_ignored,
predicate=lambda hole: hole.value)

holes = self.holes_process(holes, matches)

for hole in holes:
# pylint:disable=cell-var-from-loop
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
continue

to_remove = []
to_keep = []

ignored_matches = matches.range(hole.start, hole.end, TitleFromPosition.is_ignored)
ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)

if ignored_matches:
for ignored_match in reversed(ignored_matches):
# pylint:disable=undefined-loop-variable
trailing = matches.chain_before(hole.end, seps, predicate=lambda match: match == ignored_match)
if trailing and TitleFromPosition.should_keep(ignored_match, to_keep, matches, filepart, hole):
to_keep.append(ignored_match)
hole.end = ignored_match.start
if trailing:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole)
if should_keep:
# pylint:disable=unpacking-non-sequence
try:
append, crop = should_keep
except TypeError:
append, crop = should_keep, should_keep
if append:
to_keep.append(ignored_match)
if crop:
hole.end = ignored_match.start

for ignored_match in ignored_matches:
if ignored_match not in to_keep:
starting = matches.chain_after(hole.start, seps, predicate=lambda match: match == ignored_match)
if starting and TitleFromPosition.should_keep(ignored_match, to_keep, matches, filepart, hole):
to_keep.append(ignored_match)
hole.start = ignored_match.end
if starting:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole)
if should_keep:
# pylint:disable=unpacking-non-sequence
try:
append, crop = should_keep
except TypeError:
append, crop = should_keep, should_keep
if append:
to_keep.append(ignored_match)
if crop:
hole.start = ignored_match.end

to_remove.extend(ignored_matches)
for keep_match in to_keep:
to_remove.remove(keep_match)

group_markers = matches.markers.named('group')
hole = hole.crop(group_markers, index=0)

if hole and hole.value:
hole.name = 'title'
hole.tags = ['title']
# Split and keep values that can be a title
titles = hole.split(title_seps, lambda match: match.value)
for title in titles[1:]:
title.name = 'alternativeTitle'
hole.name = self.match_name
hole.tags = self.match_tags
if self.alternative_match_name:
# Split and keep values that can be a title
titles = hole.split(title_seps, lambda match: match.value)
for title in titles[1:]:
title.name = self.alternative_match_name
else:
titles = [hole]
return titles, to_remove

def when(self, matches, context):
fileparts = list(marker_sorted(matches.markers.named('path'), matches))
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)]

to_remove = []

Expand All @@ -128,7 +196,7 @@ def when(self, matches, context):
years_fileparts.remove(filepart)
except ValueError:
pass
titles = TitleFromPosition.check_titles_in_filepart(filepart, matches)
titles = self.check_titles_in_filepart(filepart, matches)
if titles:
titles, to_remove_c = titles
ret.extend(titles)
Expand All @@ -137,22 +205,39 @@ def when(self, matches, context):

# Add title match in all fileparts containing the year.
for filepart in years_fileparts:
titles = TitleFromPosition.check_titles_in_filepart(filepart, matches)
titles = self.check_titles_in_filepart(filepart, matches)
if titles:
# pylint:disable=unbalanced-tuple-unpacking
titles, to_remove_c = titles
ret.extend(titles)
to_remove.extend(to_remove_c)

return ret, to_remove


class TitleFromPosition(TitleBaseRule):
"""
Add title match in existing matches
"""
dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]

def __init__(self):
super(TitleFromPosition, self).__init__('title', ['title'], 'alternativeTitle')

def enabled(self, context):
return not context.get('expected_title')


class PreferTitleWithYear(Rule):
"""
Prefer title where filepart contains year.
"""
dependency = TitleFromPosition
consequence = RemoveMatch

def enabled(self, context):
return not context.get('expected_title')

def when(self, matches, context):
with_year = []
titles = matches.named('title')
Expand All @@ -177,3 +262,20 @@ def when(self, matches, context):


TITLE = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)


def expected_title(input_string, context):
"""
Expected title functional pattern.
:param input_string:
:type input_string:
:param context:
:type context:
:return:
:rtype:
"""
for search in context.get('expected_title'):
for start in find_all(input_string, search, ignore_case=True):
return start, len(search)

TITLE.functional(expected_title, name='title', disabled=lambda context: not context.get('expected_title'))
7 changes: 7 additions & 0 deletions guessit/test/series.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1037,3 +1037,10 @@
? FooBar 360
: screenSize: 360p
title: FooBar

? BarFood christmas special HDTV
: options: --expected-title BarFood
format: HDTV
title: BarFood
episodeTitle: christmas special
episodeDetails: Special

0 comments on commit eeb5fe1

Please sign in to comment.