Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add format, screenSize, videoCodec, audioCodec, website and year patt…
…erns
- Loading branch information
Showing
23 changed files
with
939 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
audioCodec property | ||
""" | ||
from rebulk import Rebulk | ||
|
||
from .common import dash | ||
from .common.validators import seps_surround | ||
|
||
import regex as re | ||
|
||
AUDIO_CODEC = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) | ||
AUDIO_CODEC.defaults(name="audioCodec", validator=seps_surround) | ||
|
||
AUDIO_CODEC.regex("MP3", "LAME", r"LAME(?:\d)+-(?:\d)+", value="MP3") | ||
AUDIO_CODEC.regex("DolbyDigital", "DD", value="DD") | ||
AUDIO_CODEC.regex("AAC", value="AAC") | ||
AUDIO_CODEC.regex("AC3", value="AC3") | ||
AUDIO_CODEC.regex("Flac", value="FLAC") | ||
AUDIO_CODEC.regex("DTS", value="DTS") # TODO: LeftValidator | ||
AUDIO_CODEC.regex("True-HD", value="True-HD") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Common module | ||
""" | ||
|
||
seps = r' [](){},:;!?+*|&=%§-_~#/\.' # list of tags/words separators | ||
|
||
dash = ("-", r"[\W_]?") # abbreviation used by many rebulk objects. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Validators | ||
""" | ||
|
||
from rebulk.validators import chars_before, chars_after, chars_surround | ||
|
||
from . import seps | ||
|
||
from functools import partial | ||
|
||
|
||
seps_before = partial(chars_before, seps) | ||
seps_after = partial(chars_after, seps) | ||
seps_surround = partial(chars_surround, seps) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
format property | ||
""" | ||
from rebulk import Rebulk | ||
|
||
from .common import dash | ||
|
||
import regex as re | ||
from .common.validators import seps_surround | ||
|
||
FORMAT = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) | ||
FORMAT.defaults(name="format", validator=seps_surround) | ||
|
||
FORMAT.regex("VHS", "VHS-Rip", value="VHS") | ||
FORMAT.regex("CAM", "CAM-Rip", "HD-CAM", value="Cam") | ||
FORMAT.regex("TELESYNC", "TS", "HD-TS", value="Telesync") | ||
FORMAT.regex("WORKPRINT", "WP", value="Workprint") | ||
FORMAT.regex("TELECINE", "TC", value="Telecine") | ||
FORMAT.regex("PPV", "PPV-Rip", value="PPV") # Pay Per View | ||
FORMAT.regex("SD-TV", "SD-TV-Rip", "Rip-SD-TV", "TV-Rip", "Rip-TV", value="TV") # TV is too common to allow matching | ||
FORMAT.regex("DVB-Rip", "DVB", "PD-TV", value="DVB") | ||
FORMAT.regex("DVD", "DVD-Rip", "VIDEO-TS", "DVD-R", "DVD-9", "DVD-5", value="DVD") | ||
FORMAT.regex("HD-TV", "TV-RIP-HD", "HD-TV-RIP", "HD-RIP", value="HDTV") | ||
FORMAT.regex("VOD", "VOD-Rip", value="VOD") | ||
FORMAT.regex("WEB-Rip", value="WEBRip") | ||
FORMAT.regex("WEB-DL", "WEB-HD", "WEB", value="WEB-DL") | ||
FORMAT.regex("HD-DVD-Rip", "HD-DVD", value="HD-DVD") | ||
FORMAT.regex("Blu-ray(?:-Rip)?", "B[DR]", "B[DR]-Rip", "BD[59]", "BD25", "BD50", value="BluRay") |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
screenSize property | ||
""" | ||
from rebulk import Rebulk | ||
|
||
import regex as re | ||
from .common.validators import seps_surround | ||
|
||
SCREEN_SIZE = Rebulk().regex_defaults(flags=re.IGNORECASE) | ||
SCREEN_SIZE.defaults(name="screenSize", validator=seps_surround) | ||
|
||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)", value="360p") | ||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)", value="368p") | ||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)", value="480p") | ||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)", value="576p") | ||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)", value="720p") | ||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)", value="900p") | ||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?1080i", value="1080i") | ||
SCREEN_SIZE.regex(r"(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?", value="1080p") | ||
SCREEN_SIZE.regex(r"(?:\d{3,4}(?:\\|\/|x|\*))?2160(?:i|p?x?)", value="4K") | ||
|
||
# TODO: implement validators from guessit 1 | ||
# validator=ChainedValidator(DefaultValidator(), OnlyOneValidator())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
videoCodec property | ||
""" | ||
from rebulk import Rebulk | ||
|
||
from .common import dash | ||
from .common.validators import seps_surround | ||
|
||
import regex as re | ||
|
||
VIDEO_CODEC = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) | ||
VIDEO_CODEC.defaults(name="videoCodec", validator=seps_surround) | ||
|
||
VIDEO_CODEC.regex(r"Rv\d{2}", value="Real") | ||
VIDEO_CODEC.regex("Mpeg2", value="Mpeg2") | ||
VIDEO_CODEC.regex("DVDivX", "DivX", value="DivX") | ||
VIDEO_CODEC.regex("XviD", value="XviD") | ||
VIDEO_CODEC.regex("[hx]-264(?:-AVC)?", "MPEG-4(?:-AVC)", value="h264") | ||
VIDEO_CODEC.regex("[hx]-265(?:-HEVC)?", "HEVC", value="h265") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Website property. | ||
""" | ||
|
||
from pkg_resources import resource_stream # @UnresolvedImport | ||
|
||
from rebulk import Rebulk | ||
|
||
import regex as re | ||
|
||
WEBSITE = Rebulk().regex_defaults(flags=re.IGNORECASE) | ||
WEBSITE.defaults(name="website") | ||
|
||
TLDS = [l.strip().decode('utf-8') | ||
for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines() | ||
if b'--' not in l][1:] # All registered domain extension | ||
|
||
SAFE_TLDS = ['com', 'org', 'net'] # For sure a website extension | ||
SAFE_SUBDOMAINS = ['www'] # For sure a website subdomain | ||
SAFE_PREFIX = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure | ||
|
||
WEBSITE.regex(r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)+(?:[a-z-]+\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)', | ||
safe_subdomains=SAFE_SUBDOMAINS, tlds=TLDS, children=True) | ||
WEBSITE.regex(r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_tlds>))(?:[^a-z0-9]|$)', | ||
safe_subdomains=SAFE_SUBDOMAINS, safe_tlds=SAFE_TLDS, children=True) | ||
WEBSITE.regex(r'(?:[^a-z0-9]|^)((?:\L<safe_subdomains>\.)*[a-z-]+\.(?:\L<safe_prefix>\.)+(?:\L<tlds>))(?:[^a-z0-9]|$)', | ||
safe_subdomains=SAFE_SUBDOMAINS, safe_prefix=SAFE_PREFIX, tlds=TLDS, children=True) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
year property | ||
""" | ||
from rebulk import Rebulk | ||
|
||
from .common.validators import seps_surround | ||
|
||
YEAR = Rebulk() | ||
YEAR.defaults(name="year", validator=seps_surround) | ||
|
||
|
||
def validate_year(match): | ||
""" | ||
Check if match is a valid year | ||
:param match: | ||
:type match: | ||
:return: | ||
:rtype: | ||
""" | ||
return 1920 <= match.value < 2030 | ||
|
||
|
||
YEAR.regex(r"\d{4}", formatter=int, validator=validate_year) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv | ||
: #title: Fear and Loathing in Las Vegas | ||
year: 1998 | ||
screenSize: 720p | ||
format: HD-DVD | ||
audioCodec: DTS | ||
videoCodec: h264 | ||
#releaseGroup: ESiR |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Multiple input strings having same expected results can be chained. | ||
# Use $ marker to check inputs that should not match results. | ||
|
||
|
||
? +MP3 | ||
? +lame | ||
? +lame3.12 | ||
? +lame3.100 | ||
: audioCodec: MP3 | ||
|
||
? +DolbyDigital | ||
? +DD | ||
? -Dolby Digital | ||
: audioCodec: DD | ||
|
||
? +AAC | ||
: audioCodec: AAC | ||
|
||
? +AC3 | ||
: audioCodec: AC3 | ||
|
||
? +Flac | ||
: audioCodec: FLAC | ||
|
||
? +DTS | ||
: audioCodec: DTS | ||
|
||
? +True-HD | ||
? +trueHD | ||
: audioCodec: True-HD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,17 @@ | ||
# Multiple input strings having same expected results can be chained. | ||
# Use $ marker to check inputs that should not match results. | ||
? 2x5 | ||
? 2X5 | ||
? 02x05 | ||
? 2X05 | ||
? 02x5 | ||
? +2x5 | ||
? +2X5 | ||
? +02x05 | ||
? +2X05 | ||
? +02x5 | ||
? S02E05 | ||
? s02e05 | ||
? s02e5 | ||
? s2e05 | ||
? $s03e05 | ||
? $s02e06 | ||
? $3x05 | ||
? $2x06 | ||
? -s03e05 | ||
? -s02e06 | ||
? -3x05 | ||
? -2x06 | ||
: season: 2 | ||
episodeNumber: 5 |
Oops, something went wrong.