Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ie] Add new fields with proper support for multiple values #8917

Merged
merged 28 commits into from Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
071326c
[ie] Add new fields with proper support for multiple values
llistochek Jan 3, 2024
698199b
Apply suggestions from code review
pukkandan Jan 3, 2024
a691696
Apply suggestions from code review
pukkandan Jan 3, 2024
d60ad19
Update yt_dlp/extractor/common.py
pukkandan Jan 3, 2024
ac52bf0
Update yt_dlp/YoutubeDL.py
pukkandan Jan 3, 2024
265e0f7
Rename new fields
llistochek Jan 3, 2024
41c3dab
Revert unrelated changes
llistochek Jan 3, 2024
c3fe956
Revert unrelated change
llistochek Jan 3, 2024
c624659
Update README
llistochek Jan 3, 2024
dca6384
Update README and fix IE documentation typo
llistochek Jan 3, 2024
2598790
Revert MutagenMetadataPP
llistochek Jan 3, 2024
84c89c3
Better backward compatibility
llistochek Jan 8, 2024
4bfd8ed
Update README to reflect changes in FFMpegMetadataPP
llistochek Jan 8, 2024
482a971
Fix linting
llistochek Jan 8, 2024
5bed30d
Future-proof
pukkandan Jan 12, 2024
afccd2d
We weren't able to deprecate
pukkandan Jan 12, 2024
b817457
Cleanup
pukkandan Jan 12, 2024
916acca
Add `creators`
pukkandan Jan 12, 2024
9e76a7e
typo
pukkandan Jan 12, 2024
1531f4f
Stricter Splitting
pukkandan Jan 12, 2024
694da35
Handle when both fields are returned
pukkandan Jan 12, 2024
75a6541
[test] Test only new fields
pukkandan Jan 12, 2024
af8e0c8
Replace comma with unicode
pukkandan Jan 12, 2024
7f3a69a
[ie/youtube] Migrate `artist`
pukkandan Jan 12, 2024
5ced986
Clean docs
pukkandan Jan 12, 2024
6aa45a9
More robust warning
pukkandan Jan 12, 2024
b40e1e7
oops
pukkandan Jan 12, 2024
b2230a6
[ie/youtube] Fix handling of 'artists' field
llistochek Feb 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 15 additions & 3 deletions yt_dlp/YoutubeDL.py
Expand Up @@ -1735,6 +1735,7 @@ def __extract_info(self, url, ie, download, extra_info, process):
'_type': 'compat_list',
'entries': ie_result,
}
self.fix_deprecated_fields(ie_result)
if extra_info.get('original_url'):
ie_result.setdefault('original_url', extra_info['original_url'])
self.add_default_extra_info(ie_result, ie, url)
Expand All @@ -1744,6 +1745,18 @@ def __extract_info(self, url, ie, download, extra_info, process):
else:
return ie_result

def fix_deprecated_fields(self, ie_result):
llistochek marked this conversation as resolved.
Show resolved Hide resolved
deprecated_multivalue_fields = {
'artist': 'artist_list',
'composer': 'composer_list',
'album_artist': 'album_artist_list',
'genre': 'genre_list',
}
for deprecated_field, new_field in deprecated_multivalue_fields.items():
if deprecated_field not in ie_result:
continue
ie_result[new_field] = re.split(r', ?', ie_result[deprecated_field])
pukkandan marked this conversation as resolved.
Show resolved Hide resolved

def add_default_extra_info(self, ie_result, ie, url):
if url is not None:
self.add_extra_info(ie_result, {
Expand Down Expand Up @@ -3918,10 +3931,9 @@ def print_debug_header(self):

# These imports can be slow. So import them only as needed
from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import (
_PLUGIN_CLASSES as plugin_ies,
from .extractor.extractors import _PLUGIN_CLASSES as plugin_ies
from .extractor.extractors import \
_PLUGIN_OVERRIDES as plugin_ie_overrides
)
llistochek marked this conversation as resolved.
Show resolved Hide resolved

def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
Expand Down
5 changes: 5 additions & 0 deletions yt_dlp/__init__.py
Expand Up @@ -670,6 +670,11 @@ def get_postprocessors(opts):
'add_metadata': opts.addmetadata,
'add_infojson': opts.embed_infojson,
}
# MutagenMetadata must run after FFmpegMetadata
if opts.addmetadata:
yield {
'key': 'MutagenMetadata',
}
# Deprecated
# This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
# but must be below EmbedSubtitle and FFmpegMetadata
Expand Down
20 changes: 14 additions & 6 deletions yt_dlp/extractor/common.py
Expand Up @@ -422,16 +422,24 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string.
artist: Artist(s) of the track.
genre: Genre(s) of the track.
artist_list: List of artists of the track.
composer_list: List of composers of the piece
genre_list: List of genres of the track.
album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits
and compilations).
album_artist_list: List of all artists appeared on the album. E.g.
["Ash Borer", "Fell Voices"] or ["Various Artists"].
pukkandan marked this conversation as resolved.
Show resolved Hide resolved
Useful for splits and compilations.
disc_number: Number of the disc or other physical medium the track belongs to,
as an integer.
composer: Composer of the piece
composer: Deprecated; use "composer_list" instead.
Composer(s) of the piece, comma-separated.
artist: Deprecated; use "artist_list" instead.
Artist(s) of the track, comma-separated.
genre: Deprecated; use "genre_list" instead.
Genre(s) of the track, comma-separated.
album_artist: Deprecated; use "album_artist_list" instead.
All artists appeared on the album, comma-separated.

The following fields should only be set for clips that should be cut from the original video:

Expand Down
1 change: 1 addition & 0 deletions yt_dlp/postprocessor/__init__.py
Expand Up @@ -30,6 +30,7 @@
)
from .modify_chapters import ModifyChaptersPP
from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .mutagenmetadata import MutagenMetadataPP
from .sponskrub import SponSkrubPP
from .sponsorblock import SponsorBlockPP
from .xattrpp import XAttrMetadataPP
Expand Down
10 changes: 6 additions & 4 deletions yt_dlp/postprocessor/ffmpeg.py
Expand Up @@ -738,9 +738,10 @@ def _get_metadata_opts(self, info):

def add(meta_list, info_list=None):
value = next((
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None)
if value not in ('', None):
value = ', '.join(map(str, variadic(value)))
value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})

Expand All @@ -754,10 +755,11 @@ def add(meta_list, info_list=None):
add(('description', 'synopsis'), 'description')
add(('purl', 'comment'), 'webpage_url')
add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
add('genre')
add('artist', ('artist_list', 'creator', 'uploader', 'uploader_id'))
add('composer', 'composer_list')
add('genre', 'genre_list')
add('album')
add('album_artist')
add('album_artist', 'album_artist_list')
llistochek marked this conversation as resolved.
Show resolved Hide resolved
add('disc', 'disc_number')
add('show', 'series')
add('season_number')
Expand Down
42 changes: 42 additions & 0 deletions yt_dlp/postprocessor/mutagenmetadata.py
@@ -0,0 +1,42 @@
from .common import PostProcessor
from ..dependencies import mutagen

if mutagen:
from mutagen.easymp4 import EasyMP4
from mutagen.flac import FLAC
from mutagen.mp3 import EasyMP3
from mutagen.musepack import Musepack
from mutagen.oggopus import OggOpus
from mutagen.oggvorbis import OggVorbis


class MutagenMetadataPP(PostProcessor):
def __init__(self, downloader):
PostProcessor.__init__(self, downloader)

@PostProcessor._restrict_to(images=False)
def run(self, information):
extension = information['ext']
ret = [], information
if not mutagen:
if extension in ['mp3', 'm4a', 'ogg', 'opus', 'flac', '.mpc']:
self.report_warning('module mutagen was not found. Tags with multiple values (e.g. artist, album artist and genre) may be set incorrectly. Please install using `python -m pip install mutagen`')
return ret
tag_mapping = {
'artist': 'artist_list',
'albumartist': 'album_artist_list',
'genre': 'genre_list',
'composer': 'composer_list'
}
supported_formats = [EasyMP3, EasyMP4, OggVorbis, OggOpus, FLAC, Musepack]
file = mutagen.File(information['filepath'], supported_formats)
if not file:
return ret
if isinstance(file, EasyMP4):
file.RegisterTextKey('composer', '\251wrt')
for tag_key, info_key in tag_mapping.items():
value = information.get(info_key)
if value:
file[tag_key] = value
file.save()
return ret