In [2]:
import yt_dlp
from youtube_search import YoutubeSearch
import multiprocessing
import urllib.request
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, error
import os

In [4]:
def find_and_download_songs(reference_file: str):
    TOTAL_ATTEMPTS = 20
    with open(reference_file, "r", encoding="utf-8") as file:
        for line in file:
            temp = line.split(",")
            name, artist, album_art_url = temp[0], temp[1], temp[3]
            text_to_search = artist + " - " + name
            best_url = None
            attempts_left = TOTAL_ATTEMPTS
            while attempts_left > 0:
                try:
                    results_list = YoutubeSearch(
                        text_to_search, max_results=1
                    ).to_dict()
                    best_url = "https://www.youtube.com{}".format(
                        results_list[0]["url_suffix"]
                    )
                    break
                except IndexError:
                    attempts_left -= 1
                    print(
                        "No valid URLs found for {}, trying again ({} attempts left).".format(
                            text_to_search, attempts_left
                        )
                    )
            if best_url is None:
                print(
                    "No valid URLs found for {}, skipping track.".format(text_to_search)
                )
                continue

            print("Initiating download for Image {}.".format(album_art_url))
            f = open("{}.jpg".format(name), "wb")
            f.write(urllib.request.urlopen(album_art_url).read())
            f.close()

            # Run you-get to fetch and download the link's audio
            print("Initiating download for {}.".format(text_to_search))
            ydl_opts = {
                "format": "bestaudio/best",
                "outtmpl": "%(title)s",  # name the file the ID of the video
                "embedthumbnail": True,
                "postprocessors": [
                    {
                        "key": "FFmpegExtractAudio",
                        "preferredcodec": "mp3",
                        "preferredquality": "192",
                    },
                    {
                        "key": "FFmpegMetadata",
                    },
                ],
            }
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info_dict = ydl.extract_info([best_url][0], download=True)

                # extract the name of the downloaded file from the info_dict
            filename = ydl.prepare_filename(info_dict)
            print(f"The downloaded file name is: {filename}")

            print("AddingCoverImage ...")
            audio = MP3(f"{filename}" + ".mp3", ID3=ID3)
            try:
                audio.add_tags()
            except error:
                pass

            audio.tags.add(
                APIC(
                    encoding=3,  # 3 is for utf-8
                    mime="image/jpeg",  # can be image/jpeg or image/png
                    type=3,  # 3 is for the cover image
                    desc="Cover",
                    data=open("{}.jpg".format(name), mode="rb").read(),
                )
            )
            audio.save()
            os.remove("{}.jpg".format(name))

In [13]:
results_list = YoutubeSearch(
    "Mình đã giảm 3x lần thời gian code với Github Copilot thế nào？", max_results=1
).to_dict()

In [14]:
results_list

[{'id': 'aGbfU4GiI9s',
  'thumbnails': ['https://i.ytimg.com/vi/aGbfU4GiI9s/hq720.jpg?sqp=-oaymwEjCOgCEMoBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLAzeRIPl3RHTluRtfC9UfCrclzyKw',
   'https://i.ytimg.com/vi/aGbfU4GiI9s/hq720.jpg?sqp=-oaymwEXCNAFEJQDSFryq4qpAwkIARUAAIhCGAE=&rs=AOn4CLCQhXwrT7oYbgjsJPV7PRB02eirag'],
  'title': 'Dùng Github Copilot, tôi mất niềm tin vào Khả Năng Code và Ngành Dev',
  'long_desc': None,
  'channel': 'Phạm Huy Hoàng',
  'duration': '12:34',
  'views': '108.979 lượt xem',
  'publish_time': '3 tháng trước',
  'url_suffix': '/watch?v=aGbfU4GiI9s&pp=ygVOTcOsbmggxJHDoyBnaeG6o20gM3ggbOG6p24gdGjhu51pIGdpYW4gY29kZSB24bubaSBHaXRodWIgQ29waWxvdCB0aOG6vyBuw6Bv77yf'}]

In [12]:
ydl_opts = {
    "format": "bestaudio/best",
    "outtmpl": "%(title)s",  # name the file the ID of the video
    "embedthumbnail": True,
    "postprocessors": [
        {
            "key": "FFmpegExtractAudio",
            "preferredcodec": "mp3",
            "preferredquality": "192",
        },
        {
            "key": "FFmpegMetadata",
        },
    ],
}
best_url = "https://www.youtube.com/watch?v=2Vv-BfVoq4g"
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info_dict = ydl.extract_info(
        [best_url][0], download=True, extra_info={"embedthumbnail": True}
    )
filename = ydl.prepare_filename(info_dict)
print("Initiating download for Image {}.".format(filename))
f = open("{}.jpg".format(filename), "wb")
f.write(urllib.request.urlopen(info_dict["thumbnails"][-2]["url"]).read())
f.close()
print("AddingCoverImage ...")
audio = MP3(f"{filename}" + ".mp3", ID3=ID3)
try:
    audio.add_tags()
except error:
    pass

audio.tags.add(
    APIC(
        encoding=3,  # 3 is for utf-8
        mime="image/jpeg",  # can be image/jpeg or image/png
        type=3,  # 3 is for the cover image
        desc="Cover",
        data=open("{}.jpg".format(filename), mode="rb").read(),
    )
)
audio.save()
os.remove("{}.jpg".format(filename))

[youtube] Extracting URL: https://www.youtube.com/watch?v=2Vv-BfVoq4g
[youtube] 2Vv-BfVoq4g: Downloading webpage
[youtube] 2Vv-BfVoq4g: Downloading ios player API JSON
[youtube] 2Vv-BfVoq4g: Downloading android player API JSON




[youtube] 2Vv-BfVoq4g: Downloading m3u8 information
[info] 2Vv-BfVoq4g: Downloading 1 format(s): 251
[download] Destination: Ed Sheeran - Perfect (Official Music Video)
[download] 100% of    4.52MiB in 00:00:00 at 4.82MiB/s   
[ExtractAudio] Destination: Ed Sheeran - Perfect (Official Music Video).mp3
Deleting original file Ed Sheeran - Perfect (Official Music Video) (pass -k to keep)
[Metadata] Adding metadata to "Ed Sheeran - Perfect (Official Music Video).mp3"
Initiating download for Image Ed Sheeran - Perfect (Official Music Video).


HTTPError: HTTP Error 404: Not Found

In [7]:
print("Initiating download for Image {}.".format(filename))
f = open("{}.jpg".format(filename), "wb")
f.write(urllib.request.urlopen(info_dict["thumbnails"][-2]["url"]).read())
f.close()

Initiating download for Image SƠN TÙNG M-TP ｜ THERE’S NO ONE AT ALL (ANOTHER VERSION) ｜ OFFICIAL MUSIC VIDEO.


In [10]:
print("AddingCoverImage ...")
audio = MP3(f"{filename}" + ".mp3", ID3=ID3)
try:
    audio.add_tags()
except error:
    pass

audio.tags.add(
    APIC(
        # encoding=3,  # 3 is for utf-8
        mime="image/jpeg",  # can be image/jpeg or image/png
        type=3,  # 3 is for the cover image
        desc="Cover",
        data=open("{}.jpg".format(filename), mode="rb").read(),
    )
)
audio.save()
os.remove("{}.jpg".format(filename))

AddingCoverImage ...


In [18]:
info_dict["thumbnails"]

[{'url': 'https://i.ytimg.com/vi/2Vv-BfVoq4g/3.jpg',
  'preference': -37,
  'id': '0'},
 {'url': 'https://i.ytimg.com/vi_webp/2Vv-BfVoq4g/3.webp',
  'preference': -36,
  'id': '1'},
 {'url': 'https://i.ytimg.com/vi/2Vv-BfVoq4g/2.jpg',
  'preference': -35,
  'id': '2'},
 {'url': 'https://i.ytimg.com/vi_webp/2Vv-BfVoq4g/2.webp',
  'preference': -34,
  'id': '3'},
 {'url': 'https://i.ytimg.com/vi/2Vv-BfVoq4g/1.jpg',
  'preference': -33,
  'id': '4'},
 {'url': 'https://i.ytimg.com/vi_webp/2Vv-BfVoq4g/1.webp',
  'preference': -32,
  'id': '5'},
 {'url': 'https://i.ytimg.com/vi/2Vv-BfVoq4g/mq3.jpg',
  'preference': -31,
  'id': '6'},
 {'url': 'https://i.ytimg.com/vi_webp/2Vv-BfVoq4g/mq3.webp',
  'preference': -30,
  'id': '7'},
 {'url': 'https://i.ytimg.com/vi/2Vv-BfVoq4g/mq2.jpg',
  'preference': -29,
  'id': '8'},
 {'url': 'https://i.ytimg.com/vi_webp/2Vv-BfVoq4g/mq2.webp',
  'preference': -28,
  'id': '9'},
 {'url': 'https://i.ytimg.com/vi/2Vv-BfVoq4g/mq1.jpg',
  'preference': -27,
  'id'