From 6e5ff7b0b8a0d953ea9cc161af51556771c12336 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Tue, 4 Jul 2023 22:40:08 -0700 Subject: [PATCH] Various search fixes (#877) --- nowplaying/artistextras/discogs.py | 16 +++++++++---- nowplaying/artistextras/theaudiodb.py | 11 +++++---- nowplaying/imagecache.py | 11 +++------ nowplaying/metadata.py | 2 ++ nowplaying/musicbrainz.py | 4 +++- nowplaying/recognition/acoustidmb.py | 30 ++++++++++++++++-------- nowplaying/trackrequests.py | 26 ++++++++++----------- nowplaying/utils.py | 33 +++++++++++++++++---------- tests/test_artistextras.py | 30 ++++++++++++++++++++++++ tests/test_utils.py | 8 +++++++ 10 files changed, 118 insertions(+), 53 deletions(-) diff --git a/nowplaying/artistextras/discogs.py b/nowplaying/artistextras/discogs.py index 3bdda65d..56034a50 100755 --- a/nowplaying/artistextras/discogs.py +++ b/nowplaying/artistextras/discogs.py @@ -2,8 +2,6 @@ ''' start of support of discogs ''' import logging -import logging.config -import logging.handlers import socket import requests.exceptions @@ -22,7 +20,6 @@ def __init__(self, config=None, qsettings=None): super().__init__(config=config, qsettings=qsettings) self.displayname = "Discogs" self.client = None - self.version = config.version def _get_apikey(self): apikey = self.config.cparser.value('discogs/apikey') @@ -56,9 +53,20 @@ def _find_discogs_artist_releaselist(self, metadata): if len(discogs_website) == 1: artistnum = discogs_website[0].split('/')[-1] artist = self.client.artist(artistnum) - artistname = artist.name + artistname = str(artist.name) logging.debug('Found a singular discogs artist URL using %s instead of %s', artistname, metadata['artist']) + elif len(discogs_website) > 1: + for website in discogs_website: + artistnum = website.split('/')[-1] + artist = self.client.artist(artistnum) + webartistname = str(artist.name) + if nowplaying.utils.normalize(webartistname) == nowplaying.utils.normalize( + metadata['artist']): + logging.debug( + 'Found near exact match discogs artist URL %s using %s instead of %s', + website, webartistname, metadata['artist']) + artistname = webartistname try: logging.debug('Fetching %s - %s', artistname, metadata['album']) diff --git a/nowplaying/artistextras/theaudiodb.py b/nowplaying/artistextras/theaudiodb.py index dc72e5bf..2d453084 100755 --- a/nowplaying/artistextras/theaudiodb.py +++ b/nowplaying/artistextras/theaudiodb.py @@ -51,12 +51,15 @@ def _check_artist(self, artdata): ''' is this actually the artist we are looking for? ''' for fieldname in ['strArtist', 'strArtistAlternate']: if artdata.get(fieldname) and self.fnstr: - normalized = nowplaying.utils.normalize(artdata[fieldname]) + normalized = nowplaying.utils.normalize(artdata[fieldname], + sizecheck=4, + nospaces=True) if normalized and normalized in self.fnstr: logging.debug('theaudiodb Trusting %s: %s', fieldname, artdata[fieldname]) return True - logging.debug('theaudiodb not Trusting %s vs. %s', self.fnstr, - nowplaying.utils.normalize(artdata.get(fieldname))) + logging.debug( + 'theaudiodb not Trusting %s vs. %s', self.fnstr, + nowplaying.utils.normalize(artdata.get(fieldname), sizecheck=4, nospaces=True)) return False def _handle_extradata(self, extradata, metadata, imagecache): # pylint: disable=too-many-branches @@ -140,7 +143,7 @@ def download(self, metadata=None, imagecache=None): # pylint: disable=too-many- return None extradata = [] - self.fnstr = nowplaying.utils.normalize(metadata['artist']) + self.fnstr = nowplaying.utils.normalize(metadata['artist'], sizecheck=4, nospaces=True) # if musicbrainz lookup fails, then there likely isn't # data in theaudiodb that matches. diff --git a/nowplaying/imagecache.py b/nowplaying/imagecache.py index c10cf03c..a9acde56 100644 --- a/nowplaying/imagecache.py +++ b/nowplaying/imagecache.py @@ -18,7 +18,6 @@ import aiosqlite import diskcache -import normality import requests_cache from PySide6.QtCore import QStandardPaths # pylint: disable=no-name-in-module @@ -67,10 +66,6 @@ def __init__(self, sizelimit=1, initialize=False, cachedir=None, stopevent=None) self.logpath = None self.stopevent = stopevent - @staticmethod - def _normalize_artist(artist): - return normality.normalize(artist).replace(' ', '') - def setup_sql(self, initialize=False): ''' create the database ''' @@ -99,7 +94,7 @@ def setup_sql(self, initialize=False): def random_fetch(self, artist, imagetype): ''' fetch a random row from a cache for the artist ''' - normalartist = self._normalize_artist(artist) + normalartist = nowplaying.utils.normalize(artist, sizecheck=0, nospaces=True) data = None if not self.databasefile.exists(): self.setup_sql() @@ -225,7 +220,7 @@ def fill_queue(self, config, artist, imagetype, urllist): logging.debug('Putting %s unfiltered for %s/%s', min(len(urllist), maxart), imagetype, artist) - normalartist = self._normalize_artist(artist) + normalartist = nowplaying.utils.normalize(artist, sizecheck=0, nospaces=True) for url in random.sample(urllist, min(len(urllist), maxart)): self.put_db_url(artist=normalartist, imagetype=imagetype, url=url) @@ -281,7 +276,7 @@ def put_db_cachekey(self, artist, url, imagetype, cachekey=None): logging.error('imagecache does not exist yet?') return - normalartist = self._normalize_artist(artist) + normalartist = nowplaying.utils.normalize(artist, sizecheck=0, nospaces=True) with sqlite3.connect(self.databasefile, timeout=30) as connection: connection.row_factory = sqlite3.Row cursor = connection.cursor() diff --git a/nowplaying/metadata.py b/nowplaying/metadata.py index c734341e..83e1fa81 100755 --- a/nowplaying/metadata.py +++ b/nowplaying/metadata.py @@ -128,6 +128,8 @@ def _uniqlists(self): if self.metadata.get('artistwebsites'): newlist = [] for url in self.metadata['artistwebsites']: + if 'wikidata' in url: + continue if 'http:' not in url: newlist.append(url) continue diff --git a/nowplaying/musicbrainz.py b/nowplaying/musicbrainz.py index a441c1cf..d1721577 100755 --- a/nowplaying/musicbrainz.py +++ b/nowplaying/musicbrainz.py @@ -318,6 +318,7 @@ def _websites(self, idlist): 'official homepage': 'homepage', 'last.fm': 'lastfm', 'discogs': 'discogs', + 'wikidata': 'wikidata' } for urlrel in webdata['artist']['url-relation-list']: @@ -327,11 +328,12 @@ def _websites(self, idlist): type=bool) and urlrel['type'] == 'discogs': sitelist.append(urlrel['target']) logging.debug('placed %s', dest) + elif urlrel['type'] == 'wikidata': + sitelist.append(urlrel['target']) elif urlrel['type'] == src and self.config.cparser.value(f'acoustidmb/{dest}', type=bool): sitelist.append(urlrel['target']) logging.debug('placed %s', dest) - return sitelist def providerinfo(self): # pylint: disable=no-self-use diff --git a/nowplaying/recognition/acoustidmb.py b/nowplaying/recognition/acoustidmb.py index 40b01dd3..e4887b64 100755 --- a/nowplaying/recognition/acoustidmb.py +++ b/nowplaying/recognition/acoustidmb.py @@ -109,18 +109,19 @@ def _fetch_from_acoustid(self, apikey, fingerprint, duration): return results['results'] def _read_acoustid_tuples(self, metadata, results): # pylint: disable=too-many-branches, too-many-statements, too-many-locals + fnstr = '' if metadata.get('filename'): - fnstr = nowplaying.utils.normalize(metadata['filename']) - else: - fnstr = '' + fnstr = nowplaying.utils.normalize(metadata['filename'], sizecheck=4, nospaces=True) + if not fnstr: + fnstr = '' artist = metadata.get('artist') title = metadata.get('title') if artist and title and artist in title and len(artist) > 3: title = title.replace(artist, '') title = nowplaying.utils.titlestripper_basic(title=title) - artistnstr = nowplaying.utils.normalize(artist) - titlenstr = nowplaying.utils.normalize(title) + artistnstr = nowplaying.utils.normalize(artist, sizecheck=4, nospaces=True) + titlenstr = nowplaying.utils.normalize(title, sizecheck=4, nospaces=True) if not artistnstr: artistnstr = '' @@ -160,10 +161,12 @@ def _read_acoustid_tuples(self, metadata, results): # pylint: disable=too-many- albumartist = artist['name'] elif isinstance(artist, str): albumartist = artist + else: + albumartist = '' if albumartist == 'Various Artists': score = score - .10 elif albumartist and nowplaying.utils.normalize( - albumartist) in completenstr: + albumartist, sizecheck=4, nospaces=True) in completenstr: score = score + .20 title = release['mediums'][0]['tracks'][0]['title'] @@ -171,7 +174,8 @@ def _read_acoustid_tuples(self, metadata, results): # pylint: disable=too-many- album = release['title'] else: album = None - if title and nowplaying.utils.normalize(title) in completenstr: + if title and nowplaying.utils.normalize(title, sizecheck=4, + nospaces=True) in completenstr: score = score + .10 artistlist = [] artistidlist = [] @@ -182,11 +186,13 @@ def _read_acoustid_tuples(self, metadata, results): # pylint: disable=too-many- elif isinstance(trackartist, str): artistlist.append(trackartist) if trackartist and artistnstr: - if nowplaying.utils.normalize(trackartist) == artistnstr: + if nowplaying.utils.normalize(trackartist, sizecheck=4, + nospaces=True) == artistnstr: score = score + .30 else: score = score - .50 - if trackartist and nowplaying.utils.normalize(trackartist) in completenstr: + if trackartist and nowplaying.utils.normalize( + trackartist, sizecheck=4, nospaces=True) in completenstr: score = score + .10 artist = ' & '.join(artistlist) @@ -223,10 +229,14 @@ def _configure_fpcalc(self, fpcalcexe=None): # pylint: disable=too-many-return- if fpcalcexe and not os.environ.get("FPCALC"): os.environ.setdefault("FPCALC", fpcalcexe) os.environ["FPCALC"] = fpcalcexe + elif sys.platform == 'linux': + if pathlib.Path('/usr/bin/fpcalc').exists(): + os.environ.setdefault("FPCALC", '/usr/bin/fpcalc') + os.environ["FPCALC"] = '/usr/bin/fpcalc' try: fpcalcexe = os.environ["FPCALC"] - except NameError: + except (NameError, KeyError): logging.error('fpcalc is not configured') return False diff --git a/nowplaying/trackrequests.py b/nowplaying/trackrequests.py index 0c3f59a6..8bf0e354 100644 --- a/nowplaying/trackrequests.py +++ b/nowplaying/trackrequests.py @@ -7,10 +7,10 @@ import re import sqlite3 import traceback +import typing as t import aiohttp import aiosqlite #pylint: disable=import-error -import normality #pylint: disable=import-error from PySide6.QtCore import Slot, QFile, QFileSystemWatcher, QStandardPaths # pylint: disable=import-error, no-name-in-module from PySide6.QtWidgets import QComboBox, QHeaderView, QTableWidgetItem # pylint: disable=import-error, no-name-in-module @@ -19,7 +19,7 @@ import nowplaying.db import nowplaying.metadata from nowplaying.exceptions import PluginVerifyError -from nowplaying.utils import TRANSPARENT_PNG_BIN +import nowplaying.utils USERREQUEST_TEXT = [ 'artist', 'title', 'displayname', 'type', 'playlist', 'username', 'filename', 'user_input', @@ -185,12 +185,10 @@ async def get_roulette_dupe_list(self, playlist=None): return dataset @staticmethod - def normalize(crazystring): - ''' user input needs to be normalized for best case matches ''' - if not crazystring: - return '' - if text := normality.normalize(crazystring): - return text.replace(' ', '') + def _normalize(text: t.Optional[str]) -> str: + ''' db normalize ''' + if text := nowplaying.utils.normalize(text, sizecheck=0, nospaces=True): + return text return '' async def add_to_db(self, data): @@ -199,8 +197,8 @@ async def add_to_db(self, data): logging.error('%s does not exist, refusing to add.', self.databasefile) return - data['normalizedartist'] = self.normalize(data.get('artist', '')) - data['normalizedtitle'] = self.normalize(data.get('title', '')) + data['normalizedartist'] = self._normalize(data.get('artist', '')) + data['normalizedtitle'] = self._normalize(data.get('title', '')) if data.get('reqid'): reqid = data['reqid'] @@ -392,8 +390,8 @@ async def _request_lookup_by_artist_title(self, artist='', title=''): newdata = await self._get_and_del_request_lookup(sql, datatuple) if not newdata: - artist = self.normalize(artist) - title = self.normalize(title) + artist = self._normalize(artist) + title = self._normalize(title) logging.debug('trying normalized artist >%s< / title >%s<', artist, title) sql = 'SELECT * FROM userrequest WHERE normalizedartist=? AND normalizedtitle=?' datatuple = artist, title @@ -428,7 +426,7 @@ async def get_request(self, metadata): return None if not newdata.get('requesterimageraw'): - newdata['requesterimageraw'] = TRANSPARENT_PNG_BIN + newdata['requesterimageraw'] = nowplaying.utils.TRANSPARENT_PNG_BIN return newdata @@ -557,7 +555,7 @@ async def _tenor_request(self, search_terms): content = { 'imageurl': None, - 'image': TRANSPARENT_PNG_BIN, + 'image': nowplaying.utils.TRANSPARENT_PNG_BIN, 'keywords': search_terms, } diff --git a/nowplaying/utils.py b/nowplaying/utils.py index 327d4a89..3655be34 100755 --- a/nowplaying/utils.py +++ b/nowplaying/utils.py @@ -11,6 +11,7 @@ import re import time import traceback +import typing as t import jinja2 import normality @@ -108,7 +109,7 @@ def generate(self, metadatadict=None): rendertext = self.template.render(**metadatadict) else: rendertext = self.template.render() - except: #pylint: disable=bare-except + except Exception: # pylint: disable=broad-except for line in traceback.format_exc().splitlines(): logging.error(line) return rendertext @@ -184,9 +185,7 @@ def songpathsubst(config, filename): newname = filename if songin := config.cparser.value('quirks/filesubstin'): - songout = config.cparser.value('quirks/filesubstout') - if not songout: - songout = '' + songout = config.cparser.value('quirks/filesubstout') or '' try: newname = filename.replace(songin, songout) @@ -199,16 +198,26 @@ def songpathsubst(config, filename): return newname -def normalize(crazystring): - ''' take a string and genericize it ''' - if not crazystring: +def normalize_text(text: t.Optional[str]) -> t.Optional[str]: + ''' take a string and genercize it ''' + if not text: return None - if len(crazystring) < 4: + transtext = text.translate(CUSTOM_TRANSLATE) + if normal := normality.normalize(transtext): + return normal + return transtext + + +def normalize(text: t.Optional[str], sizecheck: int = 0, nospaces: bool = False) -> t.Optional[str]: + ''' genericize string, optionally strip spaces, do a size check ''' + if not text: + return None + if len(text) < sizecheck: return 'TEXT IS TOO SMALL IGNORE' - text = crazystring.translate(CUSTOM_TRANSLATE) - if text := normality.normalize(text): - return text.replace(' ', '') - return None + normaltext = normalize_text(text) or text + if nospaces: + return normaltext.replace(' ', '') + return normaltext def titlestripper_basic(title=None, title_regex_list=None): diff --git a/tests/test_artistextras.py b/tests/test_artistextras.py index 486ad1b5..5d7f5a5b 100644 --- a/tests/test_artistextras.py +++ b/tests/test_artistextras.py @@ -160,6 +160,36 @@ def test_discogs_note_stripping(bootstrap): # pylint: disable=redefined-outer-n assert 'Note:' not in mpproc.metadata['artistshortbio'] +def test_discogs_weblocation1(bootstrap): # pylint: disable=redefined-outer-name + ''' noimagecache ''' + + config = bootstrap + if 'discogs' in PLUGINS: + configuresettings('discogs', config.cparser) + config.cparser.setValue('discogs/apikey', os.environ['DISCOGS_API_KEY']) + imagecaches, plugins = configureplugins(config) # pylint: disable=unused-variable + for pluginname in PLUGINS: + if 'discogs' not in pluginname: + continue + logging.debug('Testing %s', pluginname) + data = plugins[pluginname].download( + { + 'title': + 'Computer Blue', + 'album': + 'Purple Rain', + 'artist': + 'Prince and The Revolution', + 'artistwebsites': [ + 'https://www.discogs.com/artist/271351', 'https://www.discogs.com/artist/28795', + 'https://www.discogs.com/artist/293637', + 'https://www.discogs.com/artist/342899', 'https://www.discogs.com/artist/79903', + 'https://www.discogs.com/artist/571633', 'https://www.discogs.com/artist/96774' + ] + }, + imagecache=None) + assert 'NOTE: If The Revolution are credited without Prince' in data['artistlongbio'] + def test_missingallartistdata(getconfiguredplugin): # pylint: disable=redefined-outer-name ''' missing all artist data ''' imagecaches, plugins = getconfiguredplugin diff --git a/tests/test_utils.py b/tests/test_utils.py index 1d97f172..f20bc351 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -251,3 +251,11 @@ def test_artist_variations8(): assert namelist[0] == "ultra naté" assert namelist[1] == "ultra nate" assert len(namelist) == 2 + + +def test_artist_variations9(): + ''' verify artist variation ''' + namelist = nowplaying.utils.artist_name_variations("A★Teens") + assert namelist[0] == "a★teens" + assert namelist[1] == "a teens" # less than ideal + assert len(namelist) == 2