Update broken music/musicvideo scrapers #1135

Merged
4 commits merged into from Jul 8, 2012
Jump to file
+1,701 −678
Split
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<addon id="metadata.album.universal"
+ name="Universal Album Scraper"
+ version="1.2.2"
+ provider-name="Olympia, Team XBMC">
+ <requires>
+ <import addon="xbmc.metadata" version="1.0"/>
+ <import addon="metadata.common.last.fm" version="1.3.2"/>
+ <import addon="metadata.common.allmusic.com" version="2.3.1"/>
+ <import addon="metadata.common.musicbrainz.org" version="1.2.3"/>
+ <import addon="metadata.common.fanart.tv" version="1.0.1"/>
+ <import addon="metadata.common.amazon.de" version="1.0.0"/>
+ </requires>
+ <extension point="xbmc.metadata.scraper.albums"
+ language="en"
+ library="albumuniversal.xml"/>
+ <extension point="xbmc.addon.metadata">
+ <summary lang="en">Universal Scraper for Albums</summary>
+ <description lang="en">This scraper collects information from the following supported sites: MusicBrainz, last.fm, allmusic.com and amazon.de, while grabs artwork from: fanart.tv, last.fm and allmusic.com. It can be set field by field that from which site you want that specific information.
+
+The initial search is always done on MusicBrainz. In case allmusic and/or amazon.de links are not added on the MusicBrainz site, fields from allmusic.com and/or amazon.de cannot be fetched (very easy to add those missing links though).</description>
+ <platform>all</platform>
+ </extension>
+</addon>
@@ -0,0 +1,166 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<scraper framework="1.1" date="2012-06-09">
+ <NfoUrl dest="3">
+ <RegExp input="$$1" output="&lt;url&gt;http://musicbrainz.org/ws/2/release/\1?inc=recordings+release-groups+artists+labels+ratings&lt;/url&gt;" dest="3">
+ <expression>release/(.+)</expression>
+ </RegExp>
+ </NfoUrl>
+ <CreateAlbumSearchUrl dest="3">
+ <RegExp input="$$1" output="&lt;url&gt;http://search.musicbrainz.org/ws/2/release/?fmt=xml&amp;query=release:&quot;\1&quot;%20AND%20artist:&quot;$$2&quot;&lt;/url&gt;" dest="3">
+ <RegExp input="$$2" output="\1" dest="4">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$2" output="\1" dest="6">
+ <expression noclean="1">(.+)(?:Ft%2e|Feat%2e|Ft.|Feat.|%20and%20)</expression>
+ </RegExp>
+ <RegExp input="$$6" output="\1" dest="4">
+ <expression>(.+)</expression>
+ </RegExp>
+ <expression/>
+ </RegExp>
+ </CreateAlbumSearchUrl>
+ <GetAlbumSearchResults dest="8">
+ <RegExp input="$$5" output="&lt;results&gt;\1&lt;/results&gt;" dest="8">
+ <RegExp input="$$1" output="&lt;entity&gt;&lt;year&gt;\5-\4-T#\6&lt;/year&gt;&lt;artist&gt;\3&lt;/artist&gt;&lt;title&gt;\2&lt;/title&gt;&lt;url cache=&quot;mb-\1-album.xml&quot;&gt;http://musicbrainz.org/ws/2/release/\1?inc=recordings+release-groups+artists+labels+ratings&lt;/url&gt;&lt;/entity&gt;" dest="5">
+ <expression repeat="yes">id=&quot;([^&quot;]*)&quot;&gt;&lt;title&gt;([^&lt;]*)&lt;/title&gt;&lt;status&gt;Official&lt;/status&gt;&lt;text-representation&gt;&lt;language&gt;[^&lt;]*&lt;/language&gt;&lt;script&gt;[^&lt;]*&lt;/script&gt;&lt;/text-representation&gt;&lt;artist-credit&gt;&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;([^&lt;]*)&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;(?:&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;[^&lt;]*&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;)*&lt;/artist-credit&gt;&lt;release-group\stype=&quot;Album&quot;\sid=&quot;[^&quot;]*&quot;&gt;(?:&lt;primary-type&gt;[^&lt;]*&lt;/primary-type&gt;)*(?:&lt;secondary-type-list&gt;&lt;secondary-type&gt;[^&lt;]*&lt;/secondary-type&gt;&lt;/secondary-type-list&gt;)*&lt;/release-group&gt;&lt;date&gt;(\d{4})[^&lt;]*&lt;/date&gt;(?:&lt;country&gt;)*([^&lt;]*)?.*?&lt;track-list\scount=&quot;(\d+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;entity&gt;&lt;year&gt;\5-\4-T#\6&lt;/year&gt;&lt;artist&gt;\3&lt;/artist&gt;&lt;title&gt;\2&lt;/title&gt;&lt;url cache=&quot;mb-\1-album.xml&quot;&gt;http://musicbrainz.org/ws/2/release/\1?inc=recordings+release-groups+artists+labels+ratings&lt;/url&gt;&lt;/entity&gt;" dest="5+">
+ <expression repeat="yes">id=&quot;([^&quot;]*)&quot;&gt;&lt;title&gt;([^&lt;]*)&lt;/title&gt;&lt;status&gt;Official&lt;/status&gt;&lt;text-representation&gt;&lt;language&gt;[^&lt;]*&lt;/language&gt;&lt;script&gt;[^&lt;]*&lt;/script&gt;&lt;/text-representation&gt;&lt;artist-credit&gt;&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;([^&lt;]*)&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;(?:&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;[^&lt;]*&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;)*&lt;/artist-credit&gt;&lt;release-group(?:\stype=&quot;[^&quot;]*&quot;)*\sid=&quot;[^&quot;]*&quot;&gt;(?:&lt;primary-type&gt;[^&lt;]*&lt;/primary-type&gt;)*(?:&lt;secondary-type-list&gt;&lt;secondary-type&gt;[^&lt;]*&lt;/secondary-type&gt;&lt;/secondary-type-list&gt;)*&lt;/release-group&gt;&lt;date&gt;(\d{4})[^&lt;]*&lt;/date&gt;(?:&lt;country&gt;)*([^&lt;]*)?.*?&lt;track-list\scount=&quot;(\d+)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAlbumSearchResults>
+ <GetAlbumDetails dest="3">
+ <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
+ <!--MBID - release-->
+ <RegExp input="$$1" output="\1" dest="3">
+ <expression>id=&quot;([^&quot;]*)</expression>
+ </RegExp>
+ <!--MBID - release group-->
+ <RegExp input="$$1" output="\1" dest="4">
+ <expression>&lt;release-group type=&quot;[^&quot;]*&quot; id=&quot;([^&quot;]*)&quot;</expression>
+ </RegExp>
+ <!--Album title-->
+ <RegExp input="$$1" output="\1" dest="7">
+ <expression trim="1">&lt;release id=&quot;[^&quot;]*&quot;&gt;&lt;title&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <!--Artist name-->
+ <RegExp input="$$1" output="\1" dest="8">
+ <expression trim="1">&lt;release id=&quot;[^&quot;]*&quot;&gt;&lt;title&gt;[^&lt;]*&lt;.*?&lt;artist id=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumTitleByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumArtistByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumLabelByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumDateByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumTracksByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumreviewsource]" output="&lt;url function=&quot;GetAMGAlbumReview&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumreviewsource]" output="&lt;chain function=&quot;GetLastFMAlbumReviewByAlbumAndArtist&quot;&gt;$$7::$$8::$$7::$$8&lt;/chain&gt;" dest="5+">
+ <expression>last.fm</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumreviewsource]" output="&lt;url function=&quot;GetAmazonDEAlbumReview&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>amazon.de</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumratingsource]" output="&lt;chain function=&quot;GetMBAlbumRatingByMBID&quot;&gt;$$3&lt;/chain&gt;" dest="5+">
+ <expression>MusicBrainz</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumratingsource]" output="&lt;url function=&quot;GetAMGAlbumRating&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumstylessource]" output="&lt;url function=&quot;GetAMGAlbumStyles&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albummoodssource]" output="&lt;url function=&quot;GetAMGAlbumMoods&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumthemessource]" output="&lt;url function=&quot;GetAMGAlbumThemes&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp conditional="fanarttvalbumthumbs" input="$$1" output="&lt;chain function=&quot;GetFanartTvAlbumThumbsByMBID&quot;&gt;$$4&lt;/chain&gt;" dest="5+">
+ <expression noclean="1" />
+ </RegExp>
+ <RegExp conditional="allmusicalbumthumbs" input="$$1" output="&lt;url function=&quot;GetAMGAlbumThumbs&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression noclean="1" />
+ </RegExp>
+ <RegExp conditional="lastfmalbumthumbs" input="$$1" output="&lt;chain function=&quot;GetLastFMAlbumThumbs&quot;&gt;$$7::$$8::$$7::$$8&lt;/chain&gt;" dest="5+">
+ <expression noclean="1" />
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAlbumDetails>
+
+ <GetAMGAlbumReview dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumReviewByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumReview>
+
+ <GetAMGAlbumRating dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumRatingByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumRating>
+
+ <GetAMGAlbumStyles dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumStylesByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumStyles>
+
+ <GetAMGAlbumMoods dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumMoodsByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumMoods>
+
+ <GetAMGAlbumThemes dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumThemesByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumThemes>
+
+ <GetAMGAlbumThumbs dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumThumbsByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumThumbs>
+
+ <GetAmazonDEAlbumReview dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAmazonDEAlbumReviewByASIN&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">http://www.amazon.de/gp/product/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAmazonDEAlbumReview>
+
+</scraper>
@@ -0,0 +1,36 @@
+[B]1.2.2[/B]
+Fixed: accommodate new Frodo style URL encoding
+
+[B]1.2.1[/B]
+Removed: '&' sign from artist splitter
+
+[B]1.2.0[/B]
+Added: scraping album review from amazon.de (if link exist on MusicBrainz)
+
+[B]1.1.4[/B]
+Fixed: typo blocks preferring album in search result
+
+[B]1.1.3[/B]
+Fixed: won't find tracks without recording id
+
+[B]1.1.2[/B]
+Fixed: won't find tracks without duration
+
+[B]1.1.1[/B]
+Fixed: artists won't find with 'and' in their name
+
+[B]1.1.0[/B]
+Added: trying to get album info when multiple artists are credited (first artist will be used)
+Fixed: will not find some albums
+
+[B]1.0.3[/B]
+Fixed: Some Album releases were not found
+
+[B]1.0.2[/B]
+Fixed: Track Duration from MusicBrainz was wrong in certain cases. Credits to scudlee!
+
+[B]1.0.1[/B]
+Fixed: scraping moods from allmusic.com
+
+[B]1.0.0[/B]
+Initial version
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<strings>
+ <string id="30000">Grab Album Thumbs from fanart.tv</string>
+ <string id="30001">Grab Album Thumbs from Last.fm</string>
+ <string id="30002">Get Album Review from</string>
+ <string id="30003"> Preferred Language</string>
+ <string id="30004">Get Album Rating from</string>
+ <string id="30005">Get Album Styles from</string>
+ <string id="30006">Get Album Moods from</string>
+ <string id="30007">Get Album Themes from</string>
+ <string id="30008">Grab Album Thumbs from allmusic.com</string>
+ <string id="30009">Artwork</string>
+</strings>
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<settings>
+ <category label="128">
+ <setting type="lsep" label="General Settings"/>
+ <setting label="30002" type="labelenum" values="last.fm|amazon.de|None" id="albumreviewsource" default="last.fm"/>
+ <setting label="30003" type="labelenum" values="en|de|es|fr|it|jp|pl|pt|ru|sv|tr|zh" id="lastfmlanguage" default="en" visible="eq(-1,0)"/>
+ <setting type="sep"/>
+ <setting label="30004" type="labelenum" values="MusicBrainz|allmusic.com|None" id="albumratingsource" default="MusicBrainz"/>
+ <setting label="30005" type="labelenum" values="allmusic.com|None" id="albumstylessource" default="allmusic.com"/>
+ <setting label="30006" type="labelenum" values="allmusic.com|None" id="albummoodssource" default="allmusic.com"/>
+ <setting label="30007" type="labelenum" values="allmusic.com|None" id="albumthemessource" default="allmusic.com"/>
+ </category>
+
+ <category label="30009">
+ <setting type="lsep" label="Artwork Settings"/>
+ <setting label="30000" type="bool" id="fanarttvalbumthumbs" default="true"/>
+ <setting label="30001" type="bool" id="lastfmalbumthumbs" default="true"/>
+ <setting label="30008" type="bool" id="allmusicalbumthumbs" default="true"/>
+ </category>
+</settings>
Oops, something went wrong.