Permalink
Browse files

add artists/album universal scrapers for music and last.fm for musicv…

…ideos
  • Loading branch information...
MartijnKaijser committed Jul 8, 2012
1 parent 4c4ccf0 commit cfb94b4d1baae0b3c0cf3909f247e73666f12b35
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<addon id="metadata.album.universal"
+ name="Universal Album Scraper"
+ version="1.2.2"
+ provider-name="Olympia, Team XBMC">
+ <requires>
+ <import addon="xbmc.metadata" version="1.0"/>
+ <import addon="metadata.common.last.fm" version="1.3.2"/>
+ <import addon="metadata.common.allmusic.com" version="2.3.1"/>
+ <import addon="metadata.common.musicbrainz.org" version="1.2.3"/>
+ <import addon="metadata.common.fanart.tv" version="1.0.1"/>
+ <import addon="metadata.common.amazon.de" version="1.0.0"/>
+ </requires>
+ <extension point="xbmc.metadata.scraper.albums"
+ language="en"
+ library="albumuniversal.xml"/>
+ <extension point="xbmc.addon.metadata">
+ <summary lang="en">Universal Scraper for Albums</summary>
+ <description lang="en">This scraper collects information from the following supported sites: MusicBrainz, last.fm, allmusic.com and amazon.de, while grabs artwork from: fanart.tv, last.fm and allmusic.com. It can be set field by field that from which site you want that specific information.
+
+The initial search is always done on MusicBrainz. In case allmusic and/or amazon.de links are not added on the MusicBrainz site, fields from allmusic.com and/or amazon.de cannot be fetched (very easy to add those missing links though).</description>
+ <platform>all</platform>
+ </extension>
+</addon>
@@ -0,0 +1,166 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<scraper framework="1.1" date="2012-06-09">
+ <NfoUrl dest="3">
+ <RegExp input="$$1" output="&lt;url&gt;http://musicbrainz.org/ws/2/release/\1?inc=recordings+release-groups+artists+labels+ratings&lt;/url&gt;" dest="3">
+ <expression>release/(.+)</expression>
+ </RegExp>
+ </NfoUrl>
+ <CreateAlbumSearchUrl dest="3">
+ <RegExp input="$$1" output="&lt;url&gt;http://search.musicbrainz.org/ws/2/release/?fmt=xml&amp;query=release:&quot;\1&quot;%20AND%20artist:&quot;$$2&quot;&lt;/url&gt;" dest="3">
+ <RegExp input="$$2" output="\1" dest="4">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$2" output="\1" dest="6">
+ <expression noclean="1">(.+)(?:Ft%2e|Feat%2e|Ft.|Feat.|%20and%20)</expression>
+ </RegExp>
+ <RegExp input="$$6" output="\1" dest="4">
+ <expression>(.+)</expression>
+ </RegExp>
+ <expression/>
+ </RegExp>
+ </CreateAlbumSearchUrl>
+ <GetAlbumSearchResults dest="8">
+ <RegExp input="$$5" output="&lt;results&gt;\1&lt;/results&gt;" dest="8">
+ <RegExp input="$$1" output="&lt;entity&gt;&lt;year&gt;\5-\4-T#\6&lt;/year&gt;&lt;artist&gt;\3&lt;/artist&gt;&lt;title&gt;\2&lt;/title&gt;&lt;url cache=&quot;mb-\1-album.xml&quot;&gt;http://musicbrainz.org/ws/2/release/\1?inc=recordings+release-groups+artists+labels+ratings&lt;/url&gt;&lt;/entity&gt;" dest="5">
+ <expression repeat="yes">id=&quot;([^&quot;]*)&quot;&gt;&lt;title&gt;([^&lt;]*)&lt;/title&gt;&lt;status&gt;Official&lt;/status&gt;&lt;text-representation&gt;&lt;language&gt;[^&lt;]*&lt;/language&gt;&lt;script&gt;[^&lt;]*&lt;/script&gt;&lt;/text-representation&gt;&lt;artist-credit&gt;&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;([^&lt;]*)&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;(?:&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;[^&lt;]*&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;)*&lt;/artist-credit&gt;&lt;release-group\stype=&quot;Album&quot;\sid=&quot;[^&quot;]*&quot;&gt;(?:&lt;primary-type&gt;[^&lt;]*&lt;/primary-type&gt;)*(?:&lt;secondary-type-list&gt;&lt;secondary-type&gt;[^&lt;]*&lt;/secondary-type&gt;&lt;/secondary-type-list&gt;)*&lt;/release-group&gt;&lt;date&gt;(\d{4})[^&lt;]*&lt;/date&gt;(?:&lt;country&gt;)*([^&lt;]*)?.*?&lt;track-list\scount=&quot;(\d+)</expression>
+ </RegExp>
+ <RegExp input="$$1" output="&lt;entity&gt;&lt;year&gt;\5-\4-T#\6&lt;/year&gt;&lt;artist&gt;\3&lt;/artist&gt;&lt;title&gt;\2&lt;/title&gt;&lt;url cache=&quot;mb-\1-album.xml&quot;&gt;http://musicbrainz.org/ws/2/release/\1?inc=recordings+release-groups+artists+labels+ratings&lt;/url&gt;&lt;/entity&gt;" dest="5+">
+ <expression repeat="yes">id=&quot;([^&quot;]*)&quot;&gt;&lt;title&gt;([^&lt;]*)&lt;/title&gt;&lt;status&gt;Official&lt;/status&gt;&lt;text-representation&gt;&lt;language&gt;[^&lt;]*&lt;/language&gt;&lt;script&gt;[^&lt;]*&lt;/script&gt;&lt;/text-representation&gt;&lt;artist-credit&gt;&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;([^&lt;]*)&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;(?:&lt;name-credit(?:&gt;)*(?:\sjoinphrase=&quot;[^&quot;]*&quot;&gt;)*(?:&lt;name&gt;[^&lt;]*&lt;/name)*(?:&gt;)*&lt;artist\sid=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;[^&lt;]*&lt;/name&gt;(?:&lt;sort-name&gt;[^&lt;]*&lt;/sort-name&gt;)*(?:&lt;disambiguation&gt;[^&lt;]*&lt;/disambiguation&gt;)*(?:&lt;alias-list&gt;.*?&lt;/alias-list&gt;)*&lt;/artist&gt;&lt;/name-credit&gt;)*&lt;/artist-credit&gt;&lt;release-group(?:\stype=&quot;[^&quot;]*&quot;)*\sid=&quot;[^&quot;]*&quot;&gt;(?:&lt;primary-type&gt;[^&lt;]*&lt;/primary-type&gt;)*(?:&lt;secondary-type-list&gt;&lt;secondary-type&gt;[^&lt;]*&lt;/secondary-type&gt;&lt;/secondary-type-list&gt;)*&lt;/release-group&gt;&lt;date&gt;(\d{4})[^&lt;]*&lt;/date&gt;(?:&lt;country&gt;)*([^&lt;]*)?.*?&lt;track-list\scount=&quot;(\d+)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAlbumSearchResults>
+ <GetAlbumDetails dest="3">
+ <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
+ <!--MBID - release-->
+ <RegExp input="$$1" output="\1" dest="3">
+ <expression>id=&quot;([^&quot;]*)</expression>
+ </RegExp>
+ <!--MBID - release group-->
+ <RegExp input="$$1" output="\1" dest="4">
+ <expression>&lt;release-group type=&quot;[^&quot;]*&quot; id=&quot;([^&quot;]*)&quot;</expression>
+ </RegExp>
+ <!--Album title-->
+ <RegExp input="$$1" output="\1" dest="7">
+ <expression trim="1">&lt;release id=&quot;[^&quot;]*&quot;&gt;&lt;title&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <!--Artist name-->
+ <RegExp input="$$1" output="\1" dest="8">
+ <expression trim="1">&lt;release id=&quot;[^&quot;]*&quot;&gt;&lt;title&gt;[^&lt;]*&lt;.*?&lt;artist id=&quot;[^&quot;]*&quot;&gt;&lt;name&gt;([^&lt;]*)&lt;</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumTitleByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumArtistByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumLabelByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumDateByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$$3" output="&lt;chain function=&quot;GetMBAlbumTracksByMBID&quot;&gt;\1&lt;/chain&gt;" dest="5+">
+ <expression noclean="1">(.+)</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumreviewsource]" output="&lt;url function=&quot;GetAMGAlbumReview&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumreviewsource]" output="&lt;chain function=&quot;GetLastFMAlbumReviewByAlbumAndArtist&quot;&gt;$$7::$$8::$$7::$$8&lt;/chain&gt;" dest="5+">
+ <expression>last.fm</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumreviewsource]" output="&lt;url function=&quot;GetAmazonDEAlbumReview&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>amazon.de</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumratingsource]" output="&lt;chain function=&quot;GetMBAlbumRatingByMBID&quot;&gt;$$3&lt;/chain&gt;" dest="5+">
+ <expression>MusicBrainz</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumratingsource]" output="&lt;url function=&quot;GetAMGAlbumRating&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumstylessource]" output="&lt;url function=&quot;GetAMGAlbumStyles&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albummoodssource]" output="&lt;url function=&quot;GetAMGAlbumMoods&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp input="$INFO[albumthemessource]" output="&lt;url function=&quot;GetAMGAlbumThemes&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression>allmusic.com</expression>
+ </RegExp>
+ <RegExp conditional="fanarttvalbumthumbs" input="$$1" output="&lt;chain function=&quot;GetFanartTvAlbumThumbsByMBID&quot;&gt;$$4&lt;/chain&gt;" dest="5+">
+ <expression noclean="1" />
+ </RegExp>
+ <RegExp conditional="allmusicalbumthumbs" input="$$1" output="&lt;url function=&quot;GetAMGAlbumThumbs&quot; cache=&quot;mb-$$4-rg.xml&quot;&gt;http://musicbrainz.org/ws/2/release-group/$$4?inc=url-rels&lt;/url&gt;" dest="5+">
+ <expression noclean="1" />
+ </RegExp>
+ <RegExp conditional="lastfmalbumthumbs" input="$$1" output="&lt;chain function=&quot;GetLastFMAlbumThumbs&quot;&gt;$$7::$$8::$$7::$$8&lt;/chain&gt;" dest="5+">
+ <expression noclean="1" />
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAlbumDetails>
+
+ <GetAMGAlbumReview dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumReviewByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumReview>
+
+ <GetAMGAlbumRating dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumRatingByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumRating>
+
+ <GetAMGAlbumStyles dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumStylesByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumStyles>
+
+ <GetAMGAlbumMoods dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumMoodsByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumMoods>
+
+ <GetAMGAlbumThemes dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumThemesByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumThemes>
+
+ <GetAMGAlbumThumbs dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAMGAlbumThumbsByAMGID&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">allmusic.com/album/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAMGAlbumThumbs>
+
+ <GetAmazonDEAlbumReview dest="5">
+ <RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
+ <RegExp input="$$1" output="&lt;chain function=&quot;GetAmazonDEAlbumReviewByASIN&quot;&gt;\1&lt;/chain&gt;" dest="2">
+ <expression noclean="1">http://www.amazon.de/gp/product/([^&lt;]*)</expression>
+ </RegExp>
+ <expression noclean="1" />
+ </RegExp>
+ </GetAmazonDEAlbumReview>
+
+</scraper>
@@ -0,0 +1,36 @@
+[B]1.2.2[/B]
+Fixed: accommodate new Frodo style URL encoding
+
+[B]1.2.1[/B]
+Removed: '&' sign from artist splitter
+
+[B]1.2.0[/B]
+Added: scraping album review from amazon.de (if link exist on MusicBrainz)
+
+[B]1.1.4[/B]
+Fixed: typo blocks preferring album in search result
+
+[B]1.1.3[/B]
+Fixed: won't find tracks without recording id
+
+[B]1.1.2[/B]
+Fixed: won't find tracks without duration
+
+[B]1.1.1[/B]
+Fixed: artists won't find with 'and' in their name
+
+[B]1.1.0[/B]
+Added: trying to get album info when multiple artists are credited (first artist will be used)
+Fixed: will not find some albums
+
+[B]1.0.3[/B]
+Fixed: Some Album releases were not found
+
+[B]1.0.2[/B]
+Fixed: Track Duration from MusicBrainz was wrong in certain cases. Credits to scudlee!
+
+[B]1.0.1[/B]
+Fixed: scraping moods from allmusic.com
+
+[B]1.0.0[/B]
+Initial version
Binary file not shown.
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<strings>
+ <string id="30000">Grab Album Thumbs from fanart.tv</string>
+ <string id="30001">Grab Album Thumbs from Last.fm</string>
+ <string id="30002">Get Album Review from</string>
+ <string id="30003"> Preferred Language</string>
+ <string id="30004">Get Album Rating from</string>
+ <string id="30005">Get Album Styles from</string>
+ <string id="30006">Get Album Moods from</string>
+ <string id="30007">Get Album Themes from</string>
+ <string id="30008">Grab Album Thumbs from allmusic.com</string>
+ <string id="30009">Artwork</string>
+</strings>
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<settings>
+ <category label="128">
+ <setting type="lsep" label="General Settings"/>
+ <setting label="30002" type="labelenum" values="last.fm|amazon.de|None" id="albumreviewsource" default="last.fm"/>
+ <setting label="30003" type="labelenum" values="en|de|es|fr|it|jp|pl|pt|ru|sv|tr|zh" id="lastfmlanguage" default="en" visible="eq(-1,0)"/>
+ <setting type="sep"/>
+ <setting label="30004" type="labelenum" values="MusicBrainz|allmusic.com|None" id="albumratingsource" default="MusicBrainz"/>
+ <setting label="30005" type="labelenum" values="allmusic.com|None" id="albumstylessource" default="allmusic.com"/>
+ <setting label="30006" type="labelenum" values="allmusic.com|None" id="albummoodssource" default="allmusic.com"/>
+ <setting label="30007" type="labelenum" values="allmusic.com|None" id="albumthemessource" default="allmusic.com"/>
+ </category>
+
+ <category label="30009">
+ <setting type="lsep" label="Artwork Settings"/>
+ <setting label="30000" type="bool" id="fanarttvalbumthumbs" default="true"/>
+ <setting label="30001" type="bool" id="lastfmalbumthumbs" default="true"/>
+ <setting label="30008" type="bool" id="allmusicalbumthumbs" default="true"/>
+ </category>
+</settings>
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<addon id="metadata.artists.universal"
+ name="Universal Artist Scraper"
+ version="2.0.6"
+ provider-name="Team XBMC">
+ <requires>
+ <import addon="xbmc.metadata" version="1.0"/>
+ <import addon="metadata.common.last.fm" version="1.4.1"/>
+ <import addon="metadata.common.allmusic.com" version="2.4.1"/>
+ <import addon="metadata.common.musicbrainz.org" version="1.2.1"/>
+ <import addon="metadata.common.htbackdrops.com" version="1.2.0"/>
+ <import addon="metadata.common.fanart.tv" version="1.1.0"/>
+ </requires>
+ <extension point="xbmc.metadata.scraper.artists"
+ language="en"
+ library="artistuniversal.xml"/>
+ <extension point="xbmc.addon.metadata">
+ <summary lang="en">Universal Scraper for Artists</summary>
+ <description lang="en">This scraper collects information from the following supported sites: MusicBrainz, last.fm, and allmusic.com, while grabs artwork from: fanart.tv, htbackdrops.com, last.fm and allmusic.com. It can be set field by field that from which site you want that specific information.
+
+The initial search is always done on MusicBrainz. In case allmusic link is not added on the MusicBrainz site fields from allmusic.com cannot be fetched (very easy to add those missing links though).</description>
+ <platform>all</platform>
+ </extension>
+</addon>
Oops, something went wrong.

0 comments on commit cfb94b4

Please sign in to comment.