Skip to content

Commit

Permalink
[utils] clean_podcast_url: Handle more trackers (#7556)
Browse files Browse the repository at this point in the history
Authored by: mabdelfattah, bashonly
Closes #7544
  • Loading branch information
mabdelfattah committed Jul 11, 2023
1 parent 325191d commit 2af4eeb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
2 changes: 2 additions & 0 deletions test/test_utils.py
Expand Up @@ -1835,6 +1835,8 @@ def test_iri_to_uri(self):
def test_clean_podcast_url(self):
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')

def test_LazyList(self):
it = list(range(10))
Expand Down
10 changes: 7 additions & 3 deletions yt_dlp/utils/_utils.py
Expand Up @@ -5123,14 +5123,18 @@ def clean_podcast_url(url):
(?:
chtbl\.com/track|
media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
play\.podtrac\.com
)/[^/]+|
play\.podtrac\.com|
chrt\.fm/track|
mgln\.ai/e
)(?:/[^/.]+)?|
(?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
flex\.acast\.com|
pd(?:
cn\.co| # https://podcorn.com/analytics-prefix/
st\.fm # https://podsights.com/docs/
)/e
)/e|
[0-9]\.gum\.fm|
pscrb\.fm/rss/p
)/''', '', url)
return re.sub(r'^\w+://(\w+://)', r'\1', url)

Expand Down

1 comment on commit 2af4eeb

@gamer191
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was unaware this existed. Why not just use https://github.com/AdguardTeam/FiltersRegistry/blob/master/filters/filter_17_TrackParam/filter.txt? Licensing issues?

Please sign in to comment.