Skip to content

Commit

Permalink
add html5 audio/video <source> tag to html content scraper
Browse files Browse the repository at this point in the history
- <source src=.. type=..> tag content is added to embed collection
  • Loading branch information
reger committed Apr 28, 2014
1 parent a9963d5 commit 2eb7682
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion source/net/yacy/document/parser/html/ContentScraper.java
Expand Up @@ -97,6 +97,7 @@ public enum TagName {
embed(TagType.singleton), //added by [MN]
param(TagType.singleton), //added by [MN]
iframe(TagType.singleton), // scraped as singleton to get such iframes that have no closing tag
source(TagType.singleton), // html5 (part of <video> <audio>) - scaped like embed

a(TagType.pair),
h1(TagType.pair),
Expand Down Expand Up @@ -463,7 +464,7 @@ public void scrapeTag0(Tag tag) {
this.anchors.add(newLink);
}
}
} else if(tag.name.equalsIgnoreCase("embed")) {
} else if(tag.name.equalsIgnoreCase("embed") || tag.name.equalsIgnoreCase("source")) { //html5 tag
final String src = tag.opts.getProperty("src", EMPTY_STRING);
try {
if (src.length() > 0) {
Expand Down

0 comments on commit 2eb7682

Please sign in to comment.