Skip to content
Permalink
Browse files Browse the repository at this point in the history
XWIKI-19671: Filter the feed's content in the RSS macro
  • Loading branch information
michitux committed Jul 7, 2022
1 parent 4f64320 commit 5c7ebe4
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 10 deletions.
Expand Up @@ -50,6 +50,11 @@
<artifactId>xwiki-rendering-macro-box</artifactId>
<version>${rendering.version}</version>
</dependency>
<dependency>
<groupId>org.xwiki.commons</groupId>
<artifactId>xwiki-commons-xml</artifactId>
<version>${commons.version}</version>
</dependency>
<dependency>
<groupId>org.xwiki.platform</groupId>
<artifactId>xwiki-platform-bridge</artifactId>
Expand Down
Expand Up @@ -22,14 +22,17 @@
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;

import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;
import org.xwiki.bridge.SkinAccessBridge;
import org.xwiki.component.annotation.Component;
import org.xwiki.context.Execution;
Expand All @@ -49,6 +52,9 @@
import org.xwiki.rendering.parser.Parser;
import org.xwiki.rendering.syntax.Syntax;
import org.xwiki.rendering.transformation.MacroTransformationContext;
import org.xwiki.xml.html.HTMLCleaner;
import org.xwiki.xml.html.HTMLCleanerConfiguration;
import org.xwiki.xml.html.HTMLUtils;

import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
Expand Down Expand Up @@ -102,6 +108,12 @@ public class RssMacro extends AbstractBoxMacro<RssMacroParameters>
@Inject
private Execution execution;

/**
* To clean the HTML content.
*/
@Inject
private HTMLCleaner htmlCleaner;

/**
* Create a Feed object from a feed specified as a URL.
*/
Expand Down Expand Up @@ -278,13 +290,41 @@ private void generateEntries(Block parentBlock, SyndFeed feed, RssMacroParameter
// A case where doing this might hurt is if a feed declares "text" and has any XML inside it does
// not want to be interpreted as such, but displayed as is instead. But this certainly is too rare
// compared to mis-formed feeds that say text while they want to say HTML.
Block html = new RawBlock(entry.getDescription().getValue(), Syntax.XHTML_1_0);
Block html = new RawBlock(cleanHTML(entry.getDescription().getValue()), Syntax.HTML_5_0);
parentBlock.addChild(new GroupBlock(Arrays.asList(html), Collections.singletonMap(CLASS_ATTRIBUTE,
"rssitemdescription")));
}
}
}

private String cleanHTML(String content)
{
HTMLCleanerConfiguration cleanerConfiguration = this.htmlCleaner.getDefaultConfiguration();
Map<String, String> parameters = new HashMap<>(cleanerConfiguration.getParameters());

// Just always use HTML 5 as this is what browsers parse.
parameters.put(HTMLCleanerConfiguration.HTML_VERSION, "5");
// Don't trust remote content.
parameters.put(HTMLCleanerConfiguration.RESTRICTED, "true");

cleanerConfiguration.setParameters(parameters);

Document document = this.htmlCleaner.clean(new StringReader(content), cleanerConfiguration);

// Remove the HTML envelope since this macro is only a fragment of a page which will already have an
// HTML envelope when rendered. We remove it so that the HTML <head> tag isn't output.
HTMLUtils.stripHTMLEnvelope(document);

// Don't print the XML declaration nor the XHTML DocType.
String cleanedContent = HTMLUtils.toString(document, true, true);
// Don't print the top level html element (which is always present and at the same location
// since it's been normalized by the HTML cleaner)
// Note: we trim the first 7 characters since they correspond to a leading new line (generated by
// XMLUtils.toString() since the doctype is printed on a line by itself followed by a new line) +
// the 6 chars from "<html>".
return cleanedContent.substring(7, cleanedContent.length() - 8);
}

/**
* @param romeFeedFactory a custom implementation to use instead of the default, useful for tests
*/
Expand All @@ -295,7 +335,7 @@ protected void setFeedFactory(RomeFeedFactory romeFeedFactory)

/**
* Convenience method to not have to handle exceptions in several places.
*
*
* @param content the content to parse as plain text
* @return the parsed Blocks
* @since 2.0M3
Expand Down
@@ -0,0 +1,34 @@
<?xml version="1.0"?>

<!--
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
-->

<rss version="2.0">
<channel>
<title>Test Feed 3</title>
<link>http://localhost/feed.xml</link>
<description>A pseudo-feed to test the RSS macro</description>
<item>
<title>Item1</title>
<link>http://localhost/blog/item1</link>
<description>An item with &lt;a href="javascript:alert(1)"&gt;dangerous&lt;/a&gt; &lt;b&gt;HTML&lt;/b&gt;markup.</description>
</item>
</channel>
</rss>
Expand Up @@ -31,7 +31,7 @@ onWord [City]
endLink [Typed = [true] Type = [url] Reference = [http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp]] [true]
endParagraph [[class]=[rssitemtitle]]
beginGroup [[class]=[rssitemdescription]]
onRawText [How do Americans get ready to work with Russians aboard the International Space Station?] [xhtml/1.0]
onRawText [<p>How do Americans get ready to work with Russians aboard the International Space Station?</p>] [html/5.0]
endGroup [[class]=[rssitemdescription]]
beginParagraph [[class]=[rssitemtitle]]
beginLink [Typed = [true] Type = [url] Reference = [http://liftoff.msfc.nasa.gov/]] [true]
Expand All @@ -41,12 +41,12 @@ onWord [Exploration]
endLink [Typed = [true] Type = [url] Reference = [http://liftoff.msfc.nasa.gov/]] [true]
endParagraph [[class]=[rssitemtitle]]
beginGroup [[class]=[rssitemdescription]]
onRawText [Sky watchers in Europe, Asia, and parts of Alaska and Canada.] [xhtml/1.0]
onRawText [<p>Sky watchers in Europe, Asia, and parts of Alaska and Canada.</p>] [html/5.0]
endGroup [[class]=[rssitemdescription]]
endGroup [[class]=[box rssfeed]]
endMacroMarkerStandalone [testrss] [feed=file://feed1.xml|content=true|count=2|image=true]
endDocument
.#-----------------------------------------------------
.expect|xhtml/1.0
.#-----------------------------------------------------
<div class="box rssfeed"><img src="http://www.w3schools.com/images/logo.gif" class="wikimodel-freestanding" alt="http://www.w3schools.com/images/logo.gif"/><br/><p class="rsschanneltitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/">Lift Off News</a></span><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/"><img src="/xwiki/resources/icons/silk/feed.png" alt="/xwiki/resources/icons/silk/feed.png"/></a></span></p><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp">Star City</a></span></p><div class="rssitemdescription">How do Americans get ready to work with Russians aboard the International Space Station?</div><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/">Space Exploration</a></span></p><div class="rssitemdescription">Sky watchers in Europe, Asia, and parts of Alaska and Canada.</div></div>
<div class="box rssfeed"><img src="http://www.w3schools.com/images/logo.gif" class="wikimodel-freestanding" alt="http://www.w3schools.com/images/logo.gif"/><br/><p class="rsschanneltitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/">Lift Off News</a></span><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/"><img src="/xwiki/resources/icons/silk/feed.png" alt="/xwiki/resources/icons/silk/feed.png"/></a></span></p><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp">Star City</a></span></p><div class="rssitemdescription"><p>How do Americans get ready to work with Russians aboard the International Space Station?</p></div><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/">Space Exploration</a></span></p><div class="rssitemdescription"><p>Sky watchers in Europe, Asia, and parts of Alaska and Canada.</p></div></div>
Expand Up @@ -28,12 +28,12 @@ onWord [Item1]
endLink [Typed = [true] Type = [url] Reference = [http://localhost/blog/item1]] [true]
endParagraph [[class]=[rssitemtitle]]
beginGroup [[class]=[rssitemdescription]]
onRawText [An item with<b>HTML</b>markup.] [xhtml/1.0]
onRawText [<p>An item with<b>HTML</b>markup.</p>] [html/5.0]
endGroup [[class]=[rssitemdescription]]
endGroup [[class]=[box rssfeed]]
endMacroMarkerStandalone [testrss] [feed=file://feed2.xml|content=true|count=1|image=false]
endDocument
.#-----------------------------------------------------
.expect|xhtml/1.0
.#-----------------------------------------------------
<div class="box rssfeed"><p class="rsschanneltitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://localhost/feed.xml">Test Feed 2</a></span><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://localhost/feed.xml"><img src="/xwiki/resources/icons/silk/feed.png" alt="/xwiki/resources/icons/silk/feed.png"/></a></span></p><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://localhost/blog/item1">Item1</a></span></p><div class="rssitemdescription">An item with<b>HTML</b>markup.</div></div>
<div class="box rssfeed"><p class="rsschanneltitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://localhost/feed.xml">Test Feed 2</a></span><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://localhost/feed.xml"><img src="/xwiki/resources/icons/silk/feed.png" alt="/xwiki/resources/icons/silk/feed.png"/></a></span></p><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://localhost/blog/item1">Item1</a></span></p><div class="rssitemdescription"><p>An item with<b>HTML</b>markup.</p></div></div>
Expand Up @@ -17,7 +17,7 @@ onWord [City]
endLink [Typed = [true] Type = [url] Reference = [http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp]] [true]
endParagraph [[class]=[rssitemtitle]]
beginGroup [[class]=[rssitemdescription]]
onRawText [How do Americans get ready to work with Russians aboard the International Space Station?] [xhtml/1.0]
onRawText [<p>How do Americans get ready to work with Russians aboard the International Space Station?</p>] [html/5.0]
endGroup [[class]=[rssitemdescription]]
beginParagraph [[class]=[rssitemtitle]]
beginLink [Typed = [true] Type = [url] Reference = [http://liftoff.msfc.nasa.gov/]] [true]
Expand All @@ -27,12 +27,12 @@ onWord [Exploration]
endLink [Typed = [true] Type = [url] Reference = [http://liftoff.msfc.nasa.gov/]] [true]
endParagraph [[class]=[rssitemtitle]]
beginGroup [[class]=[rssitemdescription]]
onRawText [Sky watchers in Europe, Asia, and parts of Alaska and Canada.] [xhtml/1.0]
onRawText [<p>Sky watchers in Europe, Asia, and parts of Alaska and Canada.</p>] [html/5.0]
endGroup [[class]=[rssitemdescription]]
endGroup [[class]=[rssfeed]]
endMacroMarkerStandalone [testrss] [feed=file://feed1.xml|content=true|count=2|decoration=false|encoding=UTF-8]
endDocument
.#-----------------------------------------------------
.expect|xhtml/1.0
.#-----------------------------------------------------
<div class="rssfeed"><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp">Star City</a></span></p><div class="rssitemdescription">How do Americans get ready to work with Russians aboard the International Space Station?</div><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/">Space Exploration</a></span></p><div class="rssitemdescription">Sky watchers in Europe, Asia, and parts of Alaska and Canada.</div></div>
<div class="rssfeed"><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp">Star City</a></span></p><div class="rssitemdescription"><p>How do Americans get ready to work with Russians aboard the International Space Station?</p></div><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://liftoff.msfc.nasa.gov/">Space Exploration</a></span></p><div class="rssitemdescription"><p>Sky watchers in Europe, Asia, and parts of Alaska and Canada.</p></div></div>
@@ -0,0 +1,27 @@
.runTransformations
.#-----------------------------------------------------
.input|xwiki/2.0
.# Verify HTML filtering works.
.#-----------------------------------------------------
{{testrss feed="file://feed3.xml" content="true" count="2" decoration="false" encoding="UTF-8"/}}
.#-----------------------------------------------------
.expect|event/1.0
.#-----------------------------------------------------
beginDocument
beginMacroMarkerStandalone [testrss] [feed=file://feed3.xml|content=true|count=2|decoration=false|encoding=UTF-8]
beginGroup [[class]=[rssfeed]]
beginParagraph [[class]=[rssitemtitle]]
beginLink [Typed = [true] Type = [url] Reference = [http://localhost/blog/item1]] [true]
onWord [Item1]
endLink [Typed = [true] Type = [url] Reference = [http://localhost/blog/item1]] [true]
endParagraph [[class]=[rssitemtitle]]
beginGroup [[class]=[rssitemdescription]]
onRawText [<p>An item with <a>dangerous</a> <b>HTML</b>markup.</p>] [html/5.0]
endGroup [[class]=[rssitemdescription]]
endGroup [[class]=[rssfeed]]
endMacroMarkerStandalone [testrss] [feed=file://feed3.xml|content=true|count=2|decoration=false|encoding=UTF-8]
endDocument
.#-----------------------------------------------------
.expect|xhtml/1.0
.#-----------------------------------------------------
<div class="rssfeed"><p class="rssitemtitle"><span class="wikiexternallink"><a class="wikimodel-freestanding" href="http://localhost/blog/item1">Item1</a></span></p><div class="rssitemdescription"><p>An item with <a>dangerous</a> <b>HTML</b>markup.</p></div></div>

0 comments on commit 5c7ebe4

Please sign in to comment.