Permalink
Browse files

Fixed endless loop case in wikicode processing.

Detected when importing recent MediaWiki dumps containing some pages
with script content in plain text format (see Scribunto extension
https://www.mediawiki.org/wiki/Extension:Scribunto ).

Further improvement : modify the MediawikiImporter to prevent processing
revisions whose <model> is not wikitext.
  • Loading branch information...
luccioman committed Apr 12, 2017
1 parent 0bc868a commit 23775e76e2901f2e303e0d4ffea2fb5b216bf97a
Showing with 57 additions and 3 deletions.
  1. +10 −3 source/net/yacy/data/wiki/WikiCode.java
  2. +47 −0 test/java/net/yacy/data/wiki/WikiCodeTest.java
@@ -589,8 +589,9 @@ private static String processLinksAndImages(final String hostport, String line)
int p;
int positionOfOpeningTag;
int positionOfClosingTag;
int fromIndex = 0;
// internal links and images
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_LINK)) >= 0) {
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_LINK, fromIndex)) >= 0) {
positionOfClosingTag = line.indexOf(WIKI_CLOSE_LINK, positionOfOpeningTag + LEN_WIKI_OPEN_LINK);
if (positionOfClosingTag <= positionOfOpeningTag) {
break;
@@ -640,16 +641,19 @@ private static String processLinksAndImages(final String hostport, String line)
}
line = line.substring(0, positionOfOpeningTag) + "<img src=\"" + kl + "\"" + align + alt + ">" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK);
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_LINK;
}
// this is the part of the code that is responsible for Youtube video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_YOUTUBE)) {
kl = kl.substring(LEN_WIKI_VIDEO_YOUTUBE);
line = line.substring(0, positionOfOpeningTag) + "" + "<object width=\"425\" height=\"350\"><param name=\"movie\" value=\"http://www.youtube.com/v/" + kl + "\"></param><param name=\"wmode\" value=\"transparent\"></param><embed src=\"http://www.youtube.com/v/" + kl + "\" type=\"application/x-shockwave-flash\" wmode=\"transparent\" width=\"425\" height=\"350\"></embed></object>";
break;
}
// this is the part of the code that is responsible for Vimeo video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_VIMEO)) {
kl = kl.substring(LEN_WIKI_VIDEO_VIMEO);
line = line.substring(0, positionOfOpeningTag) + "" + "<iframe src=\"http://player.vimeo.com/video/" + kl + "\" width=\"425\" height=\"350\" frameborder=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen></iframe>";
break;
}
// if it's no image, it might be an internal link
else {
@@ -660,11 +664,13 @@ else if (kl.startsWith(WIKI_VIDEO_VIMEO)) {
kv = kl;
}
line = line.substring(0, positionOfOpeningTag) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK); // oob exception in append() !
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_LINK;
}
}
fromIndex = 0;
// external links
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_EXTERNAL_LINK)) >= 0) {
while ((positionOfOpeningTag = line.indexOf(WIKI_OPEN_EXTERNAL_LINK, fromIndex)) >= 0) {
positionOfClosingTag = line.indexOf(WIKI_CLOSE_EXTERNAL_LINK, positionOfOpeningTag + LEN_WIKI_OPEN_EXTERNAL_LINK);
if (positionOfClosingTag <= positionOfOpeningTag) {
break;
@@ -686,6 +692,7 @@ else if (kl.startsWith(WIKI_VIDEO_VIMEO)) {
kl = "http://" + hostport + "/" + kl;
}
line = line.substring(0, positionOfOpeningTag) + "<a class=\"extern\" href=\"" + kl + "\">" + kv + "</a>" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK);
fromIndex = positionOfClosingTag + LEN_WIKI_CLOSE_EXTERNAL_LINK;
}
return line;
}
@@ -54,4 +54,51 @@ public void testProcessLineOfWikiCode() {
assertFalse("no header tag expected:"+erg, erg.contains("<h1>"));
}
}
/**
* Test internal link markup processing
*/
@Test
public void testInternalLink() {
WikiCode wc = new WikiCode();
/* Link to another wiki article */
String result = wc.transform("http://wiki:8080", "[[article]]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"Wiki.html?page=article\""));
/* Renamed link */
result = wc.transform("http://wiki:8080", "[[article|renamed article]]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"Wiki.html?page=article\""));
assertTrue(result.contains(">renamed article<"));
/* Multiple links on the same line */
result = wc.transform("http://wiki:8080", "[[article1]] [[article2]]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"Wiki.html?page=article1\""));
assertTrue(result.contains("href=\"Wiki.html?page=article2\""));
}
/**
* Test external link markup processing
*/
@Test
public void testExternalLink() {
WikiCode wc = new WikiCode();
/* Unamed link */
String result = wc.transform("http://wiki:8080", "[http://yacy.net]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"http://yacy.net\""));
/* Named link */
result = wc.transform("http://wiki:8080", "[http://yacy.net YaCy]");
assertTrue(result.contains("<a"));
assertTrue(result.contains("href=\"http://yacy.net\""));
assertTrue(result.contains(">YaCy<"));
/* Lua Script array parameter : should not crash the transform process */
result = wc.transform("http://wiki:8080", "'[[[[2,1],[4,3],[6,5],[2,1]],[[12,11],[14,13],[16,15],[12,11]]]]'");
}
}

0 comments on commit 23775e7

Please sign in to comment.