Permalink
Browse files

Adjust mergeDocuments to keep youngest last-modified date of document

collection
  • Loading branch information...
reger24 committed May 9, 2017
1 parent 6548015 commit 077d062be37eae08bd52a783ffdb7f4be464fd42
Showing with 100 additions and 2 deletions.
  1. +2 −2 source/net/yacy/document/Document.java
  2. +98 −0 test/java/net/yacy/document/DocumentTest.java
@@ -924,7 +924,7 @@ public static Document mergeDocuments(final DigestURL location, final String glo
final Set<String> languages = new HashSet<>();
double lon = 0.0d, lat = 0.0d;
boolean indexingDenied = false;
Date date = new Date();
Date date = null;
String charset = null;
int mindepth = 999;
@@ -968,7 +968,7 @@ public static Document mergeDocuments(final DigestURL location, final String glo
rss.putAll(doc.getRSS());
images.putAll(doc.getImages());
if (doc.lon() != 0.0 && doc.lat() != 0.0) { lon = doc.lon(); lat = doc.lat(); }
if (doc.lastModified.before(date)) date = doc.lastModified;
if (date == null || doc.lastModified.after(date)) date = doc.lastModified; // set youngest lastModified date from doc collection
if (doc.getDepth() < mindepth) mindepth = doc.getDepth();
if (doc.dc_language() != null) languages.add(doc.dc_language());
@@ -0,0 +1,98 @@
/**
* DomainsTest
* part of YaCy
* Copyright 2017 by reger24; https://github.com/reger24
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.util.Date;
import net.yacy.cora.document.id.DigestURL;
import org.junit.Test;
import static org.junit.Assert.*;
/**
* Unit tests for Document class.
*/
public class DocumentTest {
/**
* Test of lastmodified calculation after mergeDocuments method, of class
* Document.
*/
@Test
public void testMergeDocuments_lastModified() throws MalformedURLException {
Date lastmodDateMin = new Date(10*1000); // min test date
Date lastmodDateMax = new Date(20*1000); // max test date
DigestURL location = new DigestURL("http://localhost/test.html");
String mimeType = "test/html";
String charset = Charset.defaultCharset().name();
Document[] docs = new Document[2];
// prepare simple document with min modified-date
docs[0] = new Document(
location,
mimeType,
charset,
null,
null,
null,
null, // title
null, // author
location.getHost(),
null,
null,
0.0d, 0.0d,
location.toTokens(),
null,
null,
null,
false,
lastmodDateMin);
// prepare simple document with max modified-date
docs[1] = new Document(
location,
mimeType,
charset,
null,
null,
null,
null, // title
null, // author
location.getHost(),
null,
null,
0.0d, 0.0d,
location.toTokens(),
null,
null,
null,
false,
lastmodDateMax);
// get last-modified after merge
Document result = Document.mergeDocuments(location, charset, docs);
// expected to be newest test date
assertEquals("merged last-modified Date",lastmodDateMax.getTime(), result.getLastModified().getTime());
}
}

0 comments on commit 077d062

Please sign in to comment.