Skip to content

Commit

Permalink
add extracted description/subject to docParser
Browse files Browse the repository at this point in the history
  • Loading branch information
reger committed Feb 15, 2015
1 parent f0a5188 commit 7e35518
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions source/net/yacy/document/parser/docParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
package net.yacy.document.parser;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.util.CommonPattern;
Expand Down Expand Up @@ -80,7 +82,7 @@ public Document[] parse(final AnchorURL location, final String mimeType,
throw new Parser.Failure("error in docParser, getText: " + e.getMessage(), location);
}
String title = (contents.length() > 240) ? contents.substring(0,240) : contents.toString().trim();
title.replaceAll("\r"," ").replaceAll("\n"," ").replaceAll("\t"," ").trim();
title = title.replaceAll("\r"," ").replaceAll("\n"," ").replaceAll("\t"," ").trim();
if (title.length() > 80) title = title.substring(0, 80);
int l = title.length();
while (true) {
Expand All @@ -97,6 +99,10 @@ public Document[] parse(final AnchorURL location, final String mimeType,
keywlist = null;
}

final String subject = extractor.getSummaryInformation().getSubject();
List<String> descriptions = new ArrayList<String>();
if (subject != null && !subject.isEmpty()) descriptions.add(subject);

Document[] docs;
docs = new Document[]{new Document(
location,
Expand All @@ -109,7 +115,7 @@ public Document[] parse(final AnchorURL location, final String mimeType,
extractor.getSummaryInformation().getAuthor(), // constuctor can handle null
extractor.getDocSummaryInformation().getCompany(), // publisher
null,
null,
descriptions,
0.0f, 0.0f,
contents.toString(),
null,
Expand Down

0 comments on commit 7e35518

Please sign in to comment.