Skip to content

Commit

Permalink
more bugfixes to date parser
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6864 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed May 11, 2010
1 parent cf43bdc commit f23cbd2
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 1 deletion.
1 change: 1 addition & 0 deletions source/net/yacy/document/content/DCEntry.java
Expand Up @@ -96,6 +96,7 @@ public Date getDate() {
String d = this.get("docdatetime");
if (d == null) d = this.get("dc:date");
if (d == null) return null;
if (d.length() == 0) return null;
try {
return DateFormatter.parseISO8601(d);
} catch (ParseException e) {
Expand Down
3 changes: 3 additions & 0 deletions source/net/yacy/document/content/SurrogateReader.java
Expand Up @@ -42,6 +42,7 @@

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;


Expand Down Expand Up @@ -78,6 +79,8 @@ public SurrogateReader(final InputStream stream, int queueSize) throws IOExcepti
public void run() {
try {
this.saxParser.parse(this.stream, this);
} catch (SAXParseException e) {
Log.logException(e);
} catch (SAXException e) {
Log.logException(e);
} catch (IOException e) {
Expand Down
6 changes: 5 additions & 1 deletion source/net/yacy/kelondro/util/DateFormatter.java
Expand Up @@ -185,12 +185,16 @@ public static final String formatRFC1123(final Date date) {
public static Date parseISO8601(String s) throws ParseException {
// do some lazy checks here
s = s.trim();
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
if (s.startsWith("{")) s = s.substring(1);
if (s.endsWith("}")) s = s.substring(0, s.length() - 1);
if (s.startsWith("[")) s = s.substring(1);
if (s.endsWith("]")) s = s.substring(0, s.length() - 1);
while (s.charAt(0) > '9' || s.charAt(0) < '0') s = s.substring(1);
while (s.length() > 0 && (s.charAt(0) > '9' || s.charAt(0) < '0')) s = s.substring(1);
if (s.endsWith("--")) s = s.substring(0, s.length() - 2) + "00";
int p = s.indexOf(';'); if (p >= 0) s = s.substring(0, p); // a semicolon may be used to separate two dates from each other; then we take the first
p = s.indexOf(','); if (p >= 0) s = s.substring(0, p); // a comma may be used to separate two dates from each other; then we take the first
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date

// no go for exact parsing
final Calendar cal = Calendar.getInstance(TZ_GMT, Locale.US);
Expand Down

0 comments on commit f23cbd2

Please sign in to comment.