Skip to content

Commit

Permalink
*) Trying to be more tolerant against wrong charset names
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2760 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
theli committed Oct 13, 2006
1 parent e9afe39 commit decb09d
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions source/de/anomic/plasma/plasmaParser.java
Expand Up @@ -352,6 +352,18 @@ public static String getRealCharsetEncoding(String encoding) {
else if ((c >= '0') && (c <= '9')) encoding = "windows-" + encoding.substring(7);
}

if (encoding.toLowerCase().startsWith("iso") && encoding.length() > 3) {
char c = encoding.charAt(3);
if (c == '_') encoding = "ISO-" + encoding.substring(4);
else if ((c >= '0') && (c <= '9')) encoding = "ISO-" + encoding.substring(3);
}

if (encoding.toLowerCase().startsWith("iso") && encoding.length() > 8) {
char c = encoding.charAt(8);
if (c == '_') encoding = encoding.substring(0,8) + "-" + encoding.substring(9);
else if ((c >= '0') && (c <= '9')) encoding = encoding.substring(0,8) + "-" + encoding.substring(8);
}

return encoding;
}

Expand Down

0 comments on commit decb09d

Please sign in to comment.