Skip to content

Commit

Permalink
Made mime type and extension normalization locale independent.
Browse files Browse the repository at this point in the history
Previously, upper cased mime type was incorrectly normalized when the
default locale is Turkish.
  • Loading branch information
luccioman committed Jun 26, 2017
1 parent 319231a commit 286f301
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 4 deletions.
18 changes: 14 additions & 4 deletions source/net/yacy/document/TextParser.java
Expand Up @@ -29,6 +29,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
Expand Down Expand Up @@ -128,6 +129,15 @@ public static Set<Parser> parsers() {
for (Set<Parser> pl: mime2parser.values()) c.addAll(pl);
return c;
}

/**
* @return the set of all supported mime types
*/
public static Set<String> supportedMimeTypes() {
final Set<String> mimeTypes = new HashSet<>();
mimeTypes.addAll(mime2parser.keySet());
return mimeTypes;
}

private static void initParser(final Parser parser) {
String prototypeMime = null;
Expand All @@ -145,15 +155,15 @@ private static void initParser(final Parser parser) {
}

if (prototypeMime != null) for (String ext: parser.supportedExtensions()) {
ext = ext.toLowerCase();
ext = ext.toLowerCase(Locale.ROOT);
final String s = ext2mime.get(ext);
if (s != null && !s.equals(prototypeMime)) AbstractParser.log.info("Parser for extension '" + ext + "' was set to mime '" + s + "', overwriting with new mime '" + prototypeMime + "'.");
ext2mime.put(ext, prototypeMime);
}

for (String ext: parser.supportedExtensions()) {
// process the extensions
ext = ext.toLowerCase();
ext = ext.toLowerCase(Locale.ROOT);
LinkedHashSet<Parser> p0 = ext2parser.get(ext);
if (p0 == null) {
p0 = new LinkedHashSet<Parser>();
Expand Down Expand Up @@ -518,12 +528,12 @@ public static String mimeOf(final MultiProtocolURL url) {
}

public static String mimeOf(final String ext) {
return ext2mime.get(ext.toLowerCase());
return ext2mime.get(ext.toLowerCase(Locale.ROOT));
}

private static String normalizeMimeType(String mimeType) {
if (mimeType == null) return "application/octet-stream";
mimeType = mimeType.toLowerCase();
mimeType = mimeType.toLowerCase(Locale.ROOT);
final int pos = mimeType.indexOf(';');
return ((pos < 0) ? mimeType.trim() : mimeType.substring(0, pos).trim());
}
Expand Down
55 changes: 55 additions & 0 deletions test/java/net/yacy/document/TextParserTest.java
@@ -0,0 +1,55 @@
// TextParserTest.java
// ---------------------------
// Copyright 2017 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package net.yacy.document;

import static org.junit.Assert.*;

import java.util.Locale;

import org.junit.Test;

/**
* Unit tests for the {@link TextParser} class.
*
* @author luccioman
*
*/
public class TextParserTest {

/**
* Test the TextParser.supportsMime() consistency with available locales.
* Possible failure case : with the Turkish ("tr") language, 'I' lower cased
* does not becomes 'i' but '\u005Cu0131' (the latin small letter 'ı'
* character).
*/
@Test
public void testSupportsMimeLocaleConsistency() {
for (Locale locale : Locale.getAvailableLocales()) {
Locale.setDefault(locale);
for (String mimeType : TextParser.supportedMimeTypes()) {
assertNull(locale + " " + mimeType, TextParser.supportsMime(mimeType.toUpperCase(Locale.ROOT)));
}
}
}

}

0 comments on commit 286f301

Please sign in to comment.