Permalink
Browse files

Do locale neutral case conversions in Classification

Required for people using Turkish language as their default system
locale, as with this locale the 'i' character has different upper and
lower case flavors than with other locales.
  • Loading branch information...
luccioman committed Nov 20, 2017
1 parent bab5f04 commit 9531b835988f971986b466dbb3e095a4355ec896
@@ -24,6 +24,7 @@
import java.io.File;
import java.io.FileInputStream;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
@@ -105,47 +106,47 @@ public String toString() {
private static void addSet(Set<String> set, final String extString) {
if ((extString == null) || (extString.isEmpty())) return;
for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase().trim());
for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase(Locale.ROOT).trim());
}
public static boolean isTextExtension(String textExt) {
if (textExt == null) return false;
return textExtSet.contains(textExt.trim().toLowerCase());
return textExtSet.contains(textExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isMediaExtension(String mediaExt) {
if (mediaExt == null) return false;
return mediaExtSet.contains(mediaExt.trim().toLowerCase());
return mediaExtSet.contains(mediaExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isImageExtension(final String imageExt) {
if (imageExt == null) return false;
return imageExtSet.contains(imageExt.trim().toLowerCase());
return imageExtSet.contains(imageExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isAudioExtension(final String audioExt) {
if (audioExt == null) return false;
return audioExtSet.contains(audioExt.trim().toLowerCase());
return audioExtSet.contains(audioExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isVideoExtension(final String videoExt) {
if (videoExt == null) return false;
return videoExtSet.contains(videoExt.trim().toLowerCase());
return videoExtSet.contains(videoExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isApplicationExtension(final String appsExt) {
if (appsExt == null) return false;
return appsExtSet.contains(appsExt.trim().toLowerCase());
return appsExtSet.contains(appsExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isControlExtension(final String ctrlExt) {
if (ctrlExt == null) return false;
return ctrlExtSet.contains(ctrlExt.trim().toLowerCase());
return ctrlExtSet.contains(ctrlExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isAnyKnownExtension(String ext) {
if (ext == null) return false;
ext = ext.trim().toLowerCase();
ext = ext.trim().toLowerCase(Locale.ROOT);
return textExtSet.contains(ext) || mediaExtSet.contains(ext) || ctrlExtSet.contains(ext);
}
@@ -182,7 +183,7 @@ public static ContentDomain getContentDomainFromMime(final String mime) {
public static boolean isPictureMime(final String mimeType) {
if (mimeType == null) return false;
return mimeType.toUpperCase().startsWith("IMAGE");
return mimeType.toUpperCase(Locale.ROOT).startsWith("IMAGE");
}
private static final Properties mimeTable = new Properties();
@@ -202,10 +203,10 @@ public static void init(final File mimeFile) {
for (Entry<Object, Object> entry: mimeTable.entrySet()) {
String ext = (String) entry.getKey();
String mime = (String) entry.getValue();
if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase());
if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase());
if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase());
if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase());
if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase(Locale.ROOT));
if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase(Locale.ROOT));
if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase(Locale.ROOT));
if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase(Locale.ROOT));
}
}
@@ -214,11 +215,11 @@ public static int countMimes() {
}
public static String ext2mime(final String ext) {
return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext));
return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext));
}
public static String ext2mime(final String ext, final String dfltMime) {
return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(), dfltMime);
return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), dfltMime);
}
public static String url2mime(final MultiProtocolURL url, final String dfltMime) {
@@ -20,6 +20,7 @@
package net.yacy.cora.document.analysis;
import java.io.File;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
@@ -42,9 +43,55 @@ public static void setUpClass() {
*/
@Test
public void testExt2mime_String() {
String mime;
mime = Classification.ext2mime("Z");
assertEquals("application/x-compress", mime);
assertEquals("application/x-compress", Classification.ext2mime("Z"));
assertEquals("application/x-compress", Classification.ext2mime("z"));
assertEquals("image/tiff", Classification.ext2mime("TIFF"));
assertEquals("image/tiff", Classification.ext2mime("tiff"));
assertEquals("image/tiff", Classification.ext2mime("TIFF", "image/tiff"));
assertEquals("image/tiff", Classification.ext2mime("tiff", "image/tiff"));
}
/**
* Test of isNNNExtension methods with lower and upper case samples, containing
* notably the 'i' character which case conversion is different whith the Turkish
* locale. THis test be successful with any default system locale.
*/
@Test
public void testIsExtension() {
assertTrue(Classification.isApplicationExtension("ISO"));
assertTrue(Classification.isApplicationExtension("iso"));
assertTrue(Classification.isAudioExtension("AIF"));
assertTrue(Classification.isAudioExtension("aif"));
assertTrue(Classification.isVideoExtension("AVI"));
assertTrue(Classification.isVideoExtension("avi"));
assertTrue(Classification.isImageExtension("GIF"));
assertTrue(Classification.isImageExtension("gif"));
assertTrue(Classification.isControlExtension("SHA1"));
assertTrue(Classification.isControlExtension("sha1"));
assertTrue(Classification.isMediaExtension("GIF"));
assertTrue(Classification.isMediaExtension("gif"));
assertTrue(Classification.isAnyKnownExtension("GIF"));
assertTrue(Classification.isAnyKnownExtension("gif"));
}
/**
* Test of isPictureMime method with some sample media types.
*/
@Test
public void testIsPictureMime() {
assertTrue(Classification.isPictureMime("image/jpeg"));
assertTrue(Classification.isPictureMime("IMAGE/JPEG"));
assertFalse(Classification.isPictureMime("text/html"));
assertFalse(Classification.isPictureMime("TEXT/HTML"));
}
}

0 comments on commit 9531b83

Please sign in to comment.