Skip to content

Commit

Permalink
* added some convertions to and from UTF-8
Browse files Browse the repository at this point in the history
* this might fix problems on windows systems
  (like http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1824)


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5574 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
f1ori committed Feb 5, 2009
1 parent 65a1de6 commit 76cdc59
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 23 deletions.
35 changes: 28 additions & 7 deletions source/de/anomic/kelondro/blob/BLOBTree.java
Expand Up @@ -39,6 +39,7 @@

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;

import de.anomic.kelondro.index.Row;
Expand Down Expand Up @@ -179,14 +180,22 @@ private byte[] elementKey(String key, final int record) {
if (key.length() > keylen) throw new RuntimeException("key len (" + key.length() + ") out of limit (" + keylen + "): '" + key + "'");
while (key.length() < keylen) key = key + fillChar;
key = key + counter(record);
return key.getBytes();
try {
return key.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
return key.getBytes();
}
}

String origKey(final byte[] rawKey) {
int n = keylen - 1;
if (n >= rawKey.length) n = rawKey.length - 1;
while ((n > 0) && (rawKey[n] == (byte) fillChar)) n--;
return new String(rawKey, 0, n + 1);
try {
return new String(rawKey, 0, n + 1, "UTF-8");
} catch (UnsupportedEncodingException e) {
return new String(rawKey, 0, n + 1);
}
}

public class keyIterator implements CloneableIterator<byte[]> {
Expand All @@ -210,7 +219,11 @@ public boolean hasNext() {
public byte[] next() {
final String result = nextKey;
nextKey = n();
return origKey(result.getBytes()).getBytes();
try {
return origKey(result.getBytes("UTF-8")).getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
return origKey(result.getBytes()).getBytes();
}
}

public void remove() {
Expand All @@ -228,8 +241,16 @@ private String n() {
if (nt == null) return null;
g = nt.getColBytes(0);
if (g == null) return null;
k = new String(g, 0, keylen);
v = new String(g, keylen, counterlen);
try {
k = new String(g, 0, keylen, "UTF-8");
} catch (UnsupportedEncodingException e1) {
k = new String(g, 0, keylen);
}
try {
v = new String(g, keylen, counterlen, "UTF-8");
} catch (UnsupportedEncodingException e1) {
v = new String(g, keylen, counterlen);
}
try {
c = Integer.parseInt(v, 16);
} catch (final NumberFormatException e) {
Expand Down Expand Up @@ -287,7 +308,7 @@ synchronized int get(final String key, final int pos) throws IOException {
}

public synchronized byte[] get(final byte[] key) throws IOException {
final RandomAccessInterface ra = getRA(new String(key));
final RandomAccessInterface ra = getRA(new String(key, "UTF-8"));
if (ra == null) return null;
return ra.readFully();
}
Expand Down Expand Up @@ -398,7 +419,7 @@ public synchronized void remove(final byte[] key) throws IOException {
if (key == null) return;
int recpos = 0;
byte[] k;
while (index.get(k = elementKey(new String(key), recpos)) != null) {
while (index.get(k = elementKey(new String(key, "UTF-8"), recpos)) != null) {
index.remove(k);
buffer.remove(k);
recpos++;
Expand Down
6 changes: 5 additions & 1 deletion source/de/anomic/kelondro/util/ByteArray.java
Expand Up @@ -143,7 +143,11 @@ public byte[] readBytes(final int from_pos, final int length) {
}

public String readString(final int from_pos, final int length) {
return new String(buffer, this.offset + from_pos, length);
try {
return new String(buffer, this.offset + from_pos, length, "UTF-8");
} catch (final UnsupportedEncodingException e) {
return "";
}
}

public String readString(final int from_pos, final int length, final String encoding) {
Expand Down
14 changes: 11 additions & 3 deletions source/de/anomic/kelondro/util/ByteBuffer.java
Expand Up @@ -188,7 +188,7 @@ public ByteBuffer append(final byte[] bb, final int of, final int le) {
}

public ByteBuffer append(final String s) {
return append(s.getBytes());
return append(s.getBytes("UTF-8"));
}

public ByteBuffer append(final String s, final String charset) throws UnsupportedEncodingException {
Expand Down Expand Up @@ -376,7 +376,11 @@ public int whitespaceEnd(final boolean includeNonLetterBytes) {


public String toString() {
return new String(buffer, offset, length);
try {
return new String(buffer, offset, length, "UTF-8");
} catch (UnsupportedEncodingException e) {
return new String(buffer, offset, length);
}
}

public String toString(final String charsetName) {
Expand All @@ -388,7 +392,11 @@ public String toString(final String charsetName) {
}

public String toString(final int left, final int rightbound) {
return new String(buffer, offset + left, rightbound - left);
try {
return new String(buffer, offset + left, rightbound - left, "UTF-8");
} catch (UnsupportedEncodingException e) {
return new String(buffer, offset, length);
}
}

public Properties propParser(final String charset) {
Expand Down
16 changes: 8 additions & 8 deletions source/de/anomic/kelondro/util/FileUtils.java
Expand Up @@ -381,7 +381,7 @@ public static Map<String, String> loadMap(final File f) {
// load props
try {
final byte[] b = read(f);
return table(strings(b));
return table(strings(b, "UTF-8"));
} catch (final IOException e2) {
System.err.println("ERROR: " + f.toString() + " not found in settings path");
return null;
Expand All @@ -391,7 +391,7 @@ public static Map<String, String> loadMap(final File f) {
public static void saveMap(final File file, final Map<String, String> props, final String comment) throws IOException {
PrintWriter pw = null;
final File tf = new File(file.toString() + "." + (System.currentTimeMillis() % 1000));
pw = new PrintWriter(new BufferedOutputStream(new FileOutputStream(tf)));
pw = new PrintWriter(tf, "UTF-8");
pw.println("# " + comment);
String key, value;
for (final Map.Entry<String, String> entry: props.entrySet()) {
Expand All @@ -412,7 +412,7 @@ public static Set<String> loadSet(final File file, final int chunksize, final bo
final Set<String> set = (tree) ? (Set<String>) new TreeSet<String>() : (Set<String>) new HashSet<String>();
final byte[] b = read(file);
for (int i = 0; (i + chunksize) <= b.length; i++) {
set.add(new String(b, i, chunksize));
set.add(new String(b, i, chunksize, "UTF-8"));
}
return set;
}
Expand Down Expand Up @@ -443,8 +443,8 @@ public static void saveSet(final File file, final String format, final Set<Strin
}
if(os != null) {
for (final Iterator<String> i = set.iterator(); i.hasNext(); ) {
os.write((i.next()).getBytes());
if (sep != null) os.write(sep.getBytes());
os.write((i.next()).getBytes("UTF-8"));
if (sep != null) os.write(sep.getBytes("UTF-8"));
}
os.close();
}
Expand All @@ -470,12 +470,12 @@ public static void saveSet(final File file, final String format, final RowSet se
String key;
if (i.hasNext()) {
key = new String(i.next().getColBytes(0));
os.write(key.getBytes());
os.write(key.getBytes("UTF-8"));
}
while (i.hasNext()) {
key = new String((i.next()).getColBytes(0));
if (sep != null) os.write(sep.getBytes());
os.write(key.getBytes());
if (sep != null) os.write(sep.getBytes("UTF-8"));
os.write(key.getBytes("UTF-8"));
}
os.close();
}
Expand Down
9 changes: 6 additions & 3 deletions source/de/anomic/net/natLib.java
Expand Up @@ -46,7 +46,7 @@ public static String getDI604(final String password) {
rm status.htm
*/
try {
ArrayList<String> x = FileUtils.strings(HttpClient.wget("http://admin:"+password+"@192.168.0.1:80/status.htm", null, 10000));
ArrayList<String> x = FileUtils.strings(HttpClient.wget("http://admin:"+password+"@192.168.0.1:80/status.htm", null, 10000), "UTF-8");
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
final String line = nxTools.tail1(x);
Expand All @@ -58,7 +58,8 @@ public static String getDI604(final String password) {

private static String getWhatIsMyIP() {
try {
ArrayList<String> x = FileUtils.strings(HttpClient.wget("http://www.whatismyip.com/", null, 10000));
ArrayList<String> x = FileUtils.strings(
HttpClient.wget("http://www.whatismyip.com/", null, 10000), "UTF-8");
x = nxTools.grep(x, 0, "Your IP is");
final String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
Expand All @@ -69,7 +70,9 @@ private static String getWhatIsMyIP() {

private static String getStanford() {
try {
ArrayList<String> x = FileUtils.strings(HttpClient.wget("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null, 10000));
ArrayList<String> x = FileUtils.strings(
HttpClient.wget("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null, 10000),
"UTF-8");
x = nxTools.grep(x, 0, "firewall protecting your browser");
final String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 7);
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/tools/loaderThreads.java
Expand Up @@ -178,7 +178,7 @@ public synchronized void feed(final byte[] v) {
int line = 0;
String s, key, value;
int p;
final ArrayList<String> lines = FileUtils.strings(v);
final ArrayList<String> lines = FileUtils.strings(v, "UTF-8");
try {
while ((this.run) && (line < lines.size())) {
// parse line and construct a property
Expand Down

0 comments on commit 76cdc59

Please sign in to comment.