Skip to content

Commit

Permalink
avoid string conversion
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7584 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Mar 11, 2011
1 parent 694fa3a commit dc0db35
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 30 deletions.
2 changes: 1 addition & 1 deletion htroot/api/yacydoc.java
Expand Up @@ -102,7 +102,7 @@ public static serverObjects respond(final RequestHeader header, final serverObje
prop.putXML("dc_date", entry.moddate().toString());
prop.putXML("dc_type", String.valueOf(entry.doctype()));
prop.putXML("dc_identifier", metadata.url().toNormalform(false, true));
prop.putXML("dc_language", entry.language());
prop.putXML("dc_language", UTF8.String(entry.language()));

prop.put("yacy_urlhash", metadata.url().hash());
prop.putXML("yacy_loaddate", entry.loaddate().toString());
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/crawler/ResultURLs.java
Expand Up @@ -221,7 +221,7 @@ public static boolean remove(final String urlHash) {
public static void main(final String[] args) {
try {
final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/");
final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de", 0, 0, 0, 0, 0, 0);
final URIMetadataRow urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), "de".getBytes(), 0, 0, 0, 0, 0, 0);
EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING;
System.out.println("valid test:\n=======");
// add
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/data/YMarkTables.java
Expand Up @@ -429,7 +429,7 @@ public static EnumMap<METADATA, String> getMetadata(final byte[] urlHash, final
metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet()));
metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount()));
metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype()));
metadata.put(METADATA.LANGUAGE, urlEntry.language());
metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language()));

final URIMetadataRow.Components meta = urlEntry.metadata();
if (meta != null) {
Expand Down
8 changes: 4 additions & 4 deletions source/de/anomic/search/Segment.java
Expand Up @@ -218,13 +218,13 @@ private int addPageIndex(
Map.Entry<String, Word> wentry;
String word;
int len = (document == null) ? urlLength : document.dc_title().length();
WordReferenceRow ientry = new WordReferenceRow(UTF8.String(url.hash()),
WordReferenceRow ientry = new WordReferenceRow(url.hash(),
urlLength, urlComps, len,
condenser.RESULT_NUMB_WORDS,
condenser.RESULT_NUMB_SENTENCES,
urlModified.getTime(),
System.currentTimeMillis(),
language,
UTF8.getBytes(language),
doctype,
outlinksSame, outlinksOther);
Word wprop;
Expand All @@ -247,10 +247,10 @@ private int addPageIndex(
try {
container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1);
container.add(ientry);
rankingProcess.add(container, true, sourceName, -1);
} catch (RowSpaceExceededException e) {
continue;
}
rankingProcess.add(container, true, sourceName, -1);
}
}
if (rankingProcess != null) rankingProcess.oneFeederTerminated();
Expand Down Expand Up @@ -339,7 +339,7 @@ public URIMetadataRow storeDocument(
condenser.RESULT_NUMB_WORDS, // word count
Response.docType(document.dc_format()), // doctype
condenser.RESULT_FLAGS, // flags
language, // language
UTF8.getBytes(language), // language
document.inboundLinks(), // inbound links
document.outboundLinks(), // outbound links
document.getAudiolinks().size(), // laudio
Expand Down
Expand Up @@ -73,7 +73,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
private final Row.Entry entry;

public CitationReferenceRow(
final String urlHash,
final byte[] urlHash,
final long lastmodified, // last-modified time of the document where word appears
final long updatetime, // update time
final int posintext, // occurrence of url; counts the url
Expand All @@ -83,7 +83,7 @@ public CitationReferenceRow(
final int urlComps, // number of path components
final byte typeofurl // outlinks to same domain
) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash);
this.entry = citationRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime);
Expand Down
4 changes: 2 additions & 2 deletions source/net/yacy/kelondro/data/image/ImageReferenceRow.java
Expand Up @@ -147,7 +147,7 @@ public ImageReferenceRow(final byte[] urlHash,
this.entry.setCol(col_reserve2, 0);
}

public ImageReferenceRow(final String urlHash,
public ImageReferenceRow(final byte[] urlHash,
final int urlLength, // byte-length of complete URL
final int urlComps, // number of path components
final int titleLength, // length of description/length (longer are better?)
Expand All @@ -160,7 +160,7 @@ public ImageReferenceRow(final String urlHash,
final int outlinksSame, // outlinks to same domain
final int outlinksOther // outlinks to other domain
) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash);
this.entry = urlEntryRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime);
Expand Down
2 changes: 1 addition & 1 deletion source/net/yacy/kelondro/data/meta/URIMetadata.java
Expand Up @@ -54,7 +54,7 @@ public interface URIMetadata {

public char doctype();

public String language();
public byte[] language();

public int size();

Expand Down
10 changes: 5 additions & 5 deletions source/net/yacy/kelondro/data/meta/URIMetadataRow.java
Expand Up @@ -68,7 +68,7 @@ public class URIMetadataRow implements URIMetadata {
"Cardinal wc-3 {b256}, " + // size of file by number of words; for video and audio: seconds
"byte[] dt-1, " + // doctype, taken from extension or any other heuristic
"Bitfield flags-4, " + // flags; any stuff (see Word-Entity definition)
"String lang-2, " + // language
"byte[] lang-2, " + // language
"Cardinal llocal-2 {b256}, " + // # of outlinks to same domain; for video and image: width
"Cardinal lother-2 {b256}, " + // # of outlinks to outside domain; for video and image: height
"Cardinal limage-2 {b256}, " + // # of embedded image links
Expand Down Expand Up @@ -130,7 +130,7 @@ public URIMetadataRow(
final int wc,
final char dt,
final Bitfield flags,
final String lang,
final byte[] lang,
final int llocal,
final int lother,
final int laudio,
Expand All @@ -150,7 +150,7 @@ public URIMetadataRow(
this.entry.setCol(col_wc, wc);
this.entry.setCol(col_dt, new byte[]{(byte) dt});
this.entry.setCol(col_flags, flags.bytes());
this.entry.setCol(col_lang, UTF8.getBytes(lang));
this.entry.setCol(col_lang, lang);
this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother);
this.entry.setCol(col_limage, limage);
Expand Down Expand Up @@ -409,8 +409,8 @@ public char doctype() {
return (char) entry.getColByte(col_dt);
}

public String language() {
return this.entry.getColString(col_lang);
public byte[] language() {
return this.entry.getColBytes(col_lang, true);
}

public int size() {
Expand Down
13 changes: 4 additions & 9 deletions source/net/yacy/kelondro/data/word/WordReferenceRow.java
Expand Up @@ -162,20 +162,20 @@ public WordReferenceRow(
this.entry.setCol(col_reserve2, 0);
}

public WordReferenceRow(final String urlHash,
public WordReferenceRow(final byte[] urlHash,
final int urlLength, // byte-length of complete URL
final int urlComps, // number of path components
final int titleLength, // length of description/length (longer are better?)
final int wordcount, // total number of words
final int phrasecount, // total number of phrases
final long lastmodified, // last-modified time of the document where word appears
final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
final String language, // (guessed) language of document
final byte[] language, // (guessed) language of document
final char doctype, // type of document
final int outlinksSame, // outlinks to same domain
final int outlinksOther // outlinks to other domain
) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
assert (urlHash.length == 12) : "urlhash = " + UTF8.String(urlHash);
this.entry = urlEntryRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime);
Expand All @@ -186,7 +186,7 @@ public WordReferenceRow(final String urlHash,
this.entry.setCol(col_wordsInText, wordcount);
this.entry.setCol(col_phrasesInText, phrasecount);
this.entry.setCol(col_doctype, new byte[]{(byte) doctype});
this.entry.setCol(col_language, ((language == null) || (language.length() != urlEntryRow.width(col_language))) ? "uk" : language);
this.entry.setCol(col_language, ((language == null) || (language.length != urlEntryRow.width(col_language))) ? WordReferenceVars.default_language : language);
this.entry.setCol(col_llocal, outlinksSame);
this.entry.setCol(col_lother, outlinksOther);
this.entry.setCol(col_urlLength, urlLength);
Expand All @@ -204,11 +204,6 @@ public void setWord(final Word word) {
this.entry.setCol(col_posofphrase, word.numOfPhrase);
}

public WordReferenceRow(final String urlHash, final String code) {
// the code is the external form of the row minus the leading urlHash entry
this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code)));
}

public WordReferenceRow(final String external) {
this.entry = urlEntryRow.newEntry(external, true);
}
Expand Down
6 changes: 2 additions & 4 deletions source/net/yacy/kelondro/index/Row.java
Expand Up @@ -478,10 +478,8 @@ public final long incCol(final int column, final long c) {
}

public final String getColString(final int column) {
return getColString(colstart[column], row[column].cellwidth);
}

private final String getColString(final int clstrt, int length) {
final int clstrt = colstart[column];
int length = row[column].cellwidth;
if (rowinstance[offset + clstrt] == 0) return null;
assert length <= rowinstance.length - offset - clstrt;
if (length > rowinstance.length - offset - clstrt) length = rowinstance.length - offset - clstrt;
Expand Down

0 comments on commit dc0db35

Please sign in to comment.