Skip to content

Commit

Permalink
move cutUrlText to nxTools,
Browse files Browse the repository at this point in the history
max length from URLs(title) on searchpage now 120 chars


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1060 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
borg-0300 committed Nov 11, 2005
1 parent 9158845 commit 5778428
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 41 deletions.
36 changes: 2 additions & 34 deletions source/de/anomic/plasma/plasmaCrawlLURL.java
Expand Up @@ -68,6 +68,7 @@
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;

Expand Down Expand Up @@ -301,39 +302,6 @@ private static String daydate(Date date) {
}
}

/**
* This function shortens the String url<br>
*
* Example returns:<br>
* <dl><dt>normal domain:</dt><dd>http://domain.net/leftpath..rightpath</dd>
* <dt>long domain:</dt><dd>http://very_very_long_domain.net/le..</dd></dl>
* @param String like a URL
* @return the shorten or the old String
*/
public static String cutUrlText(String url, int len) {
// This is contributed by Thomas Quella (borg-0300)
int la = url.length();
if (la > len) {
int cpos;
cpos = url.indexOf("://");
if (cpos >= 0) {
cpos = url.indexOf("/", cpos + 3);
if (cpos >= 0) {
if (cpos < len-(len / 3)) { // at least 1/3 characters for the path
final int lb = ((len - cpos) / 2) - 1;
if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1
url = url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb));
} else {
url = url.substring(0, len - 2).concat("..");
}
} else { // very crazy domain or very short len
url = url.substring(0, len - 2).concat("..");
} // no slash at end
} // NO URL !?
} // URL < len
return url;
}

public serverObjects genTableProps(int tabletype, int lines, boolean showInit, boolean showExec, String dfltInit, String dfltExec, String feedbackpage, boolean makeLink) {
serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps tabletype=" + tabletype + " lines=" + lines +
" showInit=" + showInit + " showExec=" + showExec +
Expand Down Expand Up @@ -387,7 +355,7 @@ public serverObjects genTableProps(int tabletype, int lines, boolean showInit, b
serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps Remove ':80' URL=" + txt);
}

txt = cutUrlText(txt, 72); // shorten the string text like a URL
txt = nxTools.cutUrlText(txt, 72); // shorten the string text like a URL

cachepath = (urle.url() == null) ? "-not-cached-" : url.substring(url.indexOf("://") + 3);
if (cachepath.endsWith("/")) cachepath = cachepath + "ndx";
Expand Down
14 changes: 8 additions & 6 deletions source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -5,9 +5,9 @@
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004, 2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
// $LastChangedDate:$
// $LastChangedRevision:$
// $LastChangedBy:$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -139,6 +139,7 @@ this class is also the core of the http crawling.
import de.anomic.server.logging.serverLog;
import de.anomic.tools.bitfield;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
Expand Down Expand Up @@ -607,7 +608,8 @@ private void initProfiles() throws IOException {
(getConfig(STR_REMOTEPROFILE, "").length() == 0) ||
(profiles.getEntry(getConfig(STR_REMOTEPROFILE, "")) == null)) {
// generate new default entry for remote crawling
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false);
// defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, false, true, true, false, true, true, false);
defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, true, true, true, true, true, true, true, false);
setConfig(STR_REMOTEPROFILE, defaultRemoteProfile.handle());
} else {
defaultRemoteProfile = profiles.getEntry(getConfig(STR_REMOTEPROFILE, ""));
Expand Down Expand Up @@ -1657,7 +1659,7 @@ public serverObjects searchFromLocal(plasmaSearchQuery query) {
prop.put("results_" + i + "_description", descr);
prop.put("results_" + i + "_url", urlstring);
prop.put("results_" + i + "_urlhash", urlhash);
prop.put("results_" + i + "_urlname", urlname);
prop.put("results_" + i + "_urlname", nxTools.cutUrlText(urlname, 120));
prop.put("results_" + i + "_date", dateString(urlentry.moddate()));
prop.put("results_" + i + "_size", Long.toString(urlentry.size()));
prop.put("results_" + i + "_words",URLEncoder.encode(query.queryWords.toString(),"UTF-8"));
Expand All @@ -1667,7 +1669,7 @@ public serverObjects searchFromLocal(plasmaSearchQuery query) {
prop.put("results_" + i + "_snippet_text", "");
} else {
prop.put("results_" + i + "_snippet", 1);
prop.put("results_" + i + "_snippet_text", snippet.line.toString().trim());
prop.put("results_" + i + "_snippet_text", snippet.line.trim());
}
i++;
}
Expand Down
38 changes: 37 additions & 1 deletion source/de/anomic/tools/nxTools.java
Expand Up @@ -3,7 +3,10 @@
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 04.05.2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -141,4 +144,37 @@ public static String awk(String sentence, String separator, int count) {
return null;
}

/**
* This function shorten URL Strings<br>
*
* Example returns:<br>
* <dl><dt>normal domain:</dt><dd>http://domain.net/leftpath..rightpath</dd>
* <dt>long domain:</dt><dd>http://very_very_long_domain.net/le..</dd></dl>
* @param String like a URL
* @return the shorten or the old String
*/
public static String cutUrlText(String url, int len) {
// This is contributed by Thomas Quella (borg-0300)
int la = url.length();
if (la > len) {
int cpos;
cpos = url.indexOf("://");
if (cpos >= 0) {
cpos = url.indexOf("/", cpos + 3);
if (cpos >= 0) {
if (cpos < len-(len / 3)) { // at least 1/3 characters for the path
final int lb = ((len - cpos) / 2) - 1;
if (lb * 2 + 2 + cpos < len) { la--; } // if smaller(odd), half right path + 1
return url.substring(0, cpos + lb).concat("..").concat(url.substring(la - lb));
} else {
return url.substring(0, len - 2).concat("..");
}
} else { // very crazy domain or very short len
return url.substring(0, len - 2).concat("..");
} // no slash at end
} // NO URL !?
} // URL < len
return url;
}

}

0 comments on commit 5778428

Please sign in to comment.