Skip to content

Commit

Permalink
- added source to J7Zip-modifed.jar and it's license (changelog is st…
Browse files Browse the repository at this point in the history
…ill to come)

- moved HTML-*replace-methods from wikiCode to de.anomic.data.htmlTools
- prepared use of different wiki parsers as suggested here: http://www.yacy-forum.de/viewtopic.php?p=34444#34444

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3741 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
karlchenofhell committed May 20, 2007
1 parent 0a64047 commit 601fc7d
Show file tree
Hide file tree
Showing 30 changed files with 1,188 additions and 604 deletions.
2 changes: 1 addition & 1 deletion htroot/Bookmarks.java
Expand Up @@ -256,7 +256,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
while(count<max_count && it.hasNext()){
bookmark=switchboard.bookmarksDB.getBookmark((String)it.next());
if(bookmark!=null){
prop.put("bookmarks_"+count+"_link", de.anomic.data.wikiCode.replaceXMLEntities(bookmark.getUrl()));
prop.put("bookmarks_"+count+"_link", de.anomic.data.htmlTools.replaceXMLEntities(bookmark.getUrl()));
prop.put("bookmarks_"+count+"_title", bookmark.getTitle());
prop.put("bookmarks_"+count+"_description", bookmark.getDescription());
prop.put("bookmarks_"+count+"_date", serverDate.dateToiso8601(new Date(bookmark.getTimeStamp())));
Expand Down
8 changes: 4 additions & 4 deletions htroot/CacheAdmin_p.java
Expand Up @@ -159,7 +159,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
formatAnchor(prop, document.getEmaillinks(), "email");

prop.put("info_type_text",
de.anomic.data.wikiCode.replaceXMLEntities(new String(scraper.getText())));
de.anomic.data.htmlTools.replaceXMLEntities(new String(scraper.getText())));

i = 0;
final Iterator sentences = document.getSentences(false);
Expand Down Expand Up @@ -266,9 +266,9 @@ private static void formatAnchor(serverObjects prop, Map anchor, String extensio
descr = ((String) entry.getValue()).trim();
if (descr.length() == 0) { descr = "-"; }
prop.put("info_type_use." + extension + "_" + extension + "_" + i + "_name",
de.anomic.data.wikiCode.replaceXMLEntities(descr.replaceAll("\n", "").trim()));
de.anomic.data.htmlTools.replaceXMLEntities(descr.replaceAll("\n", "").trim()));
prop.put("info_type_use." + extension + "_" + extension + "_" + i + "_link",
de.anomic.data.wikiCode.replaceXMLEntities(entry.getKey().toString()));
de.anomic.data.htmlTools.replaceXMLEntities(entry.getKey().toString()));
i++;
}
prop.put("info_type_use." + extension, (i == 0) ? 0 : 1);
Expand All @@ -283,7 +283,7 @@ private static void formatImageAnchor(serverObjects prop, TreeSet anchor) {
ie = (htmlFilterImageEntry) iter.next();
prop.put("info_type_use.images_images_" + i + "_name", ie.alt().replaceAll("\n", "").trim());
prop.put("info_type_use.images_images_" + i + "_link",
de.anomic.data.wikiCode.replaceXMLEntities(ie.url().toNormalform()));
de.anomic.data.htmlTools.replaceXMLEntities(ie.url().toNormalform()));
i++;
}
prop.put("info_type_use.images", (i == 0) ? 0 : 1);
Expand Down
2 changes: 1 addition & 1 deletion htroot/CrawlProfileEditor_p.java
Expand Up @@ -152,7 +152,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("edit_entries_" + count + "_readonly_label", ee.label);
prop.put("edit_entries_" + count + "_readonly_type", ee.type);
if (ee.type == eentry.BOOLEAN) {
prop.put("edit_entries_" + count + "_readonly_type_checked", Boolean.getBoolean((String)val) ? 1 : 0);
prop.put("edit_entries_" + count + "_readonly_type_checked", Boolean.valueOf((String)val).booleanValue() ? 1 : 0);
} else {
prop.put("edit_entries_" + count + "_readonly_type_value", val);
}
Expand Down
14 changes: 7 additions & 7 deletions htroot/IndexCreateIndexingQueue_p.java
Expand Up @@ -47,7 +47,7 @@
import java.text.DecimalFormat;
import java.util.ArrayList;

import de.anomic.data.wikiCode;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlZURL;
Expand Down Expand Up @@ -151,11 +151,11 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
totalSize += entrySize;
initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
prop.put("indexing-queue_list_"+entryCount+"_dark", (inProcess)? 2: ((dark) ? 1 : 0));
prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : wikiCode.replaceHTML(initiator.getName())));
prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : htmlTools.replaceHTML(initiator.getName())));
prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth());
prop.put("indexing-queue_list_"+entryCount+"_modified", pcentry.getModificationDate());
prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":wikiCode.replaceHTML(pcentry.anchorName()));
prop.put("indexing-queue_list_"+entryCount+"_url", wikiCode.replaceHTML(pcentry.normalizedURLString()));
prop.put("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":htmlTools.replaceHTML(pcentry.anchorName()));
prop.put("indexing-queue_list_"+entryCount+"_url", htmlTools.replaceHTML(pcentry.normalizedURLString()));
prop.put("indexing-queue_list_"+entryCount+"_size", bytesToString(entrySize));
prop.put("indexing-queue_list_"+entryCount+"_inProcess", (inProcess)?1:0);
prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash());
Expand Down Expand Up @@ -199,9 +199,9 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
executorHash = entry.executor();
initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash);
executorSeed = yacyCore.seedDB.getConnected(executorHash);
prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : wikiCode.replaceHTML(initiatorSeed.getName())));
prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : wikiCode.replaceHTML(executorSeed.getName())));
prop.put("rejected_list_"+j+"_url", wikiCode.replaceHTML(url.toString()));
prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : htmlTools.replaceHTML(initiatorSeed.getName())));
prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : htmlTools.replaceHTML(executorSeed.getName())));
prop.put("rejected_list_"+j+"_url", htmlTools.replaceHTML(url.toString()));
prop.put("rejected_list_"+j+"_failreason", entry.anycause());
prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0));
dark = !dark;
Expand Down
6 changes: 3 additions & 3 deletions htroot/IndexCreateLoaderQueue_p.java
Expand Up @@ -43,7 +43,7 @@
// javac -classpath .:../classes IndexCreate_p.java
// if the shell's current path is HTROOT

import de.anomic.data.wikiCode;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlLoaderMessage;
import de.anomic.plasma.plasmaSwitchboard;
Expand Down Expand Up @@ -80,9 +80,9 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

initiator = yacyCore.seedDB.getConnected(theMsg.initiator);
prop.put("loader-set_list_"+count+"_dark", ((dark) ? 1 : 0) );
prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : wikiCode.replaceHTML(initiator.getName())) );
prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : htmlTools.replaceHTML(initiator.getName())) );
prop.put("loader-set_list_"+count+"_depth", theMsg.depth );
prop.put("loader-set_list_"+count+"_url", wikiCode.replaceHTML(theMsg.url.toString())); // null pointer exception here !!! maybe url = null; check reason.
prop.put("loader-set_list_"+count+"_url", htmlTools.replaceHTML(theMsg.url.toString())); // null pointer exception here !!! maybe url = null; check reason.
dark = !dark;
count++;
}
Expand Down
10 changes: 5 additions & 5 deletions htroot/IndexCreateWWWGlobalQueue_p.java
Expand Up @@ -47,7 +47,7 @@
import java.util.Date;
import java.util.Locale;

import de.anomic.data.wikiCode;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlNURL;
Expand Down Expand Up @@ -120,12 +120,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : wikiCode.replaceHTML(initiator.getName())) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : wikiCode.replaceHTML(profileEntry.name())));
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.replaceHTML(initiator.getName())) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : htmlTools.replaceHTML(profileEntry.name())));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", wikiCode.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+showNum+"_url", wikiCode.replaceHTML(urle.url().toString()));
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.replaceHTML(urle.url().toString()));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.urlhash());
dark = !dark;
showNum++;
Expand Down
10 changes: 5 additions & 5 deletions htroot/IndexCreateWWWLocalQueue_p.java
Expand Up @@ -51,7 +51,7 @@
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import de.anomic.data.wikiCode;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlNURL;
Expand Down Expand Up @@ -124,7 +124,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
} else if ((option.equals("Depth"))) {
value = Integer.toString(entry.depth());
} else if ((option.equals("Initiator"))) {
value = (entry.initiator()==null)?"proxy":wikiCode.replaceHTML(entry.initiator());
value = (entry.initiator()==null)?"proxy":htmlTools.replaceHTML(entry.initiator());
} else if ((option.equals("ModifiedDate"))) {
value = daydate(entry.loaddate());
}
Expand Down Expand Up @@ -172,12 +172,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
prop.put("crawler-queue_list_"+showNum+"_dark", ((dark) ? 1 : 0) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : wikiCode.replaceHTML(initiator.getName())) );
prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : htmlTools.replaceHTML(initiator.getName())) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) );
prop.put("crawler-queue_list_"+showNum+"_anchor", wikiCode.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+showNum+"_url", wikiCode.replaceHTML(urle.url().toString()));
prop.put("crawler-queue_list_"+showNum+"_anchor", htmlTools.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+showNum+"_url", htmlTools.replaceHTML(urle.url().toString()));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.urlhash());
dark = !dark;
showNum++;
Expand Down
2 changes: 1 addition & 1 deletion htroot/Surftips.java
Expand Up @@ -150,7 +150,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("surftips_results_" + i + "_authorized_recommend_showScore", (showScore ? 1 : 0));

prop.put("surftips_results_" + i + "_authorized_urlhash", urlhash);
prop.put("surftips_results_" + i + "_url", de.anomic.data.wikiCode.replaceXMLEntities(url));
prop.put("surftips_results_" + i + "_url", de.anomic.data.htmlTools.replaceXMLEntities(url));
prop.put("surftips_results_" + i + "_urlname", nxTools.shortenURLString(url, 60));
prop.put("surftips_results_" + i + "_urlhash", urlhash);
prop.put("surftips_results_" + i + "_title", (showScore) ? ("(" + ranking.getScore(urlhash) + ") " + title) : title);
Expand Down
4 changes: 2 additions & 2 deletions htroot/ViewFile.java
Expand Up @@ -53,7 +53,7 @@
import java.util.Map;
import java.util.TreeSet;

import de.anomic.data.wikiCode;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
Expand Down Expand Up @@ -385,7 +385,7 @@ private static final String[] wordArray(String words) {
}

private static final String markup(String[] wordArray, String message) {
message = wikiCode.replaceXMLEntities(message);
message = htmlTools.replaceXMLEntities(message);
if (wordArray != null)
for (int j = 0; j < wordArray.length; j++) {
String currentWord = wordArray[j].trim();
Expand Down
6 changes: 3 additions & 3 deletions htroot/xml/config_p.java
Expand Up @@ -27,7 +27,7 @@
import java.util.Iterator;
import java.util.List;

import de.anomic.data.wikiCode;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
Expand Down Expand Up @@ -62,8 +62,8 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
int count=0;
while(keys.hasNext()){
key = (String) keys.next();
prop.put("options_"+count+"_key", wikiCode.replaceXMLEntities(key));
prop.put("options_"+count+"_value", wikiCode.replaceXMLEntities(env.getConfig(key, "ERROR")));
prop.put("options_"+count+"_key", htmlTools.replaceXMLEntities(key));
prop.put("options_"+count+"_value", htmlTools.replaceXMLEntities(env.getConfig(key, "ERROR")));
count++;
}
prop.put("options", count);
Expand Down
6 changes: 3 additions & 3 deletions htroot/xml/queues_p.java
Expand Up @@ -52,7 +52,7 @@
import java.util.Date;
import java.util.Locale;

import de.anomic.data.wikiCode;
import de.anomic.data.htmlTools;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlEntry;
import de.anomic.plasma.plasmaCrawlLoaderMessage;
Expand Down Expand Up @@ -123,10 +123,10 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
totalSize += entrySize;
initiator = yacyCore.seedDB.getConnected(pcentry.initiator());
prop.put("list-indexing_"+i+"_profile", (pcentry.profile() != null) ? pcentry.profile().name() : "deleted");
prop.putSafeXML("list-indexing_"+i+"_initiator", ((initiator == null) ? "proxy" : wikiCode.replaceHTML(initiator.getName())));
prop.putSafeXML("list-indexing_"+i+"_initiator", ((initiator == null) ? "proxy" : htmlTools.replaceHTML(initiator.getName())));
prop.put("list-indexing_"+i+"_depth", pcentry.depth());
prop.put("list-indexing_"+i+"_modified", pcentry.getModificationDate());
prop.putSafeXML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null)?"":wikiCode.replaceHTML(pcentry.anchorName()));
prop.putSafeXML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null)?"":htmlTools.replaceHTML(pcentry.anchorName()));
prop.putSafeXML("list-indexing_"+i+"_url", pcentry.normalizedURLString());
prop.put("list-indexing_"+i+"_size", entrySize);
prop.put("list-indexing_"+i+"_inProcess", (inProcess)?1:0);
Expand Down
4 changes: 2 additions & 2 deletions htroot/xml/util/getpageinfo_p.java
Expand Up @@ -50,8 +50,8 @@
import java.util.ArrayList;
import java.util.Iterator;

import de.anomic.data.htmlTools;
import de.anomic.data.robotsParser;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
Expand Down Expand Up @@ -92,7 +92,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
"<title>") + 7, line.toLowerCase().indexOf(
"</title>"));
// de-replace html entities
title = wikiCode.deReplaceHTML(title);
title = htmlTools.deReplaceHTML(title);
prop.put("title", title);
} catch (IndexOutOfBoundsException e) {
}
Expand Down
4 changes: 2 additions & 2 deletions htroot/yacy/list.java
Expand Up @@ -53,8 +53,8 @@
import java.io.File;

import de.anomic.data.URLFetcherStack;
import de.anomic.data.htmlTools;
import de.anomic.data.listManager;
import de.anomic.data.wikiCode;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
Expand Down Expand Up @@ -122,7 +122,7 @@ else if (col.length() == 0 && post.get("list", "").equals("queueUrls")) {
int cnt = 0;
for (int i=0; i<count; i++) {
if ((url = db.pop()) == null) continue;
b.append(wikiCode.deReplaceHTMLEntities(url.toNormalform())).append("\n");
b.append(htmlTools.deReplaceHTMLEntities(url.toNormalform())).append("\n");
cnt++;
}
prop.put("list", b);
Expand Down
Binary file modified libx/J7Zip-modified.jar
Binary file not shown.

0 comments on commit 601fc7d

Please sign in to comment.