Skip to content

Commit

Permalink
refactoring of wget string list generation
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2692 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Oct 2, 2006
1 parent dbc2e03 commit 5a40ea7
Show file tree
Hide file tree
Showing 12 changed files with 51 additions and 37 deletions.
3 changes: 2 additions & 1 deletion htroot/ConfigLanguage_p.java
Expand Up @@ -62,6 +62,7 @@
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;


public class ConfigLanguage_p {
Expand Down Expand Up @@ -97,7 +98,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
ArrayList langVector;
try{
URL u = new URL(url);
langVector = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig);
langVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig));
}catch(IOException e){
prop.put("status", 1);//unable to get url
prop.put("status_url", url);
Expand Down
3 changes: 2 additions & 1 deletion htroot/ConfigSkins_p.java
Expand Up @@ -61,6 +61,7 @@
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;

public class ConfigSkins_p {

Expand Down Expand Up @@ -126,7 +127,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
ArrayList skinVector;
try{
URL u = new URL(url);
skinVector = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig);
skinVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig));
}catch(IOException e){
prop.put("status", 1);//unable to get URL
prop.put("status_url", url);
Expand Down
5 changes: 3 additions & 2 deletions htroot/sharedBlacklist_p.java
Expand Up @@ -62,6 +62,7 @@
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;

Expand Down Expand Up @@ -130,7 +131,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

// get List
URL u = new URL(downloadURL);
otherBlacklist = httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader);
otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader));
} catch (Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.put("page", 1);
Expand All @@ -146,7 +147,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

try {
URL u = new URL(downloadURL);
otherBlacklist = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig); //get List
otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig)); //get List
} catch (Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.put("status_address",downloadURL);
Expand Down
3 changes: 2 additions & 1 deletion htroot/xml/util/getpageinfo_p.java
Expand Up @@ -56,6 +56,7 @@
import de.anomic.net.URL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;

public class getpageinfo_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
Expand All @@ -77,7 +78,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
}
if (actions.indexOf("title")>=0) {
try {
content = httpc.wget(new URL(url));
content = nxTools.strings(httpc.wget(new URL(url)));

Iterator it = content.iterator();
String line;
Expand Down
19 changes: 6 additions & 13 deletions source/de/anomic/http/httpc.java
Expand Up @@ -91,6 +91,7 @@
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.nxTools;

/**
* This class implements an http client. While http access is built-in in java
Expand Down Expand Up @@ -1280,7 +1281,7 @@ public static byte[] singlePOST(
}
}

public static ArrayList wget(
public static byte[] wget(
URL url,
String vhost,
int timeout,
Expand All @@ -1291,11 +1292,11 @@ public static ArrayList wget(
return wget(url, vhost,timeout,user,password,theRemoteProxyConfig,null);
}

public static ArrayList wget(URL url) throws IOException{
public static byte[] wget(URL url) throws IOException{
return wget(url, url.getHost(), 6000, null, null, plasmaSwitchboard.getSwitchboard().remoteProxyConfig, null);
}

public static ArrayList wget(
public static byte[] wget(
URL url,
String vhost,
int timeout,
Expand Down Expand Up @@ -1352,15 +1353,7 @@ public static ArrayList wget(
}
}

int s = 0;
int e;
ArrayList v = new ArrayList();
while (s < a.length) {
e = s; while (e < a.length) if (a[e++] < 32) {e--; break;}
v.add(new String(a, s, e - s));
s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;}
}
return v;
return a;
}

public static httpHeader whead(
Expand Down Expand Up @@ -1466,7 +1459,7 @@ public static void main(String[] args) {
httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort);
try {
URL u = new URL(url);
text = wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig);
text = nxTools.strings(wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig));
} catch (MalformedURLException e) {
System.out.println("The url '" + url + "' is wrong.");
} catch (IOException e) {
Expand Down
8 changes: 4 additions & 4 deletions source/de/anomic/net/natLib.java
Expand Up @@ -63,7 +63,7 @@ public static String getDI604(String password) {
rm status.htm
*/
try {
ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null));
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
String line = nxTools.tail1(x);
Expand All @@ -75,7 +75,7 @@ public static String getDI604(String password) {

private static String getWhatIsMyIP() {
try {
ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null));
x = nxTools.grep(x, 0, "Your IP is");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
Expand All @@ -86,7 +86,7 @@ private static String getWhatIsMyIP() {

private static String getStanford() {
try {
ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null));
x = nxTools.grep(x, 0, "firewall protecting your browser");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 7);
Expand All @@ -97,7 +97,7 @@ private static String getStanford() {

private static String getIPID() {
try {
ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null));
x = nxTools.grep(x, 2, "Your IP address");
String line = nxTools.tail1(x);
return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1);
Expand Down
3 changes: 1 addition & 2 deletions source/de/anomic/tools/loaderCore.java
Expand Up @@ -41,7 +41,6 @@

package de.anomic.tools;

import java.util.ArrayList;
import java.util.Properties;

public abstract class loaderCore implements loaderProcess {
Expand All @@ -63,7 +62,7 @@ public abstract class loaderCore implements loaderProcess {
protected int completion = 0;

// steering methods
public abstract void feed(ArrayList v); // returns true if process was successful; should be always synchronized
public abstract void feed(byte[] a); // returns true if process was successful; should be always synchronized

public void terminate() {
// if terminated before completion, completed() shows x < 100
Expand Down
3 changes: 1 addition & 2 deletions source/de/anomic/tools/loaderProcess.java
Expand Up @@ -41,13 +41,12 @@

package de.anomic.tools;

import java.util.ArrayList;
import java.util.Properties;

public interface loaderProcess {

// steering methods
public void feed(ArrayList v); // returns true if process was successful; should be always synchronized
public void feed(byte[] v); // returns true if process was successful; should be always synchronized
public void terminate(); // if terminated before completion, completed() shows x < 100

// feed-back methods
Expand Down
13 changes: 7 additions & 6 deletions source/de/anomic/tools/loaderThreads.java
Expand Up @@ -133,7 +133,7 @@ protected class loaderThread extends Thread {
private URL url;
private Exception error;
private loaderProcess process;
private ArrayList page;
private byte[] page;
private boolean loaded;

public loaderThread(URL url, loaderProcess process) {
Expand Down Expand Up @@ -193,26 +193,27 @@ public propLoader() {
this.status = STATUS_READY;
}

public synchronized void feed(ArrayList v) {
public synchronized void feed(byte[] v) {
this.status = STATUS_RUNNING;
this.completion = 1;
int line = 0;
String s, key, value;
int p;
ArrayList lines = nxTools.strings(v);
try {
while ((this.run) && (line < v.size())) {
while ((this.run) && (line < lines.size())) {
// parse line and construct a property
s = (String) v.get(line);
s = (String) lines.get(line);
if ((s != null) && ((p = s.indexOf('=')) > 0)) {
key = s.substring(0, p).trim();
value = s.substring(p + 1).trim();
if (key.length() > 0) result.put(key, value);
}
// update thread information
line++;
this.completion = 100 * line / v.size();
this.completion = 100 * line / lines.size();
}
if (line == v.size()) {
if (line == lines.size()) {
this.status = STATUS_COMPLETED;
return;
} else {
Expand Down
18 changes: 17 additions & 1 deletion source/de/anomic/tools/nxTools.java
Expand Up @@ -66,6 +66,10 @@ public static HashMap table(Vector list) {
return props;
}

public static HashMap table(byte[] a) {
return table(strings(a));
}

public static HashMap table(ArrayList list) {
Iterator i = list.iterator();
int pos;
Expand All @@ -78,7 +82,7 @@ public static HashMap table(ArrayList list) {
if (pos > 0) props.put(line.substring(0, pos).trim(), line.substring(pos + 1).trim());
}
return props;
}
}

public static Vector grep(Vector list, int afterContext, String pattern) {
Enumeration i = list.elements();
Expand Down Expand Up @@ -144,6 +148,18 @@ public static String awk(String sentence, String separator, int count) {
return null;
}

public static ArrayList strings(byte[] a) {
int s = 0;
int e;
ArrayList v = new ArrayList();
while (s < a.length) {
e = s; while (e < a.length) if (a[e++] < 32) {e--; break;}
v.add(new String(a, s, e - s));
s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;}
}
return v;
}

/**
* This function shorten URL Strings<br>
*
Expand Down
5 changes: 3 additions & 2 deletions source/de/anomic/yacy/yacyPeerActions.java
Expand Up @@ -61,6 +61,7 @@
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.tools.disorderSet;
import de.anomic.tools.nxTools;

public class yacyPeerActions {

Expand Down Expand Up @@ -200,7 +201,7 @@ public void loadSeedLists() {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
seedList = httpc.wget(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader);
seedList = nxTools.strings(httpc.wget(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader));
enu = seedList.iterator();
lc = 0;
while (enu.hasNext()) {
Expand Down Expand Up @@ -254,7 +255,7 @@ private disorderSet loadSuperseed(File local, String url) {
// read in remote file from url
try {
URL u = new URL(url);
ArrayList remote = httpc.wget(u, u.getHost(), 5000, null, null, this.sb.remoteProxyConfig);
ArrayList remote = nxTools.strings(httpc.wget(u, u.getHost(), 5000, null, null, this.sb.remoteProxyConfig));
if ((remote != null) && (remote.size() > 0)) {
Iterator e = remote.iterator();
while (e.hasNext()) {
Expand Down
5 changes: 3 additions & 2 deletions source/de/anomic/yacy/yacySeedDB.java
Expand Up @@ -71,6 +71,7 @@
import de.anomic.server.serverCore;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.nxTools;

public final class yacySeedDB {

Expand Down Expand Up @@ -713,15 +714,15 @@ private boolean checkCache(ArrayList uv, URL seedURL) throws IOException {
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.PRAGMA, "no-cache");
reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
ArrayList check = httpc.wget(
ArrayList check = nxTools.strings(httpc.wget(
seedURL,
seedURL.getHost(),
10000,
null,
null,
sb.remoteProxyConfig,
reqHeader
);
));

if (check == null) {
serverLog.logFine("YACY","SaveSeedList: Testing download failed ...");
Expand Down

0 comments on commit 5a40ea7

Please sign in to comment.