Skip to content

Commit

Permalink
some generalization of remote proxy configuration and setting handlin…
Browse files Browse the repository at this point in the history
…g in httpc

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4023 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Aug 2, 2007
1 parent fac8de6 commit 57a5b6f
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 123 deletions.
2 changes: 1 addition & 1 deletion build.properties
Expand Up @@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4

# Release Configuration
releaseVersion=0.54
releaseVersion=0.541
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy
Expand Down
3 changes: 2 additions & 1 deletion htroot/CrawlURLFetch_p.java
Expand Up @@ -512,7 +512,8 @@ private String[] getURLs(URL url) {
url.getHost(),
url.getPort(),
15000,
url.getProtocol().equals("https"));
url.getProtocol().equals("https"),
plasmaSwitchboard.getSwitchboard().remoteProxyConfig);

httpHeader header = new httpHeader();
header.put(httpHeader.ACCEPT_ENCODING, "US-ASCII");
Expand Down
4 changes: 2 additions & 2 deletions htroot/opensearchdescription.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<OpenSearchDescription xmlns="http://www.opensearch.org/Specifications/OpenSearch/1.1">
<ShortName>YaCy/#[clientname]#</ShortName>
<LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
<Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>
Expand All @@ -15,6 +15,6 @@
<Query role="example" searchTerms="yacy" />
<Tags>YaCy P2P Web Search</Tags>
<Contact>See http://#[thisaddress]#/ViewProfile.html?hash=localhash</Contact>
<Attribution>YaCy Software &amp;copy; 2004-2006 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
<Attribution>YaCy Software &amp;copy; 2004-2007 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
<SyndicationRight>open</SyndicationRight>
</OpenSearchDescription>
3 changes: 2 additions & 1 deletion source/de/anomic/data/SitemapParser.java
Expand Up @@ -180,7 +180,8 @@ public void parse() {
this.siteMapURL.getHost(),
this.siteMapURL.getPort(),
5000,
this.siteMapURL.getProtocol().equalsIgnoreCase("https"));
this.siteMapURL.getProtocol().equalsIgnoreCase("https"),
switchboard.remoteProxyConfig);

httpc.response res = remote.GET(this.siteMapURL.getFile(), null);
if (res.statusCode != 200) {
Expand Down
10 changes: 1 addition & 9 deletions source/de/anomic/data/robotsParser.java
Expand Up @@ -386,15 +386,7 @@ static Object[] downloadRobotsTxt(URL robotsURL, int redirectionCount, plasmaCra
downloadStart = System.currentTimeMillis();
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
//TODO: adding Traffic statistic for robots download?
if (
(sb == null) ||
(sb.remoteProxyConfig == null) ||
(!sb.remoteProxyConfig.useProxy())
) {
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"));
} else {
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);
}
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);

// if we previously have downloaded this robots.txt then we can set the if-modified-since header
httpHeader reqHeaders = new httpHeader();
Expand Down
101 changes: 28 additions & 73 deletions source/de/anomic/http/httpc.java
Expand Up @@ -308,62 +308,7 @@ public static httpc getInstance(
boolean ssl,
httpRemoteProxyConfig remoteProxyConfig
) throws IOException {
if (remoteProxyConfig == null) throw new NullPointerException("Proxy object must not be null.");

return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
}

public static httpc getInstance(
String server,
String vhost,
int port,
int timeout,
boolean ssl
) throws IOException {
return getInstance(server,vhost,port,timeout,ssl,null,null);
}


/**
* This method gets a new httpc instance from the object pool and
* initializes it with the given parameters.
*
* @param server
* @param port
* @param timeout
* @param ssl
* @throws IOException
* @see httpc#init
*/
public static httpc getInstance(
String server,
String vhost,
int port,
int timeout,
boolean ssl,
String incomingByteCountAccounting,
String outgoingByteCountAccounting
) throws IOException {

httpc newHttpc = null;
// fetching a new httpc from the object pool
try {
newHttpc = (httpc) httpc.theHttpcPool.borrowObject();

} catch (Exception e) {
throw new IOException("Unable to fetch a new httpc from pool. " + e.getMessage());
}

// initialize it
try {
newHttpc.init(server,vhost,port,timeout,ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
} catch (IOException e) {
try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {}
throw e;
}
return newHttpc;


return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
}

/**
Expand Down Expand Up @@ -439,7 +384,7 @@ public static Date nowDate() {
* @param remoteProxyPort
* @throws IOException
*/
void init(
private void init(
String server,
String vhost,
int port,
Expand All @@ -450,14 +395,35 @@ void init(
String outgoingByteCountAccounting
) throws IOException {

if ((theRemoteProxyConfig == null) ||
(!theRemoteProxyConfig.useProxy())) {
initN(
server,
vhost,
port,
timeout,
ssl,
incomingByteCountAccounting,
outgoingByteCountAccounting
);
return;
}

if (port == -1) {
port = (ssl)? 443 : 80;
}

String remoteProxyHost = theRemoteProxyConfig.getProxyHost();
int remoteProxyPort = theRemoteProxyConfig.getProxyPort();

this.init(remoteProxyHost, vhost, remoteProxyPort, timeout, ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
this.initN(
remoteProxyHost,
vhost,
remoteProxyPort,
timeout,
ssl,
incomingByteCountAccounting,
outgoingByteCountAccounting);

this.remoteProxyUse = true;
this.adressed_host = server;
Expand All @@ -476,7 +442,7 @@ void init(
* @param ssl Wether we should use SSL.
* @throws IOException
*/
void init(
private void initN(
String server,
String vhost,
int port,
Expand Down Expand Up @@ -968,11 +934,7 @@ public static byte[] singleGET(

httpc con = null;
try {
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
} else {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
}
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);

httpc.response res = con.GET(path, requestHeader);
if (res.status.startsWith("2")) {
Expand Down Expand Up @@ -1036,11 +998,7 @@ public static byte[] singlePOST(

httpc con = null;
try {
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
} else {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
}
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
httpc.response res = con.POST(path, requestHeader, props, files);

//System.out.println("response=" + res.toString());
Expand Down Expand Up @@ -1198,10 +1156,7 @@ public static httpHeader whead(
// start connection
httpc con = null;
try {
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy()))
con = httpc.getInstance(realhost, vhost, port, timeout, ssl);
else con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);

con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);
httpc.response res = con.HEAD(path, requestHeader);
if (res.status.startsWith("2")) {
// success
Expand Down
25 changes: 7 additions & 18 deletions source/de/anomic/http/httpdProxyHandler.java
Expand Up @@ -1265,15 +1265,13 @@ private httpc newhttpc(String server, int port, int timeout) throws IOException
httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;

// a new httpc connection, combined with possible remote proxy
boolean useProxy = (remProxyConfig!=null)&&(remProxyConfig.useProxy());

// check no-proxy rule
if (
(switchboard.remoteProxyConfig != null) &&
(switchboard.remoteProxyConfig.useProxy()) &&
(!(switchboard.remoteProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
if (switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
useProxy = false;
(remProxyConfig != null) &&
(remProxyConfig.useProxy()) &&
(!(remProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
if (remProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
remProxyConfig = null;
} else {
// analyse remoteProxyNoProxy;
// set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
Expand All @@ -1282,7 +1280,7 @@ private httpc newhttpc(String server, int port, int timeout) throws IOException
if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) {
// disallow proxy for this server
switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server);
useProxy = false;
remProxyConfig = null;
break;
}
i++;
Expand All @@ -1295,23 +1293,14 @@ private httpc newhttpc(String server, int port, int timeout) throws IOException
}

// branch to server/proxy
if (useProxy) {
return httpc.getInstance(
return httpc.getInstance(
server,
server,
port,
timeout,
false,
remProxyConfig
);
}
return httpc.getInstance(
server,
server,
port,
timeout,
false
);
}

private httpc newhttpc(String address, int timeout) throws IOException {
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/kelondro/kelondroMSetTools.java
Expand Up @@ -117,7 +117,7 @@ public static TreeMap joinConstructive(Collection maps, boolean concatStrings) {
k = (Long) orderMap.firstKey(); // the next smallest...
mapA = joinResult;
mapB = (TreeMap) orderMap.remove(k);
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings);
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings); // TODO: better with enumeration?
// free resources
mapA = null;
mapB = null;
Expand Down
4 changes: 1 addition & 3 deletions source/de/anomic/plasma/crawler/http/CrawlWorker.java
Expand Up @@ -196,9 +196,7 @@ private plasmaHTCache.Entry load(int crawlingRetryCount) throws IOException {
requestHeader.put(httpHeader.ACCEPT_ENCODING, this.acceptEncoding);

// open the connection
remote = ((this.remoteProxyConfig != null) && (this.remoteProxyConfig.useProxy()))
? httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null)
: httpc.getInstance(host, host, port, this.socketTimeout, ssl, "CRAWLER",null);
remote = httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null);

// specifying if content encoding is allowed
remote.setAllowContentEncoding((this.acceptEncoding != null && this.acceptEncoding.length() > 0));
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaCrawlLURL.java
Expand Up @@ -424,7 +424,7 @@ public void urldbcleanup() {
URL newUrl = new URL(newUrlStr);

// doing a http head request to test if the url is correct
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false);
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false, plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
response res = theHttpc.HEAD(newUrl.getPath(), null);

if (res.statusCode == 200) {
Expand Down
3 changes: 2 additions & 1 deletion source/de/anomic/plasma/plasmaParser.java
Expand Up @@ -919,7 +919,8 @@ public static void main(String[] args) {
contentURL.getHost(),
contentURL.getPort(),
5000,
contentURL.getProtocol().equalsIgnoreCase("https"));
contentURL.getProtocol().equalsIgnoreCase("https"),
null);

httpc.response res = remote.GET(contentURL.getFile(), null);
if (res.statusCode != 200) {
Expand Down
13 changes: 2 additions & 11 deletions source/de/anomic/yacy/yacySeedDB.java
Expand Up @@ -763,22 +763,13 @@ private ArrayList downloadSeedFile(URL seedURL) throws IOException {
httpc remote = null;
try {
// init httpc
if ((sb.remoteProxyConfig == null)||(!sb.remoteProxyConfig.useProxy())) {
remote = httpc.getInstance(
seedURL.getHost(),
seedURL.getHost(),
seedURL.getPort(),
10000,
seedURL.getProtocol().equalsIgnoreCase("https"));
} else {
remote = httpc.getInstance(
remote = httpc.getInstance(
seedURL.getHost(),
seedURL.getHost(),
seedURL.getPort(),
10000,
seedURL.getProtocol().equalsIgnoreCase("https"),
sb.remoteProxyConfig);
}
sb.remoteProxyConfig);

// Configure http headers
httpHeader reqHeader = new httpHeader();
Expand Down
2 changes: 1 addition & 1 deletion source/yacy.java
Expand Up @@ -514,7 +514,7 @@ static void shutdown(String homePath) {
httpHeader requestHeader = new httpHeader();
requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify
try {
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false);
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false, null);
httpc.response res = con.GET("Steering.html?shutdown=", requestHeader);

// read response
Expand Down

0 comments on commit 57a5b6f

Please sign in to comment.