Skip to content

Commit

Permalink
adapted for isListed()
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1942 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
borg-0300 committed Mar 21, 2006
1 parent 8b14a0c commit 77f3237
Showing 1 changed file with 49 additions and 42 deletions.
91 changes: 49 additions & 42 deletions source/de/anomic/http/httpdProxyHandler.java
@@ -1,12 +1,13 @@
// httpdProxyHandler.java
// httpdProxyHandler.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
//
// last major change: $LastChangedDate$ by $LastChangedBy$
// Revision: $LastChangedRevision$
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -323,29 +324,30 @@ public void handleIncomingCookies(httpHeader respondHeader, String serverhost, S
* @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream)
*/
public void doGet(Properties conProp, httpHeader requestHeader, OutputStream respond) throws IOException {

this.connectionProperties = conProp;

try {
// remembering the starting time of the request
final Date requestDate = new Date(); // remember the time...
this.connectionProperties.put(httpHeader.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction();
switchboard.proxyLastAccess = System.currentTimeMillis();

// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpHeader.CONNECTION_PROP_REQUESTLINE).length() + 2);
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
final String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
final String ip = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer

String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
final String ip = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
int pos=0;
int port=0;

URL url = null;
try {
url = httpHeader.getRequestURL(conProp);

//redirector
if (redirectorEnabled){
synchronized(redirectorProcess){
Expand Down Expand Up @@ -377,7 +379,7 @@ public void doGet(Properties conProp, httpHeader requestHeader, OutputStream res
port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos);
}

String ext;
if ((pos = path.lastIndexOf('.')) < 0) {
ext = "";
Expand All @@ -389,16 +391,17 @@ public void doGet(Properties conProp, httpHeader requestHeader, OutputStream res
// blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers
String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; }
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
return;
}

// handle outgoing cookies
handleOutgoingCookies(requestHeader, host, ip);

// set another userAgent, if not yellowlisted
if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) {
// change the User-Agent
Expand Down Expand Up @@ -441,7 +444,7 @@ public void doGet(Properties conProp, httpHeader requestHeader, OutputStream res
requestDate, // init date
0, // crawling depth
url, // url
"", // name of the url is unknown
"", // name of the url is unknown
requestHeader, // request headers
"200 OK", // request status
cachedResponseHeader, // response headers
Expand Down Expand Up @@ -646,9 +649,9 @@ private void fulfillRequestFromWeb(Properties conProp, URL url,String ext, httpH
// ok, we don't write actually into a file, only to RAM, and schedule writing the file.
byte[] cacheArray = res.writeContent(hfos);
this.theLogger.logFine("writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length)));

if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();

if (sizeBeforeDelete == -1) {
// totally fresh file
//cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
Expand All @@ -667,7 +670,7 @@ private void fulfillRequestFromWeb(Properties conProp, URL url,String ext, httpH
cacheEntry.cacheArray = cacheArray;
cacheManager.push(cacheEntry); // necessary update, write response header to cache
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS");
}
}
} else {
// the file is too big to cache it in the ram, or the size is unknown
// write to file right here.
Expand Down Expand Up @@ -701,7 +704,7 @@ private void fulfillRequestFromWeb(Properties conProp, URL url,String ext, httpH
" StoreError=" + ((storeError==null)?"None":storeError) +
" StoreHTCache=" + storeHTCache +
" SupportetContent=" + isSupportedContent);

res.writeContent(hfos, null);
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
if (sizeBeforeDelete == -1) {
Expand All @@ -715,7 +718,7 @@ private void fulfillRequestFromWeb(Properties conProp, URL url,String ext, httpH
}
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
}

if (gzippedOut != null) {
gzippedOut.finish();
}
Expand Down Expand Up @@ -908,7 +911,11 @@ public void doHead(Properties conProp, httpHeader requestHeader, OutputStream re

// check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers
String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {

// re-calc the url path
String remotePath = (args == null) ? path : (path + "?" + args);

if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, remotePath)) {
httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
Expand All @@ -927,9 +934,6 @@ public void doHead(Properties conProp, httpHeader requestHeader, OutputStream re
// resolve yacy and yacyh domains
String yAddress = yacyCore.seedDB.resolveYacyAddress(host);

// re-calc the url path
String remotePath = (args == null) ? path : (path + "?" + args);

// attach possible yacy-sublevel-domain
if ((yAddress != null) && ((pos = yAddress.indexOf("/")) >= 0)) remotePath = yAddress.substring(pos) + remotePath;

Expand Down Expand Up @@ -987,7 +991,7 @@ public void doPost(Properties conProp, httpHeader requestHeader, OutputStream re
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH);
String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);

int port, pos;
if ((pos = host.indexOf(":")) < 0) {
port = 80;
Expand Down Expand Up @@ -1101,46 +1105,49 @@ public void doPost(Properties conProp, httpHeader requestHeader, OutputStream re
public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException {
this.connectionProperties = conProp;
switchboard.proxyLastAccess = System.currentTimeMillis();

String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);

String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH);
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS);
if (args != null) { path = path + "?" + args; }

int port, pos;
if ((pos = host.indexOf(":")) < 0) {
port = 80;
} else {
port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos);
}
}

// check the blacklist
// blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers
String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, "/")) {
final String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
httpd.sendRespondError(conProp,clientOut,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
forceConnectionClose();
return;
}

// possibly branch into PROXY-PROXY connection
if (
(switchboard.remoteProxyConfig != null) &&
(switchboard.remoteProxyConfig != null) &&
(switchboard.remoteProxyConfig.useProxy()) &&
(switchboard.remoteProxyConfig.useProxy4SSL())
) {
httpc remoteProxy = null;
try {
remoteProxy = httpc.getInstance(
host,
port,
timeout,
false,
host,
port,
timeout,
false,
switchboard.remoteProxyConfig
);

httpc.response response = remoteProxy.CONNECT(host, port, requestHeader);
response.print();
if (response.success()) {
Expand All @@ -1160,8 +1167,8 @@ public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeade
} finally {
if (remoteProxy != null) httpc.returnInstance(remoteProxy);
}
}
}

// try to establish connection to remote host
Socket sslSocket = new Socket(host, port);
sslSocket.setSoTimeout(timeout); // waiting time for write
Expand Down

0 comments on commit 77f3237

Please sign in to comment.