Expand Up
@@ -14,9 +14,11 @@
from zope .interface import implements
from twisted .python import log
from twisted .python .failure import Failure
from twisted .web import http
from twisted .internet import defer , protocol , task , reactor
from twisted .internet .interfaces import IProtocol
from twisted .internet .endpoints import TCP4ClientEndpoint , SSL4ClientEndpoint
from twisted .python import failure
from twisted .python .util import InsensitiveDict
from twisted .python .components import proxyForInterface
Expand Down
Expand Up
@@ -522,6 +524,23 @@ def pageEnd(self):
class _URL (tuple ):
"""
A parsed URL.
At some point this should be replaced with a better URL implementation.
"""
def __new__ (self , scheme , host , port , path ):
return tuple .__new__ (_URL , (scheme , host , port , path ))
def __init__ (self , scheme , host , port , path ):
self .scheme = scheme
self .host = host
self .port = port
self .path = path
def _parse (url , defaultPort = None ):
"""
Split the given URL into the scheme, host, port, and path.
Expand Down
Expand Up
@@ -558,7 +577,7 @@ def _parse(url, defaultPort=None):
if path == '' :
path = '/'
return scheme , host , port , path
return _URL ( scheme , host , port , path )
def _makeGetterFactory (url , factoryFactory , contextFactory = None ,
Expand Down
Expand Up
@@ -625,10 +644,9 @@ def downloadPage(url, file, contextFactory=None, *args, **kwargs):
# should be significantly better than anything above, though it is not yet
# feature equivalent.
from twisted .internet .protocol import ClientCreator
from twisted .web .error import SchemeNotSupported
from twisted .web ._newclient import ResponseDone , Request , HTTP11ClientProtocol
from twisted .web ._newclient import Response , ResponseFailed
from twisted .web ._newclient import Request , Response , HTTP11ClientProtocol
from twisted .web ._newclient import ResponseDone , ResponseFailed
try :
from twisted .internet .ssl import ClientContextFactory
Expand Down
Expand Up
@@ -791,36 +809,168 @@ def resumeProducing(self):
class _AgentMixin ( object ):
class _HTTP11ClientFactory ( protocol . Factory ):
"""
Base class offering facilities for L{Agent}-type classes.
A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}.
@ivar _quiescentCallback: The quiescent callback to be passed to protocol
instances, used to return them to the connection pool.
@since: 11.1
"""
def __init__ (self , quiescentCallback ):
self ._quiescentCallback = quiescentCallback
def buildProtocol (self , addr ):
return HTTP11ClientProtocol (self ._quiescentCallback )
class HTTPConnectionPool (object ):
"""
A pool of persistent HTTP connections.
Features:
- Cached connections will eventually time out.
- Limits on maximum number of persistent connections.
Connections are stored using keys, which should be chosen such that any
connections stored under a given key can be used interchangeably.
@ivar persistent: Boolean indicating whether connections should be
persistent.
@ivar maxPersistentPerHost: The maximum number of cached persistent
connections for a C{host:port} destination.
@type maxPersistentPerHost: C{int}
@ivar cachedConnectionTimeout: Number of seconds a cached persistent
connection will stay open before disconnecting.
@ivar _factory: The factory used to connect to the proxy.
@ivar _connections: Map (scheme, host, port) to lists of
L{HTTP11ClientProtocol} instances.
@ivar _timeouts: Map L{HTTP11ClientProtocol} instances to a C{IDelayedCall}
instance of their timeout.
@since: 12.1
"""
_factory = _HTTP11ClientFactory
maxPersistentPerHost = 2
cachedConnectionTimeout = 240
def __init__ (self , reactor , persistent = True ):
self ._reactor = reactor
self .persistent = persistent
self ._connections = {}
self ._timeouts = {}
def _connectAndRequest ( self , method , uri , headers , bodyProducer ,
requestPath = None ):
def getConnection ( self , key , endpoint ):
"""
Internal helper to make the request.
Retrieve a connection, either new or cached, to be used for a HTTP
request.
If a cached connection is returned, it will not be used for other
requests until it is put back (which will happen automatically), since
we do not support pipelined requests. If no cached connection is
available, the passed in endpoint is used to create the connection.
If the connection doesn't disconnect at the end of its request, it
will be returned to this pool automatically. As such, only a single
request should be sent using the returned connection.
@param requestPath: If specified, the path to use for the request
instead of the path extracted from C{uri} .
@return: A C{Deferred} that will fire with L{HTTP11ClientProtocol}
that can be used to send a single HTTP request .
"""
scheme , host , port , path = _parse (uri )
if requestPath is None :
requestPath = path
d = self ._connect (scheme , host , port )
if headers is None :
headers = Headers ()
if not headers .hasHeader ('host' ):
headers = headers .copy ()
headers .addRawHeader (
'host' , self ._computeHostValue (scheme , host , port ))
def cbConnected (proto ):
return proto .request (
Request (method , requestPath , headers , bodyProducer ))
d .addCallback (cbConnected )
return d
# Try to get cached version:
connections = self ._connections .get (key )
while connections :
connection = connections .pop (0 )
# Cancel timeout:
self ._timeouts [connection ].cancel ()
del self ._timeouts [connection ]
if connection .state == "QUIESCENT" :
return defer .succeed (connection )
def quiescentCallback (protocol ):
self ._putConnection (key , protocol )
factory = self ._factory (quiescentCallback )
return endpoint .connect (factory )
def _removeConnection (self , key , connection ):
"""
Remove a connection from the cache and disconnect it.
"""
connection .transport .loseConnection ()
self ._connections [key ].remove (connection )
del self ._timeouts [connection ]
def _putConnection (self , key , connection ):
"""
Return a persistent connection to the pool. This will be called by
L{HTTP11ClientProtocol} when the connection becomes quiescent.
"""
if connection .state != "QUIESCENT" :
# Log with traceback for debugging purposes:
try :
raise RuntimeError (
"BUG: Non-quiescent protocol added to connection pool." )
except :
log .err ()
return
connections = self ._connections .setdefault (key , [])
if len (connections ) == self .maxPersistentPerHost :
dropped = connections .pop (0 )
dropped .transport .loseConnection ()
self ._timeouts [dropped ].cancel ()
del self ._timeouts [dropped ]
connections .append (connection )
cid = self ._reactor .callLater (self .cachedConnectionTimeout ,
self ._removeConnection ,
key , connection )
self ._timeouts [connection ] = cid
def closeCachedConnections (self ):
"""
Close all persistent connections and remove them from the pool.
@return: L{defer.Deferred} that fires when all connections have been
closed.
"""
results = []
for protocols in self ._connections .itervalues ():
for p in protocols :
results .append (p .abort ())
self ._connections = {}
for dc in self ._timeouts .values ():
dc .cancel ()
self ._timeouts = {}
return defer .gatherResults (results ).addCallback (lambda ign : None )
class _AgentBase (object ):
"""
Base class offering common facilities for L{Agent}-type classes.
@ivar _reactor: The C{IReactorTime} implementation which will be used by
the pool, and perhaps by subclasses as well.
@ivar _pool: The L{HTTPConnectionPool} used to manage HTTP connections.
"""
def __init__ (self , reactor , pool ):
if pool is None :
pool = HTTPConnectionPool (reactor , False )
self ._reactor = reactor
self ._pool = pool
def _computeHostValue (self , scheme , host , port ):
Expand All
@@ -833,15 +983,38 @@ def _computeHostValue(self, scheme, host, port):
return '%s:%d' % (host , port )
def _requestWithEndpoint (self , key , endpoint , method , parsedURI ,
headers , bodyProducer , requestPath ):
"""
Issue a new request, given the endpoint and the path sent as part of
the request.
"""
# Create minimal headers, if necessary:
if headers is None :
headers = Headers ()
if not headers .hasHeader ('host' ):
headers = headers .copy ()
headers .addRawHeader (
'host' , self ._computeHostValue (parsedURI .scheme , parsedURI .host ,
parsedURI .port ))
class Agent (_AgentMixin ):
d = self ._pool .getConnection (key , endpoint )
def cbConnected (proto ):
return proto .request (
Request (method , requestPath , headers , bodyProducer ,
persistent = self ._pool .persistent ))
d .addCallback (cbConnected )
return d
class Agent (_AgentBase ):
"""
L{Agent} is a very basic HTTP client. It supports I{HTTP} and I{HTTPS}
scheme URIs (but performs no certificate checking by default). It does not
support persistent connections.
scheme URIs (but performs no certificate checking by default).
@ivar _reactor: The L{IReactorTCP} and L{IReactorSSL} implementation which
will be used to set up connections over which to issue requests .
@param pool: A L{HTTPConnectionPool} instance, or C{None}, in which case a
non-persistent L{HTTPConnectionPool} instance will be created .
@ivar _contextFactory: A web context factory which will be used to create
SSL context objects for any SSL connections the agent needs to make.
Expand All
@@ -854,11 +1027,11 @@ class Agent(_AgentMixin):
@since: 9.0
"""
_protocol = HTTP11ClientProtocol
def __init__ (self , reactor , contextFactory = WebClientContextFactory (),
connectTimeout = None , bindAddress = None ):
self ._reactor = reactor
connectTimeout = None , bindAddress = None ,
pool = None ):
_AgentBase .__init__ (self , reactor , pool )
self ._contextFactory = contextFactory
self ._connectTimeout = connectTimeout
self ._bindAddress = bindAddress
Expand All
@@ -882,10 +1055,10 @@ def _wrapContextFactory(self, host, port):
return _WebToNormalContextFactory (self ._contextFactory , host , port )
def _connect (self , scheme , host , port ):
def _getEndpoint (self , scheme , host , port ):
"""
Connect to the given host and port, using a transport selected based on
scheme.
Get an endpoint for the given host and port, using a transport
selected based on scheme.
@param scheme: A string like C{'http'} or C{'https'} (the only two
supported values) to use to determine how to establish the
Expand All
@@ -897,23 +1070,20 @@ def _connect(self, scheme, host, port):
@param port: An C{int} giving the port number the connection will be
on.
@return: A L{Deferred} which fires with a connected instance of
C{self._protocol}.
@return: An endpoint which can be used to connect to given address.
"""
cc = ClientCreator (self ._reactor , self ._protocol )
kwargs = {}
if self ._connectTimeout is not None :
kwargs ['timeout' ] = self ._connectTimeout
kwargs ['bindAddress' ] = self ._bindAddress
if scheme == 'http' :
d = cc . connectTCP ( host , port , ** kwargs )
return TCP4ClientEndpoint ( self . _reactor , host , port , ** kwargs )
elif scheme == 'https' :
d = cc .connectSSL (host , port , self ._wrapContextFactory (host , port ),
** kwargs )
return SSL4ClientEndpoint (self ._reactor , host , port ,
self ._wrapContextFactory (host , port ),
** kwargs )
else :
d = defer .fail (SchemeNotSupported (
"Unsupported scheme: %r" % (scheme ,)))
return d
raise SchemeNotSupported ("Unsupported scheme: %r" % (scheme ,))
def request (self , method , uri , headers = None , bodyProducer = None ):
Expand Down
Expand Up
@@ -941,52 +1111,48 @@ def request(self, method, uri, headers=None, bodyProducer=None):
given URI is not supported.
@rtype: L{Deferred}
"""
return self ._connectAndRequest (method , uri , headers , bodyProducer )
class _HTTP11ClientFactory (protocol .ClientFactory ):
"""
A simple factory for L{HTTP11ClientProtocol}, used by L{ProxyAgent}.
@since: 11.1
"""
protocol = HTTP11ClientProtocol
parsedURI = _parse (uri )
try :
endpoint = self ._getEndpoint (parsedURI .scheme , parsedURI .host ,
parsedURI .port )
except SchemeNotSupported :
return defer .fail (Failure ())
key = (parsedURI .scheme , parsedURI .host , parsedURI .port )
return self ._requestWithEndpoint (key , endpoint , method , parsedURI ,
headers , bodyProducer , parsedURI .path )
class ProxyAgent (_AgentMixin ):
class ProxyAgent (_AgentBase ):
"""
An HTTP agent able to cross HTTP proxies.
@ivar _factory: The factory used to connect to the proxy.
@ivar _proxyEndpoint: The endpoint used to connect to the proxy, passing
the factory.
@ivar _proxyEndpoint: The endpoint used to connect to the proxy.
@since: 11.1
"""
_factory = _HTTP11ClientFactory
def __init__ (self , endpoint ):
def __init__ (self , endpoint , reactor = None , pool = None ):
if reactor is None :
from twisted .internet import reactor
_AgentBase .__init__ (self , reactor , pool )
self ._proxyEndpoint = endpoint
def _connect (self , scheme , host , port ):
"""
Ignore the connection to the expected host, and connect to the proxy
instead.
"""
return self ._proxyEndpoint .connect (self ._factory ())
def request (self , method , uri , headers = None , bodyProducer = None ):
"""
Issue a new request via the configured proxy.
"""
return self ._connectAndRequest (method , uri , headers , bodyProducer ,
requestPath = uri )
# Cache *all* connections under the same key, since we are only
# connecting to a single destination, the proxy:
key = ("http-proxy" , self ._proxyEndpoint )
# To support proxying HTTPS via CONNECT, we will use key
# ("http-proxy-CONNECT", scheme, host, port), and an endpoint that
# wraps _proxyEndpoint with an additional callback to do the CONNECT.
return self ._requestWithEndpoint (key , self ._proxyEndpoint , method ,
_parse (uri ), headers , bodyProducer ,
uri )
Expand Down
Expand Up
@@ -1345,4 +1511,5 @@ def _handleResponse(self, response, method, uri, headers, redirectCount):
'PartialDownloadError' , 'HTTPPageGetter' , 'HTTPPageDownloader' ,
'HTTPClientFactory' , 'HTTPDownloader' , 'getPage' , 'downloadPage' ,
'ResponseDone' , 'Response' , 'ResponseFailed' , 'Agent' , 'CookieAgent' ,
'ProxyAgent' , 'ContentDecoderAgent' , 'GzipDecoder' , 'RedirectAgent' ]
'ProxyAgent' , 'ContentDecoderAgent' , 'GzipDecoder' , 'RedirectAgent' ,
'HTTPConnectionPool' ]