Permalink
Browse files

HTTP GET URL size limit workaround

THis is a work-around for the size limit on HTTP GETs that range uses.
Very long requests fail and thus there's a certain type of query that
can't be made.  This chane updates the library to automatically split
and make multiple requests when the size is larger than 7500 characters.
Generally apache is configured for 8190 characters, but with the fqdn
plus other bits of the header, this is a safer limit.
  • Loading branch information...
grierj committed Mar 7, 2012
1 parent af9c755 commit 7b2f33a9e39fa3fe82e3db1bd7b18b111f05a203
Showing with 68 additions and 1 deletion.
  1. +68 −1 python_seco_range/source/seco/range.py
@@ -17,14 +17,24 @@ def __str__(self):
return repr(self.value)
class Range(object):
- def __init__(self, host, user_agent=None):
+ def __init__(self, host, user_agent=None, max_char=7500):
self.host = host
+ self.max_char = max_char
self.headers = {}
self.headers['User-Agent'] = self.get_user_agent(user_agent)
def expand(self, expr, ret_list=True):
if isinstance(expr, list):
expr = ','.join(expr)
+
+ # If the query is too large for a single query, send it off to
+ # split functions
+ if len(expr) > self.max_char:
+ if ret_list:
+ return self.split_query(expr, ret_list)
+ else:
+ return self.split_collapse(expr)
+
if ret_list:
url = 'http://%s/range/list?%s' % (self.host, urllib2.quote(expr))
else:
@@ -52,8 +62,65 @@ def expand(self, expr, ret_list=True):
return req.read()
def collapse(self, expr):
+ '''
+ Convenience function for returning collapsed format instead
+ of an individual list
+ '''
return self.expand(expr, ret_list=False)
+ def split_query(self, expr, ret_list):
+ '''
+ Range queries are GETs, which have a URL limit of 8190 on
+ apache systems. This method splits up long queries and
+ makes multiple calls, merging the result into a list.
+
+ This is, admittedly, a total hack. Should fix range to accept PUT
+ for queries.
+ '''
+ final_list = []
+ new_list = self.build_split_list(expr)
+ for short_expr in new_list:
+ final_list.append(self.expand(short_expr, ret_list=ret_list))
+
+ return final_list
+
+ def split_collapse(self, expr):
+ '''
+ Helper function for split collapses, since they may need to split
+ and call multiple times to get the final collapsed list
+ '''
+ prev_expr = ''
+ coll_expr = expr
+ # Keep collapsing until the list stops changing
+ while prev_expr != coll_expr:
+ prev_expr = coll_expr
+ coll_list = self.split_query(coll_expr, ret_list=False)
+ coll_expr = (','.join(coll_list)).strip(',')
+ return coll_expr
+
+ def build_split_list(self, expr):
+ '''
+ Take the max_char function and break up an expression list based on
+ the character limits of individual items
+ '''
+ if isinstance(expr, str):
+ expr = expr.split(',')
+ expr.sort()
+ new_list = []
+ running_total = 0
+ position = 0
+ for range in expr:
+ running_total += len(range) + 1
+ if running_total > self.max_char:
+ running_total = 0
+ position += 1
+ try:
+ new_list[position].append(range)
+ except (AttributeError, IndexError):
+ new_list.append([range,])
+
+ return new_list
+
def get_user_agent(self, provided_agent):
"""
Build a verbose User-Agent for sending to the range server.

0 comments on commit 7b2f33a

Please sign in to comment.