Changeset 3337

Show
Ignore:
Timestamp:
01/09/10 04:01:12 (10 years ago)
Author:
cemeyer
Message:

httpretrieve: Clean up exceptions.

Location:
seattle/trunk/seattlelib
Files:
1 removed
1 modified

Legend:

Unmodified
Added
Removed
  • seattle/trunk/seattlelib/httpretrieve.repy

    r3335 r3337  
    2020include urlparse.repy 
    2121include sockettimeout.repy 
    22 include http_hierarchy_error.repy 
    2322include urllib.repy 
    2423 
     
    2625 
    2726 
    28  
    29 def httpretrieve_open(url, http_query=None, http_post=None, \ 
    30     http_header=None, header_timeout=30, content_timeout=30, \ 
    31     httpheader_limit=8192, httpcontent_limit=4194304): 
     27class HttpConnectionError: 
     28  """ 
     29  Error indicating that the web server has unexpectedly dropped the 
     30  connection. 
     31  """ 
     32 
     33 
     34 
     35 
     36class HttpBrokenServerError: 
     37  """ 
     38  Error indicating that the web server has sent us complete garbage instead 
     39  of something resembling HTTP. 
     40  """ 
     41 
     42 
     43 
     44 
     45def httpretrieve_open(url, postdata=None, querydata=None, \ 
     46    httpheaders=None, timeout=None): 
    3247  """ 
    3348  <Purpose> 
    3449     Returns a file-like object that can be used to read the content from 
    35      an HTTP server. 
     50     an HTTP server. Follows 3xx redirects. 
    3651 
    3752  <Arguments> 
    3853    url: 
    3954           The URL to perform a GET or POST request on. 
    40     http_post (optional): 
    41            A dictionary of form data to POST to the server. Passing 
    42            a non-None value results in a POST request being sent to the 
    43            server. 
    44     http_query (optional): 
    45            A dictionary of form data to send as a GET request's query 
     55    postdata (optional): 
     56           A dictionary of form data or a string to POST to the server. 
     57           Passing a non-None value results in a POST request being sent 
     58           to the server. 
     59    querydata (optional): 
     60           A dictionary of form data or a string to send as the query 
    4661           string to the server. 
    4762 
    48            If http_post is omitted, the URL is retrieved with GET. If 
    49            both http_post and http_query are omitted, there is no query 
     63           If postdata is omitted, the URL is retrieved with GET. If 
     64           both postdata and querydata are omitted, there is no query 
    5065           string sent in the request. 
    51     http_header (optional): 
     66 
     67           For both querydata and postdata, strings are sent *unmodified*. 
     68           This means you probably should encode them first, with 
     69           urllib_quote(). 
     70    httpheaders (optional): 
    5271           A dictionary of supplemental HTTP request headers to add to the 
    5372           request. 
    54     header_timeout (optional): 
    55            A timeout for receiving the HTTP response headers from the 
    56            server. Defaults to 30 seconds. 
    57     content_timeout (optional): 
    58            A timeout for receiving the body of the HTTP response from the 
    59            server. Defaults to 30 seconds. 
    60     httpheader_limit (optional): 
    61            An optional limit on the quantity of HTTP response headers to 
    62            accept from the server. Defaults to 8 kiB. 
    63     httpcontent_limit (optional): 
    64            An optional limit on the quantity of the HTTP response's body. 
    65            Defaults to 4 MiB. 
     73    timeout (optional): 
     74           A timeout for establishing a connection to the web server, 
     75           sending headers, and reading the response headers. 
     76 
     77           If excluded or None, never times out. 
    6678 
    6779  <Exceptions> 
    68     HttpUserInputError if given an invalid URL, or malformed limit / 
    69       timeout values. This is also raised if the user attempts to call 
    70       a method on the file-like object after closing it. 
    71  
    72     HttpConnectionError if opening the connection or sending the HTTP 
    73       request fails. 
    74  
    75     HttpHeaderReceivingError if the timeout for recieving headers is 
    76       exceeded or the limit on header data size is exceeded. 
    77  
    78     HttpHeaderFormatError if the header data size is exceeded, the 
    79       response is malformed, the Content-length response header is 
    80       negative, or the Location response header is malformed. 
    81  
    82     HttpContentReceivingError if the server fails to send the content, 
    83       if the timeout for receiving is exceeded, or if the server 
    84       connection fails for any reason while receiving content. 
    85  
    86     HttpContentLengthError if the Content-length header returned by the 
    87       server exceeds httpcontent_limit, or if the Content-length header 
    88       doesn't match the amount of data sent by the server. 
    89  
    90     HttpStatuscodeError if the status code isn't 2xx or 3xx. 
     80    ValueError if given an invalid URL, or malformed limit or timeout 
     81      values. This is also raised if the user attempts to call a method 
     82      on the file-like object after closing it. 
     83 
     84    HttpConnectionError if opening the connection fails, or if the 
     85      connection is closed by the server before we expect. 
     86 
     87    SocketTimeoutError if the timeout is exceeded. 
     88 
     89    HttpBrokenServerError if the response or the Location response header 
     90      is malformed. 
    9191 
    9292  <Side Effects> 
     
    9595  <Returns> 
    9696    Returns a file-like object which can be used to read the body of 
    97     the response from the web server. 
    98   """ 
     97    the response from the web server. The protocol version spoken by the 
     98    server, status code, and response headers are available as members of 
     99    the object. 
     100  """ 
     101 
     102  starttime = getruntime() 
    99103 
    100104  # Check if the URL is valid and get host, path, port and query 
    101   (host, port, path, url_query) = _httpretrieve_parse_given_url(url) 
     105  (host, port, path) = _httpretrieve_parse_given_url(url) 
    102106 
    103107  # Open connection to the web server 
     
    106110 
    107111  except Exception, e: 
    108     raise HttpConnectionError('Error: opening a connection failed with given http server, Given: ' + str(url) + ' ' + str(e)) 
     112    if repr(e).startswith("timeout("): 
     113      raise HttpConnectionError("Socket timed out connecting to host/port.") 
     114    raise 
    109115 
    110116  # build an HTTP request using the given port, host, path and query 
    111   httpheader = _httpretrieve_buildhttprequest(http_header, port, host, \ 
    112       path, url_query, http_query, http_post) 
     117  httpheader = _httpretrieve_buildhttprequest(httpheaders, port, host, \ 
     118      path, querydata, postdata) 
    113119 
    114120  # send HTTP request to the web server 
    115   _httpretrieve_sendhttprequest(sock, httpheader) 
     121  sock.send(httpheader) 
    116122 
    117123  # receive the header lines from the web server 
    118124  httpheaderlines = _httpretrieve_receive_httpheader(sock, \ 
    119       header_timeout, httpheader_limit) 
     125      timeout, getruntime() - starttime) 
    120126 
    121127  # get the status code and status message from the HTTP response 
     
    125131  if http_status_number == '200': 
    126132    contentlength = _httpretrieve_get_contentlength(httpheaderlines) 
    127     return _httpretrieve_filelikeobject(sock, contentlength, \ 
    128         httpcontent_limit, content_timeout) 
     133    return _httpretrieve_filelikeobject(sock, contentlength) 
    129134 
    130135  elif http_status_number == '301' or http_status_number == '302': 
     
    139144    contentlength = _httpretrieve_get_contentlength(httpheaderlines) 
    140145    http_errorcontent = \ 
    141         _httpretrieve_receive_httperror_content(sock, contentlength) 
    142     _httpretrieve_raise_httpstatuscode_error(http_status_number, \ 
    143         http_status_msg, http_errorcontent) 
    144  
    145  
    146  
    147  
    148  
    149 def httpretrieve_save_file(url, filename, http_query=None, http_post=None, \ 
    150     http_header=None, header_timeout=30, content_timeout=30, \ 
    151     httpheader_limit=8192, httpcontent_limit=4194304): 
     146        _httpretrieve_receive_httperror_content(sock) 
     147 
     148 
     149 
     150 
     151def httpretrieve_save_file(url, filename, querydata=None, postdata=None, \ 
     152    httpheaders=None, timeout=None): 
    152153  """ 
    153154  <Purpose> 
    154     Performs an HTTP request, and saves the content of the response to a 
     155    Perform an HTTP request, and save the content of the response to a 
    155156    file. 
    156157 
     
    162163 
    163164  <Exceptions> 
    164     HttpRetrieveClientError if we cannot create the file. 
    165  
    166     This function will all raise any exception raised by httpretrieve_open(), 
    167     for the same reasons. 
     165    This function will raise any exception raised by Repy file objects 
     166    in opening, writing to, and closing the file. 
     167 
     168    This function will all also raise any exception raised by 
     169    httpretrieve_open(), for the same reasons. 
    168170 
    169171  <Side Effects> 
     
    175177 
    176178  httpcontent = '' 
    177   try: 
    178     newfile = open(filename, 'w') 
    179   except Exception, e: 
    180     raise HttpRetrieveClientError( \ 
    181         'Error on creating a file to saving http content' + str(e)) 
    182  
    183   http_obj = httpretrieve_open(url, http_query, http_post, http_header, \ 
    184       header_timeout, content_timeout, httpheader_limit, httpcontent_limit) 
     179  newfile = open(filename, 'w') 
     180 
     181  http_obj = httpretrieve_open(url, querydata, postdata, httpheaders, \ 
     182      timeout) 
    185183 
    186184  # Read from the file-like HTTP object into our file. 
     
    196194 
    197195 
    198 def httpretrieve_get_string(url, http_query=None, http_post=None, \ 
    199     http_header=None, header_timeout=30, content_timeout=30, \ 
    200     httpheader_limit=8192, httpcontent_limit=4194304): 
     196 
     197def httpretrieve_get_string(url, querydata=None, postdata=None, \ 
     198    httpheaders=None, timeout=30): 
    201199  """ 
    202200  <Purpose> 
     
    218216  """ 
    219217 
    220   http_obj = httpretrieve_open(url, http_query, http_post, http_header, \ 
    221       header_timeout, content_timeout, httpheader_limit, httpcontent_limit) 
     218  http_obj = httpretrieve_open(url, querydata, postdata, httpheaders, \ 
     219      timeout) 
    222220  httpcontent = http_obj.read() 
    223221  http_obj.close() 
     
    226224 
    227225 
     226 
    228227class _httpretrieve_filelikeobject: 
    229228  # This class implements a file-like object used for performing HTTP 
    230229  # requests and retrieving responses. 
    231230 
    232   def __init__(self, sock, contentlength, httpcontent_limit, content_timeout): 
     231  def __init__(self, sock, contentlength): 
    233232    self.sock = sock 
    234233    if contentlength == None: 
     
    237236      self.contentlengthisknown = True 
    238237      self.contentlength = contentlength 
    239     self.httpcontent_limit = httpcontent_limit 
    240     self.content_timeout = content_timeout 
     238    self.timeout = timeout 
    241239    self.fileobjclosed = False 
    242240    self.totalcontentisreceived = False 
     
    245243 
    246244 
    247   def read(self, limit = None): 
     245  def read(self, limit=None, timeout=None): 
    248246    """ 
    249247    <Purpose> 
     
    268266 
    269267    if self.fileobjclosed == True: 
    270       raise HttpUserInputError('Http Error: filelikeobj is closed') 
     268      raise ValueError("I/O operation on closed file") 
    271269 
    272270    if self.totalcontentisreceived: 
     
    277275      left_to_read = 1024 
    278276    else: 
    279       if not type(webpage_content) == int: 
    280         raise HttpUserInputError( \ 
    281             'User input Error: given a none int to receive' + str(e)) 
     277      # Sanity check type/value of limit 
     278      if type(limit) is not int: 
     279        raise TypeError("Expected an integer for limit") 
    282280      elif limit < 0: 
    283         raise HttpUserInputError( \ 
    284             'User input Error: given a negative number to receive, given: ' + \ 
    285             str(limit)) 
     281        raise ValueError("Expected a non-negative integer for limit") 
     282 
    286283      readhaslimit = True 
    287284      left_to_read = limit 
    288285 
    289     self.sock.settimeout(self.content_timeout) 
     286    if timeout is None: 
     287      self.sock.settimeout(0) 
     288    else: 
     289      self.sock.settimeout(timeout) 
    290290 
    291291    # Try to read up to limit, or until there is nothing left. 
    292292    httpcontent = '' 
    293293    while True: 
    294       try: 
    295         content = self.sock.recv(left_to_read) 
    296  
    297       except SocketTimeoutError: 
    298         self.sock.close() 
    299         raise HttpContentReceivingError( \ 
    300             'Timeout Error on receiving content: server taking too long to send content') 
    301  
    302       except Exception, e: 
    303         if 'Socket closed' not in str(e): 
    304           self.sock.close() 
    305           raise HttpContentReceivingError('Error on receiving content:' + str(e)) 
    306  
    307         self.totalcontentisreceived = True 
    308         break 
    309  
    310       else: 
    311         if len(content) >= self.httpcontent_limit: 
    312           raise HttpContentLengthError('content length exceeded ' + \ 
    313               self.httpcontent_limit) 
    314  
    315         httpcontent += content 
    316         if readhaslimit: 
    317           self.totalread += len(content) 
    318           if len(content) == left_to_read: 
    319             break 
    320           else: 
    321             left_to_read -= len(content) 
    322  
    323     # Check if there was an error receiving the HTTP response. 
    324     self._check_recieving_error(readhaslimit, httpcontent) 
     294      content = self.sock.recv(left_to_read) 
     295 
     296      httpcontent += content 
     297      if readhaslimit: 
     298        self.totalread += len(content) 
     299        if len(content) == left_to_read: 
     300          break 
     301        else: 
     302          left_to_read -= len(content) 
    325303 
    326304    return httpcontent 
     
    350328 
    351329 
    352   def _check_recieving_error(self, readhaslimit, httpcontent): 
    353     if len(httpcontent) == 0: 
    354       self.sock.close() 
    355       raise HttpContentLengthError('Error on recieving content: ' + \ 
    356           'received a http header but didnt receive any http content') 
    357  
    358     if self.contentlengthisknown: 
    359       if readhaslimit and self.totalcontentisreceived: 
    360         if self.totalread != self.contentlength: 
    361           self.sock.close() 
    362           raise HttpContentLengthError('Total length read with limit ' + \ 
    363               'did not match the content length: total read: ' + \ 
    364               str(self.totalread) + ' content length: ' + \ 
    365               str(self.contentlength)) 
    366  
    367       if readhaslimit == False: 
    368         if len(httpcontent) != self.contentlength: 
    369           self.sock.close() 
    370           raise HttpContentLengthError('Total received length did not ' + \ 
    371               'match the content length: received: ' + \ 
    372               str(len(httpcontent)) + ' content length : ' + \ 
    373               str(self.contentlength)) 
    374  
    375  
    376  
    377330 
    378331def _httpretrieve_parse_given_url(url): 
    379332  # Checks that the URL is in the right format and returns a tuple of host, 
    380333  # port, path and query. 
    381   try: 
    382     urlparse = urlparse_urlsplit(url) 
    383   except Exception, e: 
    384     raise HttpUserInputError('Given URL error: ' + str(e)) 
    385   else: 
    386     if urlparse['scheme'] != 'http': 
    387       raise HttpUserInputError('Given URL error: the given protocol ' + \ 
    388           urlparse['scheme'] + ' isnt supported') 
    389     if urlparse['hostname'] == None: 
    390       raise HttpUserInputError('Given URL error: host name is not given') 
    391  
    392     host = urlparse['hostname'] 
    393     path = urlparse['path'] 
    394     query = urlparse['query'] 
    395     port = urlparse.get('port', 80) 
    396  
    397     return (host, port, path, query) 
    398  
    399  
    400  
    401  
    402 def _httpretrieve_buildhttprequest(http_header, port, host, path, url_query, 
    403     dict_query, http_post): 
    404   # Sends the HTTP request. 
    405  
    406   if http_post != None: 
     334  urlparse = urlparse_urlsplit(url) 
     335  if urlparse['scheme'] != 'http': 
     336    raise ValueError("URL doesn't seem to be for the HTTP protocol.") 
     337  if urlparse['hostname'] == None: 
     338    raise ValueError("Missing hostname.") 
     339  if urlparse['query'] is not None and urlparse['query'] != "": 
     340    raise ValueError("URL cannot include a query string.") 
     341 
     342  host = urlparse['hostname'] 
     343  path = urlparse['path'] 
     344  port = urlparse.get('port', 80) 
     345 
     346  return (host, port, path) 
     347 
     348 
     349 
     350 
     351def _httpretrieve_buildhttprequest(httpheaders, port, host, path, \ 
     352    querydata, postdata): 
     353  # Builds the HTTP request. 
     354 
     355  if postdata != None: 
    407356    # There is a posted data, use HTTP POST. 
    408357 
    409     if not type(http_post) == dict: 
    410       raise HttpUserInputError('The given http_post is not a ' + \ 
    411           'dictionary, given: ' + str(type(http_post))) 
    412  
    413     # Convert the dictionary of form values into a POST message body. 
    414     try: 
    415       http_post = urllib_quote_parameters(http_post) 
    416     except Exception, e: 
    417       raise HttpUserInputError('Error encoding the given http post ' + \ 
    418           'dictionary ' + str(http_post) + str(e)) 
     358    if type(postdata) is dict: 
     359      postdata = urllib_quote_parameters(postdata) 
     360 
     361    if type(postdata) is not str: 
     362      raise TypeError("postdata should be a dict of form data or string") 
    419363 
    420364    # Build the minimal HTTP request header -- includes only the request 
    421365    # and the Host field. 
    422     httpheader = _httpretrieve_httprequestmain_header('POST', url_query, \ 
    423         dict_query, path, host, port) 
     366    httpheader = _httpretrieve_httprequestmain_header('POST', querydata, \ 
     367        path, host, port) 
    424368 
    425369    # Build the rest of the request. 
    426     httpheader += _httpretrieve_parse_clienthttpheader(http_header) 
    427     httpheader += 'Content-Length: ' + str(len(http_post)) + '\r\n' 
     370    httpheader += _httpretrieve_parse_clienthttpheader(httpheaders) 
     371    httpheader += 'Content-Length: ' + str(len(postdata)) + '\r\n' 
    428372    httpheader += '\r\n' 
    429     httpheader += http_post 
     373    httpheader += postdata 
    430374 
    431375  else: 
    432376    # There is no posted data, use HTTP GET. 
    433     httpheader = _httpretrieve_httprequestmain_header('GET', url_query, \ 
    434         dict_query, path, host, port) 
    435     httpheader += _httpretrieve_parse_clienthttpheader(http_header) 
     377    httpheader = _httpretrieve_httprequestmain_header('GET', querydata, \ 
     378        path, host, port) 
     379    httpheader += _httpretrieve_parse_clienthttpheader(httpheaders) 
    436380    httpheader += '\r\n' 
    437381 
    438   # return header with a new line which is signal for http header is done 
    439382  return httpheader 
    440383 
     
    442385 
    443386 
    444 def _httpretrieve_httprequestmain_header(http_command, url_query, \ 
    445     dict_query, path, host, port): 
     387def _httpretrieve_httprequestmain_header(http_command, querydata, path, \ 
     388    host, port): 
    446389  # Builds a minimal HTTP request, returning it as a string. 
    447390 
    448   # Sanity check -- the user should have only given us one set of data. 
    449   if url_query != '' and dict_query != None: 
    450     raise HttpUserInputError('Cant input a http query with the url and ' + \ 
    451         'an extra parameter dictionary with a http query') 
    452  
    453   elif dict_query != None: 
    454     # Send form data via GET. 
    455     try: 
    456       encoded_query = '?' + urllib_quote_parameters(dict_query) 
    457     except Exception, e: 
    458       raise HttpUserInputError('Error encoding the given http ' + \ 
    459           'query dictionary ' + str(dict_query) + str(e)) 
    460  
    461   elif url_query != '': 
    462     # Send an arbitrary string via GET. 
     391  if type(querydata) is dict: 
     392    querydata = urllib_quote_parameters(querydata) 
     393 
     394  if type(querydata) is str and querydata != '': 
    463395    encoded_query = '?' + url_query 
    464396  else: 
     
    484416 
    485417 
    486 def _httpretrieve_parse_clienthttpheader(http_header): 
     418def _httpretrieve_parse_clienthttpheader(httpheaders): 
    487419  # Converts a dictionary of HTTP request headers into a string. 
    488420 
    489   if http_header == None: 
     421  if httpheaders is None: 
    490422    return '' 
    491423 
    492   elif not type(http_header) == dict: 
    493     raise HttpUserInputError('The given http_post is not a dictionary, ' + \ 
    494         'given: ' + str(type(http_header))) 
     424  elif type(httpheaders) is not dict: 
     425    raise TypeError("Expected HTTP headers as a dictionary.") 
    495426 
    496427  else: 
    497428    clienthttpheader = '' 
    498     for key, val in http_header.items(): 
    499       clienthttpheader += key + ' : ' + val + '\r\n' 
     429    for key, val in httpheaders.items(): 
     430      clienthttpheader += key + ': ' + val + '\r\n' 
    500431    return clienthttpheader 
    501432 
     
    503434 
    504435 
    505 def _httpretrieve_sendhttprequest(sock, httpheader): 
    506   # Send the HTTP request; raise an exception on error. 
    507   try: 
    508     sock.send(httpheader) 
    509   except Exception, e: 
    510     sock.close() 
    511     raise HttpConnectionError('Connection error: on sending http ' + \ 
    512         'request to server ' + str(e)) 
    513  
    514  
    515  
    516  
    517 def _httpretrieve_receive_httpheader(sock, header_timeout, httpheader_limit): 
     436def _httpretrieve_receive_httpheader(sock, timeout, currentruntime): 
    518437  # Receives the HTTP headers only. Returns them as a list of strings. 
    519438 
    520   sock.settimeout(header_timeout) 
     439  if timeout is None: 
     440    sock.settimeout(0) 
     441  elif timeout - currentruntime <= 0: 
     442    raise SocketTimeoutError("Timed out") 
     443  else: 
     444    sock.settimeout(timeout - currentruntime) 
    521445 
    522446  httpheader_received = 0 
     
    524448  while True: 
    525449    # CRLFCRLF separates the HTTP headers from the body of the response. 
    526     if '\r\n\r\n' in httpheader: 
     450    if httpheader.endswith('\r\n\r\n'): 
    527451      return httpheader.split('\r\n') 
    528452 
    529     # Against the HTTP spec, we also accept LFLF as a mark of reaching the 
    530     # end of the headers. 
    531     if '\n\n' in httpheader: 
    532       return httpheader.split('\n') 
    533  
    534     if httpheader_limit == httpheader_received: 
    535       sock.close() 
    536       raise HttpHeaderFormatError('Http header length Error: The http ' + \ 
    537           'header is too long, exceeded 8 kb') 
    538  
    539     try: 
    540       content = sock.recv(1) 
    541       httpheader_received += 1 
    542  
    543     except SocketTimeoutError: 
    544       raise HttpHeaderReceivingError('Timeout Error on receiving ' + \ 
    545           'header: server taking too long to send http header') 
    546  
    547     except Exception, e: 
    548       sock.close() 
    549       raise HttpHeaderReceivingError('Error on recieving http ' + \ 
    550           'header: ' + str(e)) 
    551  
    552     else: 
    553       httpheader += content 
     453    content = sock.recv(1) 
     454    httpheader_received += 1 
     455    httpheader += content 
    554456 
    555457 
     
    565467 
    566468  if len(headersplit) != 3: 
    567     raise HttpHeaderFormatError('Invalid Http header status code ' + \ 
    568         'format: Correct format is HTTP<version> http_status_number ' + \ 
    569         'http_status_msg: Given '  + httpstatusheader) 
     469    raise HttpBrokenServerError("Server returned garbage for HTTP response.") 
    570470  if not httpstatusheader.startswith('HTTP'): 
    571     raise HttpHeaderFormatError('Invalid Http header status code ' + \ 
    572         'format: Http header status code should start of with ' + \ 
    573         'HTTP<version> but given: '  + httpstatusheader) 
    574  
    575   http_version = headersplit[0] 
     471    raise HttpBrokenServerError("Server returned garbage for HTTP response.") 
     472 
     473  http_status_msg = headersplit[2] 
    576474 
    577475  try: 
    578     int(headersplit[1]) 
     476    return (int(headersplit[1]), http_status_msg) 
    579477  except ValueError, e: 
    580     raise HttpHeaderFormatError('Invalid Http header status code ' + \ 
    581         'format: Status number should be a int, Given: ' + \ 
    582         str(headersplit[1]) + str(e)) 
    583   else: 
    584     http_status_number = headersplit[1] 
    585  
    586   http_status_msg = headersplit[2] 
    587   return http_status_number, http_status_msg 
    588  
    589  
    590  
    591  
    592 def _httpretrieve_receive_httperror_content(sock, contentlength): 
     478    raise HttpBrokenServerError("Server returned garbage for HTTP response.") 
     479 
     480 
     481 
     482 
     483def _httpretrieve_receive_httperror_content(sock): 
    593484  # Receive the error message (this is called when the server returns an 
    594485  # 'error' response). 
     
    596487  httperror_content = '' 
    597488  while True: 
    598     try: 
    599       content = sock.recv(1024) 
    600  
    601     except SocketTimeoutError: 
    602       raise HttpContentReceivingError('Timeout Error on receiving http ' + \ 
    603           'error content: server taking too long to send http error content') 
    604     except Exception, e: 
    605       if 'Socket closed' not in str(e): 
    606         sock.close() 
    607         raise HttpContentReceivingError('Error on receiving http error ' + \ 
    608             'content: ' + str(e)) 
    609       break 
    610  
    611     else: 
    612       httperror_content += content 
    613  
    614   if contentlength != None: 
    615     if contentlength != len(httperror_content): 
    616       raise HttpContentLengthError('Error on receiving http error ' + \ 
    617           'content: received conent length: ' + \ 
    618           str(len(httperror_content)) + ' actual content length: ' + \ 
    619           str(contentlength)) 
     489    content = sock.recv(1024) 
     490    httperror_content += content 
     491 
    620492  return httperror_content 
    621  
    622  
    623  
    624  
    625 def _httpretrieve_raise_httpstatuscode_error(http_status_number, http_status_msg, http_errorcontent): 
    626   # Raises an exception for the status code. 
    627  
    628   # Arbitrarily chosen individual status codes: 
    629   if http_status_number == '202': 
    630     raise HttpError202('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' http proccesing not responding. Http error content: ' + http_errorcontent) 
    631   elif http_status_number == '204': 
    632     raise HttpError204('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' thier is no http body content. Http error content: ' + http_errorcontent) 
    633   elif http_status_number == '300': 
    634     raise HttpError300('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' multiple redirect isnt suported. Http error content: ' + http_errorcontent) 
    635   elif http_status_number == '404': 
    636     raise HttpError404('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' cant find anything matching the given url. Http error content: ' + http_errorcontent) 
    637   elif http_status_number == '403': 
    638     raise HttpError403('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' the request was illegal. Http error content: ' + http_errorcontent) 
    639   elif http_status_number == '400': 
    640     raise HttpError400('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' the request contians bad syntex. Http error content: ' + http_errorcontent) 
    641   elif http_status_number == '500': 
    642     raise HttpError500('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' The server encountered an unexpected condition. Http error content: ' + http_errorcontent) 
    643   elif http_status_number == '502': 
    644     raise HttpError502('Http response error: ' + http_status_number + ' ' + http_status_msg +  ' acting like a gateway received an invalid response. Http error content: ' + http_errorcontent) 
    645  
    646   # Ranges: 
    647   elif http_status_number >= '100' and http_status_number < '200': 
    648     raise HttpError1xx('Http response error: Information ' + http_status_number + ' ' + http_status_msg + '.Http error content: ' + http_errorcontent) 
    649   elif http_status_number > '200' and http_status_number < '300': 
    650     raise HttpError2xx('Http response error: success error ' + http_status_number + ' ' + http_status_msg + '.Http error content: ' + http_errorcontent) 
    651   elif http_status_number >= '300' and http_status_number < '400': 
    652     raise HttpError3xx('Http response error: Redirection error' + http_status_number + ' ' + http_status_msg + '.Http error content: ' + http_errorcontent) 
    653   elif http_status_number >= '400' and http_status_number < '500': 
    654     raise HttpError4xx('Http response error: client error ' + http_status_number + ' ' + http_status_msg + '.Http error content: ' + http_errorcontent) 
    655   elif http_status_number >= '500' and http_status_number < '600': 
    656     raise HttpError5xx('Http response error: server error: ' + http_status_number + ' ' + http_status_msg + '.Http error content: ' + http_errorcontent) 
    657   else: 
    658     raise HttpStatusCodeError('Http response error: invalid http status response, given ' + http_status_number + '.Http error content: ' + http_errorcontent) 
    659493 
    660494 
     
    666500  for headerline in httpheaderlines: 
    667501    if headerline.startswith('Location: '): 
    668       redirect = headerline[len('Location: '):] 
    669  
    670       if len(redirect) == 0: 
    671         raise HttpHeaderFormatError('Http header redirection format ' + \ 
    672             'error: http server gave a redierect location with no URL') 
    673       return redirect 
    674  
    675   raise HttpHeaderFormatError('Http header redirection format error: ' + \ 
    676       'http redirect header didnt include the location') 
     502      return headerline[len('Location: '):] 
     503 
     504  raise HttpBrokenServerError("HTTP server indicated a redirect and did " + \ 
     505      "not send a Location header.") 
    677506 
    678507 
     
    684513  for headerline in httpheaderlines: 
    685514    if headerline.startswith('Content-Length: '): 
    686       try: 
    687         contentlength = int(headerline[len('Content-Length: '):]) 
    688       except ValueError, e: 
    689         raise HttpHeaderFormatError('Http header Content-Length format ' + \ 
    690             'error: http server provided content length that isnt a int ' + \ 
    691             str(e)) 
    692  
    693       if contentlength <= 0: 
    694         raise HttpHeaderFormatError('Http header Content-Length format ' + \ 
    695             'error: provided content length with invalid number ' + \ 
    696             str(contentlength)) 
    697       else: 
    698         return contentlength 
     515      return int(headerline[len('Content-Length: '):]) 
    699516 
    700517  return None