You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					436 lines
				
				14 KiB
			
		
		
			
		
	
	
					436 lines
				
				14 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								from __future__ import absolute_import
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import re
							 | 
						||
| 
								 | 
							
								from collections import namedtuple
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from ..exceptions import LocationParseError
							 | 
						||
| 
								 | 
							
								from ..packages import six
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# We only want to normalize urls with an HTTP(S) scheme.
							 | 
						||
| 
								 | 
							
								# urllib3 infers URLs without a scheme (None) to be http.
							 | 
						||
| 
								 | 
							
								NORMALIZABLE_SCHEMES = ("http", "https", None)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Almost all of these patterns were derived from the
							 | 
						||
| 
								 | 
							
								# 'rfc3986' module: https://github.com/python-hyper/rfc3986
							 | 
						||
| 
								 | 
							
								PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
							 | 
						||
| 
								 | 
							
								SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
							 | 
						||
| 
								 | 
							
								URI_RE = re.compile(
							 | 
						||
| 
								 | 
							
								    r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?"
							 | 
						||
| 
								 | 
							
								    r"(?://([^\\/?#]*))?"
							 | 
						||
| 
								 | 
							
								    r"([^?#]*)"
							 | 
						||
| 
								 | 
							
								    r"(?:\?([^#]*))?"
							 | 
						||
| 
								 | 
							
								    r"(?:#(.*))?$",
							 | 
						||
| 
								 | 
							
								    re.UNICODE | re.DOTALL,
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
							 | 
						||
| 
								 | 
							
								HEX_PAT = "[0-9A-Fa-f]{1,4}"
							 | 
						||
| 
								 | 
							
								LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT)
							 | 
						||
| 
								 | 
							
								_subs = {"hex": HEX_PAT, "ls32": LS32_PAT}
							 | 
						||
| 
								 | 
							
								_variations = [
							 | 
						||
| 
								 | 
							
								    #                            6( h16 ":" ) ls32
							 | 
						||
| 
								 | 
							
								    "(?:%(hex)s:){6}%(ls32)s",
							 | 
						||
| 
								 | 
							
								    #                       "::" 5( h16 ":" ) ls32
							 | 
						||
| 
								 | 
							
								    "::(?:%(hex)s:){5}%(ls32)s",
							 | 
						||
| 
								 | 
							
								    # [               h16 ] "::" 4( h16 ":" ) ls32
							 | 
						||
| 
								 | 
							
								    "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
							 | 
						||
| 
								 | 
							
								    # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
							 | 
						||
| 
								 | 
							
								    "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
							 | 
						||
| 
								 | 
							
								    # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
							 | 
						||
| 
								 | 
							
								    "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
							 | 
						||
| 
								 | 
							
								    # [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
							 | 
						||
| 
								 | 
							
								    "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
							 | 
						||
| 
								 | 
							
								    # [ *4( h16 ":" ) h16 ] "::"              ls32
							 | 
						||
| 
								 | 
							
								    "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
							 | 
						||
| 
								 | 
							
								    # [ *5( h16 ":" ) h16 ] "::"              h16
							 | 
						||
| 
								 | 
							
								    "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
							 | 
						||
| 
								 | 
							
								    # [ *6( h16 ":" ) h16 ] "::"
							 | 
						||
| 
								 | 
							
								    "(?:(?:%(hex)s:){0,6}%(hex)s)?::",
							 | 
						||
| 
								 | 
							
								]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
							 | 
						||
| 
								 | 
							
								IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
							 | 
						||
| 
								 | 
							
								ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
							 | 
						||
| 
								 | 
							
								IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]"
							 | 
						||
| 
								 | 
							
								REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
							 | 
						||
| 
								 | 
							
								TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								IPV4_RE = re.compile("^" + IPV4_PAT + "$")
							 | 
						||
| 
								 | 
							
								IPV6_RE = re.compile("^" + IPV6_PAT + "$")
							 | 
						||
| 
								 | 
							
								IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
							 | 
						||
| 
								 | 
							
								BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
							 | 
						||
| 
								 | 
							
								ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
							 | 
						||
| 
								 | 
							
								    REG_NAME_PAT,
							 | 
						||
| 
								 | 
							
								    IPV4_PAT,
							 | 
						||
| 
								 | 
							
								    IPV6_ADDRZ_PAT,
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								UNRESERVED_CHARS = set(
							 | 
						||
| 
								 | 
							
								    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								SUB_DELIM_CHARS = set("!$&'()*+,;=")
							 | 
						||
| 
								 | 
							
								USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {":"}
							 | 
						||
| 
								 | 
							
								PATH_CHARS = USERINFO_CHARS | {"@", "/"}
							 | 
						||
| 
								 | 
							
								QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class Url(namedtuple("Url", url_attrs)):
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    Data structure for representing an HTTP URL. Used as a return value for
							 | 
						||
| 
								 | 
							
								    :func:`parse_url`. Both the scheme and host are normalized as they are
							 | 
						||
| 
								 | 
							
								    both case-insensitive according to RFC 3986.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    __slots__ = ()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __new__(
							 | 
						||
| 
								 | 
							
								        cls,
							 | 
						||
| 
								 | 
							
								        scheme=None,
							 | 
						||
| 
								 | 
							
								        auth=None,
							 | 
						||
| 
								 | 
							
								        host=None,
							 | 
						||
| 
								 | 
							
								        port=None,
							 | 
						||
| 
								 | 
							
								        path=None,
							 | 
						||
| 
								 | 
							
								        query=None,
							 | 
						||
| 
								 | 
							
								        fragment=None,
							 | 
						||
| 
								 | 
							
								    ):
							 | 
						||
| 
								 | 
							
								        if path and not path.startswith("/"):
							 | 
						||
| 
								 | 
							
								            path = "/" + path
							 | 
						||
| 
								 | 
							
								        if scheme is not None:
							 | 
						||
| 
								 | 
							
								            scheme = scheme.lower()
							 | 
						||
| 
								 | 
							
								        return super(Url, cls).__new__(
							 | 
						||
| 
								 | 
							
								            cls, scheme, auth, host, port, path, query, fragment
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def hostname(self):
							 | 
						||
| 
								 | 
							
								        """For backwards-compatibility with urlparse. We're nice like that."""
							 | 
						||
| 
								 | 
							
								        return self.host
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def request_uri(self):
							 | 
						||
| 
								 | 
							
								        """Absolute path including the query string."""
							 | 
						||
| 
								 | 
							
								        uri = self.path or "/"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if self.query is not None:
							 | 
						||
| 
								 | 
							
								            uri += "?" + self.query
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return uri
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def netloc(self):
							 | 
						||
| 
								 | 
							
								        """Network location including host and port"""
							 | 
						||
| 
								 | 
							
								        if self.port:
							 | 
						||
| 
								 | 
							
								            return "%s:%d" % (self.host, self.port)
							 | 
						||
| 
								 | 
							
								        return self.host
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def url(self):
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        Convert self into a url
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        This function should more or less round-trip with :func:`.parse_url`. The
							 | 
						||
| 
								 | 
							
								        returned url may not be exactly the same as the url inputted to
							 | 
						||
| 
								 | 
							
								        :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
							 | 
						||
| 
								 | 
							
								        with a blank port will have : removed).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Example: ::
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            >>> U = parse_url('http://google.com/mail/')
							 | 
						||
| 
								 | 
							
								            >>> U.url
							 | 
						||
| 
								 | 
							
								            'http://google.com/mail/'
							 | 
						||
| 
								 | 
							
								            >>> Url('http', 'username:password', 'host.com', 80,
							 | 
						||
| 
								 | 
							
								            ... '/path', 'query', 'fragment').url
							 | 
						||
| 
								 | 
							
								            'http://username:password@host.com:80/path?query#fragment'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        scheme, auth, host, port, path, query, fragment = self
							 | 
						||
| 
								 | 
							
								        url = u""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # We use "is not None" we want things to happen with empty strings (or 0 port)
							 | 
						||
| 
								 | 
							
								        if scheme is not None:
							 | 
						||
| 
								 | 
							
								            url += scheme + u"://"
							 | 
						||
| 
								 | 
							
								        if auth is not None:
							 | 
						||
| 
								 | 
							
								            url += auth + u"@"
							 | 
						||
| 
								 | 
							
								        if host is not None:
							 | 
						||
| 
								 | 
							
								            url += host
							 | 
						||
| 
								 | 
							
								        if port is not None:
							 | 
						||
| 
								 | 
							
								            url += u":" + str(port)
							 | 
						||
| 
								 | 
							
								        if path is not None:
							 | 
						||
| 
								 | 
							
								            url += path
							 | 
						||
| 
								 | 
							
								        if query is not None:
							 | 
						||
| 
								 | 
							
								            url += u"?" + query
							 | 
						||
| 
								 | 
							
								        if fragment is not None:
							 | 
						||
| 
								 | 
							
								            url += u"#" + fragment
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return url
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __str__(self):
							 | 
						||
| 
								 | 
							
								        return self.url
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def split_first(s, delims):
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    .. deprecated:: 1.25
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Given a string and an iterable of delimiters, split on the first found
							 | 
						||
| 
								 | 
							
								    delimiter. Return two split parts and the matched delimiter.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    If not found, then the first part is the full input string.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Example::
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        >>> split_first('foo/bar?baz', '?/=')
							 | 
						||
| 
								 | 
							
								        ('foo', 'bar?baz', '/')
							 | 
						||
| 
								 | 
							
								        >>> split_first('foo/bar?baz', '123')
							 | 
						||
| 
								 | 
							
								        ('foo/bar?baz', '', None)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Scales linearly with number of delims. Not ideal for large number of delims.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    min_idx = None
							 | 
						||
| 
								 | 
							
								    min_delim = None
							 | 
						||
| 
								 | 
							
								    for d in delims:
							 | 
						||
| 
								 | 
							
								        idx = s.find(d)
							 | 
						||
| 
								 | 
							
								        if idx < 0:
							 | 
						||
| 
								 | 
							
								            continue
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if min_idx is None or idx < min_idx:
							 | 
						||
| 
								 | 
							
								            min_idx = idx
							 | 
						||
| 
								 | 
							
								            min_delim = d
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if min_idx is None or min_idx < 0:
							 | 
						||
| 
								 | 
							
								        return s, "", None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    return s[:min_idx], s[min_idx + 1 :], min_delim
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
							 | 
						||
| 
								 | 
							
								    """Percent-encodes a URI component without reapplying
							 | 
						||
| 
								 | 
							
								    onto an already percent-encoded component.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    if component is None:
							 | 
						||
| 
								 | 
							
								        return component
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    component = six.ensure_text(component)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # Normalize existing percent-encoded bytes.
							 | 
						||
| 
								 | 
							
								    # Try to see if the component we're encoding is already percent-encoded
							 | 
						||
| 
								 | 
							
								    # so we can skip all '%' characters but still encode all others.
							 | 
						||
| 
								 | 
							
								    component, percent_encodings = PERCENT_RE.subn(
							 | 
						||
| 
								 | 
							
								        lambda match: match.group(0).upper(), component
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    uri_bytes = component.encode("utf-8", "surrogatepass")
							 | 
						||
| 
								 | 
							
								    is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
							 | 
						||
| 
								 | 
							
								    encoded_component = bytearray()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    for i in range(0, len(uri_bytes)):
							 | 
						||
| 
								 | 
							
								        # Will return a single character bytestring on both Python 2 & 3
							 | 
						||
| 
								 | 
							
								        byte = uri_bytes[i : i + 1]
							 | 
						||
| 
								 | 
							
								        byte_ord = ord(byte)
							 | 
						||
| 
								 | 
							
								        if (is_percent_encoded and byte == b"%") or (
							 | 
						||
| 
								 | 
							
								            byte_ord < 128 and byte.decode() in allowed_chars
							 | 
						||
| 
								 | 
							
								        ):
							 | 
						||
| 
								 | 
							
								            encoded_component += byte
							 | 
						||
| 
								 | 
							
								            continue
							 | 
						||
| 
								 | 
							
								        encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    return encoded_component.decode(encoding)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _remove_path_dot_segments(path):
							 | 
						||
| 
								 | 
							
								    # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
							 | 
						||
| 
								 | 
							
								    segments = path.split("/")  # Turn the path into a list of segments
							 | 
						||
| 
								 | 
							
								    output = []  # Initialize the variable to use to store output
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    for segment in segments:
							 | 
						||
| 
								 | 
							
								        # '.' is the current directory, so ignore it, it is superfluous
							 | 
						||
| 
								 | 
							
								        if segment == ".":
							 | 
						||
| 
								 | 
							
								            continue
							 | 
						||
| 
								 | 
							
								        # Anything other than '..', should be appended to the output
							 | 
						||
| 
								 | 
							
								        elif segment != "..":
							 | 
						||
| 
								 | 
							
								            output.append(segment)
							 | 
						||
| 
								 | 
							
								        # In this case segment == '..', if we can, we should pop the last
							 | 
						||
| 
								 | 
							
								        # element
							 | 
						||
| 
								 | 
							
								        elif output:
							 | 
						||
| 
								 | 
							
								            output.pop()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # If the path starts with '/' and the output is empty or the first string
							 | 
						||
| 
								 | 
							
								    # is non-empty
							 | 
						||
| 
								 | 
							
								    if path.startswith("/") and (not output or output[0]):
							 | 
						||
| 
								 | 
							
								        output.insert(0, "")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # If the path starts with '/.' or '/..' ensure we add one more empty
							 | 
						||
| 
								 | 
							
								    # string to add a trailing '/'
							 | 
						||
| 
								 | 
							
								    if path.endswith(("/.", "/..")):
							 | 
						||
| 
								 | 
							
								        output.append("")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    return "/".join(output)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _normalize_host(host, scheme):
							 | 
						||
| 
								 | 
							
								    if host:
							 | 
						||
| 
								 | 
							
								        if isinstance(host, six.binary_type):
							 | 
						||
| 
								 | 
							
								            host = six.ensure_str(host)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if scheme in NORMALIZABLE_SCHEMES:
							 | 
						||
| 
								 | 
							
								            is_ipv6 = IPV6_ADDRZ_RE.match(host)
							 | 
						||
| 
								 | 
							
								            if is_ipv6:
							 | 
						||
| 
								 | 
							
								                # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as
							 | 
						||
| 
								 | 
							
								                # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID
							 | 
						||
| 
								 | 
							
								                # separator as necessary to return a valid RFC 4007 scoped IP.
							 | 
						||
| 
								 | 
							
								                match = ZONE_ID_RE.search(host)
							 | 
						||
| 
								 | 
							
								                if match:
							 | 
						||
| 
								 | 
							
								                    start, end = match.span(1)
							 | 
						||
| 
								 | 
							
								                    zone_id = host[start:end]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                    if zone_id.startswith("%25") and zone_id != "%25":
							 | 
						||
| 
								 | 
							
								                        zone_id = zone_id[3:]
							 | 
						||
| 
								 | 
							
								                    else:
							 | 
						||
| 
								 | 
							
								                        zone_id = zone_id[1:]
							 | 
						||
| 
								 | 
							
								                    zone_id = "%" + _encode_invalid_chars(zone_id, UNRESERVED_CHARS)
							 | 
						||
| 
								 | 
							
								                    return host[:start].lower() + zone_id + host[end:]
							 | 
						||
| 
								 | 
							
								                else:
							 | 
						||
| 
								 | 
							
								                    return host.lower()
							 | 
						||
| 
								 | 
							
								            elif not IPV4_RE.match(host):
							 | 
						||
| 
								 | 
							
								                return six.ensure_str(
							 | 
						||
| 
								 | 
							
								                    b".".join([_idna_encode(label) for label in host.split(".")])
							 | 
						||
| 
								 | 
							
								                )
							 | 
						||
| 
								 | 
							
								    return host
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _idna_encode(name):
							 | 
						||
| 
								 | 
							
								    if name and any(ord(x) >= 128 for x in name):
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            import idna
							 | 
						||
| 
								 | 
							
								        except ImportError:
							 | 
						||
| 
								 | 
							
								            six.raise_from(
							 | 
						||
| 
								 | 
							
								                LocationParseError("Unable to parse URL without the 'idna' module"),
							 | 
						||
| 
								 | 
							
								                None,
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            return idna.encode(name.lower(), strict=True, std3_rules=True)
							 | 
						||
| 
								 | 
							
								        except idna.IDNAError:
							 | 
						||
| 
								 | 
							
								            six.raise_from(
							 | 
						||
| 
								 | 
							
								                LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								    return name.lower().encode("ascii")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _encode_target(target):
							 | 
						||
| 
								 | 
							
								    """Percent-encodes a request target so that there are no invalid characters"""
							 | 
						||
| 
								 | 
							
								    path, query = TARGET_RE.match(target).groups()
							 | 
						||
| 
								 | 
							
								    target = _encode_invalid_chars(path, PATH_CHARS)
							 | 
						||
| 
								 | 
							
								    query = _encode_invalid_chars(query, QUERY_CHARS)
							 | 
						||
| 
								 | 
							
								    if query is not None:
							 | 
						||
| 
								 | 
							
								        target += "?" + query
							 | 
						||
| 
								 | 
							
								    return target
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def parse_url(url):
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
							 | 
						||
| 
								 | 
							
								    performed to parse incomplete urls. Fields not provided will be None.
							 | 
						||
| 
								 | 
							
								    This parser is RFC 3986 and RFC 6874 compliant.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    The parser logic and helper functions are based heavily on
							 | 
						||
| 
								 | 
							
								    work done in the ``rfc3986`` module.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    :param str url: URL to parse into a :class:`.Url` namedtuple.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Partly backwards-compatible with :mod:`urlparse`.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Example::
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        >>> parse_url('http://google.com/mail/')
							 | 
						||
| 
								 | 
							
								        Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
							 | 
						||
| 
								 | 
							
								        >>> parse_url('google.com:80')
							 | 
						||
| 
								 | 
							
								        Url(scheme=None, host='google.com', port=80, path=None, ...)
							 | 
						||
| 
								 | 
							
								        >>> parse_url('/foo?bar')
							 | 
						||
| 
								 | 
							
								        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    if not url:
							 | 
						||
| 
								 | 
							
								        # Empty
							 | 
						||
| 
								 | 
							
								        return Url()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    source_url = url
							 | 
						||
| 
								 | 
							
								    if not SCHEME_RE.search(url):
							 | 
						||
| 
								 | 
							
								        url = "//" + url
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    try:
							 | 
						||
| 
								 | 
							
								        scheme, authority, path, query, fragment = URI_RE.match(url).groups()
							 | 
						||
| 
								 | 
							
								        normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if scheme:
							 | 
						||
| 
								 | 
							
								            scheme = scheme.lower()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if authority:
							 | 
						||
| 
								 | 
							
								            auth, _, host_port = authority.rpartition("@")
							 | 
						||
| 
								 | 
							
								            auth = auth or None
							 | 
						||
| 
								 | 
							
								            host, port = _HOST_PORT_RE.match(host_port).groups()
							 | 
						||
| 
								 | 
							
								            if auth and normalize_uri:
							 | 
						||
| 
								 | 
							
								                auth = _encode_invalid_chars(auth, USERINFO_CHARS)
							 | 
						||
| 
								 | 
							
								            if port == "":
							 | 
						||
| 
								 | 
							
								                port = None
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            auth, host, port = None, None, None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if port is not None:
							 | 
						||
| 
								 | 
							
								            port = int(port)
							 | 
						||
| 
								 | 
							
								            if not (0 <= port <= 65535):
							 | 
						||
| 
								 | 
							
								                raise LocationParseError(url)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        host = _normalize_host(host, scheme)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if normalize_uri and path:
							 | 
						||
| 
								 | 
							
								            path = _remove_path_dot_segments(path)
							 | 
						||
| 
								 | 
							
								            path = _encode_invalid_chars(path, PATH_CHARS)
							 | 
						||
| 
								 | 
							
								        if normalize_uri and query:
							 | 
						||
| 
								 | 
							
								            query = _encode_invalid_chars(query, QUERY_CHARS)
							 | 
						||
| 
								 | 
							
								        if normalize_uri and fragment:
							 | 
						||
| 
								 | 
							
								            fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    except (ValueError, AttributeError):
							 | 
						||
| 
								 | 
							
								        return six.raise_from(LocationParseError(source_url), None)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # For the sake of backwards compatibility we put empty
							 | 
						||
| 
								 | 
							
								    # string values for path if there are any defined values
							 | 
						||
| 
								 | 
							
								    # beyond the path in the URL.
							 | 
						||
| 
								 | 
							
								    # TODO: Remove this when we break backwards compatibility.
							 | 
						||
| 
								 | 
							
								    if not path:
							 | 
						||
| 
								 | 
							
								        if query is not None or fragment is not None:
							 | 
						||
| 
								 | 
							
								            path = ""
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            path = None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # Ensure that each part of the URL is a `str` for
							 | 
						||
| 
								 | 
							
								    # backwards compatibility.
							 | 
						||
| 
								 | 
							
								    if isinstance(url, six.text_type):
							 | 
						||
| 
								 | 
							
								        ensure_func = six.ensure_text
							 | 
						||
| 
								 | 
							
								    else:
							 | 
						||
| 
								 | 
							
								        ensure_func = six.ensure_str
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def ensure_type(x):
							 | 
						||
| 
								 | 
							
								        return x if x is None else ensure_func(x)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    return Url(
							 | 
						||
| 
								 | 
							
								        scheme=ensure_type(scheme),
							 | 
						||
| 
								 | 
							
								        auth=ensure_type(auth),
							 | 
						||
| 
								 | 
							
								        host=ensure_type(host),
							 | 
						||
| 
								 | 
							
								        port=port,
							 | 
						||
| 
								 | 
							
								        path=ensure_type(path),
							 | 
						||
| 
								 | 
							
								        query=ensure_type(query),
							 | 
						||
| 
								 | 
							
								        fragment=ensure_type(fragment),
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def get_host(url):
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    Deprecated. Use :func:`parse_url` instead.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								    p = parse_url(url)
							 | 
						||
| 
								 | 
							
								    return p.scheme or "http", p.hostname, p.port
							 |