You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					225 lines
				
				6.4 KiB
			
		
		
			
		
	
	
					225 lines
				
				6.4 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								import logging
							 | 
						||
| 
								 | 
							
								import mimetypes
							 | 
						||
| 
								 | 
							
								import os
							 | 
						||
| 
								 | 
							
								import pathlib
							 | 
						||
| 
								 | 
							
								from typing import Callable, Iterable, Optional, Tuple
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from pip._internal.models.candidate import InstallationCandidate
							 | 
						||
| 
								 | 
							
								from pip._internal.models.link import Link
							 | 
						||
| 
								 | 
							
								from pip._internal.utils.urls import path_to_url, url_to_path
							 | 
						||
| 
								 | 
							
								from pip._internal.vcs import is_url
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								logger = logging.getLogger(__name__)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								FoundCandidates = Iterable[InstallationCandidate]
							 | 
						||
| 
								 | 
							
								FoundLinks = Iterable[Link]
							 | 
						||
| 
								 | 
							
								CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
							 | 
						||
| 
								 | 
							
								PageValidator = Callable[[Link], bool]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class LinkSource:
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def link(self) -> Optional[Link]:
							 | 
						||
| 
								 | 
							
								        """Returns the underlying link, if there's one."""
							 | 
						||
| 
								 | 
							
								        raise NotImplementedError()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def page_candidates(self) -> FoundCandidates:
							 | 
						||
| 
								 | 
							
								        """Candidates found by parsing an archive listing HTML file."""
							 | 
						||
| 
								 | 
							
								        raise NotImplementedError()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def file_links(self) -> FoundLinks:
							 | 
						||
| 
								 | 
							
								        """Links found by specifying archives directly."""
							 | 
						||
| 
								 | 
							
								        raise NotImplementedError()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _is_html_file(file_url: str) -> bool:
							 | 
						||
| 
								 | 
							
								    return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class _FlatDirectorySource(LinkSource):
							 | 
						||
| 
								 | 
							
								    """Link source specified by ``--find-links=<path-to-dir>``.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This looks the content of the directory, and returns:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * ``page_candidates``: Links listed on each HTML file in the directory.
							 | 
						||
| 
								 | 
							
								    * ``file_candidates``: Archives in the directory.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        candidates_from_page: CandidatesFromPage,
							 | 
						||
| 
								 | 
							
								        path: str,
							 | 
						||
| 
								 | 
							
								    ) -> None:
							 | 
						||
| 
								 | 
							
								        self._candidates_from_page = candidates_from_page
							 | 
						||
| 
								 | 
							
								        self._path = pathlib.Path(os.path.realpath(path))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def link(self) -> Optional[Link]:
							 | 
						||
| 
								 | 
							
								        return None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def page_candidates(self) -> FoundCandidates:
							 | 
						||
| 
								 | 
							
								        for path in self._path.iterdir():
							 | 
						||
| 
								 | 
							
								            url = path_to_url(str(path))
							 | 
						||
| 
								 | 
							
								            if not _is_html_file(url):
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            yield from self._candidates_from_page(Link(url))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def file_links(self) -> FoundLinks:
							 | 
						||
| 
								 | 
							
								        for path in self._path.iterdir():
							 | 
						||
| 
								 | 
							
								            url = path_to_url(str(path))
							 | 
						||
| 
								 | 
							
								            if _is_html_file(url):
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            yield Link(url)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class _LocalFileSource(LinkSource):
							 | 
						||
| 
								 | 
							
								    """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
							 | 
						||
| 
								 | 
							
								    the option, it is converted to a URL first. This returns:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * ``page_candidates``: Links listed on an HTML file.
							 | 
						||
| 
								 | 
							
								    * ``file_candidates``: The non-HTML file.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        candidates_from_page: CandidatesFromPage,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								    ) -> None:
							 | 
						||
| 
								 | 
							
								        self._candidates_from_page = candidates_from_page
							 | 
						||
| 
								 | 
							
								        self._link = link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def link(self) -> Optional[Link]:
							 | 
						||
| 
								 | 
							
								        return self._link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def page_candidates(self) -> FoundCandidates:
							 | 
						||
| 
								 | 
							
								        if not _is_html_file(self._link.url):
							 | 
						||
| 
								 | 
							
								            return
							 | 
						||
| 
								 | 
							
								        yield from self._candidates_from_page(self._link)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def file_links(self) -> FoundLinks:
							 | 
						||
| 
								 | 
							
								        if _is_html_file(self._link.url):
							 | 
						||
| 
								 | 
							
								            return
							 | 
						||
| 
								 | 
							
								        yield self._link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class _RemoteFileSource(LinkSource):
							 | 
						||
| 
								 | 
							
								    """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This returns:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    * ``page_candidates``: Links listed on an HTML file.
							 | 
						||
| 
								 | 
							
								    * ``file_candidates``: The non-HTML file.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        candidates_from_page: CandidatesFromPage,
							 | 
						||
| 
								 | 
							
								        page_validator: PageValidator,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								    ) -> None:
							 | 
						||
| 
								 | 
							
								        self._candidates_from_page = candidates_from_page
							 | 
						||
| 
								 | 
							
								        self._page_validator = page_validator
							 | 
						||
| 
								 | 
							
								        self._link = link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def link(self) -> Optional[Link]:
							 | 
						||
| 
								 | 
							
								        return self._link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def page_candidates(self) -> FoundCandidates:
							 | 
						||
| 
								 | 
							
								        if not self._page_validator(self._link):
							 | 
						||
| 
								 | 
							
								            return
							 | 
						||
| 
								 | 
							
								        yield from self._candidates_from_page(self._link)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def file_links(self) -> FoundLinks:
							 | 
						||
| 
								 | 
							
								        yield self._link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class _IndexDirectorySource(LinkSource):
							 | 
						||
| 
								 | 
							
								    """``--[extra-]index-url=<path-to-directory>``.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This is treated like a remote URL; ``candidates_from_page`` contains logic
							 | 
						||
| 
								 | 
							
								    for this by appending ``index.html`` to the link.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        candidates_from_page: CandidatesFromPage,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								    ) -> None:
							 | 
						||
| 
								 | 
							
								        self._candidates_from_page = candidates_from_page
							 | 
						||
| 
								 | 
							
								        self._link = link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def link(self) -> Optional[Link]:
							 | 
						||
| 
								 | 
							
								        return self._link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def page_candidates(self) -> FoundCandidates:
							 | 
						||
| 
								 | 
							
								        yield from self._candidates_from_page(self._link)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def file_links(self) -> FoundLinks:
							 | 
						||
| 
								 | 
							
								        return ()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def build_source(
							 | 
						||
| 
								 | 
							
								    location: str,
							 | 
						||
| 
								 | 
							
								    *,
							 | 
						||
| 
								 | 
							
								    candidates_from_page: CandidatesFromPage,
							 | 
						||
| 
								 | 
							
								    page_validator: PageValidator,
							 | 
						||
| 
								 | 
							
								    expand_dir: bool,
							 | 
						||
| 
								 | 
							
								    cache_link_parsing: bool,
							 | 
						||
| 
								 | 
							
								) -> Tuple[Optional[str], Optional[LinkSource]]:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    path: Optional[str] = None
							 | 
						||
| 
								 | 
							
								    url: Optional[str] = None
							 | 
						||
| 
								 | 
							
								    if os.path.exists(location):  # Is a local path.
							 | 
						||
| 
								 | 
							
								        url = path_to_url(location)
							 | 
						||
| 
								 | 
							
								        path = location
							 | 
						||
| 
								 | 
							
								    elif location.startswith("file:"):  # A file: URL.
							 | 
						||
| 
								 | 
							
								        url = location
							 | 
						||
| 
								 | 
							
								        path = url_to_path(location)
							 | 
						||
| 
								 | 
							
								    elif is_url(location):
							 | 
						||
| 
								 | 
							
								        url = location
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if url is None:
							 | 
						||
| 
								 | 
							
								        msg = (
							 | 
						||
| 
								 | 
							
								            "Location '%s' is ignored: "
							 | 
						||
| 
								 | 
							
								            "it is either a non-existing path or lacks a specific scheme."
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								        logger.warning(msg, location)
							 | 
						||
| 
								 | 
							
								        return (None, None)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if path is None:
							 | 
						||
| 
								 | 
							
								        source: LinkSource = _RemoteFileSource(
							 | 
						||
| 
								 | 
							
								            candidates_from_page=candidates_from_page,
							 | 
						||
| 
								 | 
							
								            page_validator=page_validator,
							 | 
						||
| 
								 | 
							
								            link=Link(url, cache_link_parsing=cache_link_parsing),
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								        return (url, source)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if os.path.isdir(path):
							 | 
						||
| 
								 | 
							
								        if expand_dir:
							 | 
						||
| 
								 | 
							
								            source = _FlatDirectorySource(
							 | 
						||
| 
								 | 
							
								                candidates_from_page=candidates_from_page,
							 | 
						||
| 
								 | 
							
								                path=path,
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            source = _IndexDirectorySource(
							 | 
						||
| 
								 | 
							
								                candidates_from_page=candidates_from_page,
							 | 
						||
| 
								 | 
							
								                link=Link(url, cache_link_parsing=cache_link_parsing),
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								        return (url, source)
							 | 
						||
| 
								 | 
							
								    elif os.path.isfile(path):
							 | 
						||
| 
								 | 
							
								        source = _LocalFileSource(
							 | 
						||
| 
								 | 
							
								            candidates_from_page=candidates_from_page,
							 | 
						||
| 
								 | 
							
								            link=Link(url, cache_link_parsing=cache_link_parsing),
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								        return (url, source)
							 | 
						||
| 
								 | 
							
								    logger.warning(
							 | 
						||
| 
								 | 
							
								        "Location '%s' is ignored: it is neither a file nor a directory.",
							 | 
						||
| 
								 | 
							
								        location,
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								    return (url, None)
							 |