You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					265 lines
				
				9.2 KiB
			
		
		
			
		
	
	
					265 lines
				
				9.2 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								"""Cache Management
							 | 
						||
| 
								 | 
							
								"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import hashlib
							 | 
						||
| 
								 | 
							
								import json
							 | 
						||
| 
								 | 
							
								import logging
							 | 
						||
| 
								 | 
							
								import os
							 | 
						||
| 
								 | 
							
								from typing import Any, Dict, List, Optional, Set
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
							 | 
						||
| 
								 | 
							
								from pip._vendor.packaging.utils import canonicalize_name
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from pip._internal.exceptions import InvalidWheelFilename
							 | 
						||
| 
								 | 
							
								from pip._internal.models.format_control import FormatControl
							 | 
						||
| 
								 | 
							
								from pip._internal.models.link import Link
							 | 
						||
| 
								 | 
							
								from pip._internal.models.wheel import Wheel
							 | 
						||
| 
								 | 
							
								from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
							 | 
						||
| 
								 | 
							
								from pip._internal.utils.urls import path_to_url
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								logger = logging.getLogger(__name__)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _hash_dict(d: Dict[str, str]) -> str:
							 | 
						||
| 
								 | 
							
								    """Return a stable sha224 of a dictionary."""
							 | 
						||
| 
								 | 
							
								    s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
							 | 
						||
| 
								 | 
							
								    return hashlib.sha224(s.encode("ascii")).hexdigest()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class Cache:
							 | 
						||
| 
								 | 
							
								    """An abstract class - provides cache directories for data from links
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    :param cache_dir: The root of the cache.
							 | 
						||
| 
								 | 
							
								    :param format_control: An object of FormatControl class to limit
							 | 
						||
| 
								 | 
							
								        binaries being read from the cache.
							 | 
						||
| 
								 | 
							
								    :param allowed_formats: which formats of files the cache should store.
							 | 
						||
| 
								 | 
							
								        ('binary' and 'source' are the only allowed values)
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(
							 | 
						||
| 
								 | 
							
								        self, cache_dir: str, format_control: FormatControl, allowed_formats: Set[str]
							 | 
						||
| 
								 | 
							
								    ) -> None:
							 | 
						||
| 
								 | 
							
								        super().__init__()
							 | 
						||
| 
								 | 
							
								        assert not cache_dir or os.path.isabs(cache_dir)
							 | 
						||
| 
								 | 
							
								        self.cache_dir = cache_dir or None
							 | 
						||
| 
								 | 
							
								        self.format_control = format_control
							 | 
						||
| 
								 | 
							
								        self.allowed_formats = allowed_formats
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        _valid_formats = {"source", "binary"}
							 | 
						||
| 
								 | 
							
								        assert self.allowed_formats.union(_valid_formats) == _valid_formats
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def _get_cache_path_parts(self, link: Link) -> List[str]:
							 | 
						||
| 
								 | 
							
								        """Get parts of part that must be os.path.joined with cache_dir"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # We want to generate an url to use as our cache key, we don't want to
							 | 
						||
| 
								 | 
							
								        # just re-use the URL because it might have other items in the fragment
							 | 
						||
| 
								 | 
							
								        # and we don't care about those.
							 | 
						||
| 
								 | 
							
								        key_parts = {"url": link.url_without_fragment}
							 | 
						||
| 
								 | 
							
								        if link.hash_name is not None and link.hash is not None:
							 | 
						||
| 
								 | 
							
								            key_parts[link.hash_name] = link.hash
							 | 
						||
| 
								 | 
							
								        if link.subdirectory_fragment:
							 | 
						||
| 
								 | 
							
								            key_parts["subdirectory"] = link.subdirectory_fragment
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # Include interpreter name, major and minor version in cache key
							 | 
						||
| 
								 | 
							
								        # to cope with ill-behaved sdists that build a different wheel
							 | 
						||
| 
								 | 
							
								        # depending on the python version their setup.py is being run on,
							 | 
						||
| 
								 | 
							
								        # and don't encode the difference in compatibility tags.
							 | 
						||
| 
								 | 
							
								        # https://github.com/pypa/pip/issues/7296
							 | 
						||
| 
								 | 
							
								        key_parts["interpreter_name"] = interpreter_name()
							 | 
						||
| 
								 | 
							
								        key_parts["interpreter_version"] = interpreter_version()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # Encode our key url with sha224, we'll use this because it has similar
							 | 
						||
| 
								 | 
							
								        # security properties to sha256, but with a shorter total output (and
							 | 
						||
| 
								 | 
							
								        # thus less secure). However the differences don't make a lot of
							 | 
						||
| 
								 | 
							
								        # difference for our use case here.
							 | 
						||
| 
								 | 
							
								        hashed = _hash_dict(key_parts)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # We want to nest the directories some to prevent having a ton of top
							 | 
						||
| 
								 | 
							
								        # level directories where we might run out of sub directories on some
							 | 
						||
| 
								 | 
							
								        # FS.
							 | 
						||
| 
								 | 
							
								        parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return parts
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
							 | 
						||
| 
								 | 
							
								        can_not_cache = not self.cache_dir or not canonical_package_name or not link
							 | 
						||
| 
								 | 
							
								        if can_not_cache:
							 | 
						||
| 
								 | 
							
								            return []
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        formats = self.format_control.get_allowed_formats(canonical_package_name)
							 | 
						||
| 
								 | 
							
								        if not self.allowed_formats.intersection(formats):
							 | 
						||
| 
								 | 
							
								            return []
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        candidates = []
							 | 
						||
| 
								 | 
							
								        path = self.get_path_for_link(link)
							 | 
						||
| 
								 | 
							
								        if os.path.isdir(path):
							 | 
						||
| 
								 | 
							
								            for candidate in os.listdir(path):
							 | 
						||
| 
								 | 
							
								                candidates.append((candidate, path))
							 | 
						||
| 
								 | 
							
								        return candidates
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get_path_for_link(self, link: Link) -> str:
							 | 
						||
| 
								 | 
							
								        """Return a directory to store cached items in for link."""
							 | 
						||
| 
								 | 
							
								        raise NotImplementedError()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								        package_name: Optional[str],
							 | 
						||
| 
								 | 
							
								        supported_tags: List[Tag],
							 | 
						||
| 
								 | 
							
								    ) -> Link:
							 | 
						||
| 
								 | 
							
								        """Returns a link to a cached item if it exists, otherwise returns the
							 | 
						||
| 
								 | 
							
								        passed link.
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        raise NotImplementedError()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class SimpleWheelCache(Cache):
							 | 
						||
| 
								 | 
							
								    """A cache of wheels for future installs."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(self, cache_dir: str, format_control: FormatControl) -> None:
							 | 
						||
| 
								 | 
							
								        super().__init__(cache_dir, format_control, {"binary"})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get_path_for_link(self, link: Link) -> str:
							 | 
						||
| 
								 | 
							
								        """Return a directory to store cached wheels for link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Because there are M wheels for any one sdist, we provide a directory
							 | 
						||
| 
								 | 
							
								        to cache them in, and then consult that directory when looking up
							 | 
						||
| 
								 | 
							
								        cache hits.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        We only insert things into the cache if they have plausible version
							 | 
						||
| 
								 | 
							
								        numbers, so that we don't contaminate the cache with things that were
							 | 
						||
| 
								 | 
							
								        not unique. E.g. ./package might have dozens of installs done for it
							 | 
						||
| 
								 | 
							
								        and build a version of 0.0...and if we built and cached a wheel, we'd
							 | 
						||
| 
								 | 
							
								        end up using the same wheel even if the source has been edited.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        :param link: The link of the sdist for which this will cache wheels.
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        parts = self._get_cache_path_parts(link)
							 | 
						||
| 
								 | 
							
								        assert self.cache_dir
							 | 
						||
| 
								 | 
							
								        # Store wheels within the root cache_dir
							 | 
						||
| 
								 | 
							
								        return os.path.join(self.cache_dir, "wheels", *parts)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								        package_name: Optional[str],
							 | 
						||
| 
								 | 
							
								        supported_tags: List[Tag],
							 | 
						||
| 
								 | 
							
								    ) -> Link:
							 | 
						||
| 
								 | 
							
								        candidates = []
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if not package_name:
							 | 
						||
| 
								 | 
							
								            return link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        canonical_package_name = canonicalize_name(package_name)
							 | 
						||
| 
								 | 
							
								        for wheel_name, wheel_dir in self._get_candidates(link, canonical_package_name):
							 | 
						||
| 
								 | 
							
								            try:
							 | 
						||
| 
								 | 
							
								                wheel = Wheel(wheel_name)
							 | 
						||
| 
								 | 
							
								            except InvalidWheelFilename:
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            if canonicalize_name(wheel.name) != canonical_package_name:
							 | 
						||
| 
								 | 
							
								                logger.debug(
							 | 
						||
| 
								 | 
							
								                    "Ignoring cached wheel %s for %s as it "
							 | 
						||
| 
								 | 
							
								                    "does not match the expected distribution name %s.",
							 | 
						||
| 
								 | 
							
								                    wheel_name,
							 | 
						||
| 
								 | 
							
								                    link,
							 | 
						||
| 
								 | 
							
								                    package_name,
							 | 
						||
| 
								 | 
							
								                )
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            if not wheel.supported(supported_tags):
							 | 
						||
| 
								 | 
							
								                # Built for a different python/arch/etc
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            candidates.append(
							 | 
						||
| 
								 | 
							
								                (
							 | 
						||
| 
								 | 
							
								                    wheel.support_index_min(supported_tags),
							 | 
						||
| 
								 | 
							
								                    wheel_name,
							 | 
						||
| 
								 | 
							
								                    wheel_dir,
							 | 
						||
| 
								 | 
							
								                )
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if not candidates:
							 | 
						||
| 
								 | 
							
								            return link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        _, wheel_name, wheel_dir = min(candidates)
							 | 
						||
| 
								 | 
							
								        return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class EphemWheelCache(SimpleWheelCache):
							 | 
						||
| 
								 | 
							
								    """A SimpleWheelCache that creates it's own temporary cache directory"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(self, format_control: FormatControl) -> None:
							 | 
						||
| 
								 | 
							
								        self._temp_dir = TempDirectory(
							 | 
						||
| 
								 | 
							
								            kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
							 | 
						||
| 
								 | 
							
								            globally_managed=True,
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        super().__init__(self._temp_dir.path, format_control)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class CacheEntry:
							 | 
						||
| 
								 | 
							
								    def __init__(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								        persistent: bool,
							 | 
						||
| 
								 | 
							
								    ):
							 | 
						||
| 
								 | 
							
								        self.link = link
							 | 
						||
| 
								 | 
							
								        self.persistent = persistent
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class WheelCache(Cache):
							 | 
						||
| 
								 | 
							
								    """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This Cache allows for gracefully degradation, using the ephem wheel cache
							 | 
						||
| 
								 | 
							
								    when a certain link is not found in the simple wheel cache first.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(self, cache_dir: str, format_control: FormatControl) -> None:
							 | 
						||
| 
								 | 
							
								        super().__init__(cache_dir, format_control, {"binary"})
							 | 
						||
| 
								 | 
							
								        self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
							 | 
						||
| 
								 | 
							
								        self._ephem_cache = EphemWheelCache(format_control)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get_path_for_link(self, link: Link) -> str:
							 | 
						||
| 
								 | 
							
								        return self._wheel_cache.get_path_for_link(link)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get_ephem_path_for_link(self, link: Link) -> str:
							 | 
						||
| 
								 | 
							
								        return self._ephem_cache.get_path_for_link(link)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								        package_name: Optional[str],
							 | 
						||
| 
								 | 
							
								        supported_tags: List[Tag],
							 | 
						||
| 
								 | 
							
								    ) -> Link:
							 | 
						||
| 
								 | 
							
								        cache_entry = self.get_cache_entry(link, package_name, supported_tags)
							 | 
						||
| 
								 | 
							
								        if cache_entry is None:
							 | 
						||
| 
								 | 
							
								            return link
							 | 
						||
| 
								 | 
							
								        return cache_entry.link
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def get_cache_entry(
							 | 
						||
| 
								 | 
							
								        self,
							 | 
						||
| 
								 | 
							
								        link: Link,
							 | 
						||
| 
								 | 
							
								        package_name: Optional[str],
							 | 
						||
| 
								 | 
							
								        supported_tags: List[Tag],
							 | 
						||
| 
								 | 
							
								    ) -> Optional[CacheEntry]:
							 | 
						||
| 
								 | 
							
								        """Returns a CacheEntry with a link to a cached item if it exists or
							 | 
						||
| 
								 | 
							
								        None. The cache entry indicates if the item was found in the persistent
							 | 
						||
| 
								 | 
							
								        or ephemeral cache.
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        retval = self._wheel_cache.get(
							 | 
						||
| 
								 | 
							
								            link=link,
							 | 
						||
| 
								 | 
							
								            package_name=package_name,
							 | 
						||
| 
								 | 
							
								            supported_tags=supported_tags,
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								        if retval is not link:
							 | 
						||
| 
								 | 
							
								            return CacheEntry(retval, persistent=True)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        retval = self._ephem_cache.get(
							 | 
						||
| 
								 | 
							
								            link=link,
							 | 
						||
| 
								 | 
							
								            package_name=package_name,
							 | 
						||
| 
								 | 
							
								            supported_tags=supported_tags,
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								        if retval is not link:
							 | 
						||
| 
								 | 
							
								            return CacheEntry(retval, persistent=False)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return None
							 |