diff --git a/docs/html/topics/caching.md b/docs/html/topics/caching.md
index 954cebe402d..8d6c40f112d 100644
--- a/docs/html/topics/caching.md
+++ b/docs/html/topics/caching.md
@@ -27,6 +27,13 @@ While this cache attempts to minimize network activity, it does not prevent
network access altogether. If you want a local install solution that
circumvents accessing PyPI, see {ref}`Installing from local packages`.
+```{versionchanged} 23.3
+A new cache format is now used, stored in a directory called `http-v2` (see
+below for this directory's location). Previously this cache was stored in a
+directory called `http` in the main cache directory. If you have completely
+switched to newer versions of `pip`, you may wish to delete the old directory.
+```
+
(wheel-caching)=
### Locally built wheels
@@ -124,11 +131,11 @@ The {ref}`pip cache` command can be used to manage pip's cache.
### Removing a single package
-`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache.
+`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache. HTTP cache files are not removed at this time.
### Removing the cache
-`pip cache purge` will clear all wheel files from pip's cache.
+`pip cache purge` will clear all files from pip's wheel and HTTP caches.
### Listing cached files
diff --git a/news/2984.bugfix.rst b/news/2984.bugfix.rst
new file mode 100644
index 00000000000..cce561815c9
--- /dev/null
+++ b/news/2984.bugfix.rst
@@ -0,0 +1 @@
+pip uses less memory when caching large packages. As a result, there is a new on-disk cache format stored in a new directory ($PIP_CACHE_DIR/http-v2).
diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py
index 7a53d510586..02910604bbf 100644
--- a/src/pip/_internal/cli/req_command.py
+++ b/src/pip/_internal/cli/req_command.py
@@ -120,7 +120,7 @@ def _build_session(
ssl_context = None
session = PipSession(
- cache=os.path.join(cache_dir, "http") if cache_dir else None,
+ cache=os.path.join(cache_dir, "http-v2") if cache_dir else None,
retries=retries if retries is not None else options.retries,
trusted_hosts=options.trusted_hosts,
index_urls=self._get_index_urls(options),
diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py
index f6430980c36..1f3b5fe142b 100644
--- a/src/pip/_internal/commands/cache.py
+++ b/src/pip/_internal/commands/cache.py
@@ -93,24 +93,30 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None:
num_http_files = len(self._find_http_files(options))
num_packages = len(self._find_wheels(options, "*"))
- http_cache_location = self._cache_dir(options, "http")
+ http_cache_location = self._cache_dir(options, "http-v2")
+ old_http_cache_location = self._cache_dir(options, "http")
wheels_cache_location = self._cache_dir(options, "wheels")
- http_cache_size = filesystem.format_directory_size(http_cache_location)
+ http_cache_size = filesystem.format_size(
+ filesystem.directory_size(http_cache_location)
+ + filesystem.directory_size(old_http_cache_location)
+ )
wheels_cache_size = filesystem.format_directory_size(wheels_cache_location)
message = (
textwrap.dedent(
"""
- Package index page cache location: {http_cache_location}
+ Package index page cache location (pip v23.3+): {http_cache_location}
+ Package index page cache location (older pips): {old_http_cache_location}
Package index page cache size: {http_cache_size}
Number of HTTP files: {num_http_files}
Locally built wheels location: {wheels_cache_location}
Locally built wheels size: {wheels_cache_size}
Number of locally built wheels: {package_count}
- """
+ """ # noqa: E501
)
.format(
http_cache_location=http_cache_location,
+ old_http_cache_location=old_http_cache_location,
http_cache_size=http_cache_size,
num_http_files=num_http_files,
wheels_cache_location=wheels_cache_location,
@@ -189,8 +195,11 @@ def _cache_dir(self, options: Values, subdir: str) -> str:
return os.path.join(options.cache_dir, subdir)
def _find_http_files(self, options: Values) -> List[str]:
- http_dir = self._cache_dir(options, "http")
- return filesystem.find_files(http_dir, "*")
+ old_http_dir = self._cache_dir(options, "http")
+ new_http_dir = self._cache_dir(options, "http-v2")
+ return filesystem.find_files(old_http_dir, "*") + filesystem.find_files(
+ new_http_dir, "*"
+ )
def _find_wheels(self, options: Values, pattern: str) -> List[str]:
wheel_dir = self._cache_dir(options, "wheels")
diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py
index a81a2398519..a4d13620532 100644
--- a/src/pip/_internal/network/cache.py
+++ b/src/pip/_internal/network/cache.py
@@ -3,10 +3,11 @@
import os
from contextlib import contextmanager
-from typing import Generator, Optional
+from datetime import datetime
+from typing import BinaryIO, Generator, Optional, Union
-from pip._vendor.cachecontrol.cache import BaseCache
-from pip._vendor.cachecontrol.caches import FileCache
+from pip._vendor.cachecontrol.cache import SeparateBodyBaseCache
+from pip._vendor.cachecontrol.caches import SeparateBodyFileCache
from pip._vendor.requests.models import Response
from pip._internal.utils.filesystem import adjacent_tmp_file, replace
@@ -28,7 +29,7 @@ def suppressed_cache_errors() -> Generator[None, None, None]:
pass
-class SafeFileCache(BaseCache):
+class SafeFileCache(SeparateBodyBaseCache):
"""
A file based cache which is safe to use even when the target directory may
not be accessible or writable.
@@ -43,7 +44,7 @@ def _get_cache_path(self, name: str) -> str:
# From cachecontrol.caches.file_cache.FileCache._fn, brought into our
# class for backwards-compatibility and to avoid using a non-public
# method.
- hashed = FileCache.encode(name)
+ hashed = SeparateBodyFileCache.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
@@ -53,17 +54,33 @@ def get(self, key: str) -> Optional[bytes]:
with open(path, "rb") as f:
return f.read()
- def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None:
- path = self._get_cache_path(key)
+ def _write(self, path: str, data: bytes) -> None:
with suppressed_cache_errors():
ensure_dir(os.path.dirname(path))
with adjacent_tmp_file(path) as f:
- f.write(value)
+ f.write(data)
replace(f.name, path)
+ def set(
+ self, key: str, value: bytes, expires: Union[int, datetime, None] = None
+ ) -> None:
+ path = self._get_cache_path(key)
+ self._write(path, value)
+
def delete(self, key: str) -> None:
path = self._get_cache_path(key)
with suppressed_cache_errors():
os.remove(path)
+ with suppressed_cache_errors():
+ os.remove(path + ".body")
+
+ def get_body(self, key: str) -> Optional[BinaryIO]:
+ path = self._get_cache_path(key) + ".body"
+ with suppressed_cache_errors():
+ return open(path, "rb")
+
+ def set_body(self, key: str, body: bytes) -> None:
+ path = self._get_cache_path(key) + ".body"
+ self._write(path, body)
diff --git a/src/pip/_vendor/cachecontrol.pyi b/src/pip/_vendor/cachecontrol.pyi
deleted file mode 100644
index 636a66bacaf..00000000000
--- a/src/pip/_vendor/cachecontrol.pyi
+++ /dev/null
@@ -1 +0,0 @@
-from cachecontrol import *
\ No newline at end of file
diff --git a/src/pip/_vendor/cachecontrol/__init__.py b/src/pip/_vendor/cachecontrol/__init__.py
index f631ae6df47..4d20bc9b12a 100644
--- a/src/pip/_vendor/cachecontrol/__init__.py
+++ b/src/pip/_vendor/cachecontrol/__init__.py
@@ -8,11 +8,21 @@
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
-__version__ = "0.12.11"
+__version__ = "0.13.1"
-from .wrapper import CacheControl
-from .adapter import CacheControlAdapter
-from .controller import CacheController
+from pip._vendor.cachecontrol.adapter import CacheControlAdapter
+from pip._vendor.cachecontrol.controller import CacheController
+from pip._vendor.cachecontrol.wrapper import CacheControl
+
+__all__ = [
+ "__author__",
+ "__email__",
+ "__version__",
+ "CacheControlAdapter",
+ "CacheController",
+ "CacheControl",
+]
import logging
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
diff --git a/src/pip/_vendor/cachecontrol/_cmd.py b/src/pip/_vendor/cachecontrol/_cmd.py
index 4266b5ee92a..2c84208a5d8 100644
--- a/src/pip/_vendor/cachecontrol/_cmd.py
+++ b/src/pip/_vendor/cachecontrol/_cmd.py
@@ -1,8 +1,11 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
import logging
+from argparse import ArgumentParser
+from typing import TYPE_CHECKING
from pip._vendor import requests
@@ -10,16 +13,19 @@
from pip._vendor.cachecontrol.cache import DictCache
from pip._vendor.cachecontrol.controller import logger
-from argparse import ArgumentParser
+if TYPE_CHECKING:
+ from argparse import Namespace
+ from pip._vendor.cachecontrol.controller import CacheController
-def setup_logging():
+
+def setup_logging() -> None:
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
logger.addHandler(handler)
-def get_session():
+def get_session() -> requests.Session:
adapter = CacheControlAdapter(
DictCache(), cache_etags=True, serializer=None, heuristic=None
)
@@ -27,17 +33,17 @@ def get_session():
sess.mount("http://", adapter)
sess.mount("https://", adapter)
- sess.cache_controller = adapter.controller
+ sess.cache_controller = adapter.controller # type: ignore[attr-defined]
return sess
-def get_args():
+def get_args() -> Namespace:
parser = ArgumentParser()
parser.add_argument("url", help="The URL to try and cache")
return parser.parse_args()
-def main(args=None):
+def main() -> None:
args = get_args()
sess = get_session()
@@ -48,10 +54,13 @@ def main(args=None):
setup_logging()
# try setting the cache
- sess.cache_controller.cache_response(resp.request, resp.raw)
+ cache_controller: CacheController = (
+ sess.cache_controller # type: ignore[attr-defined]
+ )
+ cache_controller.cache_response(resp.request, resp.raw)
# Now try to get it
- if sess.cache_controller.cached_request(resp.request):
+ if cache_controller.cached_request(resp.request):
print("Cached!")
else:
print("Not cached :(")
diff --git a/src/pip/_vendor/cachecontrol/adapter.py b/src/pip/_vendor/cachecontrol/adapter.py
index 94c75e1a05b..3e83e308dba 100644
--- a/src/pip/_vendor/cachecontrol/adapter.py
+++ b/src/pip/_vendor/cachecontrol/adapter.py
@@ -1,16 +1,26 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
-import types
import functools
+import types
import zlib
+from typing import TYPE_CHECKING, Any, Collection, Mapping
from pip._vendor.requests.adapters import HTTPAdapter
-from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
-from .cache import DictCache
-from .filewrapper import CallbackFileWrapper
+from pip._vendor.cachecontrol.cache import DictCache
+from pip._vendor.cachecontrol.controller import PERMANENT_REDIRECT_STATUSES, CacheController
+from pip._vendor.cachecontrol.filewrapper import CallbackFileWrapper
+
+if TYPE_CHECKING:
+ from pip._vendor.requests import PreparedRequest, Response
+ from pip._vendor.urllib3 import HTTPResponse
+
+ from pip._vendor.cachecontrol.cache import BaseCache
+ from pip._vendor.cachecontrol.heuristics import BaseHeuristic
+ from pip._vendor.cachecontrol.serialize import Serializer
class CacheControlAdapter(HTTPAdapter):
@@ -18,16 +28,16 @@ class CacheControlAdapter(HTTPAdapter):
def __init__(
self,
- cache=None,
- cache_etags=True,
- controller_class=None,
- serializer=None,
- heuristic=None,
- cacheable_methods=None,
- *args,
- **kw
- ):
- super(CacheControlAdapter, self).__init__(*args, **kw)
+ cache: BaseCache | None = None,
+ cache_etags: bool = True,
+ controller_class: type[CacheController] | None = None,
+ serializer: Serializer | None = None,
+ heuristic: BaseHeuristic | None = None,
+ cacheable_methods: Collection[str] | None = None,
+ *args: Any,
+ **kw: Any,
+ ) -> None:
+ super().__init__(*args, **kw)
self.cache = DictCache() if cache is None else cache
self.heuristic = heuristic
self.cacheable_methods = cacheable_methods or ("GET",)
@@ -37,7 +47,16 @@ def __init__(
self.cache, cache_etags=cache_etags, serializer=serializer
)
- def send(self, request, cacheable_methods=None, **kw):
+ def send(
+ self,
+ request: PreparedRequest,
+ stream: bool = False,
+ timeout: None | float | tuple[float, float] | tuple[float, None] = None,
+ verify: bool | str = True,
+ cert: (None | bytes | str | tuple[bytes | str, bytes | str]) = None,
+ proxies: Mapping[str, str] | None = None,
+ cacheable_methods: Collection[str] | None = None,
+ ) -> Response:
"""
Send a request. Use the request information to see if it
exists in the cache and cache the response if we need to and can.
@@ -54,13 +73,17 @@ def send(self, request, cacheable_methods=None, **kw):
# check for etags and add headers if appropriate
request.headers.update(self.controller.conditional_headers(request))
- resp = super(CacheControlAdapter, self).send(request, **kw)
+ resp = super().send(request, stream, timeout, verify, cert, proxies)
return resp
def build_response(
- self, request, response, from_cache=False, cacheable_methods=None
- ):
+ self,
+ request: PreparedRequest,
+ response: HTTPResponse,
+ from_cache: bool = False,
+ cacheable_methods: Collection[str] | None = None,
+ ) -> Response:
"""
Build a response by making a request or using the cache.
@@ -102,36 +125,37 @@ def build_response(
else:
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
- response._fp = CallbackFileWrapper(
- response._fp,
+ response._fp = CallbackFileWrapper( # type: ignore[attr-defined]
+ response._fp, # type: ignore[attr-defined]
functools.partial(
self.controller.cache_response, request, response
),
)
if response.chunked:
- super_update_chunk_length = response._update_chunk_length
+ super_update_chunk_length = response._update_chunk_length # type: ignore[attr-defined]
- def _update_chunk_length(self):
+ def _update_chunk_length(self: HTTPResponse) -> None:
super_update_chunk_length()
if self.chunk_left == 0:
- self._fp._close()
+ self._fp._close() # type: ignore[attr-defined]
- response._update_chunk_length = types.MethodType(
+ response._update_chunk_length = types.MethodType( # type: ignore[attr-defined]
_update_chunk_length, response
)
- resp = super(CacheControlAdapter, self).build_response(request, response)
+ resp: Response = super().build_response(request, response) # type: ignore[no-untyped-call]
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
+ assert request.url is not None
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Give the request a from_cache attr to let people use it
- resp.from_cache = from_cache
+ resp.from_cache = from_cache # type: ignore[attr-defined]
return resp
- def close(self):
+ def close(self) -> None:
self.cache.close()
- super(CacheControlAdapter, self).close()
+ super().close() # type: ignore[no-untyped-call]
diff --git a/src/pip/_vendor/cachecontrol/cache.py b/src/pip/_vendor/cachecontrol/cache.py
index 2a965f595ff..3293b0057c7 100644
--- a/src/pip/_vendor/cachecontrol/cache.py
+++ b/src/pip/_vendor/cachecontrol/cache.py
@@ -6,38 +6,46 @@
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""
+from __future__ import annotations
+
from threading import Lock
+from typing import IO, TYPE_CHECKING, MutableMapping
+if TYPE_CHECKING:
+ from datetime import datetime
-class BaseCache(object):
- def get(self, key):
+class BaseCache:
+ def get(self, key: str) -> bytes | None:
raise NotImplementedError()
- def set(self, key, value, expires=None):
+ def set(
+ self, key: str, value: bytes, expires: int | datetime | None = None
+ ) -> None:
raise NotImplementedError()
- def delete(self, key):
+ def delete(self, key: str) -> None:
raise NotImplementedError()
- def close(self):
+ def close(self) -> None:
pass
class DictCache(BaseCache):
-
- def __init__(self, init_dict=None):
+ def __init__(self, init_dict: MutableMapping[str, bytes] | None = None) -> None:
self.lock = Lock()
self.data = init_dict or {}
- def get(self, key):
+ def get(self, key: str) -> bytes | None:
return self.data.get(key, None)
- def set(self, key, value, expires=None):
+ def set(
+ self, key: str, value: bytes, expires: int | datetime | None = None
+ ) -> None:
with self.lock:
self.data.update({key: value})
- def delete(self, key):
+ def delete(self, key: str) -> None:
with self.lock:
if key in self.data:
self.data.pop(key)
@@ -55,10 +63,11 @@ class SeparateBodyBaseCache(BaseCache):
Similarly, the body should be loaded separately via ``get_body()``.
"""
- def set_body(self, key, body):
+
+ def set_body(self, key: str, body: bytes) -> None:
raise NotImplementedError()
- def get_body(self, key):
+ def get_body(self, key: str) -> IO[bytes] | None:
"""
Return the body as file-like object.
"""
diff --git a/src/pip/_vendor/cachecontrol/caches/__init__.py b/src/pip/_vendor/cachecontrol/caches/__init__.py
index 37827291fb5..24ff469ff98 100644
--- a/src/pip/_vendor/cachecontrol/caches/__init__.py
+++ b/src/pip/_vendor/cachecontrol/caches/__init__.py
@@ -2,8 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0
-from .file_cache import FileCache, SeparateBodyFileCache
-from .redis_cache import RedisCache
-
+from pip._vendor.cachecontrol.caches.file_cache import FileCache, SeparateBodyFileCache
+from pip._vendor.cachecontrol.caches.redis_cache import RedisCache
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]
diff --git a/src/pip/_vendor/cachecontrol/caches/file_cache.py b/src/pip/_vendor/cachecontrol/caches/file_cache.py
index f1ddb2ebdf9..1fd28013084 100644
--- a/src/pip/_vendor/cachecontrol/caches/file_cache.py
+++ b/src/pip/_vendor/cachecontrol/caches/file_cache.py
@@ -1,22 +1,23 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
import hashlib
import os
from textwrap import dedent
+from typing import IO, TYPE_CHECKING
-from ..cache import BaseCache, SeparateBodyBaseCache
-from ..controller import CacheController
+from pip._vendor.cachecontrol.cache import BaseCache, SeparateBodyBaseCache
+from pip._vendor.cachecontrol.controller import CacheController
-try:
- FileNotFoundError
-except NameError:
- # py2.X
- FileNotFoundError = (IOError, OSError)
+if TYPE_CHECKING:
+ from datetime import datetime
+ from filelock import BaseFileLock
-def _secure_open_write(filename, fmode):
+
+def _secure_open_write(filename: str, fmode: int) -> IO[bytes]:
# We only want to write to this file, so open it in write only mode
flags = os.O_WRONLY
@@ -39,7 +40,7 @@ def _secure_open_write(filename, fmode):
# there
try:
os.remove(filename)
- except (IOError, OSError):
+ except OSError:
# The file must not exist already, so we can just skip ahead to opening
pass
@@ -62,37 +63,27 @@ class _FileCacheMixin:
def __init__(
self,
- directory,
- forever=False,
- filemode=0o0600,
- dirmode=0o0700,
- use_dir_lock=None,
- lock_class=None,
- ):
-
- if use_dir_lock is not None and lock_class is not None:
- raise ValueError("Cannot use use_dir_lock and lock_class together")
-
+ directory: str,
+ forever: bool = False,
+ filemode: int = 0o0600,
+ dirmode: int = 0o0700,
+ lock_class: type[BaseFileLock] | None = None,
+ ) -> None:
try:
- from lockfile import LockFile
- from lockfile.mkdirlockfile import MkdirLockFile
+ if lock_class is None:
+ from filelock import FileLock
+
+ lock_class = FileLock
except ImportError:
notice = dedent(
"""
NOTE: In order to use the FileCache you must have
- lockfile installed. You can install it via pip:
- pip install lockfile
+ filelock installed. You can install it via pip:
+ pip install filelock
"""
)
raise ImportError(notice)
- else:
- if use_dir_lock:
- lock_class = MkdirLockFile
-
- elif lock_class is None:
- lock_class = LockFile
-
self.directory = directory
self.forever = forever
self.filemode = filemode
@@ -100,17 +91,17 @@ def __init__(
self.lock_class = lock_class
@staticmethod
- def encode(x):
+ def encode(x: str) -> str:
return hashlib.sha224(x.encode()).hexdigest()
- def _fn(self, name):
+ def _fn(self, name: str) -> str:
# NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
- def get(self, key):
+ def get(self, key: str) -> bytes | None:
name = self._fn(key)
try:
with open(name, "rb") as fh:
@@ -119,26 +110,28 @@ def get(self, key):
except FileNotFoundError:
return None
- def set(self, key, value, expires=None):
+ def set(
+ self, key: str, value: bytes, expires: int | datetime | None = None
+ ) -> None:
name = self._fn(key)
self._write(name, value)
- def _write(self, path, data: bytes):
+ def _write(self, path: str, data: bytes) -> None:
"""
Safely write the data to the given path.
"""
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(path), self.dirmode)
- except (IOError, OSError):
+ except OSError:
pass
- with self.lock_class(path) as lock:
+ with self.lock_class(path + ".lock"):
# Write our actual file
- with _secure_open_write(lock.path, self.filemode) as fh:
+ with _secure_open_write(path, self.filemode) as fh:
fh.write(data)
- def _delete(self, key, suffix):
+ def _delete(self, key: str, suffix: str) -> None:
name = self._fn(key) + suffix
if not self.forever:
try:
@@ -153,7 +146,7 @@ class FileCache(_FileCacheMixin, BaseCache):
downloads.
"""
- def delete(self, key):
+ def delete(self, key: str) -> None:
self._delete(key, "")
@@ -163,23 +156,23 @@ class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
peak memory usage.
"""
- def get_body(self, key):
+ def get_body(self, key: str) -> IO[bytes] | None:
name = self._fn(key) + ".body"
try:
return open(name, "rb")
except FileNotFoundError:
return None
- def set_body(self, key, body):
+ def set_body(self, key: str, body: bytes) -> None:
name = self._fn(key) + ".body"
self._write(name, body)
- def delete(self, key):
+ def delete(self, key: str) -> None:
self._delete(key, "")
self._delete(key, ".body")
-def url_to_file_path(url, filecache):
+def url_to_file_path(url: str, filecache: FileCache) -> str:
"""Return the file cache path based on the URL.
This does not ensure the file exists!
diff --git a/src/pip/_vendor/cachecontrol/caches/redis_cache.py b/src/pip/_vendor/cachecontrol/caches/redis_cache.py
index 2cba4b07080..f4f68c47bf6 100644
--- a/src/pip/_vendor/cachecontrol/caches/redis_cache.py
+++ b/src/pip/_vendor/cachecontrol/caches/redis_cache.py
@@ -1,39 +1,48 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
-from __future__ import division
-from datetime import datetime
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING
+
from pip._vendor.cachecontrol.cache import BaseCache
+if TYPE_CHECKING:
+ from redis import Redis
-class RedisCache(BaseCache):
- def __init__(self, conn):
+class RedisCache(BaseCache):
+ def __init__(self, conn: Redis[bytes]) -> None:
self.conn = conn
- def get(self, key):
+ def get(self, key: str) -> bytes | None:
return self.conn.get(key)
- def set(self, key, value, expires=None):
+ def set(
+ self, key: str, value: bytes, expires: int | datetime | None = None
+ ) -> None:
if not expires:
self.conn.set(key, value)
elif isinstance(expires, datetime):
- expires = expires - datetime.utcnow()
- self.conn.setex(key, int(expires.total_seconds()), value)
+ now_utc = datetime.now(timezone.utc)
+ if expires.tzinfo is None:
+ now_utc = now_utc.replace(tzinfo=None)
+ delta = expires - now_utc
+ self.conn.setex(key, int(delta.total_seconds()), value)
else:
self.conn.setex(key, expires, value)
- def delete(self, key):
+ def delete(self, key: str) -> None:
self.conn.delete(key)
- def clear(self):
+ def clear(self) -> None:
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)
- def close(self):
+ def close(self) -> None:
"""Redis uses connection pooling, no need to close the connection."""
pass
diff --git a/src/pip/_vendor/cachecontrol/compat.py b/src/pip/_vendor/cachecontrol/compat.py
deleted file mode 100644
index ccec9379dba..00000000000
--- a/src/pip/_vendor/cachecontrol/compat.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-FileCopyrightText: 2015 Eric Larson
-#
-# SPDX-License-Identifier: Apache-2.0
-
-try:
- from urllib.parse import urljoin
-except ImportError:
- from urlparse import urljoin
-
-
-try:
- import cPickle as pickle
-except ImportError:
- import pickle
-
-# Handle the case where the requests module has been patched to not have
-# urllib3 bundled as part of its source.
-try:
- from pip._vendor.requests.packages.urllib3.response import HTTPResponse
-except ImportError:
- from pip._vendor.urllib3.response import HTTPResponse
-
-try:
- from pip._vendor.requests.packages.urllib3.util import is_fp_closed
-except ImportError:
- from pip._vendor.urllib3.util import is_fp_closed
-
-# Replicate some six behaviour
-try:
- text_type = unicode
-except NameError:
- text_type = str
diff --git a/src/pip/_vendor/cachecontrol/controller.py b/src/pip/_vendor/cachecontrol/controller.py
index 7f23529f115..586b9f97b80 100644
--- a/src/pip/_vendor/cachecontrol/controller.py
+++ b/src/pip/_vendor/cachecontrol/controller.py
@@ -5,17 +5,27 @@
"""
The httplib2 algorithms ported for use with requests.
"""
+from __future__ import annotations
+
+import calendar
import logging
import re
-import calendar
import time
from email.utils import parsedate_tz
+from typing import TYPE_CHECKING, Collection, Mapping
from pip._vendor.requests.structures import CaseInsensitiveDict
-from .cache import DictCache, SeparateBodyBaseCache
-from .serialize import Serializer
+from pip._vendor.cachecontrol.cache import DictCache, SeparateBodyBaseCache
+from pip._vendor.cachecontrol.serialize import Serializer
+
+if TYPE_CHECKING:
+ from typing import Literal
+
+ from pip._vendor.requests import PreparedRequest
+ from pip._vendor.urllib3 import HTTPResponse
+ from pip._vendor.cachecontrol.cache import BaseCache
logger = logging.getLogger(__name__)
@@ -24,20 +34,26 @@
PERMANENT_REDIRECT_STATUSES = (301, 308)
-def parse_uri(uri):
+def parse_uri(uri: str) -> tuple[str, str, str, str, str]:
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
- groups = URI.match(uri).groups()
+ match = URI.match(uri)
+ assert match is not None
+ groups = match.groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
-class CacheController(object):
+class CacheController:
"""An interface to see if request should cached or not."""
def __init__(
- self, cache=None, cache_etags=True, serializer=None, status_codes=None
+ self,
+ cache: BaseCache | None = None,
+ cache_etags: bool = True,
+ serializer: Serializer | None = None,
+ status_codes: Collection[int] | None = None,
):
self.cache = DictCache() if cache is None else cache
self.cache_etags = cache_etags
@@ -45,7 +61,7 @@ def __init__(
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
@classmethod
- def _urlnorm(cls, uri):
+ def _urlnorm(cls, uri: str) -> str:
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
@@ -65,10 +81,10 @@ def _urlnorm(cls, uri):
return defrag_uri
@classmethod
- def cache_url(cls, uri):
+ def cache_url(cls, uri: str) -> str:
return cls._urlnorm(uri)
- def parse_cache_control(self, headers):
+ def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]:
known_directives = {
# https://tools.ietf.org/html/rfc7234#section-5.2
"max-age": (int, True),
@@ -87,7 +103,7 @@ def parse_cache_control(self, headers):
cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
- retval = {}
+ retval: dict[str, int | None] = {}
for cc_directive in cc_headers.split(","):
if not cc_directive.strip():
@@ -122,11 +138,33 @@ def parse_cache_control(self, headers):
return retval
- def cached_request(self, request):
+ def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None:
+ """
+ Load a cached response, or return None if it's not available.
+ """
+ cache_url = request.url
+ assert cache_url is not None
+ cache_data = self.cache.get(cache_url)
+ if cache_data is None:
+ logger.debug("No cache entry available")
+ return None
+
+ if isinstance(self.cache, SeparateBodyBaseCache):
+ body_file = self.cache.get_body(cache_url)
+ else:
+ body_file = None
+
+ result = self.serializer.loads(request, cache_data, body_file)
+ if result is None:
+ logger.warning("Cache entry deserialization failed, entry ignored")
+ return result
+
+ def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]:
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
+ assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers)
@@ -140,21 +178,9 @@ def cached_request(self, request):
logger.debug('Request header has "max_age" as 0, cache bypassed')
return False
- # Request allows serving from the cache, let's see if we find something
- cache_data = self.cache.get(cache_url)
- if cache_data is None:
- logger.debug("No cache entry available")
- return False
-
- if isinstance(self.cache, SeparateBodyBaseCache):
- body_file = self.cache.get_body(cache_url)
- else:
- body_file = None
-
- # Check whether it can be deserialized
- resp = self.serializer.loads(request, cache_data, body_file)
+ # Check whether we can load the response from the cache:
+ resp = self._load_from_cache(request)
if not resp:
- logger.warning("Cache entry deserialization failed, entry ignored")
return False
# If we have a cached permanent redirect, return it immediately. We
@@ -174,7 +200,7 @@ def cached_request(self, request):
logger.debug(msg)
return resp
- headers = CaseInsensitiveDict(resp.headers)
+ headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if not headers or "date" not in headers:
if "etag" not in headers:
# Without date or etag, the cached response can never be used
@@ -185,7 +211,9 @@ def cached_request(self, request):
return False
now = time.time()
- date = calendar.timegm(parsedate_tz(headers["date"]))
+ time_tuple = parsedate_tz(headers["date"])
+ assert time_tuple is not None
+ date = calendar.timegm(time_tuple[:6])
current_age = max(0, now - date)
logger.debug("Current age based on date: %i", current_age)
@@ -199,28 +227,30 @@ def cached_request(self, request):
freshness_lifetime = 0
# Check the max-age pragma in the cache control header
- if "max-age" in resp_cc:
- freshness_lifetime = resp_cc["max-age"]
+ max_age = resp_cc.get("max-age")
+ if max_age is not None:
+ freshness_lifetime = max_age
logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
# If there isn't a max-age, check for an expires header
elif "expires" in headers:
expires = parsedate_tz(headers["expires"])
if expires is not None:
- expire_time = calendar.timegm(expires) - date
+ expire_time = calendar.timegm(expires[:6]) - date
freshness_lifetime = max(0, expire_time)
logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
# Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response.
- if "max-age" in cc:
- freshness_lifetime = cc["max-age"]
+ max_age = cc.get("max-age")
+ if max_age is not None:
+ freshness_lifetime = max_age
logger.debug(
"Freshness lifetime from request max-age: %i", freshness_lifetime
)
- if "min-fresh" in cc:
- min_fresh = cc["min-fresh"]
+ min_fresh = cc.get("min-fresh")
+ if min_fresh is not None:
# adjust our current age by our min fresh
current_age += min_fresh
logger.debug("Adjusted current age from min-fresh: %i", current_age)
@@ -239,13 +269,12 @@ def cached_request(self, request):
# return the original handler
return False
- def conditional_headers(self, request):
- cache_url = self.cache_url(request.url)
- resp = self.serializer.loads(request, self.cache.get(cache_url))
+ def conditional_headers(self, request: PreparedRequest) -> dict[str, str]:
+ resp = self._load_from_cache(request)
new_headers = {}
if resp:
- headers = CaseInsensitiveDict(resp.headers)
+ headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if "etag" in headers:
new_headers["If-None-Match"] = headers["ETag"]
@@ -255,7 +284,14 @@ def conditional_headers(self, request):
return new_headers
- def _cache_set(self, cache_url, request, response, body=None, expires_time=None):
+ def _cache_set(
+ self,
+ cache_url: str,
+ request: PreparedRequest,
+ response: HTTPResponse,
+ body: bytes | None = None,
+ expires_time: int | None = None,
+ ) -> None:
"""
Store the data in the cache.
"""
@@ -267,7 +303,10 @@ def _cache_set(self, cache_url, request, response, body=None, expires_time=None)
self.serializer.dumps(request, response, b""),
expires=expires_time,
)
- self.cache.set_body(cache_url, body)
+ # body is None can happen when, for example, we're only updating
+ # headers, as is the case in update_cached_response().
+ if body is not None:
+ self.cache.set_body(cache_url, body)
else:
self.cache.set(
cache_url,
@@ -275,7 +314,13 @@ def _cache_set(self, cache_url, request, response, body=None, expires_time=None)
expires=expires_time,
)
- def cache_response(self, request, response, body=None, status_codes=None):
+ def cache_response(
+ self,
+ request: PreparedRequest,
+ response: HTTPResponse,
+ body: bytes | None = None,
+ status_codes: Collection[int] | None = None,
+ ) -> None:
"""
Algorithm for caching requests.
@@ -290,10 +335,14 @@ def cache_response(self, request, response, body=None, status_codes=None):
)
return
- response_headers = CaseInsensitiveDict(response.headers)
+ response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
+ response.headers
+ )
if "date" in response_headers:
- date = calendar.timegm(parsedate_tz(response_headers["date"]))
+ time_tuple = parsedate_tz(response_headers["date"])
+ assert time_tuple is not None
+ date = calendar.timegm(time_tuple[:6])
else:
date = 0
@@ -312,6 +361,7 @@ def cache_response(self, request, response, body=None, status_codes=None):
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
+ assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Updating cache with response from "%s"', cache_url)
@@ -344,11 +394,11 @@ def cache_response(self, request, response, body=None, status_codes=None):
if response_headers.get("expires"):
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
- expires_time = calendar.timegm(expires) - date
+ expires_time = calendar.timegm(expires[:6]) - date
expires_time = max(expires_time, 14 * 86400)
- logger.debug("etag object cached for {0} seconds".format(expires_time))
+ logger.debug(f"etag object cached for {expires_time} seconds")
logger.debug("Caching due to etag")
self._cache_set(cache_url, request, response, body, expires_time)
@@ -362,11 +412,14 @@ def cache_response(self, request, response, body=None, status_codes=None):
# is no date header then we can't do anything about expiring
# the cache.
elif "date" in response_headers:
- date = calendar.timegm(parsedate_tz(response_headers["date"]))
+ time_tuple = parsedate_tz(response_headers["date"])
+ assert time_tuple is not None
+ date = calendar.timegm(time_tuple[:6])
# cache when there is a max-age > 0
- if "max-age" in cc and cc["max-age"] > 0:
+ max_age = cc.get("max-age")
+ if max_age is not None and max_age > 0:
logger.debug("Caching b/c date exists and max-age > 0")
- expires_time = cc["max-age"]
+ expires_time = max_age
self._cache_set(
cache_url,
request,
@@ -381,12 +434,12 @@ def cache_response(self, request, response, body=None, status_codes=None):
if response_headers["expires"]:
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
- expires_time = calendar.timegm(expires) - date
+ expires_time = calendar.timegm(expires[:6]) - date
else:
expires_time = None
logger.debug(
- "Caching b/c of expires header. expires in {0} seconds".format(
+ "Caching b/c of expires header. expires in {} seconds".format(
expires_time
)
)
@@ -398,16 +451,18 @@ def cache_response(self, request, response, body=None, status_codes=None):
expires_time,
)
- def update_cached_response(self, request, response):
+ def update_cached_response(
+ self, request: PreparedRequest, response: HTTPResponse
+ ) -> HTTPResponse:
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
+ assert request.url is not None
cache_url = self.cache_url(request.url)
-
- cached_response = self.serializer.loads(request, self.cache.get(cache_url))
+ cached_response = self._load_from_cache(request)
if not cached_response:
# we didn't have a cached response
@@ -423,11 +478,11 @@ def update_cached_response(self, request, response):
excluded_headers = ["content-length"]
cached_response.headers.update(
- dict(
- (k, v)
- for k, v in response.headers.items()
+ {
+ k: v
+ for k, v in response.headers.items() # type: ignore[no-untyped-call]
if k.lower() not in excluded_headers
- )
+ }
)
# we want a 200 b/c we have content via the cache
diff --git a/src/pip/_vendor/cachecontrol/filewrapper.py b/src/pip/_vendor/cachecontrol/filewrapper.py
index f5ed5f6f6ec..25143902a26 100644
--- a/src/pip/_vendor/cachecontrol/filewrapper.py
+++ b/src/pip/_vendor/cachecontrol/filewrapper.py
@@ -1,12 +1,17 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
-from tempfile import NamedTemporaryFile
import mmap
+from tempfile import NamedTemporaryFile
+from typing import TYPE_CHECKING, Any, Callable
+
+if TYPE_CHECKING:
+ from http.client import HTTPResponse
-class CallbackFileWrapper(object):
+class CallbackFileWrapper:
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
@@ -25,12 +30,14 @@ class CallbackFileWrapper(object):
performance impact.
"""
- def __init__(self, fp, callback):
+ def __init__(
+ self, fp: HTTPResponse, callback: Callable[[bytes], None] | None
+ ) -> None:
self.__buf = NamedTemporaryFile("rb+", delete=True)
self.__fp = fp
self.__callback = callback
- def __getattr__(self, name):
+ def __getattr__(self, name: str) -> Any:
# The vaguaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
@@ -42,7 +49,7 @@ def __getattr__(self, name):
fp = self.__getattribute__("_CallbackFileWrapper__fp")
return getattr(fp, name)
- def __is_fp_closed(self):
+ def __is_fp_closed(self) -> bool:
try:
return self.__fp.fp is None
@@ -50,7 +57,8 @@ def __is_fp_closed(self):
pass
try:
- return self.__fp.closed
+ closed: bool = self.__fp.closed
+ return closed
except AttributeError:
pass
@@ -59,7 +67,7 @@ def __is_fp_closed(self):
# TODO: Add some logging here...
return False
- def _close(self):
+ def _close(self) -> None:
if self.__callback:
if self.__buf.tell() == 0:
# Empty file:
@@ -86,8 +94,8 @@ def _close(self):
# Important when caching big files.
self.__buf.close()
- def read(self, amt=None):
- data = self.__fp.read(amt)
+ def read(self, amt: int | None = None) -> bytes:
+ data: bytes = self.__fp.read(amt)
if data:
# We may be dealing with b'', a sign that things are over:
# it's passed e.g. after we've already closed self.__buf.
@@ -97,8 +105,8 @@ def read(self, amt=None):
return data
- def _safe_read(self, amt):
- data = self.__fp._safe_read(amt)
+ def _safe_read(self, amt: int) -> bytes:
+ data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined]
if amt == 2 and data == b"\r\n":
# urllib executes this read to toss the CRLF at the end
# of the chunk.
diff --git a/src/pip/_vendor/cachecontrol/heuristics.py b/src/pip/_vendor/cachecontrol/heuristics.py
index ebe4a96f589..b9d72ca4ac5 100644
--- a/src/pip/_vendor/cachecontrol/heuristics.py
+++ b/src/pip/_vendor/cachecontrol/heuristics.py
@@ -1,29 +1,31 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
import calendar
import time
-
+from datetime import datetime, timedelta, timezone
from email.utils import formatdate, parsedate, parsedate_tz
+from typing import TYPE_CHECKING, Any, Mapping
-from datetime import datetime, timedelta
+if TYPE_CHECKING:
+ from pip._vendor.urllib3 import HTTPResponse
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
-def expire_after(delta, date=None):
- date = date or datetime.utcnow()
+def expire_after(delta: timedelta, date: datetime | None = None) -> datetime:
+ date = date or datetime.now(timezone.utc)
return date + delta
-def datetime_to_header(dt):
+def datetime_to_header(dt: datetime) -> str:
return formatdate(calendar.timegm(dt.timetuple()))
-class BaseHeuristic(object):
-
- def warning(self, response):
+class BaseHeuristic:
+ def warning(self, response: HTTPResponse) -> str | None:
"""
Return a valid 1xx warning header value describing the cache
adjustments.
@@ -34,7 +36,7 @@ def warning(self, response):
"""
return '110 - "Response is Stale"'
- def update_headers(self, response):
+ def update_headers(self, response: HTTPResponse) -> dict[str, str]:
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
@@ -43,7 +45,7 @@ def update_headers(self, response):
"""
return {}
- def apply(self, response):
+ def apply(self, response: HTTPResponse) -> HTTPResponse:
updated_headers = self.update_headers(response)
if updated_headers:
@@ -61,12 +63,12 @@ class OneDayCache(BaseHeuristic):
future.
"""
- def update_headers(self, response):
+ def update_headers(self, response: HTTPResponse) -> dict[str, str]:
headers = {}
if "expires" not in response.headers:
date = parsedate(response.headers["date"])
- expires = expire_after(timedelta(days=1), date=datetime(*date[:6]))
+ expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc)) # type: ignore[misc]
headers["expires"] = datetime_to_header(expires)
headers["cache-control"] = "public"
return headers
@@ -77,14 +79,14 @@ class ExpiresAfter(BaseHeuristic):
Cache **all** requests for a defined time period.
"""
- def __init__(self, **kw):
+ def __init__(self, **kw: Any) -> None:
self.delta = timedelta(**kw)
- def update_headers(self, response):
+ def update_headers(self, response: HTTPResponse) -> dict[str, str]:
expires = expire_after(self.delta)
return {"expires": datetime_to_header(expires), "cache-control": "public"}
- def warning(self, response):
+ def warning(self, response: HTTPResponse) -> str | None:
tmpl = "110 - Automatically cached for %s. Response might be stale"
return tmpl % self.delta
@@ -101,12 +103,23 @@ class LastModified(BaseHeuristic):
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
+
cacheable_by_default_statuses = {
- 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
+ 200,
+ 203,
+ 204,
+ 206,
+ 300,
+ 301,
+ 404,
+ 405,
+ 410,
+ 414,
+ 501,
}
- def update_headers(self, resp):
- headers = resp.headers
+ def update_headers(self, resp: HTTPResponse) -> dict[str, str]:
+ headers: Mapping[str, str] = resp.headers
if "expires" in headers:
return {}
@@ -120,9 +133,11 @@ def update_headers(self, resp):
if "date" not in headers or "last-modified" not in headers:
return {}
- date = calendar.timegm(parsedate_tz(headers["date"]))
+ time_tuple = parsedate_tz(headers["date"])
+ assert time_tuple is not None
+ date = calendar.timegm(time_tuple[:6])
last_modified = parsedate(headers["last-modified"])
- if date is None or last_modified is None:
+ if last_modified is None:
return {}
now = time.time()
@@ -135,5 +150,5 @@ def update_headers(self, resp):
expires = date + freshness_lifetime
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
- def warning(self, resp):
+ def warning(self, resp: HTTPResponse) -> str | None:
return None
diff --git a/src/pip/_vendor/cachecontrol/py.typed b/src/pip/_vendor/cachecontrol/py.typed
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/pip/_vendor/cachecontrol/serialize.py b/src/pip/_vendor/cachecontrol/serialize.py
index 7fe1a3e33a3..f9e967c3c34 100644
--- a/src/pip/_vendor/cachecontrol/serialize.py
+++ b/src/pip/_vendor/cachecontrol/serialize.py
@@ -1,78 +1,76 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
-import base64
import io
-import json
-import zlib
+from typing import IO, TYPE_CHECKING, Any, Mapping, cast
from pip._vendor import msgpack
from pip._vendor.requests.structures import CaseInsensitiveDict
+from pip._vendor.urllib3 import HTTPResponse
-from .compat import HTTPResponse, pickle, text_type
+if TYPE_CHECKING:
+ from pip._vendor.requests import PreparedRequest
-def _b64_decode_bytes(b):
- return base64.b64decode(b.encode("ascii"))
+class Serializer:
+ serde_version = "4"
-
-def _b64_decode_str(s):
- return _b64_decode_bytes(s).decode("utf8")
-
-
-_default_body_read = object()
-
-
-class Serializer(object):
- def dumps(self, request, response, body=None):
- response_headers = CaseInsensitiveDict(response.headers)
+ def dumps(
+ self,
+ request: PreparedRequest,
+ response: HTTPResponse,
+ body: bytes | None = None,
+ ) -> bytes:
+ response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
+ response.headers
+ )
if body is None:
# When a body isn't passed in, we'll read the response. We
# also update the response with a new file handler to be
# sure it acts as though it was never read.
body = response.read(decode_content=False)
- response._fp = io.BytesIO(body)
-
- # NOTE: This is all a bit weird, but it's really important that on
- # Python 2.x these objects are unicode and not str, even when
- # they contain only ascii. The problem here is that msgpack
- # understands the difference between unicode and bytes and we
- # have it set to differentiate between them, however Python 2
- # doesn't know the difference. Forcing these to unicode will be
- # enough to have msgpack know the difference.
+ response._fp = io.BytesIO(body) # type: ignore[attr-defined]
+ response.length_remaining = len(body)
+
data = {
- u"response": {
- u"body": body, # Empty bytestring if body is stored separately
- u"headers": dict(
- (text_type(k), text_type(v)) for k, v in response.headers.items()
- ),
- u"status": response.status,
- u"version": response.version,
- u"reason": text_type(response.reason),
- u"strict": response.strict,
- u"decode_content": response.decode_content,
+ "response": {
+ "body": body, # Empty bytestring if body is stored separately
+ "headers": {str(k): str(v) for k, v in response.headers.items()}, # type: ignore[no-untyped-call]
+ "status": response.status,
+ "version": response.version,
+ "reason": str(response.reason),
+ "decode_content": response.decode_content,
}
}
# Construct our vary headers
- data[u"vary"] = {}
- if u"vary" in response_headers:
- varied_headers = response_headers[u"vary"].split(",")
+ data["vary"] = {}
+ if "vary" in response_headers:
+ varied_headers = response_headers["vary"].split(",")
for header in varied_headers:
- header = text_type(header).strip()
+ header = str(header).strip()
header_value = request.headers.get(header, None)
if header_value is not None:
- header_value = text_type(header_value)
- data[u"vary"][header] = header_value
+ header_value = str(header_value)
+ data["vary"][header] = header_value
+
+ return b",".join([f"cc={self.serde_version}".encode(), self.serialize(data)])
- return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
+ def serialize(self, data: dict[str, Any]) -> bytes:
+ return cast(bytes, msgpack.dumps(data, use_bin_type=True))
- def loads(self, request, data, body_file=None):
+ def loads(
+ self,
+ request: PreparedRequest,
+ data: bytes,
+ body_file: IO[bytes] | None = None,
+ ) -> HTTPResponse | None:
# Short circuit if we've been given an empty set of data
if not data:
- return
+ return None
# Determine what version of the serializer the data was serialized
# with
@@ -88,18 +86,23 @@ def loads(self, request, data, body_file=None):
ver = b"cc=0"
# Get the version number out of the cc=N
- ver = ver.split(b"=", 1)[-1].decode("ascii")
+ verstr = ver.split(b"=", 1)[-1].decode("ascii")
# Dispatch to the actual load method for the given version
try:
- return getattr(self, "_loads_v{}".format(ver))(request, data, body_file)
+ return getattr(self, f"_loads_v{verstr}")(request, data, body_file) # type: ignore[no-any-return]
except AttributeError:
# This is a version we don't have a loads function for, so we'll
# just treat it as a miss and return None
- return
-
- def prepare_response(self, request, cached, body_file=None):
+ return None
+
+ def prepare_response(
+ self,
+ request: PreparedRequest,
+ cached: Mapping[str, Any],
+ body_file: IO[bytes] | None = None,
+ ) -> HTTPResponse | None:
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
@@ -108,23 +111,26 @@ def prepare_response(self, request, cached, body_file=None):
# This case is also handled in the controller code when creating
# a cache entry, but is left here for backwards compatibility.
if "*" in cached.get("vary", {}):
- return
+ return None
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
- return
+ return None
body_raw = cached["response"].pop("body")
- headers = CaseInsensitiveDict(data=cached["response"]["headers"])
+ headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
+ data=cached["response"]["headers"]
+ )
if headers.get("transfer-encoding", "") == "chunked":
headers.pop("transfer-encoding")
cached["response"]["headers"] = headers
try:
+ body: IO[bytes]
if body_file is None:
body = io.BytesIO(body_raw)
else:
@@ -138,53 +144,63 @@ def prepare_response(self, request, cached, body_file=None):
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode("utf8"))
+ # Discard any `strict` parameter serialized by older version of cachecontrol.
+ cached["response"].pop("strict", None)
+
return HTTPResponse(body=body, preload_content=False, **cached["response"])
- def _loads_v0(self, request, data, body_file=None):
+ def _loads_v0(
+ self,
+ request: PreparedRequest,
+ data: bytes,
+ body_file: IO[bytes] | None = None,
+ ) -> None:
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
# a miss.
- return
-
- def _loads_v1(self, request, data, body_file=None):
- try:
- cached = pickle.loads(data)
- except ValueError:
- return
-
- return self.prepare_response(request, cached, body_file)
-
- def _loads_v2(self, request, data, body_file=None):
- assert body_file is None
- try:
- cached = json.loads(zlib.decompress(data).decode("utf8"))
- except (ValueError, zlib.error):
- return
-
- # We need to decode the items that we've base64 encoded
- cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
- cached["response"]["headers"] = dict(
- (_b64_decode_str(k), _b64_decode_str(v))
- for k, v in cached["response"]["headers"].items()
- )
- cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
- cached["vary"] = dict(
- (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
- for k, v in cached["vary"].items()
- )
-
- return self.prepare_response(request, cached, body_file)
-
- def _loads_v3(self, request, data, body_file):
+ return None
+
+ def _loads_v1(
+ self,
+ request: PreparedRequest,
+ data: bytes,
+ body_file: IO[bytes] | None = None,
+ ) -> HTTPResponse | None:
+ # The "v1" pickled cache format. This is no longer supported
+ # for security reasons, so we treat it as a miss.
+ return None
+
+ def _loads_v2(
+ self,
+ request: PreparedRequest,
+ data: bytes,
+ body_file: IO[bytes] | None = None,
+ ) -> HTTPResponse | None:
+ # The "v2" compressed base64 cache format.
+ # This has been removed due to age and poor size/performance
+ # characteristics, so we treat it as a miss.
+ return None
+
+ def _loads_v3(
+ self,
+ request: PreparedRequest,
+ data: bytes,
+ body_file: IO[bytes] | None = None,
+ ) -> None:
# Due to Python 2 encoding issues, it's impossible to know for sure
# exactly how to load v3 entries, thus we'll treat these as a miss so
# that they get rewritten out as v4 entries.
- return
-
- def _loads_v4(self, request, data, body_file=None):
+ return None
+
+ def _loads_v4(
+ self,
+ request: PreparedRequest,
+ data: bytes,
+ body_file: IO[bytes] | None = None,
+ ) -> HTTPResponse | None:
try:
cached = msgpack.loads(data, raw=False)
except ValueError:
- return
+ return None
return self.prepare_response(request, cached, body_file)
diff --git a/src/pip/_vendor/cachecontrol/wrapper.py b/src/pip/_vendor/cachecontrol/wrapper.py
index b6ee7f20398..f618bc363f1 100644
--- a/src/pip/_vendor/cachecontrol/wrapper.py
+++ b/src/pip/_vendor/cachecontrol/wrapper.py
@@ -1,22 +1,32 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
-from .adapter import CacheControlAdapter
-from .cache import DictCache
+from typing import TYPE_CHECKING, Collection
+from pip._vendor.cachecontrol.adapter import CacheControlAdapter
+from pip._vendor.cachecontrol.cache import DictCache
-def CacheControl(
- sess,
- cache=None,
- cache_etags=True,
- serializer=None,
- heuristic=None,
- controller_class=None,
- adapter_class=None,
- cacheable_methods=None,
-):
+if TYPE_CHECKING:
+ from pip._vendor import requests
+
+ from pip._vendor.cachecontrol.cache import BaseCache
+ from pip._vendor.cachecontrol.controller import CacheController
+ from pip._vendor.cachecontrol.heuristics import BaseHeuristic
+ from pip._vendor.cachecontrol.serialize import Serializer
+
+def CacheControl(
+ sess: requests.Session,
+ cache: BaseCache | None = None,
+ cache_etags: bool = True,
+ serializer: Serializer | None = None,
+ heuristic: BaseHeuristic | None = None,
+ controller_class: type[CacheController] | None = None,
+ adapter_class: type[CacheControlAdapter] | None = None,
+ cacheable_methods: Collection[str] | None = None,
+) -> requests.Session:
cache = DictCache() if cache is None else cache
adapter_class = adapter_class or CacheControlAdapter
adapter = adapter_class(
diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt
index ade8512e25a..43ced2a4b89 100644
--- a/src/pip/_vendor/vendor.txt
+++ b/src/pip/_vendor/vendor.txt
@@ -1,4 +1,4 @@
-CacheControl==0.12.11 # Make sure to update the license in pyproject.toml for this.
+CacheControl==0.13.1 # Make sure to update the license in pyproject.toml for this.
colorama==0.4.6
distlib==0.3.6
distro==1.8.0
diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py
index 8bee7e4fc51..a744dbbb9bc 100644
--- a/tests/functional/test_cache.py
+++ b/tests/functional/test_cache.py
@@ -20,7 +20,7 @@ def cache_dir(script: PipTestEnvironment) -> str:
@pytest.fixture
def http_cache_dir(cache_dir: str) -> str:
- return os.path.normcase(os.path.join(cache_dir, "http"))
+ return os.path.normcase(os.path.join(cache_dir, "http-v2"))
@pytest.fixture
@@ -203,7 +203,10 @@ def test_cache_info(
) -> None:
result = script.pip("cache", "info")
- assert f"Package index page cache location: {http_cache_dir}" in result.stdout
+ assert (
+ f"Package index page cache location (pip v23.3+): {http_cache_dir}"
+ in result.stdout
+ )
assert f"Locally built wheels location: {wheel_cache_dir}" in result.stdout
num_wheels = len(wheel_cache_files)
assert f"Number of locally built wheels: {num_wheels}" in result.stdout
diff --git a/tests/unit/test_network_cache.py b/tests/unit/test_network_cache.py
index a5519864f4c..aa849f3b03a 100644
--- a/tests/unit/test_network_cache.py
+++ b/tests/unit/test_network_cache.py
@@ -31,6 +31,17 @@ def test_cache_roundtrip(self, cache_tmpdir: Path) -> None:
cache.delete("test key")
assert cache.get("test key") is None
+ def test_cache_roundtrip_body(self, cache_tmpdir: Path) -> None:
+ cache = SafeFileCache(os.fspath(cache_tmpdir))
+ assert cache.get_body("test key") is None
+ cache.set_body("test key", b"a test string")
+ body = cache.get_body("test key")
+ assert body is not None
+ with body:
+ assert body.read() == b"a test string"
+ cache.delete("test key")
+ assert cache.get_body("test key") is None
+
@pytest.mark.skipif("sys.platform == 'win32'")
def test_safe_get_no_perms(
self, cache_tmpdir: Path, monkeypatch: pytest.MonkeyPatch