Skip to content

Commit

Permalink
Add pull-through caching
Browse files Browse the repository at this point in the history
closes #507
  • Loading branch information
lubosmj committed Oct 24, 2023
1 parent 4df9e2e commit 426c400
Show file tree
Hide file tree
Showing 17 changed files with 1,426 additions and 122 deletions.
3 changes: 3 additions & 0 deletions CHANGES/507.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added support for pull-through caching. Users can now create a distribution with a remote pointing
to a remote registry without specifying the upstream name and Pulp automatically downloads missing
content and acts as a smart proxy.
35 changes: 35 additions & 0 deletions docs/workflows/host.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,38 @@ Docker Output::
In general, the automatic conversion cannot be performed when the content is not available
in the storage. Therefore, it may be successful only if the content was previously synced
with the ``immediate`` policy.


Pull-Through Caching
--------------------

The Pull-Through Caching feature offers an alternative way to host content by leveraging a **remote
registry** as the source of truth. This eliminates the need for repository synchronization, reducing
storage overhead, and ensuring up-to-date images. Pulp acts as a **caching proxy** and stores images
either in a repository (when all image layers are downloaded through Pulp) or as orphaned content.

Administering the caching::

# initialize a pull-through remote (the concept of upstream-name is not applicable here)
REMOTE_HREF=$(http ${BASE_ADDR}/pulp/api/v3/remotes/container/pull-through/ name=docker-cache url=https://registry-1.docker.io | jq -r ".pulp_href")

# create a specialized distribution linked to the initialized remote
http ${BASE_ADDR}/pulp/api/v3/distributions/container/pull-through/ remote=${REMOTE_HREF} name=docker-cache base_path=docker-cache

Downloading content::

podman pull localhost:24817/docker-cache/library/busybox

In the example above, the image "busybox" is pulled from the "docker-cache" distribution, acting as
a transparent caching layer.

By incorporating the Pull-Through Caching feature, administrators can **reduce external network
dependencies**, and ensure a more reliable and responsive container deployment system in production
environments.

.. note::
Pulp creates repositories that maintain a single repository version for user-pulled images.
Thus, only the latest repository version is retained. For instance, when pulling "debian:10,"
a "debian" repository with the "10" tag is established. Subsequent pulls such as "debian:11"
result in a new repository version that incorporates both tags while removing the previous
version. Repositories and their content remain manageable through standard API endpoints.
17 changes: 12 additions & 5 deletions pulp_container/app/cache.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from django.core.exceptions import ObjectDoesNotExist
from django.db.models import F, Value

from pulpcore.plugin.cache import CacheKeys, AsyncContentCache, SyncContentCache

from pulp_container.app.models import ContainerDistribution
from pulp_container.app.models import ContainerDistribution, ContainerPullThroughDistribution
from pulp_container.app.exceptions import RepositoryNotFound

ACCEPT_HEADER_KEY = "accept_header"
Expand Down Expand Up @@ -67,11 +68,17 @@ def find_base_path_cached(request, cached):
return path
else:
try:
distro = ContainerDistribution.objects.select_related(
"repository", "repository_version"
).get(base_path=path)
distro = ContainerDistribution.objects.get(base_path=path)
except ObjectDoesNotExist:
raise RepositoryNotFound(name=path)
distro = (
ContainerPullThroughDistribution.objects.annotate(path=Value(path))
.filter(path__startswith=F("base_path"))
.order_by("-base_path")
.first()
)
if not distro:
raise RepositoryNotFound(name=path)

return distro.base_path


Expand Down
18 changes: 17 additions & 1 deletion pulp_container/app/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re

from aiohttp.client_exceptions import ClientResponseError
from collections import namedtuple
from logging import getLogger
from multidict import MultiDict
from urllib import parse
Expand All @@ -15,6 +16,8 @@

log = getLogger(__name__)

InMemoryDownloadResult = namedtuple("InMemoryDownloadResult", ["data", "headers", "status_code"])


class RegistryAuthHttpDownloader(HttpDownloader):
"""
Expand All @@ -24,13 +27,14 @@ class RegistryAuthHttpDownloader(HttpDownloader):
"""

registry_auth = {"bearer": None, "basic": None}
token_lock = asyncio.Lock()

def __init__(self, *args, **kwargs):
"""
Initialize the downloader.
"""
self.remote = kwargs.pop("remote")
self.token_lock = asyncio.Lock()

super().__init__(*args, **kwargs)

async def _run(self, handle_401=True, extra_data=None):
Expand Down Expand Up @@ -95,7 +99,9 @@ async def _run(self, handle_401=True, extra_data=None):
return await self._run(handle_401=False, extra_data=extra_data)
else:
raise

to_return = await self._handle_response(response)

await response.release()
self.response_headers = response.headers

Expand Down Expand Up @@ -174,6 +180,16 @@ def auth_header(token, basic_auth):
return {}


class InMemoryDownloader(RegistryAuthHttpDownloader):
"""A downloader class suited for downloading data in-memory."""

async def _handle_response(self, response):
data = await response.text()
return InMemoryDownloadResult(
data=data, headers=response.headers, status_code=response.status
)


class NoAuthSignatureDownloader(HttpDownloader):
"""A downloader class suited for signature downloads."""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Generated by Django 4.2.5 on 2023-09-06 11:49

from django.db import migrations, models
import django.db.models.deletion
import pulpcore.app.models.access_policy


class Migration(migrations.Migration):

dependencies = [
('core', '0109_contentartifact_relative_path_index'),
('container', '0036_containerpushrepository_pending_blobs_manifests'),
]

operations = [
migrations.CreateModel(
name='ContainerPullThroughDistribution',
fields=[
('distribution_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.distribution')),
],
options={
'permissions': [('manage_roles_containerpullthroughdistribution', 'Can manage role assignments on pull-through cache distribution')],
'default_related_name': '%(app_label)s_%(model_name)s',
},
bases=('core.distribution', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
),
migrations.CreateModel(
name='ContainerPullThroughRemote',
fields=[
('remote_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.remote')),
],
options={
'permissions': [('manage_roles_containerpullthroughremote', 'Can manage role assignments on pull-through container remote')],
'default_related_name': '%(app_label)s_%(model_name)s',
},
bases=('core.remote', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
),
migrations.AddField(
model_name='containerrepository',
name='pending_blobs',
field=models.ManyToManyField(related_name='pending_blobs', to='container.blob'),
),
migrations.AddField(
model_name='containerrepository',
name='pending_manifests',
field=models.ManyToManyField(to='container.manifest'),
),
migrations.AddField(
model_name='containerrepository',
name='pending_tags',
field=models.ManyToManyField(to='container.tag'),
),
migrations.AddField(
model_name='containerrepository',
name='remaining_blobs',
field=models.ManyToManyField(related_name='remaining_blobs', to='container.blob'),
),
migrations.AddField(
model_name='containerdistribution',
name='pull_through_distribution',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='distributions', to='container.containerpullthroughdistribution'),
),
]
Loading

0 comments on commit 426c400

Please sign in to comment.