Stop pre-caching every resource in the system upon import

If we don't have something in the cache when we call get_by_natural_key, do an actual filtered query for it and cache the results. We'll get more overall API calls this way, but they'll be smaller and will happen while we are importing, not upfront.
2026-05-08 01:47:35 -02:30 · 2024-04-22 14:49:47 -04:00
parent e4646ae611
commit fb237e3834
2 changed files with 34 additions and 11 deletions
--- a/awxkit/awxkit/api/pages/api.py
+++ b/awxkit/awxkit/api/pages/api.py
@@ -234,7 +234,7 @@ class ApiV2(base.Base):
        return endpoint.get(**{identifier: value}, all_pages=True)
    def export_assets(self, **kwargs):
-        self._cache = page.PageCache()
+        self._cache = page.PageCache(self.connection)
        # If no resource kwargs are explicitly used, export everything.
        all_resources = all(kwargs.get(resource) is None for resource in EXPORTABLE_RESOURCES)
@@ -335,7 +335,7 @@ class ApiV2(base.Base):
                if name == 'roles':
                    indexed_roles = defaultdict(list)
                    for role in S:
-                        if 'content_object' not in role:
+                        if role.get('content_object') is None:
                            continue
                        indexed_roles[role['content_object']['type']].append(role)
                    self._roles.append((_page, indexed_roles))
@@ -411,7 +411,7 @@ class ApiV2(base.Base):
            # FIXME: deal with pruning existing relations that do not match the import set
    def import_assets(self, data):
-        self._cache = page.PageCache()
+        self._cache = page.PageCache(self.connection)
        self._related = []
        self._roles = []
@@ -420,11 +420,8 @@ class ApiV2(base.Base):
        for resource in self._dependent_resources():
            endpoint = getattr(self, resource)
            # Load up existing objects, so that we can try to update or link to them
            self._cache.get_page(endpoint)
            imported = self._import_list(endpoint, data.get(resource) or [])
            changed = changed or imported
            # FIXME: should we delete existing unpatched assets?
        self._assign_related()
        self._assign_membership()
--- a/awxkit/awxkit/api/pages/page.py
+++ b/awxkit/awxkit/api/pages/page.py
@@ -11,6 +11,7 @@ from awxkit.utils import PseudoNamespace, is_relative_endpoint, are_same_endpoin
 from awxkit.api import utils
 from awxkit.api.client import Connection
 from awxkit.api.registry import URLRegistry
 from awxkit.api.resources import resources
 from awxkit.config import config
 import awxkit.exceptions as exc
@@ -493,10 +494,11 @@ class TentativePage(str):
 class PageCache(object):
-    def __init__(self):
+    def __init__(self, connection=None):
        self.options = {}
        self.pages_by_url = {}
        self.pages_by_natural_key = {}
        self.connection = connection or Connection(config.base_url, not config.assume_untrusted)
    def get_options(self, page):
        url = page.endpoint if isinstance(page, Page) else str(page)
@@ -550,7 +552,31 @@ class PageCache(object):
        return self.set_page(page)
    def get_by_natural_key(self, natural_key):
-        endpoint = self.pages_by_natural_key.get(utils.freeze(natural_key))
+        page = self.pages_by_natural_key.get(utils.freeze(natural_key))
-        log.debug("get_by_natural_key: %s, endpoint: %s", repr(natural_key), endpoint)
+        if page is None:
-        if endpoint:
+            # We need some way to get ahold of the top-level resource
-            return self.get_page(endpoint)
+            # list endpoint from the natural_key type.  The resources
            # object more or less has that for each of the detail
            # views.  Just chop off the /<id>/ bit.
            endpoint = getattr(resources, natural_key['type'], None)
            if endpoint is None:
                return
            endpoint = ''.join([endpoint.rsplit('/', 2)[0], '/'])
            page_type = get_registered_page(endpoint)
            kwargs = {}
            for k, v in natural_key.items():
                if isinstance(v, str) and k != 'type':
                    kwargs[k] = v
            # Do a filtered query against the list endpoint, usually
            # with the name of the object but sometimes more.
            list_page = page_type(self.connection, endpoint=endpoint).get(all_pages=True, **kwargs)
            if 'results' in list_page:
                for p in list_page.results:
                    self.set_page(p)
            page = self.pages_by_natural_key.get(utils.freeze(natural_key))
        log.debug("get_by_natural_key: %s, endpoint: %s", repr(natural_key), page)
        if page:
            return self.get_page(page)