Stop pre-caching every resource in the system upon import

If we don't have something in the cache when we call
get_by_natural_key, do an actual filtered query for it and cache the
results.  We'll get more overall API calls this way, but they'll be
smaller and will happen while we are importing, not upfront.
This commit is contained in:
Jeff Bradberry 2024-04-22 14:49:47 -04:00
parent e4646ae611
commit fb237e3834
2 changed files with 34 additions and 11 deletions

View File

@ -234,7 +234,7 @@ class ApiV2(base.Base):
return endpoint.get(**{identifier: value}, all_pages=True)
def export_assets(self, **kwargs):
self._cache = page.PageCache()
self._cache = page.PageCache(self.connection)
# If no resource kwargs are explicitly used, export everything.
all_resources = all(kwargs.get(resource) is None for resource in EXPORTABLE_RESOURCES)
@ -335,7 +335,7 @@ class ApiV2(base.Base):
if name == 'roles':
indexed_roles = defaultdict(list)
for role in S:
if 'content_object' not in role:
if role.get('content_object') is None:
continue
indexed_roles[role['content_object']['type']].append(role)
self._roles.append((_page, indexed_roles))
@ -411,7 +411,7 @@ class ApiV2(base.Base):
# FIXME: deal with pruning existing relations that do not match the import set
def import_assets(self, data):
self._cache = page.PageCache()
self._cache = page.PageCache(self.connection)
self._related = []
self._roles = []
@ -420,11 +420,8 @@ class ApiV2(base.Base):
for resource in self._dependent_resources():
endpoint = getattr(self, resource)
# Load up existing objects, so that we can try to update or link to them
self._cache.get_page(endpoint)
imported = self._import_list(endpoint, data.get(resource) or [])
changed = changed or imported
# FIXME: should we delete existing unpatched assets?
self._assign_related()
self._assign_membership()

View File

@ -11,6 +11,7 @@ from awxkit.utils import PseudoNamespace, is_relative_endpoint, are_same_endpoin
from awxkit.api import utils
from awxkit.api.client import Connection
from awxkit.api.registry import URLRegistry
from awxkit.api.resources import resources
from awxkit.config import config
import awxkit.exceptions as exc
@ -493,10 +494,11 @@ class TentativePage(str):
class PageCache(object):
def __init__(self):
def __init__(self, connection=None):
self.options = {}
self.pages_by_url = {}
self.pages_by_natural_key = {}
self.connection = connection or Connection(config.base_url, not config.assume_untrusted)
def get_options(self, page):
url = page.endpoint if isinstance(page, Page) else str(page)
@ -550,7 +552,31 @@ class PageCache(object):
return self.set_page(page)
def get_by_natural_key(self, natural_key):
endpoint = self.pages_by_natural_key.get(utils.freeze(natural_key))
log.debug("get_by_natural_key: %s, endpoint: %s", repr(natural_key), endpoint)
if endpoint:
return self.get_page(endpoint)
page = self.pages_by_natural_key.get(utils.freeze(natural_key))
if page is None:
# We need some way to get ahold of the top-level resource
# list endpoint from the natural_key type. The resources
# object more or less has that for each of the detail
# views. Just chop off the /<id>/ bit.
endpoint = getattr(resources, natural_key['type'], None)
if endpoint is None:
return
endpoint = ''.join([endpoint.rsplit('/', 2)[0], '/'])
page_type = get_registered_page(endpoint)
kwargs = {}
for k, v in natural_key.items():
if isinstance(v, str) and k != 'type':
kwargs[k] = v
# Do a filtered query against the list endpoint, usually
# with the name of the object but sometimes more.
list_page = page_type(self.connection, endpoint=endpoint).get(all_pages=True, **kwargs)
if 'results' in list_page:
for p in list_page.results:
self.set_page(p)
page = self.pages_by_natural_key.get(utils.freeze(natural_key))
log.debug("get_by_natural_key: %s, endpoint: %s", repr(natural_key), page)
if page:
return self.get_page(page)