replace our memcached-based fact cache implementation with local files

see: https://github.com/ansible/ansible-tower/issues/7840
This commit is contained in:
Ryan Petrello
2018-01-12 17:27:54 -05:00
parent e0c04df1ee
commit 983b192a45
6 changed files with 198 additions and 336 deletions

View File

@@ -2,26 +2,26 @@
# All Rights Reserved.
# Python
import codecs
import datetime
import hashlib
import hmac
import logging
import os
import time
import json
import base64
from urlparse import urljoin
import six
# Django
from django.conf import settings
from django.db import models
#from django.core.cache import cache
import memcache
from django.db.models import Q, Count
from django.utils.dateparse import parse_datetime
from dateutil import parser
from dateutil.tz import tzutc
from django.utils.encoding import force_text, smart_str
from django.utils.timezone import utc
from django.utils.timezone import utc, now
from django.utils.translation import ugettext_lazy as _
from django.core.exceptions import ValidationError
@@ -714,85 +714,61 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
def get_notification_friendly_name(self):
return "Job"
@property
def memcached_fact_key(self):
return '{}'.format(self.inventory.id)
def memcached_fact_host_key(self, host_name):
return '{}-{}'.format(self.inventory.id, base64.b64encode(host_name.encode('utf-8')))
def memcached_fact_modified_key(self, host_name):
return '{}-{}-modified'.format(self.inventory.id, base64.b64encode(host_name.encode('utf-8')))
def _get_inventory_hosts(self, only=['name', 'ansible_facts', 'modified',]):
def _get_inventory_hosts(self, only=['name', 'ansible_facts', 'ansible_facts_modified', 'modified',]):
if not self.inventory:
return []
return self.inventory.hosts.only(*only)
def _get_memcache_connection(self):
return memcache.Client([settings.CACHES['default']['LOCATION']], debug=0)
def start_job_fact_cache(self):
if not self.inventory:
return
cache = self._get_memcache_connection()
host_names = []
for host in self._get_inventory_hosts():
host_key = self.memcached_fact_host_key(host.name)
modified_key = self.memcached_fact_modified_key(host.name)
if cache.get(modified_key) is None:
if host.ansible_facts_modified:
host_modified = host.ansible_facts_modified.replace(tzinfo=tzutc()).isoformat()
else:
host_modified = datetime.datetime.now(tzutc()).isoformat()
cache.set(host_key, json.dumps(host.ansible_facts))
cache.set(modified_key, host_modified)
host_names.append(host.name)
cache.set(self.memcached_fact_key, host_names)
def finish_job_fact_cache(self):
if not self.inventory:
return
cache = self._get_memcache_connection()
def start_job_fact_cache(self, destination, modification_times, timeout=None):
destination = os.path.join(destination, 'facts')
os.makedirs(destination, mode=0700)
hosts = self._get_inventory_hosts()
if timeout is None:
timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT
if timeout > 0:
# exclude hosts with fact data older than `settings.ANSIBLE_FACT_CACHE_TIMEOUT seconds`
timeout = now() - datetime.timedelta(seconds=timeout)
hosts = hosts.filter(ansible_facts_modified__gte=timeout)
for host in hosts:
host_key = self.memcached_fact_host_key(host.name)
modified_key = self.memcached_fact_modified_key(host.name)
filepath = os.sep.join(map(six.text_type, [destination, host.name]))
with codecs.open(filepath, 'w', encoding='utf-8') as f:
os.chmod(f.name, 0600)
json.dump(host.ansible_facts, f)
# make note of the time we wrote the file so we can check if it changed later
modification_times[filepath] = os.path.getmtime(filepath)
modified = cache.get(modified_key)
if modified is None:
cache.delete(host_key)
continue
# Save facts if cache is newer than DB
modified = parser.parse(modified, tzinfos=[tzutc()])
if not host.ansible_facts_modified or modified > host.ansible_facts_modified:
ansible_facts = cache.get(host_key)
try:
ansible_facts = json.loads(ansible_facts)
except Exception:
ansible_facts = None
if ansible_facts is None:
cache.delete(host_key)
continue
host.ansible_facts = ansible_facts
host.ansible_facts_modified = modified
if 'insights' in ansible_facts and 'system_id' in ansible_facts['insights']:
host.insights_system_id = ansible_facts['insights']['system_id']
host.save()
def finish_job_fact_cache(self, destination, modification_times):
destination = os.path.join(destination, 'facts')
for host in self._get_inventory_hosts():
filepath = os.sep.join(map(six.text_type, [destination, host.name]))
if os.path.exists(filepath):
# If the file changed since we wrote it pre-playbook run...
modified = os.path.getmtime(filepath)
if modified > modification_times.get(filepath, 0):
with codecs.open(filepath, 'r', encoding='utf-8') as f:
try:
ansible_facts = json.load(f)
except ValueError:
continue
host.ansible_facts = ansible_facts
host.ansible_facts_modified = now()
if 'insights' in ansible_facts and 'system_id' in ansible_facts['insights']:
host.insights_system_id = ansible_facts['insights']['system_id']
host.save()
system_tracking_logger.info(
'New fact for inventory {} host {}'.format(
smart_str(host.inventory.name), smart_str(host.name)),
extra=dict(inventory_id=host.inventory.id, host_name=host.name,
ansible_facts=host.ansible_facts,
ansible_facts_modified=host.ansible_facts_modified.isoformat()))
else:
# if the file goes missing, ansible removed it (likely via clear_facts)
host.ansible_facts = {}
host.ansible_facts_modified = now()
system_tracking_logger.info(
'New fact for inventory {} host {}'.format(
smart_str(host.inventory.name), smart_str(host.name)),
extra=dict(inventory_id=host.inventory.id, host_name=host.name,
ansible_facts=host.ansible_facts,
ansible_facts_modified=host.ansible_facts_modified.isoformat()))
'Facts cleared for inventory {} host {}'.format(
smart_str(host.inventory.name), smart_str(host.name)))
host.save()
class JobHostSummary(CreatedModifiedModel):

View File

@@ -812,6 +812,15 @@ class BaseTask(LogErrorsTask):
# Fetch ansible version once here to support version-dependent features.
kwargs['ansible_version'] = get_ansible_version()
kwargs['private_data_dir'] = self.build_private_data_dir(instance, **kwargs)
# Fetch "cached" fact data from prior runs and put on the disk
# where ansible expects to find it
if getattr(instance, 'use_fact_cache', False) and not kwargs.get('isolated'):
instance.start_job_fact_cache(
os.path.join(kwargs['private_data_dir']),
kwargs.setdefault('fact_modification_times', {})
)
# May have to serialize the value
kwargs['private_data_files'] = self.build_private_data_files(instance, **kwargs)
kwargs['passwords'] = self.build_passwords(instance, **kwargs)
@@ -1032,10 +1041,8 @@ class RunJob(BaseTask):
env['INVENTORY_ID'] = str(job.inventory.pk)
if job.use_fact_cache and not kwargs.get('isolated'):
env['ANSIBLE_LIBRARY'] = self.get_path_to('..', 'plugins', 'library')
env['ANSIBLE_CACHE_PLUGINS'] = self.get_path_to('..', 'plugins', 'fact_caching')
env['ANSIBLE_CACHE_PLUGIN'] = "awx"
env['ANSIBLE_CACHE_PLUGIN_TIMEOUT'] = str(settings.ANSIBLE_FACT_CACHE_TIMEOUT)
env['ANSIBLE_CACHE_PLUGIN_CONNECTION'] = settings.CACHES['default']['LOCATION'] if 'LOCATION' in settings.CACHES['default'] else ''
env['ANSIBLE_CACHE_PLUGIN'] = "jsonfile"
env['ANSIBLE_CACHE_PLUGIN_CONNECTION'] = os.path.join(kwargs['private_data_dir'], 'facts')
if job.project:
env['PROJECT_REVISION'] = job.project.scm_revision
env['ANSIBLE_RETRY_FILES_ENABLED'] = "False"
@@ -1286,14 +1293,14 @@ class RunJob(BaseTask):
('project_update', local_project_sync.name, local_project_sync.id)))
raise
if job.use_fact_cache and not kwargs.get('isolated'):
job.start_job_fact_cache()
def final_run_hook(self, job, status, **kwargs):
super(RunJob, self).final_run_hook(job, status, **kwargs)
if job.use_fact_cache and not kwargs.get('isolated'):
job.finish_job_fact_cache()
job.finish_job_fact_cache(
kwargs['private_data_dir'],
kwargs['fact_modification_times']
)
try:
inventory = job.inventory
except Inventory.DoesNotExist:

View File

@@ -1,4 +1,7 @@
# -*- coding: utf-8 -*-
import json
import os
import time
import pytest
@@ -8,51 +11,14 @@ from awx.main.models import (
Host,
)
import datetime
import json
import base64
from dateutil.tz import tzutc
class CacheMock(object):
def __init__(self):
self.d = dict()
def get(self, key):
if key not in self.d:
return None
return self.d[key]
def set(self, key, val):
self.d[key] = val
def delete(self, key):
del self.d[key]
@pytest.fixture
def old_time():
return (datetime.datetime.now(tzutc()) - datetime.timedelta(minutes=60))
@pytest.fixture()
def new_time():
return (datetime.datetime.now(tzutc()))
@pytest.fixture
def hosts(old_time, inventory):
def hosts(inventory):
return [
Host(name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=old_time, inventory=inventory),
Host(name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=old_time, inventory=inventory),
Host(name='host3', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=old_time, inventory=inventory),
]
@pytest.fixture
def hosts2(inventory):
return [
Host(name='host2', ansible_facts="foobar", ansible_facts_modified=old_time, inventory=inventory),
Host(name='host1', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
Host(name='host2', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
Host(name='host3', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
Host(name=u'Iñtërnâtiônàlizætiøn', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
]
@@ -62,87 +28,92 @@ def inventory():
@pytest.fixture
def mock_cache(mocker):
cache = CacheMock()
mocker.patch.object(cache, 'set', wraps=cache.set)
mocker.patch.object(cache, 'get', wraps=cache.get)
mocker.patch.object(cache, 'delete', wraps=cache.delete)
return cache
@pytest.fixture
def job(mocker, hosts, inventory, mock_cache):
def job(mocker, hosts, inventory):
j = Job(inventory=inventory, id=2)
j._get_inventory_hosts = mocker.Mock(return_value=hosts)
j._get_memcache_connection = mocker.Mock(return_value=mock_cache)
return j
@pytest.fixture
def job2(mocker, hosts2, inventory, mock_cache):
j = Job(inventory=inventory, id=3)
j._get_inventory_hosts = mocker.Mock(return_value=hosts2)
j._get_memcache_connection = mocker.Mock(return_value=mock_cache)
return j
def test_start_job_fact_cache(hosts, job, inventory, tmpdir):
fact_cache = str(tmpdir)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
for host in hosts:
filepath = os.path.join(fact_cache, 'facts', host.name)
assert os.path.exists(filepath)
with open(filepath, 'r') as f:
assert f.read() == json.dumps(host.ansible_facts)
assert filepath in modified_times
def test_start_job_fact_cache(hosts, job, inventory, mocker):
def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker, tmpdir):
fact_cache = str(tmpdir)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
job.start_job_fact_cache()
job._get_memcache_connection().set.assert_any_call('5', [h.name for h in hosts])
for host in hosts:
job._get_memcache_connection().set.assert_any_call('{}-{}'.format(5, base64.b64encode(host.name)), json.dumps(host.ansible_facts))
job._get_memcache_connection().set.assert_any_call('{}-{}-modified'.format(5, base64.b64encode(host.name)), host.ansible_facts_modified.isoformat())
def test_start_job_fact_cache_existing_host(hosts, hosts2, job, job2, inventory, mocker):
job.start_job_fact_cache()
for host in hosts:
job._get_memcache_connection().set.assert_any_call('{}-{}'.format(5, base64.b64encode(host.name)), json.dumps(host.ansible_facts))
job._get_memcache_connection().set.assert_any_call('{}-{}-modified'.format(5, base64.b64encode(host.name)), host.ansible_facts_modified.isoformat())
job._get_memcache_connection().set.reset_mock()
job2.start_job_fact_cache()
# Ensure hosts2 ansible_facts didn't overwrite hosts ansible_facts
ansible_facts_cached = job._get_memcache_connection().get('{}-{}'.format(5, base64.b64encode(hosts2[0].name)))
assert ansible_facts_cached == json.dumps(hosts[1].ansible_facts)
def test_memcached_fact_host_key_unicode(job):
host_name = u'Iñtërnâtiônàlizætiøn'
host_key = job.memcached_fact_host_key(host_name)
assert host_key == '5-ScOxdMOrcm7DonRpw7Ruw6BsaXrDpnRpw7hu'
def test_memcached_fact_modified_key_unicode(job):
host_name = u'Iñtërnâtiônàlizætiøn'
host_key = job.memcached_fact_modified_key(host_name)
assert host_key == '5-ScOxdMOrcm7DonRpw7Ruw6BsaXrDpnRpw7hu-modified'
def test_finish_job_fact_cache(job, hosts, inventory, mocker, new_time):
job.start_job_fact_cache()
for h in hosts:
h.save = mocker.Mock()
host_key = job.memcached_fact_host_key(hosts[1].name)
modified_key = job.memcached_fact_modified_key(hosts[1].name)
ansible_facts_new = {"foo": "bar", "insights": {"system_id": "updated_by_scan"}}
job._get_memcache_connection().set(host_key, json.dumps(ansible_facts_new))
job._get_memcache_connection().set(modified_key, new_time.isoformat())
job.finish_job_fact_cache()
filepath = os.path.join(fact_cache, 'facts', hosts[1].name)
with open(filepath, 'w') as f:
f.write(json.dumps(ansible_facts_new))
f.flush()
# I feel kind of gross about calling `os.utime` by hand, but I noticed
# that in our container-based dev environment, the resolution for
# `os.stat()` after a file write was over a second, and I don't want to put
# a sleep() in this test
new_modification_time = time.time() + 3600
os.utime(filepath, (new_modification_time, new_modification_time))
hosts[0].save.assert_not_called()
hosts[2].save.assert_not_called()
job.finish_job_fact_cache(fact_cache, modified_times)
for host in (hosts[0], hosts[2], hosts[3]):
host.save.assert_not_called()
assert host.ansible_facts == {"a": 1, "b": 2}
assert host.ansible_facts_modified is None
assert hosts[1].ansible_facts == ansible_facts_new
assert hosts[1].insights_system_id == "updated_by_scan"
hosts[1].save.assert_called_once_with()
def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpdir):
fact_cache = str(tmpdir)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
for h in hosts:
h.save = mocker.Mock()
for h in hosts:
filepath = os.path.join(fact_cache, 'facts', h.name)
with open(filepath, 'w') as f:
f.write('not valid json!')
f.flush()
new_modification_time = time.time() + 3600
os.utime(filepath, (new_modification_time, new_modification_time))
job.finish_job_fact_cache(fact_cache, modified_times)
for h in hosts:
h.save.assert_not_called()
def test_finish_job_fact_cache_clear(job, hosts, inventory, mocker, tmpdir):
fact_cache = str(tmpdir)
modified_times = {}
job.start_job_fact_cache(fact_cache, modified_times, 0)
for h in hosts:
h.save = mocker.Mock()
os.remove(os.path.join(fact_cache, 'facts', hosts[1].name))
job.finish_job_fact_cache(fact_cache, modified_times)
for host in (hosts[0], hosts[2], hosts[3]):
host.save.assert_not_called()
assert host.ansible_facts == {"a": 1, "b": 2}
assert host.ansible_facts_modified is None
assert hosts[1].ansible_facts == {}
hosts[1].save.assert_called_once_with()

View File

@@ -331,6 +331,23 @@ class TestGenericRun(TestJobExecution):
args, cwd, env, stdout = call_args
assert env['FOO'] == 'BAR'
def test_fact_cache_usage(self):
self.instance.use_fact_cache = True
start_mock = mock.Mock()
patch = mock.patch.object(Job, 'start_job_fact_cache', start_mock)
self.patches.append(patch)
patch.start()
self.task.run(self.pk)
call_args, _ = self.run_pexpect.call_args_list[0]
args, cwd, env, stdout = call_args
start_mock.assert_called_once()
tmpdir, _ = start_mock.call_args[0]
assert env['ANSIBLE_CACHE_PLUGIN'] == 'jsonfile'
assert env['ANSIBLE_CACHE_PLUGIN_CONNECTION'] == os.path.join(tmpdir, 'facts')
class TestAdhocRun(TestJobExecution):

View File

@@ -1,128 +0,0 @@
# Copyright (c) 2015 Ansible, Inc.
# This file is a utility Ansible plugin that is not part of the AWX or Ansible
# packages. It does not import any code from either package, nor does its
# license apply to Ansible or AWX.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# Neither the name of the <ORGANIZATION> nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import os
import memcache
import json
import datetime
import base64
from dateutil import parser
from dateutil.tz import tzutc
from ansible import constants as C
try:
from ansible.cache.base import BaseCacheModule
except Exception:
from ansible.plugins.cache.base import BaseCacheModule
class CacheModule(BaseCacheModule):
def __init__(self, *args, **kwargs):
self.mc = memcache.Client([C.CACHE_PLUGIN_CONNECTION], debug=0)
self._timeout = int(C.CACHE_PLUGIN_TIMEOUT)
self._inventory_id = os.environ['INVENTORY_ID']
@property
def host_names_key(self):
return '{}'.format(self._inventory_id)
def translate_host_key(self, host_name):
return '{}-{}'.format(self._inventory_id, base64.b64encode(host_name.encode('utf-8')))
def translate_modified_key(self, host_name):
return '{}-{}-modified'.format(self._inventory_id, base64.b64encode(host_name.encode('utf-8')))
def get(self, key):
host_key = self.translate_host_key(key)
modified_key = self.translate_modified_key(key)
'''
Cache entry expired
'''
modified = self.mc.get(modified_key)
if modified is None:
raise KeyError
modified = parser.parse(modified).replace(tzinfo=tzutc())
now_utc = datetime.datetime.now(tzutc())
if self._timeout != 0 and (modified + datetime.timedelta(seconds=self._timeout)) < now_utc:
raise KeyError
value_json = self.mc.get(host_key)
if value_json is None:
raise KeyError
try:
return json.loads(value_json)
# If cache entry is corrupt or bad, fail gracefully.
except (TypeError, ValueError):
self.delete(key)
raise KeyError
def set(self, key, value):
host_key = self.translate_host_key(key)
modified_key = self.translate_modified_key(key)
self.mc.set(host_key, json.dumps(value))
self.mc.set(modified_key, datetime.datetime.now(tzutc()).isoformat())
def keys(self):
return self.mc.get(self.host_names_key)
def contains(self, key):
try:
self.get(key)
return True
except KeyError:
return False
def delete(self, key):
self.set(key, {})
def flush(self):
host_names = self.mc.get(self.host_names_key)
if not host_names:
return
for k in host_names:
self.mc.delete(self.translate_host_key(k))
self.mc.delete(self.translate_modified_key(k))
def copy(self):
ret = dict()
host_names = self.mc.get(self.host_names_key)
if not host_names:
return
for k in host_names:
ret[k] = self.mc.get(self.translate_host_key(k))
return ret