Allow manually running a health check, and make other adjustments to the health check trigger (#11002)

* Full finalize the planned work for health checks of execution nodes

* Implementation of instance health_check endpoint

* Also do version conditional to node_type

* Do not use receptor mesh to check main cluster nodes health

* Fix bugs from testing health check of cluster nodes, add doc

* Add a few fields to health check serializer missed before

* Light refactoring of error field processing

* Fix errors clearing error, write more unit tests

* Update health check info in docs

* Bump migration of health check after rebase

* Mark string for translation

* Add related health_check link for system auditors too

* Handle health_check cluster node timeout, add errors for peer judgement
This commit is contained in:
Alan Rominger
2021-09-03 16:37:37 -04:00
committed by GitHub
parent 169c0f6642
commit 6a17e5b65b
15 changed files with 285 additions and 53 deletions

View File

@@ -324,6 +324,23 @@ def test_instance_group_capacity(instance_factory, instance_group_factory):
assert ig_single.capacity == 100
@pytest.mark.django_db
def test_health_check_clears_errors():
instance = Instance.objects.create(hostname='foo-1', enabled=True, capacity=0, errors='something went wrong')
data = dict(version='ansible-runner-4.2', cpu=782, memory=int(39e9), uuid='asdfasdfasdfasdfasdf', errors='')
instance.save_health_data(**data)
for k, v in data.items():
assert getattr(instance, k) == v
@pytest.mark.django_db
def test_health_check_oh_no():
instance = Instance.objects.create(hostname='foo-2', enabled=True, capacity=52, cpu=8, memory=int(40e9))
instance.save_health_data('', 0, 0, errors='This it not a real instance!')
assert instance.capacity == instance.cpu_capacity == 0
assert instance.errors == 'This it not a real instance!'
@pytest.mark.django_db
class TestInstanceGroupOrdering:
def test_ad_hoc_instance_groups(self, instance_group_factory, inventory, default_instance_group):