disabled instance does not mean offline instance

* Disabling an instance is used to stop and instance from being the
target of new jobs to run.
* The instance should still perform it's heartbeat so that it isn't
considered offline.
* If the instance was allowed to go offline on an openshift cluster it
would be deleted from the database.
This commit is contained in:
chris meyers 2019-05-09 13:50:46 -04:00
parent 6feb58f76d
commit 8aa28092ff
3 changed files with 10 additions and 17 deletions

View File

@ -339,10 +339,7 @@ class InstanceDetail(RetrieveUpdateAPIView):
r = super(InstanceDetail, self).update(request, *args, **kwargs)
if status.is_success(r.status_code):
obj = self.get_object()
if obj.enabled:
obj.refresh_capacity()
else:
obj.capacity = 0
obj.refresh_capacity()
obj.save()
r.data = serializers.InstanceSerializer(obj, context=self.get_serializer_context()).to_representation(obj)
return r

View File

@ -143,7 +143,10 @@ class Instance(HasPolicyEditsMixin, BaseModel):
def refresh_capacity(self):
cpu = get_cpu_capacity()
mem = get_mem_capacity()
self.capacity = get_system_task_capacity(self.capacity_adjustment)
if self.enabled:
self.capacity = get_system_task_capacity(self.capacity_adjustment)
else:
self.capacity = 0
self.cpu = cpu[0]
self.memory = mem[0]
self.cpu_capacity = cpu[1]
@ -230,9 +233,7 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
def fit_task_to_most_remaining_capacity_instance(self, task):
instance_most_capacity = None
for i in self.instances.filter(capacity__gt=0).order_by('hostname'):
if not i.enabled:
continue
for i in self.instances.filter(capacity__gt=0, enabled=True).order_by('hostname'):
if i.remaining_capacity >= task.task_impact and \
(instance_most_capacity is None or
i.remaining_capacity > instance_most_capacity.remaining_capacity):
@ -241,7 +242,7 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
def find_largest_idle_instance(self):
largest_instance = None
for i in self.instances.filter(capacity__gt=0).order_by('hostname'):
for i in self.instances.filter(capacity__gt=0, enabled=True).order_by('hostname'):
if i.jobs_running == 0:
if largest_instance is None:
largest_instance = i
@ -252,7 +253,7 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
def choose_online_controller_node(self):
return random.choice(list(self.controller
.instances
.filter(capacity__gt=0)
.filter(capacity__gt=0, enabled=True)
.values_list('hostname', flat=True)))

View File

@ -396,14 +396,9 @@ def cluster_node_heartbeat():
instance_list.remove(inst)
if this_inst:
startup_event = this_inst.is_lost(ref_time=nowtime)
if this_inst.capacity == 0 and this_inst.enabled:
logger.warning('Rejoining the cluster as instance {}.'.format(this_inst.hostname))
if this_inst.enabled:
this_inst.refresh_capacity()
elif this_inst.capacity != 0 and not this_inst.enabled:
this_inst.capacity = 0
this_inst.save(update_fields=['capacity'])
this_inst.refresh_capacity()
if startup_event:
logger.warning('Rejoining the cluster as instance {}.'.format(this_inst.hostname))
return
else:
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))