Pin channels-redis to 4.3.1 to fix an async issue (#13348 )

Refs django/channels_redis#332 Refs #13313 Signed-off-by: Rick Elrod <rick@elrod.me>
Merge pull request #13352 from AlanCoding/dont_pass_subtasks
2026-02-05 11:34:43 -03:30 · 2022-12-20 17:05:44 -06:00 · 2022-12-20 16:25:39 -05:00 · 2022-12-20 16:06:25 -05:00 · 2022-12-19 16:02:51 -05:00 · 2022-12-19 14:16:05 -03:00
810 changed files with 32063 additions and 19225 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,3 +1,2 @@
-awx/ui/node_modules
 Dockerfile
 .git
--- a/.github/triage_replies.md
+++ b/.github/triage_replies.md
@@ -53,6 +53,16 @@ https://github.com/ansible/awx/#get-involved \
 Thank you once again for this and your interest in AWX!


+### Red Hat Support Team
+- Hi! \
+\
+It appears that you are using an RPM build for RHEL. Please reach out to the Red Hat support team and submit a ticket. \
+\
+Here is the link to do so: \
+\
+https://access.redhat.com/support \
+\
+Thank you for your submission and for supporting AWX!


 ## Common
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,6 +2,7 @@
 name: CI
 env:
  BRANCH: ${{ github.base_ref || 'devel' }}
+  LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
 on:
  pull_request:
 jobs:
@@ -27,6 +28,9 @@ jobs:
          - name: awx-collection
            command: /start_tests.sh test_collection_all
            label: Run Collection Tests
+          - name: awx-collection-sanity
+            command: /start_tests.sh test_collection_sanity
+            label: Run Ansible core Collection Sanity tests
          - name: api-schema
            label: Check API Schema
            command: /start_tests.sh detect-schema-change SCHEMA_DIFF_BASE_BRANCH=${{ github.event.pull_request.base.ref }}
--- a/.github/workflows/devel_images.yml
+++ b/.github/workflows/devel_images.yml
@@ -1,5 +1,7 @@
 ---
 name: Build/Push Development Images
+env:
+  LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
 on:
  push:
    branches:
--- a/.github/workflows/e2e_test.yml
+++ b/.github/workflows/e2e_test.yml
@@ -1,5 +1,8 @@
 ---
 name: E2E Tests
+env:
+  LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
+
 on:
  pull_request_target:
    types: [labeled]
--- a/.github/workflows/feature_branch_deletion.yml
+++ b/.github/workflows/feature_branch_deletion.yml
@@ -0,0 +1,26 @@
+---
+name: Feature branch deletion cleanup
+env:
+  LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
+on:
+  delete:
+    branches:
+      - feature_**
+jobs:
+  push:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+    steps:
+      - name: Delete API Schema
+        env:
+          AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }}
+          AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }}
+          AWS_REGION: 'us-east-1'
+        run: |
+          ansible localhost -c local, -m command -a "{{ ansible_python_interpreter + ' -m pip install boto3'}}"
+          ansible localhost -c local -m aws_s3 \
+            -a "bucket=awx-public-ci-files object=${GITHUB_REF##*/}/schema.json mode=delete permission=public-read"
+
+
--- a/.github/workflows/pr_body_check.yml
+++ b/.github/workflows/pr_body_check.yml
@@ -13,21 +13,13 @@ jobs:
      packages: write
      contents: read
    steps:
-      - name: Write PR body to a file
-        run: |
-          cat >> pr.body << __SOME_RANDOM_PR_EOF__
-          ${{ github.event.pull_request.body }}
-          __SOME_RANDOM_PR_EOF__
-
-      - name: Display the received body for troubleshooting
-        run: cat pr.body
-
-      # We want to write these out individually just incase the options were joined on a single line
      - name: Check for each of the lines
+        env:
+          PR_BODY: ${{ github.event.pull_request.body }}
        run: |
-          grep "Bug, Docs Fix or other nominal change" pr.body > Z
-          grep "New or Enhanced Feature" pr.body > Y
-          grep "Breaking Change" pr.body > X
+          echo $PR_BODY | grep "Bug, Docs Fix or other nominal change" > Z
+          echo $PR_BODY | grep "New or Enhanced Feature" > Y
+          echo $PR_BODY | grep "Breaking Change" > X
          exit 0
        # We exit 0 and set the shell to prevent the returns from the greps from failing this step
        # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
--- a/.github/workflows/promote.yml
+++ b/.github/workflows/promote.yml
@@ -1,5 +1,9 @@
 ---
 name: Promote Release
+
+env:
+  LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
+
 on:
  release:
    types: [published]
--- a/.github/workflows/stage.yml
+++ b/.github/workflows/stage.yml
@@ -1,5 +1,9 @@
 ---
 name: Stage Release
+
+env:
+  LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
+
 on:
  workflow_dispatch:
    inputs:
--- a/.github/workflows/update_dependabot_prs.yml
+++ b/.github/workflows/update_dependabot_prs.yml
@@ -19,8 +19,11 @@ jobs:
            GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
            OWNER: ${{ github.repository_owner }}
            REPO: ${{ github.event.repository.name }}
-            BRANCH: ${{github.event.pull_request.head.ref}}
-            PR: ${{github.event.pull_request}}
+            PR: ${{github.event.pull_request.number}}
+            PR_BODY: ${{github.event.pull_request.body}}
        run: |
-          gh pr checkout ${{ env.BRANCH }}
-          gh pr edit --body "${{ env.PR }}\nBug, Docs Fix or other nominal change"
+          gh pr checkout ${{ env.PR }}
+          echo "${{ env.PR_BODY }}" > my_pr_body.txt
+          echo "" >> my_pr_body.txt
+          echo "Bug, Docs Fix or other nominal change" >> my_pr_body.txt
+          gh pr edit ${{env.PR}} --body-file my_pr_body.txt
--- a/.github/workflows/upload_schema.yml
+++ b/.github/workflows/upload_schema.yml
@@ -1,10 +1,15 @@
 ---
 name: Upload API Schema
+
+env:
+  LC_ALL: "C.UTF-8" # prevent ERROR: Ansible could not initialize the preferred locale: unsupported locale setting
+
 on:
  push:
    branches:
      - devel
      - release_**
+      - feature_**
 jobs:
  push:
    runs-on: ubuntu-latest
--- a/.gitignore
+++ b/.gitignore
@@ -153,9 +153,6 @@ use_dev_supervisor.txt
 /sanity/
 /awx_collection_build/

-# Setup for metrics gathering
-tools/prometheus/prometheus.yml
-
 .idea/*
 *.unison.tmp
 *.#
--- a/.yamllint
+++ b/.yamllint
@@ -8,6 +8,8 @@ ignore: |
  awx/ui/test/e2e/tests/smoke-vars.yml
  awx/ui/node_modules
  tools/docker-compose/_sources
+  # django template files
+  awx/api/templates/instance_install_bundle/**

 extends: default

--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,7 +3,7 @@ recursive-include awx *.po
 recursive-include awx *.mo
 recursive-include awx/static *
 recursive-include awx/templates *.html
-recursive-include awx/api/templates *.md *.html
+recursive-include awx/api/templates *.md *.html *.yml
 recursive-include awx/ui/build *.html
 recursive-include awx/ui/build *
 recursive-include awx/playbooks *.yml
@@ -12,7 +12,7 @@ recursive-include awx/plugins *.ps1
 recursive-include requirements *.txt
 recursive-include requirements *.yml
 recursive-include config *
-recursive-include docs/licenses *
+recursive-include licenses *
 recursive-exclude awx devonly.py*
 recursive-exclude awx/api/tests *
 recursive-exclude awx/main/tests *
--- a/70
+++ b/70
@@ -6,7 +6,9 @@ CHROMIUM_BIN=/tmp/chrome-linux/chrome
 GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD)
 MANAGEMENT_COMMAND ?= awx-manage
 VERSION := $(shell $(PYTHON) tools/scripts/scm_version.py)
-COLLECTION_VERSION := $(shell $(PYTHON) tools/scripts/scm_version.py | cut -d . -f 1-3)
+
+# ansible-test requires semver compatable version, so we allow overrides to hack it
+COLLECTION_VERSION ?= $(shell $(PYTHON) tools/scripts/scm_version.py | cut -d . -f 1-3)

 # NOTE: This defaults the container image version to the branch that's active
 COMPOSE_TAG ?= $(GIT_BRANCH)
@@ -34,7 +36,7 @@ RECEPTOR_IMAGE ?= quay.io/ansible/receptor:devel
 SRC_ONLY_PKGS ?= cffi,pycparser,psycopg2,twilio
 # These should be upgraded in the AWX and Ansible venv before attempting
 # to install the actual requirements
-VENV_BOOTSTRAP ?= pip==21.2.4 setuptools==58.2.0 setuptools_scm[toml]==6.4.2 wheel==0.36.2
+VENV_BOOTSTRAP ?= pip==21.2.4 setuptools==65.6.3 setuptools_scm[toml]==7.0.5 wheel==0.38.4

 NAME ?= awx

@@ -85,6 +87,7 @@ clean: clean-ui clean-api clean-awxkit clean-dist

 clean-api:
 	rm -rf build $(NAME)-$(VERSION) *.egg-info
+	rm -rf .tox
 	find . -type f -regex ".*\.py[co]$$" -delete
 	find . -type d -name "__pycache__" -delete
 	rm -f awx/awx_test.sqlite3*
@@ -117,7 +120,7 @@ virtualenv_awx:
 		fi; \
 	fi

-## Install third-party requirements needed for AWX's environment. 
+## Install third-party requirements needed for AWX's environment.
 # this does not use system site packages intentionally
 requirements_awx: virtualenv_awx
 	if [[ "$(PIP_OPTIONS)" == *"--no-index"* ]]; then \
@@ -181,7 +184,7 @@ collectstatic:
 	@if [ "$(VENV_BASE)" ]; then \
 		. $(VENV_BASE)/awx/bin/activate; \
 	fi; \
-	mkdir -p awx/public/static && $(PYTHON) manage.py collectstatic --clear --noinput > /dev/null 2>&1
+	$(PYTHON) manage.py collectstatic --clear --noinput > /dev/null 2>&1

 DEV_RELOAD_COMMAND ?= supervisorctl restart tower-processes:*

@@ -299,7 +302,8 @@ test_collection:
 	if [ "$(VENV_BASE)" ]; then \
 		. $(VENV_BASE)/awx/bin/activate; \
 	fi && \
-	pip install ansible-core && \
+	if ! [ -x "$(shell command -v ansible-playbook)" ]; then pip install ansible-core; fi
+	ansible --version
 	py.test $(COLLECTION_TEST_DIRS) -v
 	# The python path needs to be modified so that the tests can find Ansible within the container
 	# First we will use anything expility set as PYTHONPATH
@@ -329,8 +333,13 @@ install_collection: build_collection
 	rm -rf $(COLLECTION_INSTALL)
 	ansible-galaxy collection install awx_collection_build/$(COLLECTION_NAMESPACE)-$(COLLECTION_PACKAGE)-$(COLLECTION_VERSION).tar.gz

-test_collection_sanity: install_collection
-	cd $(COLLECTION_INSTALL) && ansible-test sanity
+test_collection_sanity:
+	rm -rf awx_collection_build/
+	rm -rf $(COLLECTION_INSTALL)
+	if ! [ -x "$(shell command -v ansible-test)" ]; then pip install ansible-core; fi
+	ansible --version
+	COLLECTION_VERSION=1.0.0 make install_collection
+	cd $(COLLECTION_INSTALL) && ansible-test sanity --exclude=plugins/modules/export.py

 test_collection_integration: install_collection
 	cd $(COLLECTION_INSTALL) && ansible-test integration $(COLLECTION_TEST_TARGET)
@@ -377,28 +386,29 @@ clean-ui:
 	rm -rf awx/ui/build
 	rm -rf awx/ui/src/locales/_build
 	rm -rf $(UI_BUILD_FLAG_FILE)
+        # the collectstatic command doesn't like it if this dir doesn't exist.
+	mkdir -p awx/ui/build/static

 awx/ui/node_modules:
-	NODE_OPTIONS=--max-old-space-size=6144 $(NPM_BIN) --prefix awx/ui --loglevel warn ci
+	NODE_OPTIONS=--max-old-space-size=6144 $(NPM_BIN) --prefix awx/ui --loglevel warn --force ci

-$(UI_BUILD_FLAG_FILE): awx/ui/node_modules
+$(UI_BUILD_FLAG_FILE):
+	$(MAKE) awx/ui/node_modules
 	$(PYTHON) tools/scripts/compilemessages.py
 	$(NPM_BIN) --prefix awx/ui --loglevel warn run compile-strings
 	$(NPM_BIN) --prefix awx/ui --loglevel warn run build
-	mkdir -p awx/public/static/css
-	mkdir -p awx/public/static/js
-	mkdir -p awx/public/static/media
-	cp -r awx/ui/build/static/css/* awx/public/static/css
-	cp -r awx/ui/build/static/js/* awx/public/static/js
-	cp -r awx/ui/build/static/media/* awx/public/static/media
 	touch $@

-
-
 ui-release: $(UI_BUILD_FLAG_FILE)

 ui-devel: awx/ui/node_modules
 	@$(MAKE) -B $(UI_BUILD_FLAG_FILE)
+	mkdir -p /var/lib/awx/public/static/css
+	mkdir -p /var/lib/awx/public/static/js
+	mkdir -p /var/lib/awx/public/static/media
+	cp -r awx/ui/build/static/css/* /var/lib/awx/public/static/css
+	cp -r awx/ui/build/static/js/* /var/lib/awx/public/static/js
+	cp -r awx/ui/build/static/media/* /var/lib/awx/public/static/media

 ui-devel-instrumented: awx/ui/node_modules
 	$(NPM_BIN) --prefix awx/ui --loglevel warn run start-instrumented
@@ -450,12 +460,18 @@ awx/projects:
 COMPOSE_UP_OPTS ?=
 COMPOSE_OPTS ?=
 CONTROL_PLANE_NODE_COUNT ?= 1
-EXECUTION_NODE_COUNT ?= 2
+EXECUTION_NODE_COUNT ?= 0
 MINIKUBE_CONTAINER_GROUP ?= false
+MINIKUBE_SETUP ?= false # if false, run minikube separately
+EXTRA_SOURCES_ANSIBLE_OPTS ?=
+
+ifneq ($(ADMIN_PASSWORD),)
+	EXTRA_SOURCES_ANSIBLE_OPTS := -e admin_password=$(ADMIN_PASSWORD) $(EXTRA_SOURCES_ANSIBLE_OPTS)
+endif

 docker-compose-sources: .git/hooks/pre-commit
 	@if [ $(MINIKUBE_CONTAINER_GROUP) = true ]; then\
-	    ansible-playbook -i tools/docker-compose/inventory tools/docker-compose-minikube/deploy.yml; \
+	    ansible-playbook -i tools/docker-compose/inventory -e minikube_setup=$(MINIKUBE_SETUP) tools/docker-compose-minikube/deploy.yml; \
 	fi;

 	ansible-playbook -i tools/docker-compose/inventory tools/docker-compose/ansible/sources.yml \
@@ -469,7 +485,8 @@ docker-compose-sources: .git/hooks/pre-commit
 	    -e enable_ldap=$(LDAP) \
 	    -e enable_splunk=$(SPLUNK) \
 	    -e enable_prometheus=$(PROMETHEUS) \
-	    -e enable_grafana=$(GRAFANA)
+	    -e enable_grafana=$(GRAFANA) $(EXTRA_SOURCES_ANSIBLE_OPTS)
+


 docker-compose: awx/projects docker-compose-sources
@@ -558,12 +575,20 @@ Dockerfile.kube-dev: tools/ansible/roles/dockerfile/templates/Dockerfile.j2
 	    -e template_dest=_build_kube_dev \
 	    -e receptor_image=$(RECEPTOR_IMAGE)

+## Build awx_kube_devel image for development on local Kubernetes environment.
 awx-kube-dev-build: Dockerfile.kube-dev
 	DOCKER_BUILDKIT=1 docker build -f Dockerfile.kube-dev \
 	    --build-arg BUILDKIT_INLINE_CACHE=1 \
 	    --cache-from=$(DEV_DOCKER_TAG_BASE)/awx_kube_devel:$(COMPOSE_TAG) \
 	    -t $(DEV_DOCKER_TAG_BASE)/awx_kube_devel:$(COMPOSE_TAG) .

+## Build awx image for deployment on Kubernetes environment.
+awx-kube-build: Dockerfile
+	DOCKER_BUILDKIT=1 docker build -f Dockerfile \
+		--build-arg VERSION=$(VERSION) \
+		--build-arg SETUPTOOLS_SCM_PRETEND_VERSION=$(VERSION) \
+		--build-arg HEADLESS=$(HEADLESS) \
+		-t $(DEV_DOCKER_TAG_BASE)/awx:$(COMPOSE_TAG) .

 # Translation TASKS
 # --------------------------------------
@@ -576,13 +601,12 @@ pot: $(UI_BUILD_FLAG_FILE)
 po: $(UI_BUILD_FLAG_FILE)
 	$(NPM_BIN) --prefix awx/ui --loglevel warn run extract-strings -- --clean

-LANG = "en-us"
 ## generate API django .pot .po
 messages:
 	@if [ "$(VENV_BASE)" ]; then \
 		. $(VENV_BASE)/awx/bin/activate; \
 	fi; \
-	$(PYTHON) manage.py makemessages -l $(LANG) --keep-pot
+	$(PYTHON) manage.py makemessages -l en_us --keep-pot

 print-%:
 	@echo $($*)
@@ -620,4 +644,4 @@ help/generate:
 		} \
 	} \
 	{ lastLine = $$0 }' $(MAKEFILE_LIST) | sort -u
-	@printf "\n"
+	@printf "\n"
--- a/awx/api/generics.py
+++ b/awx/api/generics.py
@@ -6,7 +6,6 @@ import inspect
 import logging
 import time
 import uuid
-import urllib.parse

 # Django
 from django.conf import settings
@@ -14,7 +13,7 @@ from django.contrib.auth import views as auth_views
 from django.contrib.contenttypes.models import ContentType
 from django.core.cache import cache
 from django.core.exceptions import FieldDoesNotExist
-from django.db import connection
+from django.db import connection, transaction
 from django.db.models.fields.related import OneToOneRel
 from django.http import QueryDict
 from django.shortcuts import get_object_or_404
@@ -30,7 +29,7 @@ from rest_framework.response import Response
 from rest_framework import status
 from rest_framework import views
 from rest_framework.permissions import AllowAny
-from rest_framework.renderers import StaticHTMLRenderer, JSONRenderer
+from rest_framework.renderers import StaticHTMLRenderer
 from rest_framework.negotiation import DefaultContentNegotiation

 # AWX
@@ -41,7 +40,7 @@ from awx.main.utils import camelcase_to_underscore, get_search_fields, getattrd,
 from awx.main.utils.db import get_all_field_names
 from awx.main.utils.licensing import server_product_name
 from awx.main.views import ApiErrorView
-from awx.api.serializers import ResourceAccessListElementSerializer, CopySerializer, UserSerializer
+from awx.api.serializers import ResourceAccessListElementSerializer, CopySerializer
 from awx.api.versioning import URLPathVersioning
 from awx.api.metadata import SublistAttachDetatchMetadata, Metadata
 from awx.conf import settings_registry
@@ -63,9 +62,9 @@ __all__ = [
    'SubDetailAPIView',
    'ResourceAccessList',
    'ParentMixin',
-    'DeleteLastUnattachLabelMixin',
    'SubListAttachDetachAPIView',
    'CopyAPIView',
+    'GenericCancelView',
    'BaseUsersList',
 ]

@@ -91,14 +90,9 @@ class LoggedLoginView(auth_views.LoginView):

    def post(self, request, *args, **kwargs):
        ret = super(LoggedLoginView, self).post(request, *args, **kwargs)
-        current_user = getattr(request, 'user', None)
        if request.user.is_authenticated:
            logger.info(smart_str(u"User {} logged in from {}".format(self.request.user.username, request.META.get('REMOTE_ADDR', None))))
            ret.set_cookie('userLoggedIn', 'true')
-            current_user = UserSerializer(self.request.user)
-            current_user = smart_str(JSONRenderer().render(current_user.data))
-            current_user = urllib.parse.quote('%s' % current_user, '')
-            ret.set_cookie('current_user', current_user, secure=settings.SESSION_COOKIE_SECURE or None)
            ret.setdefault('X-API-Session-Cookie-Name', getattr(settings, 'SESSION_COOKIE_NAME', 'awx_sessionid'))

            return ret
@@ -255,7 +249,7 @@ class APIView(views.APIView):
            response['X-API-Query-Time'] = '%0.3fs' % sum(q_times)

        if getattr(self, 'deprecated', False):
-            response['Warning'] = '299 awx "This resource has been deprecated and will be removed in a future release."'  # noqa
+            response['Warning'] = '299 awx "This resource has been deprecated and will be removed in a future release."'

        return response

@@ -775,28 +769,6 @@ class SubListAttachDetachAPIView(SubListCreateAttachDetachAPIView):
        return {'id': None}


-class DeleteLastUnattachLabelMixin(object):
-    """
-    Models for which you want the last instance to be deleted from the database
-    when the last disassociate is called should inherit from this class. Further,
-    the model should implement is_detached()
-    """
-
-    def unattach(self, request, *args, **kwargs):
-        (sub_id, res) = super(DeleteLastUnattachLabelMixin, self).unattach_validate(request)
-        if res:
-            return res
-
-        res = super(DeleteLastUnattachLabelMixin, self).unattach_by_id(request, sub_id)
-
-        obj = self.model.objects.get(id=sub_id)
-
-        if obj.is_detached():
-            obj.delete()
-
-        return res
-
-
 class SubDetailAPIView(ParentMixin, generics.RetrieveAPIView, GenericAPIView):
    pass

@@ -1014,6 +986,23 @@ class CopyAPIView(GenericAPIView):
        return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)


+class GenericCancelView(RetrieveAPIView):
+    # In subclass set model, serializer_class
+    obj_permission_type = 'cancel'
+
+    @transaction.non_atomic_requests
+    def dispatch(self, *args, **kwargs):
+        return super(GenericCancelView, self).dispatch(*args, **kwargs)
+
+    def post(self, request, *args, **kwargs):
+        obj = self.get_object()
+        if obj.can_cancel:
+            obj.cancel()
+            return Response(status=status.HTTP_202_ACCEPTED)
+        else:
+            return self.http_method_not_allowed(request, *args, **kwargs)
+
+
 class BaseUsersList(SubListCreateAttachDetachAPIView):
    def post(self, request, *args, **kwargs):
        ret = super(BaseUsersList, self).post(request, *args, **kwargs)
--- a/awx/api/permissions.py
+++ b/awx/api/permissions.py
@@ -24,7 +24,6 @@ __all__ = [
    'InventoryInventorySourcesUpdatePermission',
    'UserPermission',
    'IsSystemAdminOrAuditor',
-    'InstanceGroupTowerPermission',
    'WorkflowApprovalPermission',
 ]

--- a/awx/api/serializers.py
+++ b/awx/api/serializers.py
@@ -29,6 +29,7 @@ from django.utils.translation import gettext_lazy as _
 from django.utils.encoding import force_str
 from django.utils.text import capfirst
 from django.utils.timezone import now
+from django.core.validators import RegexValidator, MaxLengthValidator

 # Django REST Framework
 from rest_framework.exceptions import ValidationError, PermissionDenied
@@ -112,7 +113,7 @@ from awx.main.utils import (
 )
 from awx.main.utils.filters import SmartFilter
 from awx.main.utils.named_url_graph import reset_counters
-from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups, TaskManagerInstances
+from awx.main.scheduler.task_manager_models import TaskManagerModels
 from awx.main.redact import UriCleaner, REPLACE_STR

 from awx.main.validators import vars_validate_or_raise
@@ -120,6 +121,9 @@ from awx.main.validators import vars_validate_or_raise
 from awx.api.versioning import reverse
 from awx.api.fields import BooleanNullField, CharNullField, ChoiceNullField, VerbatimField, DeprecatedCredentialField

+# AWX Utils
+from awx.api.validators import HostnameRegexValidator
+
 logger = logging.getLogger('awx.api.serializers')

 # Fields that should be summarized regardless of object type.
@@ -154,6 +158,7 @@ SUMMARIZABLE_FK_FIELDS = {
    'source_project': DEFAULT_SUMMARY_FIELDS + ('status', 'scm_type'),
    'project_update': DEFAULT_SUMMARY_FIELDS + ('status', 'failed'),
    'credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'cloud', 'kubernetes', 'credential_type_id'),
+    'signature_validation_credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'credential_type_id'),
    'job': DEFAULT_SUMMARY_FIELDS + ('status', 'failed', 'elapsed', 'type', 'canceled_on'),
    'job_template': DEFAULT_SUMMARY_FIELDS,
    'workflow_job_template': DEFAULT_SUMMARY_FIELDS,
@@ -614,7 +619,7 @@ class BaseSerializer(serializers.ModelSerializer, metaclass=BaseSerializerMetacl
    def validate(self, attrs):
        attrs = super(BaseSerializer, self).validate(attrs)
        try:
-            # Create/update a model instance and run it's full_clean() method to
+            # Create/update a model instance and run its full_clean() method to
            # do any validation implemented on the model class.
            exclusions = self.get_validation_exclusions(self.instance)
            obj = self.instance or self.Meta.model()
@@ -1470,6 +1475,7 @@ class ProjectSerializer(UnifiedJobTemplateSerializer, ProjectOptionsSerializer):
            'allow_override',
            'custom_virtualenv',
            'default_environment',
+            'signature_validation_credential',
        ) + (
            'last_update_failed',
            'last_updated',
@@ -1678,6 +1684,7 @@ class InventorySerializer(LabelsListMixin, BaseSerializerWithVariables):
            'total_inventory_sources',
            'inventory_sources_with_failures',
            'pending_deletion',
+            'prevent_instance_group_fallback',
        )

    def get_related(self, obj):
@@ -2214,6 +2221,15 @@ class InventorySourceUpdateSerializer(InventorySourceSerializer):
    class Meta:
        fields = ('can_update',)

+    def validate(self, attrs):
+        project = self.instance.source_project
+        if project:
+            failed_reason = project.get_reason_if_failed()
+            if failed_reason:
+                raise serializers.ValidationError(failed_reason)
+
+        return super(InventorySourceUpdateSerializer, self).validate(attrs)
+

 class InventoryUpdateSerializer(UnifiedJobSerializer, InventorySourceOptionsSerializer):

@@ -2230,6 +2246,7 @@ class InventoryUpdateSerializer(UnifiedJobSerializer, InventorySourceOptionsSeri
            'source_project_update',
            'custom_virtualenv',
            'instance_group',
+            'scm_revision',
        )

    def get_related(self, obj):
@@ -2920,6 +2937,12 @@ class JobTemplateSerializer(JobTemplateMixin, UnifiedJobTemplateSerializer, JobO
            'ask_verbosity_on_launch',
            'ask_inventory_on_launch',
            'ask_credential_on_launch',
+            'ask_execution_environment_on_launch',
+            'ask_labels_on_launch',
+            'ask_forks_on_launch',
+            'ask_job_slice_count_on_launch',
+            'ask_timeout_on_launch',
+            'ask_instance_groups_on_launch',
            'survey_enabled',
            'become_enabled',
            'diff_mode',
@@ -2928,6 +2951,7 @@ class JobTemplateSerializer(JobTemplateMixin, UnifiedJobTemplateSerializer, JobO
            'job_slice_count',
            'webhook_service',
            'webhook_credential',
+            'prevent_instance_group_fallback',
        )
        read_only_fields = ('*', 'custom_virtualenv')

@@ -3182,7 +3206,7 @@ class JobRelaunchSerializer(BaseSerializer):
        return attrs


-class JobCreateScheduleSerializer(BaseSerializer):
+class JobCreateScheduleSerializer(LabelsListMixin, BaseSerializer):

    can_schedule = serializers.SerializerMethodField()
    prompts = serializers.SerializerMethodField()
@@ -3208,14 +3232,17 @@ class JobCreateScheduleSerializer(BaseSerializer):
        try:
            config = obj.launch_config
            ret = config.prompts_dict(display=True)
-            if 'inventory' in ret:
-                ret['inventory'] = self._summarize('inventory', ret['inventory'])
-            if 'credentials' in ret:
-                all_creds = [self._summarize('credential', cred) for cred in ret['credentials']]
-                ret['credentials'] = all_creds
+            for field_name in ('inventory', 'execution_environment'):
+                if field_name in ret:
+                    ret[field_name] = self._summarize(field_name, ret[field_name])
+            for field_name, singular in (('credentials', 'credential'), ('instance_groups', 'instance_group')):
+                if field_name in ret:
+                    ret[field_name] = [self._summarize(singular, obj) for obj in ret[field_name]]
+            if 'labels' in ret:
+                ret['labels'] = self._summary_field_labels(config)
            return ret
        except JobLaunchConfig.DoesNotExist:
-            return {'all': _('Unknown, job may have been ran before launch configurations were saved.')}
+            return {'all': _('Unknown, job may have been run before launch configurations were saved.')}


 class AdHocCommandSerializer(UnifiedJobSerializer):
@@ -3385,6 +3412,9 @@ class WorkflowJobTemplateSerializer(JobTemplateMixin, LabelsListMixin, UnifiedJo
    limit = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
    scm_branch = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)

+    skip_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
+    job_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
+
    class Meta:
        model = WorkflowJobTemplate
        fields = (
@@ -3403,6 +3433,11 @@ class WorkflowJobTemplateSerializer(JobTemplateMixin, LabelsListMixin, UnifiedJo
            'webhook_service',
            'webhook_credential',
            '-execution_environment',
+            'ask_labels_on_launch',
+            'ask_skip_tags_on_launch',
+            'ask_tags_on_launch',
+            'skip_tags',
+            'job_tags',
        )

    def get_related(self, obj):
@@ -3446,7 +3481,7 @@ class WorkflowJobTemplateSerializer(JobTemplateMixin, LabelsListMixin, UnifiedJo

        # process char_prompts, these are not direct fields on the model
        mock_obj = self.Meta.model()
-        for field_name in ('scm_branch', 'limit'):
+        for field_name in ('scm_branch', 'limit', 'skip_tags', 'job_tags'):
            if field_name in attrs:
                setattr(mock_obj, field_name, attrs[field_name])
                attrs.pop(field_name)
@@ -3472,6 +3507,9 @@ class WorkflowJobSerializer(LabelsListMixin, UnifiedJobSerializer):
    limit = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
    scm_branch = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)

+    skip_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
+    job_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
+
    class Meta:
        model = WorkflowJob
        fields = (
@@ -3491,6 +3529,8 @@ class WorkflowJobSerializer(LabelsListMixin, UnifiedJobSerializer):
            'webhook_service',
            'webhook_credential',
            'webhook_guid',
+            'skip_tags',
+            'job_tags',
        )

    def get_related(self, obj):
@@ -3607,6 +3647,9 @@ class LaunchConfigurationBaseSerializer(BaseSerializer):
    skip_tags = serializers.CharField(allow_blank=True, allow_null=True, required=False, default=None)
    diff_mode = serializers.BooleanField(required=False, allow_null=True, default=None)
    verbosity = serializers.ChoiceField(allow_null=True, required=False, default=None, choices=VERBOSITY_CHOICES)
+    forks = serializers.IntegerField(required=False, allow_null=True, min_value=0, default=None)
+    job_slice_count = serializers.IntegerField(required=False, allow_null=True, min_value=0, default=None)
+    timeout = serializers.IntegerField(required=False, allow_null=True, default=None)
    exclude_errors = ()

    class Meta:
@@ -3622,13 +3665,21 @@ class LaunchConfigurationBaseSerializer(BaseSerializer):
            'skip_tags',
            'diff_mode',
            'verbosity',
+            'execution_environment',
+            'forks',
+            'job_slice_count',
+            'timeout',
        )

    def get_related(self, obj):
        res = super(LaunchConfigurationBaseSerializer, self).get_related(obj)
        if obj.inventory_id:
            res['inventory'] = self.reverse('api:inventory_detail', kwargs={'pk': obj.inventory_id})
+        if obj.execution_environment_id:
+            res['execution_environment'] = self.reverse('api:execution_environment_detail', kwargs={'pk': obj.execution_environment_id})
+        res['labels'] = self.reverse('api:{}_labels_list'.format(get_type_for_model(self.Meta.model)), kwargs={'pk': obj.pk})
        res['credentials'] = self.reverse('api:{}_credentials_list'.format(get_type_for_model(self.Meta.model)), kwargs={'pk': obj.pk})
+        res['instance_groups'] = self.reverse('api:{}_instance_groups_list'.format(get_type_for_model(self.Meta.model)), kwargs={'pk': obj.pk})
        return res

    def _build_mock_obj(self, attrs):
@@ -3708,7 +3759,11 @@ class LaunchConfigurationBaseSerializer(BaseSerializer):

        # Build unsaved version of this config, use it to detect prompts errors
        mock_obj = self._build_mock_obj(attrs)
-        accepted, rejected, errors = ujt._accept_or_ignore_job_kwargs(_exclude_errors=self.exclude_errors, **mock_obj.prompts_dict())
+        if set(list(ujt.get_ask_mapping().keys()) + ['extra_data']) & set(attrs.keys()):
+            accepted, rejected, errors = ujt._accept_or_ignore_job_kwargs(_exclude_errors=self.exclude_errors, **mock_obj.prompts_dict())
+        else:
+            # Only perform validation of prompts if prompts fields are provided
+            errors = {}

        # Remove all unprocessed $encrypted$ strings, indicating default usage
        if 'extra_data' in attrs and password_dict:
@@ -4080,7 +4135,6 @@ class SystemJobEventSerializer(AdHocCommandEventSerializer):


 class JobLaunchSerializer(BaseSerializer):
-
    # Representational fields
    passwords_needed_to_start = serializers.ReadOnlyField()
    can_start_without_user_input = serializers.BooleanField(read_only=True)
@@ -4103,6 +4157,12 @@ class JobLaunchSerializer(BaseSerializer):
    skip_tags = serializers.CharField(required=False, write_only=True, allow_blank=True)
    limit = serializers.CharField(required=False, write_only=True, allow_blank=True)
    verbosity = serializers.ChoiceField(required=False, choices=VERBOSITY_CHOICES, write_only=True)
+    execution_environment = serializers.PrimaryKeyRelatedField(queryset=ExecutionEnvironment.objects.all(), required=False, write_only=True)
+    labels = serializers.PrimaryKeyRelatedField(many=True, queryset=Label.objects.all(), required=False, write_only=True)
+    forks = serializers.IntegerField(required=False, write_only=True, min_value=0)
+    job_slice_count = serializers.IntegerField(required=False, write_only=True, min_value=0)
+    timeout = serializers.IntegerField(required=False, write_only=True)
+    instance_groups = serializers.PrimaryKeyRelatedField(many=True, queryset=InstanceGroup.objects.all(), required=False, write_only=True)

    class Meta:
        model = JobTemplate
@@ -4130,6 +4190,12 @@ class JobLaunchSerializer(BaseSerializer):
            'ask_verbosity_on_launch',
            'ask_inventory_on_launch',
            'ask_credential_on_launch',
+            'ask_execution_environment_on_launch',
+            'ask_labels_on_launch',
+            'ask_forks_on_launch',
+            'ask_job_slice_count_on_launch',
+            'ask_timeout_on_launch',
+            'ask_instance_groups_on_launch',
            'survey_enabled',
            'variables_needed_to_start',
            'credential_needed_to_start',
@@ -4137,6 +4203,12 @@ class JobLaunchSerializer(BaseSerializer):
            'job_template_data',
            'defaults',
            'verbosity',
+            'execution_environment',
+            'labels',
+            'forks',
+            'job_slice_count',
+            'timeout',
+            'instance_groups',
        )
        read_only_fields = (
            'ask_scm_branch_on_launch',
@@ -4149,6 +4221,12 @@ class JobLaunchSerializer(BaseSerializer):
            'ask_verbosity_on_launch',
            'ask_inventory_on_launch',
            'ask_credential_on_launch',
+            'ask_execution_environment_on_launch',
+            'ask_labels_on_launch',
+            'ask_forks_on_launch',
+            'ask_job_slice_count_on_launch',
+            'ask_timeout_on_launch',
+            'ask_instance_groups_on_launch',
        )

    def get_credential_needed_to_start(self, obj):
@@ -4173,6 +4251,17 @@ class JobLaunchSerializer(BaseSerializer):
                    if cred.credential_type.managed and 'vault_id' in cred.credential_type.defined_fields:
                        cred_dict['vault_id'] = cred.get_input('vault_id', default=None)
                    defaults_dict.setdefault(field_name, []).append(cred_dict)
+            elif field_name == 'execution_environment':
+                if obj.execution_environment_id:
+                    defaults_dict[field_name] = {'id': obj.execution_environment.id, 'name': obj.execution_environment.name}
+                else:
+                    defaults_dict[field_name] = {}
+            elif field_name == 'labels':
+                for label in obj.labels.all():
+                    label_dict = {'id': label.id, 'name': label.name}
+                    defaults_dict.setdefault(field_name, []).append(label_dict)
+            elif field_name == 'instance_groups':
+                defaults_dict[field_name] = []
            else:
                defaults_dict[field_name] = getattr(obj, field_name)
        return defaults_dict
@@ -4192,8 +4281,10 @@ class JobLaunchSerializer(BaseSerializer):
        # Basic validation - cannot run a playbook without a playbook
        if not template.project:
            errors['project'] = _("A project is required to run a job.")
-        elif template.project.status in ('error', 'failed'):
-            errors['playbook'] = _("Missing a revision to run due to failed project update.")
+        else:
+            failure_reason = template.project.get_reason_if_failed()
+            if failure_reason:
+                errors['playbook'] = failure_reason

        # cannot run a playbook without an inventory
        if template.inventory and template.inventory.pending_deletion is True:
@@ -4271,6 +4362,10 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
    scm_branch = serializers.CharField(required=False, write_only=True, allow_blank=True)
    workflow_job_template_data = serializers.SerializerMethodField()

+    labels = serializers.PrimaryKeyRelatedField(many=True, queryset=Label.objects.all(), required=False, write_only=True)
+    skip_tags = serializers.CharField(required=False, write_only=True, allow_blank=True)
+    job_tags = serializers.CharField(required=False, write_only=True, allow_blank=True)
+
    class Meta:
        model = WorkflowJobTemplate
        fields = (
@@ -4290,8 +4385,22 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
            'workflow_job_template_data',
            'survey_enabled',
            'ask_variables_on_launch',
+            'ask_labels_on_launch',
+            'labels',
+            'ask_skip_tags_on_launch',
+            'ask_tags_on_launch',
+            'skip_tags',
+            'job_tags',
+        )
+        read_only_fields = (
+            'ask_inventory_on_launch',
+            'ask_variables_on_launch',
+            'ask_skip_tags_on_launch',
+            'ask_labels_on_launch',
+            'ask_limit_on_launch',
+            'ask_scm_branch_on_launch',
+            'ask_tags_on_launch',
        )
-        read_only_fields = ('ask_inventory_on_launch', 'ask_variables_on_launch')

    def get_survey_enabled(self, obj):
        if obj:
@@ -4299,10 +4408,15 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
        return False

    def get_defaults(self, obj):
+
        defaults_dict = {}
        for field_name in WorkflowJobTemplate.get_ask_mapping().keys():
            if field_name == 'inventory':
                defaults_dict[field_name] = dict(name=getattrd(obj, '%s.name' % field_name, None), id=getattrd(obj, '%s.pk' % field_name, None))
+            elif field_name == 'labels':
+                for label in obj.labels.all():
+                    label_dict = {"id": label.id, "name": label.name}
+                    defaults_dict.setdefault(field_name, []).append(label_dict)
            else:
                defaults_dict[field_name] = getattr(obj, field_name)
        return defaults_dict
@@ -4311,6 +4425,7 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
        return dict(name=obj.name, id=obj.id, description=obj.description)

    def validate(self, attrs):
+
        template = self.instance

        accepted, rejected, errors = template._accept_or_ignore_job_kwargs(**attrs)
@@ -4328,6 +4443,7 @@ class WorkflowJobLaunchSerializer(BaseSerializer):
        WFJT_inventory = template.inventory
        WFJT_limit = template.limit
        WFJT_scm_branch = template.scm_branch
+
        super(WorkflowJobLaunchSerializer, self).validate(attrs)
        template.extra_vars = WFJT_extra_vars
        template.inventory = WFJT_inventory
@@ -4719,6 +4835,8 @@ class ScheduleSerializer(LaunchConfigurationBaseSerializer, SchedulePreviewSeria
        if isinstance(obj.unified_job_template, SystemJobTemplate):
            summary_fields['unified_job_template']['job_type'] = obj.unified_job_template.job_type

+        # We are not showing instance groups on summary fields because JTs don't either
+
        if 'inventory' in summary_fields:
            return summary_fields

@@ -4753,7 +4871,7 @@ class ScheduleSerializer(LaunchConfigurationBaseSerializer, SchedulePreviewSeria
 class InstanceLinkSerializer(BaseSerializer):
    class Meta:
        model = InstanceLink
-        fields = ('source', 'target')
+        fields = ('source', 'target', 'link_state')

    source = serializers.SlugRelatedField(slug_field="hostname", read_only=True)
    target = serializers.SlugRelatedField(slug_field="hostname", read_only=True)
@@ -4762,63 +4880,93 @@ class InstanceLinkSerializer(BaseSerializer):
 class InstanceNodeSerializer(BaseSerializer):
    class Meta:
        model = Instance
-        fields = ('id', 'hostname', 'node_type', 'node_state')
-
-    node_state = serializers.SerializerMethodField()
-
-    def get_node_state(self, obj):
-        if not obj.enabled:
-            return "disabled"
-        return "error" if obj.errors else "healthy"
+        fields = ('id', 'hostname', 'node_type', 'node_state', 'enabled')


 class InstanceSerializer(BaseSerializer):
+    show_capabilities = ['edit']

    consumed_capacity = serializers.SerializerMethodField()
    percent_capacity_remaining = serializers.SerializerMethodField()
-    jobs_running = serializers.IntegerField(help_text=_('Count of jobs in the running or waiting state that ' 'are targeted for this instance'), read_only=True)
+    jobs_running = serializers.IntegerField(help_text=_('Count of jobs in the running or waiting state that are targeted for this instance'), read_only=True)
    jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance'), read_only=True)
+    health_check_pending = serializers.SerializerMethodField()

    class Meta:
        model = Instance
-        read_only_fields = ('uuid', 'hostname', 'version', 'node_type')
+        read_only_fields = ('ip_address', 'uuid', 'version')
        fields = (
-            "id",
-            "type",
-            "url",
-            "related",
-            "uuid",
-            "hostname",
-            "created",
-            "modified",
-            "last_seen",
-            "last_health_check",
-            "errors",
+            'id',
+            'hostname',
+            'type',
+            'url',
+            'related',
+            'summary_fields',
+            'uuid',
+            'created',
+            'modified',
+            'last_seen',
+            'health_check_started',
+            'health_check_pending',
+            'last_health_check',
+            'errors',
            'capacity_adjustment',
-            "version",
-            "capacity",
-            "consumed_capacity",
-            "percent_capacity_remaining",
-            "jobs_running",
-            "jobs_total",
-            "cpu",
-            "memory",
-            "cpu_capacity",
-            "mem_capacity",
-            "enabled",
-            "managed_by_policy",
-            "node_type",
+            'version',
+            'capacity',
+            'consumed_capacity',
+            'percent_capacity_remaining',
+            'jobs_running',
+            'jobs_total',
+            'cpu',
+            'memory',
+            'cpu_capacity',
+            'mem_capacity',
+            'enabled',
+            'managed_by_policy',
+            'node_type',
+            'node_state',
+            'ip_address',
+            'listener_port',
        )
+        extra_kwargs = {
+            'node_type': {'initial': Instance.Types.EXECUTION, 'default': Instance.Types.EXECUTION},
+            'node_state': {'initial': Instance.States.INSTALLED, 'default': Instance.States.INSTALLED},
+            'hostname': {
+                'validators': [
+                    MaxLengthValidator(limit_value=250),
+                    validators.UniqueValidator(queryset=Instance.objects.all()),
+                    RegexValidator(
+                        regex=r'^localhost$|^127(?:\.[0-9]+){0,2}\.[0-9]+$|^(?:0*\:)*?:?0*1$',
+                        flags=re.IGNORECASE,
+                        inverse_match=True,
+                        message="hostname cannot be localhost or 127.0.0.1",
+                    ),
+                    HostnameRegexValidator(),
+                ],
+            },
+        }

    def get_related(self, obj):
        res = super(InstanceSerializer, self).get_related(obj)
        res['jobs'] = self.reverse('api:instance_unified_jobs_list', kwargs={'pk': obj.pk})
        res['instance_groups'] = self.reverse('api:instance_instance_groups_list', kwargs={'pk': obj.pk})
+        if settings.IS_K8S and obj.node_type in (Instance.Types.EXECUTION,):
+            res['install_bundle'] = self.reverse('api:instance_install_bundle', kwargs={'pk': obj.pk})
+        res['peers'] = self.reverse('api:instance_peers_list', kwargs={"pk": obj.pk})
        if self.context['request'].user.is_superuser or self.context['request'].user.is_system_auditor:
-            if obj.node_type != 'hop':
+            if obj.node_type == 'execution':
                res['health_check'] = self.reverse('api:instance_health_check', kwargs={'pk': obj.pk})
        return res

+    def get_summary_fields(self, obj):
+        summary = super().get_summary_fields(obj)
+
+        # use this handle to distinguish between a listView and a detailView
+        if self.is_detail_view:
+            summary['links'] = InstanceLinkSerializer(InstanceLink.objects.select_related('target', 'source').filter(source=obj), many=True).data
+
+        return summary
+
    def get_consumed_capacity(self, obj):
        return obj.consumed_capacity

@@ -4828,10 +4976,58 @@ class InstanceSerializer(BaseSerializer):
        else:
            return float("{0:.2f}".format(((float(obj.capacity) - float(obj.consumed_capacity)) / (float(obj.capacity))) * 100))

-    def validate(self, attrs):
-        if self.instance.node_type == 'hop':
-            raise serializers.ValidationError(_('Hop node instances may not be changed.'))
-        return attrs
+    def get_health_check_pending(self, obj):
+        return obj.health_check_pending
+
+    def validate(self, data):
+        if self.instance:
+            if self.instance.node_type == Instance.Types.HOP:
+                raise serializers.ValidationError("Hop node instances may not be changed.")
+        else:
+            if not settings.IS_K8S:
+                raise serializers.ValidationError("Can only create instances on Kubernetes or OpenShift.")
+        return data
+
+    def validate_node_type(self, value):
+        if not self.instance:
+            if value not in (Instance.Types.EXECUTION,):
+                raise serializers.ValidationError("Can only create execution nodes.")
+        else:
+            if self.instance.node_type != value:
+                raise serializers.ValidationError("Cannot change node type.")
+
+        return value
+
+    def validate_node_state(self, value):
+        if self.instance:
+            if value != self.instance.node_state:
+                if not settings.IS_K8S:
+                    raise serializers.ValidationError("Can only change the state on Kubernetes or OpenShift.")
+                if value != Instance.States.DEPROVISIONING:
+                    raise serializers.ValidationError("Can only change instances to the 'deprovisioning' state.")
+                if self.instance.node_type not in (Instance.Types.EXECUTION,):
+                    raise serializers.ValidationError("Can only deprovision execution nodes.")
+        else:
+            if value and value != Instance.States.INSTALLED:
+                raise serializers.ValidationError("Can only create instances in the 'installed' state.")
+
+        return value
+
+    def validate_hostname(self, value):
+        """
+        - Hostname cannot be "localhost" - but can be something like localhost.domain
+        - Cannot change the hostname of an-already instantiated & initialized Instance object
+        """
+        if self.instance and self.instance.hostname != value:
+            raise serializers.ValidationError("Cannot change hostname.")
+
+        return value
+
+    def validate_listener_port(self, value):
+        if self.instance and self.instance.listener_port != value:
+            raise serializers.ValidationError("Cannot change listener port.")
+
+        return value


 class InstanceHealthCheckSerializer(BaseSerializer):
@@ -4844,12 +5040,10 @@ class InstanceHealthCheckSerializer(BaseSerializer):
 class InstanceGroupSerializer(BaseSerializer):

    show_capabilities = ['edit', 'delete']
-
+    capacity = serializers.SerializerMethodField()
    consumed_capacity = serializers.SerializerMethodField()
    percent_capacity_remaining = serializers.SerializerMethodField()
-    jobs_running = serializers.IntegerField(
-        help_text=_('Count of jobs in the running or waiting state that ' 'are targeted for this instance group'), read_only=True
-    )
+    jobs_running = serializers.SerializerMethodField()
    jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance group'), read_only=True)
    instances = serializers.SerializerMethodField()
    is_container_group = serializers.BooleanField(
@@ -4875,6 +5069,22 @@ class InstanceGroupSerializer(BaseSerializer):
        label=_('Policy Instance Minimum'),
        help_text=_("Static minimum number of Instances that will be automatically assign to " "this group when new instances come online."),
    )
+    max_concurrent_jobs = serializers.IntegerField(
+        default=0,
+        min_value=0,
+        required=False,
+        initial=0,
+        label=_('Max Concurrent Jobs'),
+        help_text=_("Maximum number of concurrent jobs to run on a group. When set to zero, no maximum is enforced."),
+    )
+    max_forks = serializers.IntegerField(
+        default=0,
+        min_value=0,
+        required=False,
+        initial=0,
+        label=_('Max Forks'),
+        help_text=_("Maximum number of forks to execute concurrently on a group. When set to zero, no maximum is enforced."),
+    )
    policy_instance_list = serializers.ListField(
        child=serializers.CharField(),
        required=False,
@@ -4896,6 +5106,8 @@ class InstanceGroupSerializer(BaseSerializer):
            "consumed_capacity",
            "percent_capacity_remaining",
            "jobs_running",
+            "max_concurrent_jobs",
+            "max_forks",
            "jobs_total",
            "instances",
            "is_container_group",
@@ -4977,28 +5189,39 @@ class InstanceGroupSerializer(BaseSerializer):
        # Store capacity values (globally computed) in the context
        if 'task_manager_igs' not in self.context:
            instance_groups_queryset = None
-            jobs_qs = UnifiedJob.objects.filter(status__in=('running', 'waiting'))
            if self.parent:  # Is ListView:
                instance_groups_queryset = self.parent.instance

-            instances = TaskManagerInstances(jobs_qs)
-            instance_groups = TaskManagerInstanceGroups(instances_by_hostname=instances, instance_groups_queryset=instance_groups_queryset)
+            tm_models = TaskManagerModels.init_with_consumed_capacity(
+                instance_fields=['uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'enabled'],
+                instance_groups_queryset=instance_groups_queryset,
+            )

-            self.context['task_manager_igs'] = instance_groups
+            self.context['task_manager_igs'] = tm_models.instance_groups
        return self.context['task_manager_igs']

    def get_consumed_capacity(self, obj):
        ig_mgr = self.get_ig_mgr()
        return ig_mgr.get_consumed_capacity(obj.name)

-    def get_percent_capacity_remaining(self, obj):
-        if not obj.capacity:
-            return 0.0
+    def get_capacity(self, obj):
        ig_mgr = self.get_ig_mgr()
-        return float("{0:.2f}".format((float(ig_mgr.get_remaining_capacity(obj.name)) / (float(obj.capacity))) * 100))
+        return ig_mgr.get_capacity(obj.name)
+
+    def get_percent_capacity_remaining(self, obj):
+        capacity = self.get_capacity(obj)
+        if not capacity:
+            return 0.0
+        consumed_capacity = self.get_consumed_capacity(obj)
+        return float("{0:.2f}".format(((float(capacity) - float(consumed_capacity)) / (float(capacity))) * 100))

    def get_instances(self, obj):
-        return obj.instances.count()
+        ig_mgr = self.get_ig_mgr()
+        return len(ig_mgr.get_instances(obj.name))
+
+    def get_jobs_running(self, obj):
+        ig_mgr = self.get_ig_mgr()
+        return ig_mgr.get_jobs_running(obj.name)


 class ActivityStreamSerializer(BaseSerializer):
--- a/awx/api/templates/api/job_template_launch.md
+++ b/awx/api/templates/api/job_template_launch.md
@@ -1,5 +1,5 @@
 Launch a Job Template:
-
+{% ifmeth GET %}
 Make a GET request to this resource to determine if the job_template can be
 launched and whether any passwords are required to launch the job_template.
 The response will include the following fields:
@@ -29,8 +29,8 @@ The response will include the following fields:
 * `inventory_needed_to_start`: Flag indicating the presence of an inventory
  associated with the job template.  If not then one should be supplied when
  launching the job (boolean, read-only)
-
-Make a POST request to this resource to launch the job_template. If any
+{% endifmeth %}
+{% ifmeth POST %}Make a POST request to this resource to launch the job_template. If any
 passwords, inventory, or extra variables (extra_vars) are required, they must
 be passed via POST data, with extra_vars given as a YAML or JSON string and
 escaped parentheses. If the `inventory_needed_to_start` is `True` then the
@@ -41,3 +41,4 @@ are not provided, a 400 status code will be returned.  If the job cannot be
 launched, a 405 status code will be returned. If the provided credential or
 inventory are not allowed to be used by the user, then a 403 status code will
 be returned.
+{% endifmeth %}
--- a/awx/api/templates/instance_install_bundle/group_vars/all.yml
+++ b/awx/api/templates/instance_install_bundle/group_vars/all.yml
@@ -0,0 +1,23 @@
+receptor_user: awx
+receptor_group: awx
+receptor_verify: true
+receptor_tls: true
+receptor_work_commands:
+  ansible-runner:
+    command: ansible-runner
+    params: worker
+    allowruntimeparams: true
+    verifysignature: true
+custom_worksign_public_keyfile: receptor/work-public-key.pem
+custom_tls_certfile: receptor/tls/receptor.crt
+custom_tls_keyfile: receptor/tls/receptor.key
+custom_ca_certfile: receptor/tls/ca/receptor-ca.crt
+receptor_protocol: 'tcp'
+receptor_listener: true
+receptor_port: {{ instance.listener_port }}
+receptor_dependencies:
+  - python39-pip
+{% verbatim %}
+podman_user: "{{ receptor_user }}"
+podman_group: "{{ receptor_group }}"
+{% endverbatim %}
--- a/awx/api/templates/instance_install_bundle/install_receptor.yml
+++ b/awx/api/templates/instance_install_bundle/install_receptor.yml
@@ -0,0 +1,20 @@
+{% verbatim %}
+---
+- hosts: all
+  become: yes
+  tasks:
+    - name: Create the receptor user
+      user:
+        name: "{{ receptor_user }}"
+        shell: /bin/bash
+    - name: Enable Copr repo for Receptor
+      command: dnf copr enable ansible-awx/receptor -y
+    - import_role:
+        name: ansible.receptor.podman
+    - import_role:
+        name: ansible.receptor.setup
+    - name: Install ansible-runner
+      pip:
+        name: ansible-runner
+        executable: pip3.9
+{% endverbatim %}
--- a/awx/api/templates/instance_install_bundle/inventory.yml
+++ b/awx/api/templates/instance_install_bundle/inventory.yml
@@ -0,0 +1,7 @@
+---
+all:
+  hosts:
+    remote-execution:
+      ansible_host: {{ instance.hostname }}
+      ansible_user: <username> # user provided
+      ansible_ssh_private_key_file: ~/.ssh/id_rsa
--- a/awx/api/templates/instance_install_bundle/requirements.yml
+++ b/awx/api/templates/instance_install_bundle/requirements.yml
@@ -0,0 +1,4 @@
+---
+collections:
+  - name: ansible.receptor
+    version: 1.1.0
--- a/awx/api/urls/instance.py
+++ b/awx/api/urls/instance.py
@@ -3,7 +3,15 @@

 from django.urls import re_path

-from awx.api.views import InstanceList, InstanceDetail, InstanceUnifiedJobsList, InstanceInstanceGroupsList, InstanceHealthCheck
+from awx.api.views import (
+    InstanceList,
+    InstanceDetail,
+    InstanceUnifiedJobsList,
+    InstanceInstanceGroupsList,
+    InstanceHealthCheck,
+    InstancePeersList,
+)
+from awx.api.views.instance_install_bundle import InstanceInstallBundle


 urls = [
@@ -12,6 +20,8 @@ urls = [
    re_path(r'^(?P<pk>[0-9]+)/jobs/$', InstanceUnifiedJobsList.as_view(), name='instance_unified_jobs_list'),
    re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', InstanceInstanceGroupsList.as_view(), name='instance_instance_groups_list'),
    re_path(r'^(?P<pk>[0-9]+)/health_check/$', InstanceHealthCheck.as_view(), name='instance_health_check'),
+    re_path(r'^(?P<pk>[0-9]+)/peers/$', InstancePeersList.as_view(), name='instance_peers_list'),
+    re_path(r'^(?P<pk>[0-9]+)/install_bundle/$', InstanceInstallBundle.as_view(), name='instance_install_bundle'),
 ]

 __all__ = ['urls']
--- a/awx/api/urls/inventory.py
+++ b/awx/api/urls/inventory.py
@@ -3,26 +3,28 @@

 from django.urls import re_path

-from awx.api.views import (
+from awx.api.views.inventory import (
    InventoryList,
    InventoryDetail,
-    InventoryHostsList,
-    InventoryGroupsList,
-    InventoryRootGroupsList,
-    InventoryVariableData,
-    InventoryScriptView,
-    InventoryTreeView,
-    InventoryInventorySourcesList,
-    InventoryInventorySourcesUpdate,
    InventoryActivityStreamList,
    InventoryJobTemplateList,
-    InventoryAdHocCommandsList,
    InventoryAccessList,
    InventoryObjectRolesList,
    InventoryInstanceGroupsList,
    InventoryLabelList,
    InventoryCopy,
 )
+from awx.api.views import (
+    InventoryHostsList,
+    InventoryGroupsList,
+    InventoryInventorySourcesList,
+    InventoryInventorySourcesUpdate,
+    InventoryAdHocCommandsList,
+    InventoryRootGroupsList,
+    InventoryScriptView,
+    InventoryTreeView,
+    InventoryVariableData,
+)


 urls = [
--- a/awx/api/urls/inventory_update.py
+++ b/awx/api/urls/inventory_update.py
@@ -3,6 +3,9 @@

 from django.urls import re_path

+from awx.api.views.inventory import (
+    InventoryUpdateEventsList,
+)
 from awx.api.views import (
    InventoryUpdateList,
    InventoryUpdateDetail,
@@ -10,7 +13,6 @@ from awx.api.views import (
    InventoryUpdateStdout,
    InventoryUpdateNotificationsList,
    InventoryUpdateCredentialsList,
-    InventoryUpdateEventsList,
 )


--- a/awx/api/urls/label.py
+++ b/awx/api/urls/label.py
@@ -3,7 +3,7 @@

 from django.urls import re_path

-from awx.api.views import LabelList, LabelDetail
+from awx.api.views.labels import LabelList, LabelDetail


 urls = [re_path(r'^$', LabelList.as_view(), name='label_list'), re_path(r'^(?P<pk>[0-9]+)/$', LabelDetail.as_view(), name='label_detail')]
--- a/awx/api/urls/oauth2_root.py
+++ b/awx/api/urls/oauth2_root.py
@@ -10,7 +10,7 @@ from oauthlib import oauth2
 from oauth2_provider import views

 from awx.main.models import RefreshToken
-from awx.api.views import ApiOAuthAuthorizationRootView
+from awx.api.views.root import ApiOAuthAuthorizationRootView


 class TokenView(views.TokenView):
--- a/awx/api/urls/organization.py
+++ b/awx/api/urls/organization.py
@@ -3,7 +3,7 @@

 from django.urls import re_path

-from awx.api.views import (
+from awx.api.views.organization import (
    OrganizationList,
    OrganizationDetail,
    OrganizationUsersList,
@@ -14,7 +14,6 @@ from awx.api.views import (
    OrganizationJobTemplatesList,
    OrganizationWorkflowJobTemplatesList,
    OrganizationTeamsList,
-    OrganizationCredentialList,
    OrganizationActivityStreamList,
    OrganizationNotificationTemplatesList,
    OrganizationNotificationTemplatesErrorList,
@@ -25,8 +24,8 @@ from awx.api.views import (
    OrganizationGalaxyCredentialsList,
    OrganizationObjectRolesList,
    OrganizationAccessList,
-    OrganizationApplicationList,
 )
+from awx.api.views import OrganizationCredentialList, OrganizationApplicationList


 urls = [
--- a/awx/api/urls/schedule.py
+++ b/awx/api/urls/schedule.py
@@ -3,7 +3,7 @@

 from django.urls import re_path

-from awx.api.views import ScheduleList, ScheduleDetail, ScheduleUnifiedJobsList, ScheduleCredentialsList
+from awx.api.views import ScheduleList, ScheduleDetail, ScheduleUnifiedJobsList, ScheduleCredentialsList, ScheduleLabelsList, ScheduleInstanceGroupList


 urls = [
@@ -11,6 +11,8 @@ urls = [
    re_path(r'^(?P<pk>[0-9]+)/$', ScheduleDetail.as_view(), name='schedule_detail'),
    re_path(r'^(?P<pk>[0-9]+)/jobs/$', ScheduleUnifiedJobsList.as_view(), name='schedule_unified_jobs_list'),
    re_path(r'^(?P<pk>[0-9]+)/credentials/$', ScheduleCredentialsList.as_view(), name='schedule_credentials_list'),
+    re_path(r'^(?P<pk>[0-9]+)/labels/$', ScheduleLabelsList.as_view(), name='schedule_labels_list'),
+    re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', ScheduleInstanceGroupList.as_view(), name='schedule_instance_groups_list'),
 ]

 __all__ = ['urls']
--- a/awx/api/urls/urls.py
+++ b/awx/api/urls/urls.py
@@ -6,13 +6,15 @@ from django.urls import include, re_path

 from awx import MODE
 from awx.api.generics import LoggedLoginView, LoggedLogoutView
-from awx.api.views import (
+from awx.api.views.root import (
    ApiRootView,
    ApiV2RootView,
    ApiV2PingView,
    ApiV2ConfigView,
    ApiV2SubscriptionView,
    ApiV2AttachView,
+)
+from awx.api.views import (
    AuthView,
    UserMeList,
    DashboardView,
@@ -28,8 +30,8 @@ from awx.api.views import (
    OAuth2TokenList,
    ApplicationOAuth2TokenList,
    OAuth2ApplicationDetail,
-    MeshVisualizer,
 )
+from awx.api.views.mesh_visualizer import MeshVisualizer

 from awx.api.views.metrics import MetricsView

--- a/awx/api/urls/webhooks.py
+++ b/awx/api/urls/webhooks.py
@@ -1,6 +1,6 @@
 from django.urls import re_path

-from awx.api.views import WebhookKeyView, GithubWebhookReceiver, GitlabWebhookReceiver
+from awx.api.views.webhooks import WebhookKeyView, GithubWebhookReceiver, GitlabWebhookReceiver


 urlpatterns = [
--- a/awx/api/urls/workflow_job_node.py
+++ b/awx/api/urls/workflow_job_node.py
@@ -10,6 +10,8 @@ from awx.api.views import (
    WorkflowJobNodeFailureNodesList,
    WorkflowJobNodeAlwaysNodesList,
    WorkflowJobNodeCredentialsList,
+    WorkflowJobNodeLabelsList,
+    WorkflowJobNodeInstanceGroupsList,
 )


@@ -20,6 +22,8 @@ urls = [
    re_path(r'^(?P<pk>[0-9]+)/failure_nodes/$', WorkflowJobNodeFailureNodesList.as_view(), name='workflow_job_node_failure_nodes_list'),
    re_path(r'^(?P<pk>[0-9]+)/always_nodes/$', WorkflowJobNodeAlwaysNodesList.as_view(), name='workflow_job_node_always_nodes_list'),
    re_path(r'^(?P<pk>[0-9]+)/credentials/$', WorkflowJobNodeCredentialsList.as_view(), name='workflow_job_node_credentials_list'),
+    re_path(r'^(?P<pk>[0-9]+)/labels/$', WorkflowJobNodeLabelsList.as_view(), name='workflow_job_node_labels_list'),
+    re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', WorkflowJobNodeInstanceGroupsList.as_view(), name='workflow_job_node_instance_groups_list'),
 ]

 __all__ = ['urls']
--- a/awx/api/urls/workflow_job_template_node.py
+++ b/awx/api/urls/workflow_job_template_node.py
@@ -11,6 +11,8 @@ from awx.api.views import (
    WorkflowJobTemplateNodeAlwaysNodesList,
    WorkflowJobTemplateNodeCredentialsList,
    WorkflowJobTemplateNodeCreateApproval,
+    WorkflowJobTemplateNodeLabelsList,
+    WorkflowJobTemplateNodeInstanceGroupsList,
 )


@@ -21,6 +23,8 @@ urls = [
    re_path(r'^(?P<pk>[0-9]+)/failure_nodes/$', WorkflowJobTemplateNodeFailureNodesList.as_view(), name='workflow_job_template_node_failure_nodes_list'),
    re_path(r'^(?P<pk>[0-9]+)/always_nodes/$', WorkflowJobTemplateNodeAlwaysNodesList.as_view(), name='workflow_job_template_node_always_nodes_list'),
    re_path(r'^(?P<pk>[0-9]+)/credentials/$', WorkflowJobTemplateNodeCredentialsList.as_view(), name='workflow_job_template_node_credentials_list'),
+    re_path(r'^(?P<pk>[0-9]+)/labels/$', WorkflowJobTemplateNodeLabelsList.as_view(), name='workflow_job_template_node_labels_list'),
+    re_path(r'^(?P<pk>[0-9]+)/instance_groups/$', WorkflowJobTemplateNodeInstanceGroupsList.as_view(), name='workflow_job_template_node_instance_groups_list'),
    re_path(r'^(?P<pk>[0-9]+)/create_approval_template/$', WorkflowJobTemplateNodeCreateApproval.as_view(), name='workflow_job_template_node_create_approval'),
 ]

--- a/awx/api/validators.py
+++ b/awx/api/validators.py
@@ -0,0 +1,55 @@
+import re
+
+from django.core.validators import RegexValidator, validate_ipv46_address
+from django.core.exceptions import ValidationError
+
+
+class HostnameRegexValidator(RegexValidator):
+    """
+    Fully validates a domain name that is compliant with norms in Linux/RHEL
+        - Cannot start with a hyphen
+        - Cannot begin with, or end with a "."
+        - Cannot contain any whitespaces
+        - Entire hostname is max 255 chars (including dots)
+        - Each domain/label is between 1 and 63 characters, except top level domain, which must be at least 2 characters
+        - Supports ipv4, ipv6, simple hostnames and FQDNs
+        - Follows RFC 9210 (modern RFC 1123, 1178) requirements
+
+    Accepts an IP Address or Hostname as the argument
+    """
+
+    regex = '^[a-z0-9][-a-z0-9]*$|^([a-z0-9][-a-z0-9]{0,62}[.])*[a-z0-9][-a-z0-9]{1,62}$'
+    flags = re.IGNORECASE
+
+    def __call__(self, value):
+        regex_matches, err = self.__validate(value)
+        invalid_input = regex_matches if self.inverse_match else not regex_matches
+        if invalid_input:
+            if err is None:
+                err = ValidationError(self.message, code=self.code, params={"value": value})
+            raise err
+
+    def __str__(self):
+        return f"regex={self.regex}, message={self.message}, code={self.code}, inverse_match={self.inverse_match}, flags={self.flags}"
+
+    def __validate(self, value):
+
+        if ' ' in value:
+            return False, ValidationError("whitespaces in hostnames are illegal")
+
+        """
+        If we have an IP address, try and validate it.
+        """
+        try:
+            validate_ipv46_address(value)
+            return True, None
+        except ValidationError:
+            pass
+
+        """
+        By this point in the code, we probably have a simple hostname, FQDN or a strange hostname like "192.localhost.domain.101"
+        """
+        if not self.regex.match(value):
+            return False, ValidationError(f"illegal characters detected in hostname={value}. Please verify.")
+
+        return True, None
--- a/awx/api/views/init.py
+++ b/awx/api/views/init.py
@@ -5,6 +5,7 @@
 import dateutil
 import functools
 import html
+import itertools
 import logging
 import re
 import requests
@@ -20,8 +21,10 @@ from urllib3.exceptions import ConnectTimeoutError
 # Django
 from django.conf import settings
 from django.core.exceptions import FieldError, ObjectDoesNotExist
-from django.db.models import Q, Sum
+from django.db.models import Q, Sum, Count
 from django.db import IntegrityError, ProgrammingError, transaction, connection
+from django.db.models.fields.related import ManyToManyField, ForeignKey
+from django.db.models.functions import Trunc
 from django.shortcuts import get_object_or_404
 from django.utils.safestring import mark_safe
 from django.utils.timezone import now
@@ -46,9 +49,6 @@ from rest_framework import status
 from rest_framework_yaml.parsers import YAMLParser
 from rest_framework_yaml.renderers import YAMLRenderer

-# QSStats
-import qsstats
-
 # ANSIConv
 import ansiconv

@@ -68,7 +68,7 @@ from awx.api.generics import (
    APIView,
    BaseUsersList,
    CopyAPIView,
-    DeleteLastUnattachLabelMixin,
+    GenericCancelView,
    GenericAPIView,
    ListAPIView,
    ListCreateAPIView,
@@ -85,6 +85,7 @@ from awx.api.generics import (
    SubListCreateAttachDetachAPIView,
    SubListDestroyAPIView,
 )
+from awx.api.views.labels import LabelSubListCreateAttachDetachView
 from awx.api.versioning import reverse
 from awx.main import models
 from awx.main.utils import (
@@ -121,59 +122,9 @@ from awx.api.views.mixin import (
    UnifiedJobDeletionMixin,
    NoTruncateMixin,
 )
-from awx.api.views.organization import (  # noqa
-    OrganizationList,
-    OrganizationDetail,
-    OrganizationInventoriesList,
-    OrganizationUsersList,
-    OrganizationAdminsList,
-    OrganizationExecutionEnvironmentsList,
-    OrganizationProjectsList,
-    OrganizationJobTemplatesList,
-    OrganizationWorkflowJobTemplatesList,
-    OrganizationTeamsList,
-    OrganizationActivityStreamList,
-    OrganizationNotificationTemplatesList,
-    OrganizationNotificationTemplatesAnyList,
-    OrganizationNotificationTemplatesErrorList,
-    OrganizationNotificationTemplatesStartedList,
-    OrganizationNotificationTemplatesSuccessList,
-    OrganizationNotificationTemplatesApprovalList,
-    OrganizationInstanceGroupsList,
-    OrganizationGalaxyCredentialsList,
-    OrganizationAccessList,
-    OrganizationObjectRolesList,
-)
-from awx.api.views.inventory import (  # noqa
-    InventoryList,
-    InventoryDetail,
-    InventoryUpdateEventsList,
-    InventoryList,
-    InventoryDetail,
-    InventoryActivityStreamList,
-    InventoryInstanceGroupsList,
-    InventoryAccessList,
-    InventoryObjectRolesList,
-    InventoryJobTemplateList,
-    InventoryLabelList,
-    InventoryCopy,
-)
-from awx.api.views.mesh_visualizer import MeshVisualizer  # noqa
-from awx.api.views.root import (  # noqa
-    ApiRootView,
-    ApiOAuthAuthorizationRootView,
-    ApiVersionRootView,
-    ApiV2RootView,
-    ApiV2PingView,
-    ApiV2ConfigView,
-    ApiV2SubscriptionView,
-    ApiV2AttachView,
-)
-from awx.api.views.webhooks import WebhookKeyView, GithubWebhookReceiver, GitlabWebhookReceiver  # noqa
 from awx.api.pagination import UnifiedJobEventPagination
 from awx.main.utils import set_environ

-
 logger = logging.getLogger('awx.api.views')


@@ -331,34 +282,54 @@ class DashboardJobsGraphView(APIView):
            success_query = success_query.filter(instance_of=models.ProjectUpdate)
            failed_query = failed_query.filter(instance_of=models.ProjectUpdate)

-        success_qss = qsstats.QuerySetStats(success_query, 'finished')
-        failed_qss = qsstats.QuerySetStats(failed_query, 'finished')
-
-        start_date = now()
+        end = now()
+        interval = 'day'
        if period == 'month':
-            end_date = start_date - dateutil.relativedelta.relativedelta(months=1)
-            interval = 'days'
+            start = end - dateutil.relativedelta.relativedelta(months=1)
        elif period == 'two_weeks':
-            end_date = start_date - dateutil.relativedelta.relativedelta(weeks=2)
-            interval = 'days'
+            start = end - dateutil.relativedelta.relativedelta(weeks=2)
        elif period == 'week':
-            end_date = start_date - dateutil.relativedelta.relativedelta(weeks=1)
-            interval = 'days'
+            start = end - dateutil.relativedelta.relativedelta(weeks=1)
        elif period == 'day':
-            end_date = start_date - dateutil.relativedelta.relativedelta(days=1)
-            interval = 'hours'
+            start = end - dateutil.relativedelta.relativedelta(days=1)
+            interval = 'hour'
        else:
            return Response({'error': _('Unknown period "%s"') % str(period)}, status=status.HTTP_400_BAD_REQUEST)

        dashboard_data = {"jobs": {"successful": [], "failed": []}}
-        for element in success_qss.time_series(end_date, start_date, interval=interval):
-            dashboard_data['jobs']['successful'].append([time.mktime(element[0].timetuple()), element[1]])
-        for element in failed_qss.time_series(end_date, start_date, interval=interval):
-            dashboard_data['jobs']['failed'].append([time.mktime(element[0].timetuple()), element[1]])
+
+        succ_list = dashboard_data['jobs']['successful']
+        fail_list = dashboard_data['jobs']['failed']
+
+        qs_s = (
+            success_query.filter(finished__range=(start, end))
+            .annotate(d=Trunc('finished', interval, tzinfo=end.tzinfo))
+            .order_by()
+            .values('d')
+            .annotate(agg=Count('id', distinct=True))
+        )
+        data_s = {item['d']: item['agg'] for item in qs_s}
+        qs_f = (
+            failed_query.filter(finished__range=(start, end))
+            .annotate(d=Trunc('finished', interval, tzinfo=end.tzinfo))
+            .order_by()
+            .values('d')
+            .annotate(agg=Count('id', distinct=True))
+        )
+        data_f = {item['d']: item['agg'] for item in qs_f}
+
+        start_date = start.replace(hour=0, minute=0, second=0, microsecond=0)
+        for d in itertools.count():
+            date = start_date + dateutil.relativedelta.relativedelta(days=d)
+            if date > end:
+                break
+            succ_list.append([time.mktime(date.timetuple()), data_s.get(date, 0)])
+            fail_list.append([time.mktime(date.timetuple()), data_f.get(date, 0)])
+
        return Response(dashboard_data)


-class InstanceList(ListAPIView):
+class InstanceList(ListCreateAPIView):

    name = _("Instances")
    model = models.Instance
@@ -373,6 +344,13 @@ class InstanceDetail(RetrieveUpdateAPIView):
    model = models.Instance
    serializer_class = serializers.InstanceSerializer

+    def update_raw_data(self, data):
+        # these fields are only valid on creation of an instance, so they unwanted on detail view
+        data.pop('listener_port', None)
+        data.pop('node_type', None)
+        data.pop('hostname', None)
+        return super(InstanceDetail, self).update_raw_data(data)
+
    def update(self, request, *args, **kwargs):
        r = super(InstanceDetail, self).update(request, *args, **kwargs)
        if status.is_success(r.status_code):
@@ -397,6 +375,17 @@ class InstanceUnifiedJobsList(SubListAPIView):
        return qs


+class InstancePeersList(SubListAPIView):
+
+    name = _("Instance Peers")
+    parent_model = models.Instance
+    model = models.Instance
+    serializer_class = serializers.InstanceSerializer
+    parent_access = 'read'
+    search_fields = {'hostname'}
+    relationship = 'peers'
+
+
 class InstanceInstanceGroupsList(InstanceGroupMembershipMixin, SubListCreateAttachDetachAPIView):

    name = _("Instance's Instance Groups")
@@ -429,8 +418,8 @@ class InstanceHealthCheck(GenericAPIView):
    permission_classes = (IsSystemAdminOrAuditor,)

    def get_queryset(self):
+        return super().get_queryset().filter(node_type='execution')
        # FIXME: For now, we don't have a good way of checking the health of a hop node.
-        return super().get_queryset().exclude(node_type='hop')

    def get(self, request, *args, **kwargs):
        obj = self.get_object()
@@ -439,40 +428,22 @@ class InstanceHealthCheck(GenericAPIView):

    def post(self, request, *args, **kwargs):
        obj = self.get_object()
+        if obj.health_check_pending:
+            return Response({'msg': f"Health check was already in progress for {obj.hostname}."}, status=status.HTTP_200_OK)

-        if obj.node_type == 'execution':
+        # Note: hop nodes are already excluded by the get_queryset method
+        obj.health_check_started = now()
+        obj.save(update_fields=['health_check_started'])
+        if obj.node_type == models.Instance.Types.EXECUTION:
            from awx.main.tasks.system import execution_node_health_check

-            runner_data = execution_node_health_check(obj.hostname)
-            obj.refresh_from_db()
-            data = self.get_serializer(data=request.data).to_representation(obj)
-            # Add in some extra unsaved fields
-            for extra_field in ('transmit_timing', 'run_timing'):
-                if extra_field in runner_data:
-                    data[extra_field] = runner_data[extra_field]
+            execution_node_health_check.apply_async([obj.hostname])
        else:
-            from awx.main.tasks.system import cluster_node_health_check
-
-            if settings.CLUSTER_HOST_ID == obj.hostname:
-                cluster_node_health_check(obj.hostname)
-            else:
-                cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname)
-                start_time = time.time()
-                prior_check_time = obj.last_health_check
-                while time.time() - start_time < 50.0:
-                    obj.refresh_from_db(fields=['last_health_check'])
-                    if obj.last_health_check != prior_check_time:
-                        break
-                    if time.time() - start_time < 1.0:
-                        time.sleep(0.1)
-                    else:
-                        time.sleep(1.0)
-                else:
-                    obj.mark_offline(errors=_('Health check initiated by user determined this instance to be unresponsive'))
-            obj.refresh_from_db()
-            data = self.get_serializer(data=request.data).to_representation(obj)
-
-        return Response(data, status=status.HTTP_200_OK)
+            return Response(
+                {"error": f"Cannot run a health check on instances of type {obj.node_type}.  Health checks can only be run on execution nodes."},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+        return Response({'msg': f"Health check is running for {obj.hostname}."}, status=status.HTTP_200_OK)


 class InstanceGroupList(ListCreateAPIView):
@@ -617,6 +588,19 @@ class ScheduleCredentialsList(LaunchConfigCredentialsBase):
    parent_model = models.Schedule


+class ScheduleLabelsList(LabelSubListCreateAttachDetachView):
+
+    parent_model = models.Schedule
+
+
+class ScheduleInstanceGroupList(SubListAttachDetachAPIView):
+
+    model = models.InstanceGroup
+    serializer_class = serializers.InstanceGroupSerializer
+    parent_model = models.Schedule
+    relationship = 'instance_groups'
+
+
 class ScheduleUnifiedJobsList(SubListAPIView):

    model = models.UnifiedJob
@@ -1020,20 +1004,11 @@ class SystemJobEventsList(SubListAPIView):
        return job.get_event_queryset()


-class ProjectUpdateCancel(RetrieveAPIView):
+class ProjectUpdateCancel(GenericCancelView):

    model = models.ProjectUpdate
-    obj_permission_type = 'cancel'
    serializer_class = serializers.ProjectUpdateCancelSerializer

-    def post(self, request, *args, **kwargs):
-        obj = self.get_object()
-        if obj.can_cancel:
-            obj.cancel()
-            return Response(status=status.HTTP_202_ACCEPTED)
-        else:
-            return self.http_method_not_allowed(request, *args, **kwargs)
-

 class ProjectUpdateNotificationsList(SubListAPIView):

@@ -2272,6 +2247,8 @@ class InventorySourceUpdateView(RetrieveAPIView):

    def post(self, request, *args, **kwargs):
        obj = self.get_object()
+        serializer = self.get_serializer(instance=obj, data=request.data)
+        serializer.is_valid(raise_exception=True)
        if obj.can_update:
            update = obj.update()
            if not update:
@@ -2306,20 +2283,11 @@ class InventoryUpdateCredentialsList(SubListAPIView):
    relationship = 'credentials'


-class InventoryUpdateCancel(RetrieveAPIView):
+class InventoryUpdateCancel(GenericCancelView):

    model = models.InventoryUpdate
-    obj_permission_type = 'cancel'
    serializer_class = serializers.InventoryUpdateCancelSerializer

-    def post(self, request, *args, **kwargs):
-        obj = self.get_object()
-        if obj.can_cancel:
-            obj.cancel()
-            return Response(status=status.HTTP_202_ACCEPTED)
-        else:
-            return self.http_method_not_allowed(request, *args, **kwargs)
-

 class InventoryUpdateNotificationsList(SubListAPIView):

@@ -2381,10 +2349,13 @@ class JobTemplateLaunch(RetrieveAPIView):
            for field, ask_field_name in modified_ask_mapping.items():
                if not getattr(obj, ask_field_name):
                    data.pop(field, None)
-                elif field == 'inventory':
+                elif isinstance(getattr(obj.__class__, field).field, ForeignKey):
                    data[field] = getattrd(obj, "%s.%s" % (field, 'id'), None)
-                elif field == 'credentials':
-                    data[field] = [cred.id for cred in obj.credentials.all()]
+                elif isinstance(getattr(obj.__class__, field).field, ManyToManyField):
+                    if field == 'instance_groups':
+                        data[field] = []
+                        continue
+                    data[field] = [item.id for item in getattr(obj, field).all()]
                else:
                    data[field] = getattr(obj, field)
        return data
@@ -2397,9 +2368,8 @@ class JobTemplateLaunch(RetrieveAPIView):
        """
        modern_data = data.copy()

-        id_fd = '{}_id'.format('inventory')
-        if 'inventory' not in modern_data and id_fd in modern_data:
-            modern_data['inventory'] = modern_data[id_fd]
+        if 'inventory' not in modern_data and 'inventory_id' in modern_data:
+            modern_data['inventory'] = modern_data['inventory_id']

        # credential passwords were historically provided as top-level attributes
        if 'credential_passwords' not in modern_data:
@@ -2719,28 +2689,9 @@ class JobTemplateCredentialsList(SubListCreateAttachDetachAPIView):
        return super(JobTemplateCredentialsList, self).is_valid_relation(parent, sub, created)


-class JobTemplateLabelList(DeleteLastUnattachLabelMixin, SubListCreateAttachDetachAPIView):
+class JobTemplateLabelList(LabelSubListCreateAttachDetachView):

-    model = models.Label
-    serializer_class = serializers.LabelSerializer
    parent_model = models.JobTemplate
-    relationship = 'labels'
-
-    def post(self, request, *args, **kwargs):
-        # If a label already exists in the database, attach it instead of erroring out
-        # that it already exists
-        if 'id' not in request.data and 'name' in request.data and 'organization' in request.data:
-            existing = models.Label.objects.filter(name=request.data['name'], organization_id=request.data['organization'])
-            if existing.exists():
-                existing = existing[0]
-                request.data['id'] = existing.id
-                del request.data['name']
-                del request.data['organization']
-        if models.Label.objects.filter(unifiedjobtemplate_labels=self.kwargs['pk']).count() > 100:
-            return Response(
-                dict(msg=_('Maximum number of labels for {} reached.'.format(self.parent_model._meta.verbose_name_raw))), status=status.HTTP_400_BAD_REQUEST
-            )
-        return super(JobTemplateLabelList, self).post(request, *args, **kwargs)


 class JobTemplateCallback(GenericAPIView):
@@ -2966,6 +2917,22 @@ class WorkflowJobNodeCredentialsList(SubListAPIView):
    relationship = 'credentials'


+class WorkflowJobNodeLabelsList(SubListAPIView):
+
+    model = models.Label
+    serializer_class = serializers.LabelSerializer
+    parent_model = models.WorkflowJobNode
+    relationship = 'labels'
+
+
+class WorkflowJobNodeInstanceGroupsList(SubListAttachDetachAPIView):
+
+    model = models.InstanceGroup
+    serializer_class = serializers.InstanceGroupSerializer
+    parent_model = models.WorkflowJobNode
+    relationship = 'instance_groups'
+
+
 class WorkflowJobTemplateNodeList(ListCreateAPIView):

    model = models.WorkflowJobTemplateNode
@@ -2984,6 +2951,19 @@ class WorkflowJobTemplateNodeCredentialsList(LaunchConfigCredentialsBase):
    parent_model = models.WorkflowJobTemplateNode


+class WorkflowJobTemplateNodeLabelsList(LabelSubListCreateAttachDetachView):
+
+    parent_model = models.WorkflowJobTemplateNode
+
+
+class WorkflowJobTemplateNodeInstanceGroupsList(SubListAttachDetachAPIView):
+
+    model = models.InstanceGroup
+    serializer_class = serializers.InstanceGroupSerializer
+    parent_model = models.WorkflowJobTemplateNode
+    relationship = 'instance_groups'
+
+
 class WorkflowJobTemplateNodeChildrenBaseList(EnforceParentRelationshipMixin, SubListCreateAttachDetachAPIView):

    model = models.WorkflowJobTemplateNode
@@ -3082,8 +3062,7 @@ class WorkflowJobNodeChildrenBaseList(SubListAPIView):
    search_fields = ('unified_job_template__name', 'unified_job_template__description')

    #
-    # Limit the set of WorkflowJobeNodes to the related nodes of specified by
-    #'relationship'
+    # Limit the set of WorkflowJobNodes to the related nodes of specified by self.relationship
    #
    def get_queryset(self):
        parent = self.get_parent_object()
@@ -3196,13 +3175,17 @@ class WorkflowJobTemplateLaunch(RetrieveAPIView):
                data['extra_vars'] = extra_vars
            modified_ask_mapping = models.WorkflowJobTemplate.get_ask_mapping()
            modified_ask_mapping.pop('extra_vars')
-            for field_name, ask_field_name in obj.get_ask_mapping().items():
+
+            for field, ask_field_name in modified_ask_mapping.items():
                if not getattr(obj, ask_field_name):
-                    data.pop(field_name, None)
-                elif field_name == 'inventory':
-                    data[field_name] = getattrd(obj, "%s.%s" % (field_name, 'id'), None)
+                    data.pop(field, None)
+                elif isinstance(getattr(obj.__class__, field).field, ForeignKey):
+                    data[field] = getattrd(obj, "%s.%s" % (field, 'id'), None)
+                elif isinstance(getattr(obj.__class__, field).field, ManyToManyField):
+                    data[field] = [item.id for item in getattr(obj, field).all()]
                else:
-                    data[field_name] = getattr(obj, field_name)
+                    data[field] = getattr(obj, field)
+
        return data

    def post(self, request, *args, **kwargs):
@@ -3381,20 +3364,15 @@ class WorkflowJobWorkflowNodesList(SubListAPIView):
        return super(WorkflowJobWorkflowNodesList, self).get_queryset().order_by('id')


-class WorkflowJobCancel(RetrieveAPIView):
+class WorkflowJobCancel(GenericCancelView):

    model = models.WorkflowJob
-    obj_permission_type = 'cancel'
    serializer_class = serializers.WorkflowJobCancelSerializer

    def post(self, request, *args, **kwargs):
-        obj = self.get_object()
-        if obj.can_cancel:
-            obj.cancel()
-            ScheduleWorkflowManager().schedule()
-            return Response(status=status.HTTP_202_ACCEPTED)
-        else:
-            return self.http_method_not_allowed(request, *args, **kwargs)
+        r = super().post(request, *args, **kwargs)
+        ScheduleWorkflowManager().schedule()
+        return r


 class WorkflowJobNotificationsList(SubListAPIView):
@@ -3550,20 +3528,11 @@ class JobActivityStreamList(SubListAPIView):
    search_fields = ('changes',)


-class JobCancel(RetrieveAPIView):
+class JobCancel(GenericCancelView):

    model = models.Job
-    obj_permission_type = 'cancel'
    serializer_class = serializers.JobCancelSerializer

-    def post(self, request, *args, **kwargs):
-        obj = self.get_object()
-        if obj.can_cancel:
-            obj.cancel()
-            return Response(status=status.HTTP_202_ACCEPTED)
-        else:
-            return self.http_method_not_allowed(request, *args, **kwargs)
-

 class JobRelaunch(RetrieveAPIView):

@@ -3689,15 +3658,21 @@ class JobCreateSchedule(RetrieveAPIView):
            extra_data=config.extra_data,
            survey_passwords=config.survey_passwords,
            inventory=config.inventory,
+            execution_environment=config.execution_environment,
            char_prompts=config.char_prompts,
            credentials=set(config.credentials.all()),
+            labels=set(config.labels.all()),
+            instance_groups=list(config.instance_groups.all()),
        )
        if not request.user.can_access(models.Schedule, 'add', schedule_data):
            raise PermissionDenied()

-        creds_list = schedule_data.pop('credentials')
+        related_fields = ('credentials', 'labels', 'instance_groups')
+        related = [schedule_data.pop(relationship) for relationship in related_fields]
        schedule = models.Schedule.objects.create(**schedule_data)
-        schedule.credentials.add(*creds_list)
+        for relationship, items in zip(related_fields, related):
+            for item in items:
+                getattr(schedule, relationship).add(item)

        data = serializers.ScheduleSerializer(schedule, context=self.get_serializer_context()).data
        data.serializer.instance = None  # hack to avoid permissions.py assuming this is Job model
@@ -4028,20 +4003,11 @@ class AdHocCommandDetail(UnifiedJobDeletionMixin, RetrieveDestroyAPIView):
    serializer_class = serializers.AdHocCommandDetailSerializer


-class AdHocCommandCancel(RetrieveAPIView):
+class AdHocCommandCancel(GenericCancelView):

    model = models.AdHocCommand
-    obj_permission_type = 'cancel'
    serializer_class = serializers.AdHocCommandCancelSerializer

-    def post(self, request, *args, **kwargs):
-        obj = self.get_object()
-        if obj.can_cancel:
-            obj.cancel()
-            return Response(status=status.HTTP_202_ACCEPTED)
-        else:
-            return self.http_method_not_allowed(request, *args, **kwargs)
-

 class AdHocCommandRelaunch(GenericAPIView):

@@ -4176,20 +4142,11 @@ class SystemJobDetail(UnifiedJobDeletionMixin, RetrieveDestroyAPIView):
    serializer_class = serializers.SystemJobSerializer


-class SystemJobCancel(RetrieveAPIView):
+class SystemJobCancel(GenericCancelView):

    model = models.SystemJob
-    obj_permission_type = 'cancel'
    serializer_class = serializers.SystemJobCancelSerializer

-    def post(self, request, *args, **kwargs):
-        obj = self.get_object()
-        if obj.can_cancel:
-            obj.cancel()
-            return Response(status=status.HTTP_202_ACCEPTED)
-        else:
-            return self.http_method_not_allowed(request, *args, **kwargs)
-

 class SystemJobNotificationsList(SubListAPIView):

@@ -4428,18 +4385,6 @@ class NotificationDetail(RetrieveAPIView):
    serializer_class = serializers.NotificationSerializer


-class LabelList(ListCreateAPIView):
-
-    model = models.Label
-    serializer_class = serializers.LabelSerializer
-
-
-class LabelDetail(RetrieveUpdateAPIView):
-
-    model = models.Label
-    serializer_class = serializers.LabelSerializer
-
-
 class ActivityStreamList(SimpleListAPIView):

    model = models.ActivityStream
--- a/awx/api/views/instance_install_bundle.py
+++ b/awx/api/views/instance_install_bundle.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2018 Red Hat, Inc.
+# All Rights Reserved.
+
+import datetime
+import io
+import ipaddress
+import os
+import tarfile
+
+import asn1
+from awx.api import serializers
+from awx.api.generics import GenericAPIView, Response
+from awx.api.permissions import IsSystemAdminOrAuditor
+from awx.main import models
+from cryptography import x509
+from cryptography.hazmat.primitives import hashes, serialization
+from cryptography.hazmat.primitives.asymmetric import rsa
+from cryptography.x509 import DNSName, IPAddress, ObjectIdentifier, OtherName
+from cryptography.x509.oid import NameOID
+from django.http import HttpResponse
+from django.template.loader import render_to_string
+from django.utils.translation import gettext_lazy as _
+from rest_framework import status
+
+# Red Hat has an OID namespace (RHANANA). Receptor has its own designation under that.
+RECEPTOR_OID = "1.3.6.1.4.1.2312.19.1"
+
+# generate install bundle for the instance
+# install bundle directory structure
+# ├── install_receptor.yml (playbook)
+# ├── inventory.yml
+# ├── group_vars
+# │   └── all.yml
+# ├── receptor
+# │   ├── tls
+# │   │   ├── ca
+# │   │   │   └── receptor-ca.crt
+# │   │   ├── receptor.crt
+# │   │   └── receptor.key
+# │   └── work-public-key.pem
+# └── requirements.yml
+class InstanceInstallBundle(GenericAPIView):
+
+    name = _('Install Bundle')
+    model = models.Instance
+    serializer_class = serializers.InstanceSerializer
+    permission_classes = (IsSystemAdminOrAuditor,)
+
+    def get(self, request, *args, **kwargs):
+        instance_obj = self.get_object()
+
+        if instance_obj.node_type not in ('execution',):
+            return Response(
+                data=dict(msg=_('Install bundle can only be generated for execution nodes.')),
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+
+        with io.BytesIO() as f:
+            with tarfile.open(fileobj=f, mode='w:gz') as tar:
+                # copy /etc/receptor/tls/ca/receptor-ca.crt to receptor/tls/ca in the tar file
+                tar.add(
+                    os.path.realpath('/etc/receptor/tls/ca/receptor-ca.crt'), arcname=f"{instance_obj.hostname}_install_bundle/receptor/tls/ca/receptor-ca.crt"
+                )
+
+                # copy /etc/receptor/signing/work-public-key.pem to receptor/work-public-key.pem
+                tar.add('/etc/receptor/signing/work-public-key.pem', arcname=f"{instance_obj.hostname}_install_bundle/receptor/work-public-key.pem")
+
+                # generate and write the receptor key to receptor/tls/receptor.key in the tar file
+                key, cert = generate_receptor_tls(instance_obj)
+
+                key_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/receptor/tls/receptor.key")
+                key_tarinfo.size = len(key)
+                tar.addfile(key_tarinfo, io.BytesIO(key))
+
+                cert_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/receptor/tls/receptor.crt")
+                cert_tarinfo.size = len(cert)
+                tar.addfile(cert_tarinfo, io.BytesIO(cert))
+
+                # generate and write install_receptor.yml to the tar file
+                playbook = generate_playbook().encode('utf-8')
+                playbook_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/install_receptor.yml")
+                playbook_tarinfo.size = len(playbook)
+                tar.addfile(playbook_tarinfo, io.BytesIO(playbook))
+
+                # generate and write inventory.yml to the tar file
+                inventory_yml = generate_inventory_yml(instance_obj).encode('utf-8')
+                inventory_yml_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/inventory.yml")
+                inventory_yml_tarinfo.size = len(inventory_yml)
+                tar.addfile(inventory_yml_tarinfo, io.BytesIO(inventory_yml))
+
+                # generate and write group_vars/all.yml to the tar file
+                group_vars = generate_group_vars_all_yml(instance_obj).encode('utf-8')
+                group_vars_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/group_vars/all.yml")
+                group_vars_tarinfo.size = len(group_vars)
+                tar.addfile(group_vars_tarinfo, io.BytesIO(group_vars))
+
+                # generate and write requirements.yml to the tar file
+                requirements_yml = generate_requirements_yml().encode('utf-8')
+                requirements_yml_tarinfo = tarfile.TarInfo(f"{instance_obj.hostname}_install_bundle/requirements.yml")
+                requirements_yml_tarinfo.size = len(requirements_yml)
+                tar.addfile(requirements_yml_tarinfo, io.BytesIO(requirements_yml))
+
+            # respond with the tarfile
+            f.seek(0)
+            response = HttpResponse(f.read(), status=status.HTTP_200_OK)
+            response['Content-Disposition'] = f"attachment; filename={instance_obj.hostname}_install_bundle.tar.gz"
+            return response
+
+
+def generate_playbook():
+    return render_to_string("instance_install_bundle/install_receptor.yml")
+
+
+def generate_requirements_yml():
+    return render_to_string("instance_install_bundle/requirements.yml")
+
+
+def generate_inventory_yml(instance_obj):
+    return render_to_string("instance_install_bundle/inventory.yml", context=dict(instance=instance_obj))
+
+
+def generate_group_vars_all_yml(instance_obj):
+    return render_to_string("instance_install_bundle/group_vars/all.yml", context=dict(instance=instance_obj))
+
+
+def generate_receptor_tls(instance_obj):
+    # generate private key for the receptor
+    key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
+
+    # encode receptor hostname to asn1
+    hostname = instance_obj.hostname
+    encoder = asn1.Encoder()
+    encoder.start()
+    encoder.write(hostname.encode(), nr=asn1.Numbers.UTF8String)
+    hostname_asn1 = encoder.output()
+
+    san_params = [
+        DNSName(hostname),
+        OtherName(ObjectIdentifier(RECEPTOR_OID), hostname_asn1),
+    ]
+
+    try:
+        san_params.append(IPAddress(ipaddress.IPv4Address(hostname)))
+    except ipaddress.AddressValueError:
+        pass
+
+    # generate certificate for the receptor
+    csr = (
+        x509.CertificateSigningRequestBuilder()
+        .subject_name(
+            x509.Name(
+                [
+                    x509.NameAttribute(NameOID.COMMON_NAME, hostname),
+                ]
+            )
+        )
+        .add_extension(
+            x509.SubjectAlternativeName(san_params),
+            critical=False,
+        )
+        .sign(key, hashes.SHA256())
+    )
+
+    # sign csr with the receptor ca key from /etc/receptor/ca/receptor-ca.key
+    with open('/etc/receptor/tls/ca/receptor-ca.key', 'rb') as f:
+        ca_key = serialization.load_pem_private_key(
+            f.read(),
+            password=None,
+        )
+
+    with open('/etc/receptor/tls/ca/receptor-ca.crt', 'rb') as f:
+        ca_cert = x509.load_pem_x509_certificate(f.read())
+
+    cert = (
+        x509.CertificateBuilder()
+        .subject_name(csr.subject)
+        .issuer_name(ca_cert.issuer)
+        .public_key(csr.public_key())
+        .serial_number(x509.random_serial_number())
+        .not_valid_before(datetime.datetime.utcnow())
+        .not_valid_after(datetime.datetime.utcnow() + datetime.timedelta(days=3650))
+        .add_extension(
+            csr.extensions.get_extension_for_class(x509.SubjectAlternativeName).value,
+            critical=csr.extensions.get_extension_for_class(x509.SubjectAlternativeName).critical,
+        )
+        .sign(ca_key, hashes.SHA256())
+    )
+
+    key = key.private_bytes(
+        encoding=serialization.Encoding.PEM,
+        format=serialization.PrivateFormat.TraditionalOpenSSL,
+        encryption_algorithm=serialization.NoEncryption(),
+    )
+
+    cert = cert.public_bytes(
+        encoding=serialization.Encoding.PEM,
+    )
+
+    return key, cert
--- a/awx/api/views/inventory.py
+++ b/awx/api/views/inventory.py
@@ -18,8 +18,6 @@ from rest_framework import status
 # AWX
 from awx.main.models import ActivityStream, Inventory, JobTemplate, Role, User, InstanceGroup, InventoryUpdateEvent, InventoryUpdate

-from awx.main.models.label import Label
-
 from awx.api.generics import (
    ListCreateAPIView,
    RetrieveUpdateDestroyAPIView,
@@ -27,9 +25,8 @@ from awx.api.generics import (
    SubListAttachDetachAPIView,
    ResourceAccessList,
    CopyAPIView,
-    DeleteLastUnattachLabelMixin,
-    SubListCreateAttachDetachAPIView,
 )
+from awx.api.views.labels import LabelSubListCreateAttachDetachView


 from awx.api.serializers import (
@@ -39,7 +36,6 @@ from awx.api.serializers import (
    InstanceGroupSerializer,
    InventoryUpdateEventSerializer,
    JobTemplateSerializer,
-    LabelSerializer,
 )
 from awx.api.views.mixin import RelatedJobsPreventDeleteMixin

@@ -157,28 +153,9 @@ class InventoryJobTemplateList(SubListAPIView):
        return qs.filter(inventory=parent)


-class InventoryLabelList(DeleteLastUnattachLabelMixin, SubListCreateAttachDetachAPIView, SubListAPIView):
+class InventoryLabelList(LabelSubListCreateAttachDetachView):

-    model = Label
-    serializer_class = LabelSerializer
    parent_model = Inventory
-    relationship = 'labels'
-
-    def post(self, request, *args, **kwargs):
-        # If a label already exists in the database, attach it instead of erroring out
-        # that it already exists
-        if 'id' not in request.data and 'name' in request.data and 'organization' in request.data:
-            existing = Label.objects.filter(name=request.data['name'], organization_id=request.data['organization'])
-            if existing.exists():
-                existing = existing[0]
-                request.data['id'] = existing.id
-                del request.data['name']
-                del request.data['organization']
-        if Label.objects.filter(inventory_labels=self.kwargs['pk']).count() > 100:
-            return Response(
-                dict(msg=_('Maximum number of labels for {} reached.'.format(self.parent_model._meta.verbose_name_raw))), status=status.HTTP_400_BAD_REQUEST
-            )
-        return super(InventoryLabelList, self).post(request, *args, **kwargs)


 class InventoryCopy(CopyAPIView):
--- a/awx/api/views/labels.py
+++ b/awx/api/views/labels.py
@@ -0,0 +1,71 @@
+# AWX
+from awx.api.generics import SubListCreateAttachDetachAPIView, RetrieveUpdateAPIView, ListCreateAPIView
+from awx.main.models import Label
+from awx.api.serializers import LabelSerializer
+
+# Django
+from django.utils.translation import gettext_lazy as _
+
+# Django REST Framework
+from rest_framework.response import Response
+from rest_framework.status import HTTP_400_BAD_REQUEST
+
+
+class LabelSubListCreateAttachDetachView(SubListCreateAttachDetachAPIView):
+    """
+    For related labels lists like /api/v2/inventories/N/labels/
+
+    We want want the last instance to be deleted from the database
+    when the last disassociate happens.
+
+    Subclasses need to define parent_model
+    """
+
+    model = Label
+    serializer_class = LabelSerializer
+    relationship = 'labels'
+
+    def unattach(self, request, *args, **kwargs):
+        (sub_id, res) = super().unattach_validate(request)
+        if res:
+            return res
+
+        res = super().unattach_by_id(request, sub_id)
+
+        obj = self.model.objects.get(id=sub_id)
+
+        if obj.is_detached():
+            obj.delete()
+
+        return res
+
+    def post(self, request, *args, **kwargs):
+        # If a label already exists in the database, attach it instead of erroring out
+        # that it already exists
+        if 'id' not in request.data and 'name' in request.data and 'organization' in request.data:
+            existing = Label.objects.filter(name=request.data['name'], organization_id=request.data['organization'])
+            if existing.exists():
+                existing = existing[0]
+                request.data['id'] = existing.id
+                del request.data['name']
+                del request.data['organization']
+
+        # Give a 400 error if we have attached too many labels to this object
+        label_filter = self.parent_model._meta.get_field(self.relationship).remote_field.name
+        if Label.objects.filter(**{label_filter: self.kwargs['pk']}).count() > 100:
+            return Response(dict(msg=_(f'Maximum number of labels for {self.parent_model._meta.verbose_name_raw} reached.')), status=HTTP_400_BAD_REQUEST)
+
+        return super().post(request, *args, **kwargs)
+
+
+class LabelDetail(RetrieveUpdateAPIView):
+
+    model = Label
+    serializer_class = LabelSerializer
+
+
+class LabelList(ListCreateAPIView):
+
+    name = _("Labels")
+    model = Label
+    serializer_class = LabelSerializer
--- a/awx/api/views/mixin.py
+++ b/awx/api/views/mixin.py
@@ -16,7 +16,7 @@ from rest_framework import status

 from awx.main.constants import ACTIVE_STATES
 from awx.main.utils import get_object_or_400
-from awx.main.models.ha import Instance, InstanceGroup
+from awx.main.models.ha import Instance, InstanceGroup, schedule_policy_task
 from awx.main.models.organization import Team
 from awx.main.models.projects import Project
 from awx.main.models.inventory import Inventory
@@ -107,6 +107,11 @@ class InstanceGroupMembershipMixin(object):
                if inst_name in ig_obj.policy_instance_list:
                    ig_obj.policy_instance_list.pop(ig_obj.policy_instance_list.index(inst_name))
                    ig_obj.save(update_fields=['policy_instance_list'])
+
+            # sometimes removing an instance has a non-obvious consequence
+            # this is almost always true if policy_instance_percentage or _minimum is non-zero
+            # after removing a single instance, the other memberships need to be re-balanced
+            schedule_policy_task()
        return response


--- a/awx/locale/es/LC_MESSAGES/django.po
+++ b/awx/locale/es/LC_MESSAGES/django.po
@@ -6237,4 +6237,5 @@ msgstr "%s se está actualizando."

 #: awx/ui/urls.py:24
 msgid "This page will refresh when complete."
-msgstr "Esta página se actualizará cuando se complete."
+msgstr "Esta página se actualizará cuando se complete."
+
--- a/awx/locale/fr/LC_MESSAGES/django.po
+++ b/awx/locale/fr/LC_MESSAGES/django.po
@@ -721,7 +721,7 @@ msgstr "DTSTART valide obligatoire dans rrule. La valeur doit commencer par : DT
 #: awx/api/serializers.py:4657
 msgid ""
 "DTSTART cannot be a naive datetime.  Specify ;TZINFO= or YYYYMMDDTHHMMSSZZ."
-msgstr "DTSTART ne peut correspondre à une DateHeure naïve. Spécifier ;TZINFO= ou YYYYMMDDTHHMMSSZZ."
+msgstr "DTSTART ne peut correspondre à une date-heure naïve. Spécifier ;TZINFO= ou YYYYMMDDTHHMMSSZZ."

 #: awx/api/serializers.py:4659
 msgid "Multiple DTSTART is not supported."
@@ -6239,4 +6239,5 @@ msgstr "%s est en cours de mise à niveau."

 #: awx/ui/urls.py:24
 msgid "This page will refresh when complete."
-msgstr "Cette page sera rafraîchie une fois terminée."
+msgstr "Cette page sera rafraîchie une fois terminée."
+
--- a/awx/locale/nl/LC_MESSAGES/django.po
+++ b/awx/locale/nl/LC_MESSAGES/django.po
@@ -6237,4 +6237,5 @@ msgstr "Er wordt momenteel een upgrade van%s geïnstalleerd."

 #: awx/ui/urls.py:24
 msgid "This page will refresh when complete."
-msgstr "Deze pagina wordt vernieuwd als hij klaar is."
+msgstr "Deze pagina wordt vernieuwd als hij klaar is."
+
--- a/awx/main/access.py
+++ b/awx/main/access.py
@@ -12,7 +12,7 @@ from django.conf import settings
 from django.db.models import Q, Prefetch
 from django.contrib.auth.models import User
 from django.utils.translation import gettext_lazy as _
-from django.core.exceptions import ObjectDoesNotExist
+from django.core.exceptions import ObjectDoesNotExist, FieldDoesNotExist

 # Django REST Framework
 from rest_framework.exceptions import ParseError, PermissionDenied
@@ -281,13 +281,23 @@ class BaseAccess(object):
        """
        return True

+    def assure_relationship_exists(self, obj, relationship):
+        if '.' in relationship:
+            return  # not attempting validation for complex relationships now
+        try:
+            obj._meta.get_field(relationship)
+        except FieldDoesNotExist:
+            raise NotImplementedError(f'The relationship {relationship} does not exist for model {type(obj)}')
+
    def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
+        self.assure_relationship_exists(obj, relationship)
        if skip_sub_obj_read_check:
            return self.can_change(obj, None)
        else:
            return bool(self.can_change(obj, None) and self.user.can_access(type(sub_obj), 'read', sub_obj))

    def can_unattach(self, obj, sub_obj, relationship, data=None):
+        self.assure_relationship_exists(obj, relationship)
        return self.can_change(obj, data)

    def check_related(self, field, Model, data, role_field='admin_role', obj=None, mandatory=False):
@@ -328,6 +338,8 @@ class BaseAccess(object):
            role = getattr(resource, role_field, None)
            if role is None:
                # Handle special case where resource does not have direct roles
+                if role_field == 'read_role':
+                    return self.user.can_access(type(resource), 'read', resource)
                access_method_type = {'admin_role': 'change', 'execute_role': 'start'}[role_field]
                return self.user.can_access(type(resource), access_method_type, resource, None)
            return self.user in role
@@ -499,6 +511,21 @@ class BaseAccess(object):
        return False


+class UnifiedCredentialsMixin(BaseAccess):
+    """
+    The credentials many-to-many is a standard relationship for JT, jobs, and others
+    Permission to attach is always use permission, and permission to unattach is admin to the parent object
+    """
+
+    @check_superuser
+    def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
+        if relationship == 'credentials':
+            if not isinstance(sub_obj, Credential):
+                raise RuntimeError(f'Can only attach credentials to credentials relationship, got {type(sub_obj)}')
+            return self.can_change(obj, None) and (self.user in sub_obj.use_role)
+        return super().can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
+
+
 class NotificationAttachMixin(BaseAccess):
    """For models that can have notifications attached

@@ -552,7 +579,8 @@ class InstanceAccess(BaseAccess):
        return super(InstanceAccess, self).can_unattach(obj, sub_obj, relationship, relationship, data=data)

    def can_add(self, data):
-        return False
+
+        return self.user.is_superuser

    def can_change(self, obj, data):
        return False
@@ -965,9 +993,6 @@ class HostAccess(BaseAccess):
        if data and 'name' in data:
            self.check_license(add_host_name=data['name'])

-            # Check the per-org limit
-            self.check_org_host_limit({'inventory': obj.inventory}, add_host_name=data['name'])
-
        # Checks for admin or change permission on inventory, controls whether
        # the user can edit variable data.
        return obj and self.user in obj.inventory.admin_role
@@ -1031,7 +1056,7 @@ class GroupAccess(BaseAccess):
        return bool(obj and self.user in obj.inventory.admin_role)


-class InventorySourceAccess(NotificationAttachMixin, BaseAccess):
+class InventorySourceAccess(NotificationAttachMixin, UnifiedCredentialsMixin, BaseAccess):
    """
    I can see inventory sources whenever I can see their inventory.
    I can change inventory sources whenever I can change their inventory.
@@ -1075,18 +1100,6 @@ class InventorySourceAccess(NotificationAttachMixin, BaseAccess):
            return self.user in obj.inventory.update_role
        return False

-    @check_superuser
-    def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
-        if relationship == 'credentials' and isinstance(sub_obj, Credential):
-            return obj and obj.inventory and self.user in obj.inventory.admin_role and self.user in sub_obj.use_role
-        return super(InventorySourceAccess, self).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
-
-    @check_superuser
-    def can_unattach(self, obj, sub_obj, relationship, *args, **kwargs):
-        if relationship == 'credentials' and isinstance(sub_obj, Credential):
-            return obj and obj.inventory and self.user in obj.inventory.admin_role
-        return super(InventorySourceAccess, self).can_attach(obj, sub_obj, relationship, *args, **kwargs)
-

 class InventoryUpdateAccess(BaseAccess):
    """
@@ -1485,7 +1498,7 @@ class ProjectUpdateAccess(BaseAccess):
        return obj and self.user in obj.project.admin_role


-class JobTemplateAccess(NotificationAttachMixin, BaseAccess):
+class JobTemplateAccess(NotificationAttachMixin, UnifiedCredentialsMixin, BaseAccess):
    """
    I can see job templates when:
     - I have read role for the job template.
@@ -1549,8 +1562,7 @@ class JobTemplateAccess(NotificationAttachMixin, BaseAccess):
            if self.user not in inventory.use_role:
                return False

-        ee = get_value(ExecutionEnvironment, 'execution_environment')
-        if ee and not self.user.can_access(ExecutionEnvironment, 'read', ee):
+        if not self.check_related('execution_environment', ExecutionEnvironment, data, role_field='read_role'):
            return False

        project = get_value(Project, 'project')
@@ -1600,10 +1612,8 @@ class JobTemplateAccess(NotificationAttachMixin, BaseAccess):
        if self.changes_are_non_sensitive(obj, data):
            return True

-        if data.get('execution_environment'):
-            ee = get_object_from_data('execution_environment', ExecutionEnvironment, data)
-            if not self.user.can_access(ExecutionEnvironment, 'read', ee):
-                return False
+        if not self.check_related('execution_environment', ExecutionEnvironment, data, obj=obj, role_field='read_role'):
+            return False

        for required_field, cls in (('inventory', Inventory), ('project', Project)):
            is_mandatory = True
@@ -1667,17 +1677,13 @@ class JobTemplateAccess(NotificationAttachMixin, BaseAccess):
            if not obj.organization:
                return False
            return self.user.can_access(type(sub_obj), "read", sub_obj) and self.user in obj.organization.admin_role
-        if relationship == 'credentials' and isinstance(sub_obj, Credential):
-            return self.user in obj.admin_role and self.user in sub_obj.use_role
        return super(JobTemplateAccess, self).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)

    @check_superuser
    def can_unattach(self, obj, sub_obj, relationship, *args, **kwargs):
        if relationship == "instance_groups":
            return self.can_attach(obj, sub_obj, relationship, *args, **kwargs)
-        if relationship == 'credentials' and isinstance(sub_obj, Credential):
-            return self.user in obj.admin_role
-        return super(JobTemplateAccess, self).can_attach(obj, sub_obj, relationship, *args, **kwargs)
+        return super(JobTemplateAccess, self).can_unattach(obj, sub_obj, relationship, *args, **kwargs)


 class JobAccess(BaseAccess):
@@ -1824,7 +1830,7 @@ class SystemJobAccess(BaseAccess):
        return False  # no relaunching of system jobs


-class JobLaunchConfigAccess(BaseAccess):
+class JobLaunchConfigAccess(UnifiedCredentialsMixin, BaseAccess):
    """
    Launch configs must have permissions checked for
     - relaunching
@@ -1832,63 +1838,69 @@ class JobLaunchConfigAccess(BaseAccess):

    In order to create a new object with a copy of this launch config, I need:
     - use access to related inventory (if present)
+     - read access to Execution Environment (if present), unless the specified ee is already in the template
     - use role to many-related credentials (if any present)
+     - read access to many-related labels (if any present), unless the specified label is already in the template
+     - read access to many-related instance groups (if any present), unless the specified instance group is already in the template
    """

    model = JobLaunchConfig
    select_related = 'job'
    prefetch_related = ('credentials', 'inventory')

-    def _unusable_creds_exist(self, qs):
-        return qs.exclude(pk__in=Credential._accessible_pk_qs(Credential, self.user, 'use_role')).exists()
+    M2M_CHECKS = {'credentials': Credential, 'labels': Label, 'instance_groups': InstanceGroup}

-    def has_credentials_access(self, obj):
-        # user has access if no related credentials exist that the user lacks use role for
-        return not self._unusable_creds_exist(obj.credentials)
+    def _related_filtered_queryset(self, cls):
+        if cls is Label:
+            return LabelAccess(self.user).filtered_queryset()
+        elif cls is InstanceGroup:
+            return InstanceGroupAccess(self.user).filtered_queryset()
+        else:
+            return cls._accessible_pk_qs(cls, self.user, 'use_role')
+
+    def has_obj_m2m_access(self, obj):
+        for relationship, cls in self.M2M_CHECKS.items():
+            if getattr(obj, relationship).exclude(pk__in=self._related_filtered_queryset(cls)).exists():
+                return False
+        return True

    @check_superuser
    def can_add(self, data, template=None):
        # This is a special case, we don't check related many-to-many elsewhere
        # launch RBAC checks use this
-        if 'credentials' in data and data['credentials'] or 'reference_obj' in data:
-            if 'reference_obj' in data:
-                prompted_cred_qs = data['reference_obj'].credentials.all()
-            else:
-                # If given model objects, only use the primary key from them
-                cred_pks = [cred.pk for cred in data['credentials']]
-                if template:
-                    for cred in template.credentials.all():
-                        if cred.pk in cred_pks:
-                            cred_pks.remove(cred.pk)
-                prompted_cred_qs = Credential.objects.filter(pk__in=cred_pks)
-            if self._unusable_creds_exist(prompted_cred_qs):
+        if 'reference_obj' in data:
+            if not self.has_obj_m2m_access(data['reference_obj']):
                return False
-        return self.check_related('inventory', Inventory, data, role_field='use_role')
+        else:
+            for relationship, cls in self.M2M_CHECKS.items():
+                if relationship in data and data[relationship]:
+                    # If given model objects, only use the primary key from them
+                    sub_obj_pks = [sub_obj.pk for sub_obj in data[relationship]]
+                    if template:
+                        for sub_obj in getattr(template, relationship).all():
+                            if sub_obj.pk in sub_obj_pks:
+                                sub_obj_pks.remove(sub_obj.pk)
+                    if cls.objects.filter(pk__in=sub_obj_pks).exclude(pk__in=self._related_filtered_queryset(cls)).exists():
+                        return False
+        return self.check_related('inventory', Inventory, data, role_field='use_role') and self.check_related(
+            'execution_environment', ExecutionEnvironment, data, role_field='read_role'
+        )

    @check_superuser
    def can_use(self, obj):
-        return self.check_related('inventory', Inventory, {}, obj=obj, role_field='use_role', mandatory=True) and self.has_credentials_access(obj)
+        return (
+            self.has_obj_m2m_access(obj)
+            and self.check_related('inventory', Inventory, {}, obj=obj, role_field='use_role', mandatory=True)
+            and self.check_related('execution_environment', ExecutionEnvironment, {}, obj=obj, role_field='read_role')
+        )

    def can_change(self, obj, data):
-        return self.check_related('inventory', Inventory, data, obj=obj, role_field='use_role')
-
-    def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
-        if isinstance(sub_obj, Credential) and relationship == 'credentials':
-            return self.user in sub_obj.use_role
-        else:
-            raise NotImplementedError('Only credentials can be attached to launch configurations.')
-
-    def can_unattach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
-        if isinstance(sub_obj, Credential) and relationship == 'credentials':
-            if skip_sub_obj_read_check:
-                return True
-            else:
-                return self.user in sub_obj.read_role
-        else:
-            raise NotImplementedError('Only credentials can be attached to launch configurations.')
+        return self.check_related('inventory', Inventory, data, obj=obj, role_field='use_role') and self.check_related(
+            'execution_environment', ExecutionEnvironment, data, obj=obj, role_field='read_role'
+        )


-class WorkflowJobTemplateNodeAccess(BaseAccess):
+class WorkflowJobTemplateNodeAccess(UnifiedCredentialsMixin, BaseAccess):
    """
    I can see/use a WorkflowJobTemplateNode if I have read permission
        to associated Workflow Job Template
@@ -1911,7 +1923,7 @@ class WorkflowJobTemplateNodeAccess(BaseAccess):
    """

    model = WorkflowJobTemplateNode
-    prefetch_related = ('success_nodes', 'failure_nodes', 'always_nodes', 'unified_job_template', 'credentials', 'workflow_job_template')
+    prefetch_related = ('success_nodes', 'failure_nodes', 'always_nodes', 'unified_job_template', 'workflow_job_template')

    def filtered_queryset(self):
        return self.model.objects.filter(workflow_job_template__in=WorkflowJobTemplate.accessible_objects(self.user, 'read_role'))
@@ -1923,7 +1935,8 @@ class WorkflowJobTemplateNodeAccess(BaseAccess):
        return (
            self.check_related('workflow_job_template', WorkflowJobTemplate, data, mandatory=True)
            and self.check_related('unified_job_template', UnifiedJobTemplate, data, role_field='execute_role')
-            and JobLaunchConfigAccess(self.user).can_add(data)
+            and self.check_related('inventory', Inventory, data, role_field='use_role')
+            and self.check_related('execution_environment', ExecutionEnvironment, data, role_field='read_role')
        )

    def wfjt_admin(self, obj):
@@ -1932,17 +1945,14 @@ class WorkflowJobTemplateNodeAccess(BaseAccess):
        else:
            return self.user in obj.workflow_job_template.admin_role

-    def ujt_execute(self, obj):
+    def ujt_execute(self, obj, data=None):
        if not obj.unified_job_template:
            return True
-        return self.check_related('unified_job_template', UnifiedJobTemplate, {}, obj=obj, role_field='execute_role', mandatory=True)
+        return self.check_related('unified_job_template', UnifiedJobTemplate, data, obj=obj, role_field='execute_role', mandatory=True)

    def can_change(self, obj, data):
-        if not data:
-            return True
-
        # should not be able to edit the prompts if lacking access to UJT or WFJT
-        return self.ujt_execute(obj) and self.wfjt_admin(obj) and JobLaunchConfigAccess(self.user).can_change(obj, data)
+        return self.ujt_execute(obj, data=data) and self.wfjt_admin(obj) and JobLaunchConfigAccess(self.user).can_change(obj, data)

    def can_delete(self, obj):
        return self.wfjt_admin(obj)
@@ -1955,29 +1965,14 @@ class WorkflowJobTemplateNodeAccess(BaseAccess):
        return True

    def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
-        if not self.wfjt_admin(obj):
-            return False
-        if relationship == 'credentials':
-            # Need permission to related template to attach a credential
-            if not self.ujt_execute(obj):
-                return False
-            return JobLaunchConfigAccess(self.user).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
-        elif relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
-            return self.check_same_WFJT(obj, sub_obj)
-        else:
-            raise NotImplementedError('Relationship {} not understood for WFJT nodes.'.format(relationship))
+        if relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
+            return self.wfjt_admin(obj) and self.check_same_WFJT(obj, sub_obj)
+        return super().can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)

-    def can_unattach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
-        if not self.wfjt_admin(obj):
-            return False
-        if relationship == 'credentials':
-            if not self.ujt_execute(obj):
-                return False
-            return JobLaunchConfigAccess(self.user).can_unattach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
-        elif relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
-            return self.check_same_WFJT(obj, sub_obj)
-        else:
-            raise NotImplementedError('Relationship {} not understood for WFJT nodes.'.format(relationship))
+    def can_unattach(self, obj, sub_obj, relationship, data=None):
+        if relationship in ('success_nodes', 'failure_nodes', 'always_nodes'):
+            return self.wfjt_admin(obj)
+        return super().can_unattach(obj, sub_obj, relationship, data=None)


 class WorkflowJobNodeAccess(BaseAccess):
@@ -2052,13 +2047,10 @@ class WorkflowJobTemplateAccess(NotificationAttachMixin, BaseAccess):
        if not data:  # So the browseable API will work
            return Organization.accessible_objects(self.user, 'workflow_admin_role').exists()

-        if data.get('execution_environment'):
-            ee = get_object_from_data('execution_environment', ExecutionEnvironment, data)
-            if not self.user.can_access(ExecutionEnvironment, 'read', ee):
-                return False
-
-        return self.check_related('organization', Organization, data, role_field='workflow_admin_role', mandatory=True) and self.check_related(
-            'inventory', Inventory, data, role_field='use_role'
+        return bool(
+            self.check_related('organization', Organization, data, role_field='workflow_admin_role', mandatory=True)
+            and self.check_related('inventory', Inventory, data, role_field='use_role')
+            and self.check_related('execution_environment', ExecutionEnvironment, data, role_field='read_role')
        )

    def can_copy(self, obj):
@@ -2104,14 +2096,10 @@ class WorkflowJobTemplateAccess(NotificationAttachMixin, BaseAccess):
        if self.user.is_superuser:
            return True

-        if data and data.get('execution_environment'):
-            ee = get_object_from_data('execution_environment', ExecutionEnvironment, data)
-            if not self.user.can_access(ExecutionEnvironment, 'read', ee):
-                return False
-
        return (
            self.check_related('organization', Organization, data, role_field='workflow_admin_role', obj=obj)
            and self.check_related('inventory', Inventory, data, role_field='use_role', obj=obj)
+            and self.check_related('execution_environment', ExecutionEnvironment, data, obj=obj, role_field='read_role')
            and self.user in obj.admin_role
        )

@@ -2518,7 +2506,7 @@ class UnifiedJobAccess(BaseAccess):
        return super(UnifiedJobAccess, self).get_queryset().filter(workflowapproval__isnull=True)


-class ScheduleAccess(BaseAccess):
+class ScheduleAccess(UnifiedCredentialsMixin, BaseAccess):
    """
    I can see a schedule if I can see it's related unified job, I can create them or update them if I have write access
    """
@@ -2559,12 +2547,6 @@ class ScheduleAccess(BaseAccess):
    def can_delete(self, obj):
        return self.can_change(obj, {})

-    def can_attach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
-        return JobLaunchConfigAccess(self.user).can_attach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
-
-    def can_unattach(self, obj, sub_obj, relationship, data, skip_sub_obj_read_check=False):
-        return JobLaunchConfigAccess(self.user).can_unattach(obj, sub_obj, relationship, data, skip_sub_obj_read_check=skip_sub_obj_read_check)
-

 class NotificationTemplateAccess(BaseAccess):
    """
@@ -2715,46 +2697,66 @@ class ActivityStreamAccess(BaseAccess):
        # 'job_template', 'job', 'project', 'project_update', 'workflow_job',
        # 'inventory_source', 'workflow_job_template'

-        inventory_set = Inventory.accessible_objects(self.user, 'read_role')
-        credential_set = Credential.accessible_objects(self.user, 'read_role')
+        q = Q(user=self.user)
+        inventory_set = Inventory.accessible_pk_qs(self.user, 'read_role')
+        if inventory_set:
+            q |= (
+                Q(ad_hoc_command__inventory__in=inventory_set)
+                | Q(inventory__in=inventory_set)
+                | Q(host__inventory__in=inventory_set)
+                | Q(group__inventory__in=inventory_set)
+                | Q(inventory_source__inventory__in=inventory_set)
+                | Q(inventory_update__inventory_source__inventory__in=inventory_set)
+            )
+
+        credential_set = Credential.accessible_pk_qs(self.user, 'read_role')
+        if credential_set:
+            q |= Q(credential__in=credential_set)
+
        auditing_orgs = (
            (Organization.accessible_objects(self.user, 'admin_role') | Organization.accessible_objects(self.user, 'auditor_role'))
            .distinct()
            .values_list('id', flat=True)
        )
-        project_set = Project.accessible_objects(self.user, 'read_role')
-        jt_set = JobTemplate.accessible_objects(self.user, 'read_role')
-        team_set = Team.accessible_objects(self.user, 'read_role')
-        wfjt_set = WorkflowJobTemplate.accessible_objects(self.user, 'read_role')
-        app_set = OAuth2ApplicationAccess(self.user).filtered_queryset()
-        token_set = OAuth2TokenAccess(self.user).filtered_queryset()
+        if auditing_orgs:
+            q |= (
+                Q(user__in=auditing_orgs.values('member_role__members'))
+                | Q(organization__in=auditing_orgs)
+                | Q(notification_template__organization__in=auditing_orgs)
+                | Q(notification__notification_template__organization__in=auditing_orgs)
+                | Q(label__organization__in=auditing_orgs)
+                | Q(role__in=Role.objects.filter(ancestors__in=self.user.roles.all()) if auditing_orgs else [])
+            )

-        return qs.filter(
-            Q(ad_hoc_command__inventory__in=inventory_set)
-            | Q(o_auth2_application__in=app_set)
-            | Q(o_auth2_access_token__in=token_set)
-            | Q(user__in=auditing_orgs.values('member_role__members'))
-            | Q(user=self.user)
-            | Q(organization__in=auditing_orgs)
-            | Q(inventory__in=inventory_set)
-            | Q(host__inventory__in=inventory_set)
-            | Q(group__inventory__in=inventory_set)
-            | Q(inventory_source__inventory__in=inventory_set)
-            | Q(inventory_update__inventory_source__inventory__in=inventory_set)
-            | Q(credential__in=credential_set)
-            | Q(team__in=team_set)
-            | Q(project__in=project_set)
-            | Q(project_update__project__in=project_set)
-            | Q(job_template__in=jt_set)
-            | Q(job__job_template__in=jt_set)
-            | Q(workflow_job_template__in=wfjt_set)
-            | Q(workflow_job_template_node__workflow_job_template__in=wfjt_set)
-            | Q(workflow_job__workflow_job_template__in=wfjt_set)
-            | Q(notification_template__organization__in=auditing_orgs)
-            | Q(notification__notification_template__organization__in=auditing_orgs)
-            | Q(label__organization__in=auditing_orgs)
-            | Q(role__in=Role.objects.filter(ancestors__in=self.user.roles.all()) if auditing_orgs else [])
-        ).distinct()
+        project_set = Project.accessible_pk_qs(self.user, 'read_role')
+        if project_set:
+            q |= Q(project__in=project_set) | Q(project_update__project__in=project_set)
+
+        jt_set = JobTemplate.accessible_pk_qs(self.user, 'read_role')
+        if jt_set:
+            q |= Q(job_template__in=jt_set) | Q(job__job_template__in=jt_set)
+
+        wfjt_set = WorkflowJobTemplate.accessible_pk_qs(self.user, 'read_role')
+        if wfjt_set:
+            q |= (
+                Q(workflow_job_template__in=wfjt_set)
+                | Q(workflow_job_template_node__workflow_job_template__in=wfjt_set)
+                | Q(workflow_job__workflow_job_template__in=wfjt_set)
+            )
+
+        team_set = Team.accessible_pk_qs(self.user, 'read_role')
+        if team_set:
+            q |= Q(team__in=team_set)
+
+        app_set = OAuth2ApplicationAccess(self.user).filtered_queryset()
+        if app_set:
+            q |= Q(o_auth2_application__in=app_set)
+
+        token_set = OAuth2TokenAccess(self.user).filtered_queryset()
+        if token_set:
+            q |= Q(o_auth2_access_token__in=token_set)
+
+        return qs.filter(q).distinct()

    def can_add(self, data):
        return False
--- a/awx/main/analytics/broadcast_websocket.py
+++ b/awx/main/analytics/broadcast_websocket.py
@@ -1,8 +1,8 @@
 import datetime
 import asyncio
 import logging
-import aioredis
 import redis
+import redis.asyncio
 import re

 from prometheus_client import (
@@ -82,7 +82,7 @@ class BroadcastWebsocketStatsManager:

    async def run_loop(self):
        try:
-            redis_conn = await aioredis.create_redis_pool(settings.BROKER_URL)
+            redis_conn = await redis.asyncio.Redis.from_url(settings.BROKER_URL)
            while True:
                stats_data_str = ''.join(stat.serialize() for stat in self._stats.values())
                await redis_conn.set(self._redis_key, stats_data_str)
@@ -122,8 +122,8 @@ class BroadcastWebsocketStats:
            'Number of messages received, to be forwarded, by the broadcast websocket system',
            registry=self._registry,
        )
-        self._messages_received = Gauge(
-            f'awx_{self.remote_name}_messages_received',
+        self._messages_received_current_conn = Gauge(
+            f'awx_{self.remote_name}_messages_received_currrent_conn',
            'Number forwarded messages received by the broadcast websocket system, for the duration of the current connection',
            registry=self._registry,
        )
@@ -144,13 +144,13 @@ class BroadcastWebsocketStats:

    def record_message_received(self):
        self._internal_messages_received_per_minute.record()
-        self._messages_received.inc()
+        self._messages_received_current_conn.inc()
        self._messages_received_total.inc()

    def record_connection_established(self):
        self._connection.state('connected')
        self._connection_start.set_to_current_time()
-        self._messages_received.set(0)
+        self._messages_received_current_conn.set(0)

    def record_connection_lost(self):
        self._connection.state('disconnected')
--- a/awx/main/analytics/collectors.py
+++ b/awx/main/analytics/collectors.py
@@ -16,6 +16,7 @@ from awx.conf.license import get_license
 from awx.main.utils import get_awx_version, camelcase_to_underscore, datetime_hook
 from awx.main import models
 from awx.main.analytics import register
+from awx.main.scheduler.task_manager_models import TaskManagerModels

 """
 This module is used to define metrics collected by awx.main.analytics.gather()
@@ -235,25 +236,25 @@ def projects_by_scm_type(since, **kwargs):
@register('instance_info', '1.2', description=_('Cluster topology and capacity'))
 def instance_info(since, include_hostnames=False, **kwargs):
    info = {}
-    instances = models.Instance.objects.values_list('hostname').values(
-        'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'enabled'
-    )
-    for instance in instances:
-        consumed_capacity = sum(x.task_impact for x in models.UnifiedJob.objects.filter(execution_node=instance['hostname'], status__in=('running', 'waiting')))
+    # Use same method that the TaskManager does to compute consumed capacity without querying all running jobs for each Instance
+    tm_models = TaskManagerModels.init_with_consumed_capacity(instance_fields=['uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'enabled'])
+    for tm_instance in tm_models.instances.instances_by_hostname.values():
+        instance = tm_instance.obj
        instance_info = {
-            'uuid': instance['uuid'],
-            'version': instance['version'],
-            'capacity': instance['capacity'],
-            'cpu': instance['cpu'],
-            'memory': instance['memory'],
-            'managed_by_policy': instance['managed_by_policy'],
-            'enabled': instance['enabled'],
-            'consumed_capacity': consumed_capacity,
-            'remaining_capacity': instance['capacity'] - consumed_capacity,
+            'uuid': instance.uuid,
+            'version': instance.version,
+            'capacity': instance.capacity,
+            'cpu': instance.cpu,
+            'memory': instance.memory,
+            'managed_by_policy': instance.managed_by_policy,
+            'enabled': instance.enabled,
+            'consumed_capacity': tm_instance.consumed_capacity,
+            'remaining_capacity': instance.capacity - tm_instance.consumed_capacity,
+            'node_type': instance.node_type,
        }
        if include_hostnames is True:
-            instance_info['hostname'] = instance['hostname']
-        info[instance['uuid']] = instance_info
+            instance_info['hostname'] = instance.hostname
+        info[instance.uuid] = instance_info
    return info


--- a/awx/main/analytics/metrics.py
+++ b/awx/main/analytics/metrics.py
@@ -3,6 +3,7 @@ from prometheus_client import CollectorRegistry, Gauge, Info, generate_latest

 from awx.conf.license import get_license
 from awx.main.utils import get_awx_version
+from awx.main.models import UnifiedJob
 from awx.main.analytics.collectors import (
    counts,
    instance_info,
@@ -56,6 +57,7 @@ def metrics():
        [
            'hostname',
            'instance_uuid',
+            'node_type',
        ],
        registry=REGISTRY,
    )
@@ -83,6 +85,7 @@ def metrics():
        [
            'hostname',
            'instance_uuid',
+            'node_type',
        ],
        registry=REGISTRY,
    )
@@ -110,6 +113,7 @@ def metrics():
        [
            'hostname',
            'instance_uuid',
+            'node_type',
        ],
        registry=REGISTRY,
    )
@@ -119,6 +123,7 @@ def metrics():
        [
            'hostname',
            'instance_uuid',
+            'node_type',
        ],
        registry=REGISTRY,
    )
@@ -169,8 +174,9 @@ def metrics():

    all_job_data = job_counts(None)
    statuses = all_job_data.get('status', {})
-    for status, value in statuses.items():
-        STATUS.labels(status=status).set(value)
+    states = set(dict(UnifiedJob.STATUS_CHOICES).keys()) - set(['new'])
+    for state in states:
+        STATUS.labels(status=state).set(statuses.get(state, 0))

    RUNNING_JOBS.set(current_counts['running_jobs'])
    PENDING_JOBS.set(current_counts['pending_jobs'])
@@ -178,12 +184,13 @@ def metrics():
    instance_data = instance_info(None, include_hostnames=True)
    for uuid, info in instance_data.items():
        hostname = info['hostname']
-        INSTANCE_CAPACITY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['capacity'])
+        node_type = info['node_type']
+        INSTANCE_CAPACITY.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).set(instance_data[uuid]['capacity'])
        INSTANCE_CPU.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['cpu'])
        INSTANCE_MEMORY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['memory'])
-        INSTANCE_CONSUMED_CAPACITY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['consumed_capacity'])
-        INSTANCE_REMAINING_CAPACITY.labels(hostname=hostname, instance_uuid=uuid).set(instance_data[uuid]['remaining_capacity'])
-        INSTANCE_INFO.labels(hostname=hostname, instance_uuid=uuid).info(
+        INSTANCE_CONSUMED_CAPACITY.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).set(instance_data[uuid]['consumed_capacity'])
+        INSTANCE_REMAINING_CAPACITY.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).set(instance_data[uuid]['remaining_capacity'])
+        INSTANCE_INFO.labels(hostname=hostname, instance_uuid=uuid, node_type=node_type).info(
            {
                'enabled': str(instance_data[uuid]['enabled']),
                'managed_by_policy': str(instance_data[uuid]['managed_by_policy']),
--- a/awx/main/analytics/subsystem_metrics.py
+++ b/awx/main/analytics/subsystem_metrics.py
@@ -5,7 +5,9 @@ import logging

 from django.conf import settings
 from django.apps import apps
+
 from awx.main.consumers import emit_channel_notification
+from awx.main.utils import is_testing

 root_key = 'awx_metrics'
 logger = logging.getLogger('awx.main.analytics')
@@ -163,14 +165,10 @@ class Metrics:
        Instance = apps.get_model('main', 'Instance')
        if instance_name:
            self.instance_name = instance_name
-        elif settings.IS_TESTING():
+        elif is_testing():
            self.instance_name = "awx_testing"
        else:
-            try:
-                self.instance_name = Instance.objects.me().hostname
-            except Exception as e:
-                self.instance_name = settings.CLUSTER_HOST_ID
-                logger.info(f'Instance {self.instance_name} seems to be unregistered, error: {e}')
+            self.instance_name = Instance.objects.my_hostname()

        # metric name, help_text
        METRICSLIST = [
--- a/awx/main/conf.py
+++ b/awx/main/conf.py
@@ -569,7 +569,7 @@ register(
 register(
    'LOG_AGGREGATOR_LOGGERS',
    field_class=fields.StringListField,
-    default=['awx', 'activity_stream', 'job_events', 'system_tracking'],
+    default=['awx', 'activity_stream', 'job_events', 'system_tracking', 'broadcast_websocket'],
    label=_('Loggers Sending Data to Log Aggregator Form'),
    help_text=_(
        'List of loggers that will send HTTP logs to the collector, these can '
@@ -577,7 +577,8 @@ register(
        'awx - service logs\n'
        'activity_stream - activity stream records\n'
        'job_events - callback data from Ansible job events\n'
-        'system_tracking - facts gathered from scan jobs.'
+        'system_tracking - facts gathered from scan jobs\n'
+        'broadcast_websocket - errors pertaining to websockets broadcast metrics\n'
    ),
    category=_('Logging'),
    category_slug='logging',
--- a/awx/main/credential_plugins/aim.py
+++ b/awx/main/credential_plugins/aim.py
@@ -9,10 +9,16 @@ aim_inputs = {
    'fields': [
        {
            'id': 'url',
-            'label': _('CyberArk AIM URL'),
+            'label': _('CyberArk CCP URL'),
            'type': 'string',
            'format': 'url',
        },
+        {
+            'id': 'webservice_id',
+            'label': _('Web Service ID'),
+            'type': 'string',
+            'help_text': _('The CCP Web Service ID. Leave blank to default to AIMWebService.'),
+        },
        {
            'id': 'app_id',
            'label': _('Application ID'),
@@ -64,10 +70,13 @@ def aim_backend(**kwargs):
    client_cert = kwargs.get('client_cert', None)
    client_key = kwargs.get('client_key', None)
    verify = kwargs['verify']
+    webservice_id = kwargs['webservice_id']
    app_id = kwargs['app_id']
    object_query = kwargs['object_query']
    object_query_format = kwargs['object_query_format']
    reason = kwargs.get('reason', None)
+    if webservice_id == '':
+        webservice_id = 'AIMWebService'

    query_params = {
        'AppId': app_id,
@@ -78,7 +87,7 @@ def aim_backend(**kwargs):
        query_params['reason'] = reason

    request_qs = '?' + urlencode(query_params, quote_via=quote)
-    request_url = urljoin(url, '/'.join(['AIMWebService', 'api', 'Accounts']))
+    request_url = urljoin(url, '/'.join([webservice_id, 'api', 'Accounts']))

    with CertFiles(client_cert, client_key) as cert:
        res = requests.get(
@@ -92,4 +101,4 @@ def aim_backend(**kwargs):
    return res.json()['Content']


-aim_plugin = CredentialPlugin('CyberArk AIM Central Credential Provider Lookup', inputs=aim_inputs, backend=aim_backend)
+aim_plugin = CredentialPlugin('CyberArk Central Credential Provider Lookup', inputs=aim_inputs, backend=aim_backend)
--- a/awx/main/credential_plugins/conjur.py
+++ b/awx/main/credential_plugins/conjur.py
@@ -1,6 +1,5 @@
 from .plugin import CredentialPlugin, CertFiles, raise_for_status

-import base64
 from urllib.parse import urljoin, quote

 from django.utils.translation import gettext_lazy as _
@@ -61,7 +60,7 @@ def conjur_backend(**kwargs):
    cacert = kwargs.get('cacert', None)

    auth_kwargs = {
-        'headers': {'Content-Type': 'text/plain'},
+        'headers': {'Content-Type': 'text/plain', 'Accept-Encoding': 'base64'},
        'data': api_key,
        'allow_redirects': False,
    }
@@ -69,9 +68,9 @@ def conjur_backend(**kwargs):
    with CertFiles(cacert) as cert:
        # https://www.conjur.org/api.html#authentication-authenticate-post
        auth_kwargs['verify'] = cert
-        resp = requests.post(urljoin(url, '/'.join(['authn', account, username, 'authenticate'])), **auth_kwargs)
+        resp = requests.post(urljoin(url, '/'.join(['api', 'authn', account, username, 'authenticate'])), **auth_kwargs)
    raise_for_status(resp)
-    token = base64.b64encode(resp.content).decode('utf-8')
+    token = resp.content.decode('utf-8')

    lookup_kwargs = {
        'headers': {'Authorization': 'Token token="{}"'.format(token)},
@@ -79,9 +78,10 @@ def conjur_backend(**kwargs):
    }

    # https://www.conjur.org/api.html#secrets-retrieve-a-secret-get
-    path = urljoin(url, '/'.join(['secrets', account, 'variable', secret_path]))
+    path = urljoin(url, '/'.join(['api', 'secrets', account, 'variable', secret_path]))
    if version:
-        path = '?'.join([path, version])
+        ver = "version={}".format(version)
+        path = '?'.join([path, ver])

    with CertFiles(cacert) as cert:
        lookup_kwargs['verify'] = cert
@@ -90,4 +90,4 @@ def conjur_backend(**kwargs):
    return resp.text


-conjur_plugin = CredentialPlugin('CyberArk Conjur Secret Lookup', inputs=conjur_inputs, backend=conjur_backend)
+conjur_plugin = CredentialPlugin('CyberArk Conjur Secrets Manager Lookup', inputs=conjur_inputs, backend=conjur_backend)
--- a/awx/main/dispatch/init.py
+++ b/awx/main/dispatch/init.py
@@ -31,7 +31,7 @@ class PubSub(object):
            cur.execute('SELECT pg_notify(%s, %s);', (channel, payload))

    def events(self, select_timeout=5, yield_timeouts=False):
-        if not pg_connection.get_autocommit():
+        if not self.conn.autocommit:
            raise RuntimeError('Listening for events can only be done in autocommit mode')

        while True:
--- a/awx/main/dispatch/control.py
+++ b/awx/main/dispatch/control.py
@@ -3,6 +3,7 @@ import uuid
 import json

 from django.conf import settings
+from django.db import connection
 import redis

 from awx.main.dispatch import get_local_queuename
@@ -37,18 +38,27 @@ class Control(object):
    def running(self, *args, **kwargs):
        return self.control_with_reply('running', *args, **kwargs)

+    def cancel(self, task_ids, *args, **kwargs):
+        return self.control_with_reply('cancel', *args, extra_data={'task_ids': task_ids}, **kwargs)
+
    @classmethod
    def generate_reply_queue_name(cls):
        return f"reply_to_{str(uuid.uuid4()).replace('-','_')}"

-    def control_with_reply(self, command, timeout=5):
+    def control_with_reply(self, command, timeout=5, extra_data=None):
        logger.warning('checking {} {} for {}'.format(self.service, command, self.queuename))
        reply_queue = Control.generate_reply_queue_name()
        self.result = None

+        if not connection.get_autocommit():
+            raise RuntimeError('Control-with-reply messages can only be done in autocommit mode')
+
        with pg_bus_conn() as conn:
            conn.listen(reply_queue)
-            conn.notify(self.queuename, json.dumps({'control': command, 'reply_to': reply_queue}))
+            send_data = {'control': command, 'reply_to': reply_queue}
+            if extra_data:
+                send_data.update(extra_data)
+            conn.notify(self.queuename, json.dumps(send_data))

            for reply in conn.events(select_timeout=timeout, yield_timeouts=True):
                if reply is None:
--- a/awx/main/dispatch/pool.py
+++ b/awx/main/dispatch/pool.py
@@ -72,11 +72,9 @@ class PoolWorker(object):
        self.messages_finished = 0
        self.managed_tasks = collections.OrderedDict()
        self.finished = MPQueue(queue_size) if self.track_managed_tasks else NoOpResultQueue()
-        self.last_finished = None
        self.queue = MPQueue(queue_size)
        self.process = Process(target=target, args=(self.queue, self.finished) + args)
        self.process.daemon = True
-        self.scale_down_in = settings.DISPATCHER_SCALE_DOWN_WAIT_TIME

    def start(self):
        self.process.start()
@@ -147,9 +145,6 @@ class PoolWorker(object):
                # state of which events are *currently* being processed.
                logger.warning('Event UUID {} appears to be have been duplicated.'.format(uuid))

-        if finished:
-            self.last_finished = time.time()
-
    @property
    def current_task(self):
        if not self.track_managed_tasks:
@@ -195,14 +190,6 @@ class PoolWorker(object):
    def idle(self):
        return not self.busy

-    @property
-    def ready_to_scale_down(self):
-        if self.busy:
-            return False
-        if self.last_finished is None:
-            return True
-        return time.time() - self.last_finished > self.scale_down_in
-

 class StatefulPoolWorker(PoolWorker):

@@ -263,7 +250,7 @@ class WorkerPool(object):
        except Exception:
            logger.exception('could not fork')
        else:
-            logger.info(f'scaling up worker pid:{worker.pid} total:{len(self.workers)}')
+            logger.debug('scaling up worker pid:{}'.format(worker.pid))
        return idx, worker

    def debug(self, *args, **kwargs):
@@ -400,14 +387,16 @@ class AutoscalePool(WorkerPool):
                                reaper.reap_job(j, 'failed')
                        except Exception:
                            logger.exception('failed to reap job UUID {}'.format(w.current_task['uuid']))
+                    else:
+                        logger.warning(f'Worker was told to quit but has not, pid={w.pid}')
                orphaned.extend(w.orphaned_tasks)
                self.workers.remove(w)
-            elif (len(self.workers) > self.min_workers) and w.ready_to_scale_down:
+            elif w.idle and len(self.workers) > self.min_workers:
                # the process has an empty queue (it's idle) and we have
                # more processes in the pool than we need (> min)
                # send this process a message so it will exit gracefully
                # at the next opportunity
-                logger.info(f'scaling down worker pid:{w.pid} prior total:{len(self.workers)}')
+                logger.debug('scaling down worker pid:{}'.format(w.pid))
                w.quit()
                self.workers.remove(w)
            if w.alive:
@@ -463,9 +452,6 @@ class AutoscalePool(WorkerPool):
        try:
            if isinstance(body, dict) and body.get('bind_kwargs'):
                self.add_bind_kwargs(body)
-            # when the cluster heartbeat occurs, clean up internally
-            if isinstance(body, dict) and 'cluster_node_heartbeat' in body['task']:
-                self.cleanup()
            if self.should_grow:
                self.up()
            # we don't care about "preferred queue" round robin distribution, just
@@ -480,7 +466,7 @@ class AutoscalePool(WorkerPool):
                task_name = 'unknown'
                if isinstance(body, dict):
                    task_name = body.get('task')
-                logger.warn(f'Workers maxed, queuing {task_name}, load: {sum(len(w.managed_tasks) for w in self.workers)} / {len(self.workers)}')
+                logger.warning(f'Workers maxed, queuing {task_name}, load: {sum(len(w.managed_tasks) for w in self.workers)} / {len(self.workers)}')
                return super(AutoscalePool, self).write(preferred_queue, body)
        except Exception:
            for conn in connections.all():
--- a/awx/main/dispatch/publish.py
+++ b/awx/main/dispatch/publish.py
@@ -1,14 +1,13 @@
 import inspect
 import logging
-import sys
 import json
 import time
 from uuid import uuid4

-from django.conf import settings
 from django_guid import get_guid

 from . import pg_bus_conn
+from awx.main.utils import is_testing

 logger = logging.getLogger('awx.main.dispatch')

@@ -93,7 +92,7 @@ class task:
                obj.update(**kw)
                if callable(queue):
                    queue = queue()
-                if not settings.IS_TESTING(sys.argv):
+                if not is_testing():
                    with pg_bus_conn() as conn:
                        conn.notify(queue, json.dumps(obj))
                return (obj, queue)
--- a/awx/main/dispatch/reaper.py
+++ b/awx/main/dispatch/reaper.py
@@ -16,12 +16,7 @@ def startup_reaping():
    If this particular instance is starting, then we know that any running jobs are invalid
    so we will reap those jobs as a special action here
    """
-    try:
-        me = Instance.objects.me()
-    except RuntimeError as e:
-        logger.warning(f'Local instance is not registered, not running startup reaper: {e}')
-        return
-    jobs = UnifiedJob.objects.filter(status='running', controller_node=me.hostname)
+    jobs = UnifiedJob.objects.filter(status='running', controller_node=Instance.objects.my_hostname())
    job_ids = []
    for j in jobs:
        job_ids.append(j.id)
@@ -62,16 +57,13 @@ def reap_waiting(instance=None, status='failed', job_explanation=None, grace_per
    if grace_period is None:
        grace_period = settings.JOB_WAITING_GRACE_PERIOD + settings.TASK_MANAGER_TIMEOUT

-    me = instance
-    if me is None:
-        try:
-            me = Instance.objects.me()
-        except RuntimeError as e:
-            logger.warning(f'Local instance is not registered, not running reaper: {e}')
-            return
+    if instance is None:
+        hostname = Instance.objects.my_hostname()
+    else:
+        hostname = instance.hostname
    if ref_time is None:
        ref_time = tz_now()
-    jobs = UnifiedJob.objects.filter(status='waiting', modified__lte=ref_time - timedelta(seconds=grace_period), controller_node=me.hostname)
+    jobs = UnifiedJob.objects.filter(status='waiting', modified__lte=ref_time - timedelta(seconds=grace_period), controller_node=hostname)
    if excluded_uuids:
        jobs = jobs.exclude(celery_task_id__in=excluded_uuids)
    for j in jobs:
@@ -82,16 +74,13 @@ def reap(instance=None, status='failed', job_explanation=None, excluded_uuids=No
    """
    Reap all jobs in running for this instance.
    """
-    me = instance
-    if me is None:
-        try:
-            me = Instance.objects.me()
-        except RuntimeError as e:
-            logger.warning(f'Local instance is not registered, not running reaper: {e}')
-            return
+    if instance is None:
+        hostname = Instance.objects.my_hostname()
+    else:
+        hostname = instance.hostname
    workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id
    jobs = UnifiedJob.objects.filter(
-        Q(status='running') & (Q(execution_node=me.hostname) | Q(controller_node=me.hostname)) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
+        Q(status='running') & (Q(execution_node=hostname) | Q(controller_node=hostname)) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
    )
    if excluded_uuids:
        jobs = jobs.exclude(celery_task_id__in=excluded_uuids)
--- a/awx/main/dispatch/worker/base.py
+++ b/awx/main/dispatch/worker/base.py
@@ -63,7 +63,7 @@ class AWXConsumerBase(object):
    def control(self, body):
        logger.warning(f'Received control signal:\n{body}')
        control = body.get('control')
-        if control in ('status', 'running'):
+        if control in ('status', 'running', 'cancel'):
            reply_queue = body['reply_to']
            if control == 'status':
                msg = '\n'.join([self.listening_on, self.pool.debug()])
@@ -72,6 +72,17 @@ class AWXConsumerBase(object):
                for worker in self.pool.workers:
                    worker.calculate_managed_tasks()
                    msg.extend(worker.managed_tasks.keys())
+            elif control == 'cancel':
+                msg = []
+                task_ids = set(body['task_ids'])
+                for worker in self.pool.workers:
+                    task = worker.current_task
+                    if task and task['uuid'] in task_ids:
+                        logger.warn(f'Sending SIGTERM to task id={task["uuid"]}, task={task.get("task")}, args={task.get("args")}')
+                        os.kill(worker.pid, signal.SIGTERM)
+                        msg.append(task['uuid'])
+                if task_ids and not msg:
+                    logger.info(f'Could not locate running tasks to cancel with ids={task_ids}')

            with pg_bus_conn() as conn:
                conn.notify(reply_queue, json.dumps(msg))
@@ -103,7 +114,6 @@ class AWXConsumerBase(object):
            queue = 0
        self.pool.write(queue, body)
        self.total_messages += 1
-        self.record_statistics()

    @log_excess_runtime(logger)
    def record_statistics(self):
@@ -145,6 +155,16 @@ class AWXConsumerPG(AWXConsumerBase):
        # if no successful loops have ran since startup, then we should fail right away
        self.pg_is_down = True  # set so that we fail if we get database errors on startup
        self.pg_down_time = time.time() - self.pg_max_wait  # allow no grace period
+        self.last_cleanup = time.time()
+
+    def run_periodic_tasks(self):
+        self.record_statistics()  # maintains time buffer in method
+
+        if time.time() - self.last_cleanup > 60:  # same as cluster_node_heartbeat
+            # NOTE: if we run out of database connections, it is important to still run cleanup
+            # so that we scale down workers and free up connections
+            self.pool.cleanup()
+            self.last_cleanup = time.time()

    def run(self, *args, **kwargs):
        super(AWXConsumerPG, self).run(*args, **kwargs)
@@ -160,8 +180,10 @@ class AWXConsumerPG(AWXConsumerBase):
                    if init is False:
                        self.worker.on_start()
                        init = True
-                    for e in conn.events():
-                        self.process_task(json.loads(e.payload))
+                    for e in conn.events(yield_timeouts=True):
+                        if e is not None:
+                            self.process_task(json.loads(e.payload))
+                        self.run_periodic_tasks()
                        self.pg_is_down = False
                    if self.should_stop:
                        return
@@ -218,6 +240,8 @@ class BaseWorker(object):
                    # so we can establish a new connection
                    conn.close_if_unusable_or_obsolete()
                self.perform_work(body, *args)
+            except Exception:
+                logger.exception(f'Unhandled exception in perform_work in worker pid={os.getpid()}')
            finally:
                if 'uuid' in body:
                    uuid = body['uuid']
--- a/awx/main/management/commands/bottleneck.py
+++ b/awx/main/management/commands/bottleneck.py
@@ -25,7 +25,7 @@ class Command(BaseCommand):
        with connection.cursor() as cursor:
            cursor.execute(
                f'''
-                SELECT 
+                SELECT
                    b.id, b.job_id, b.host_name, b.created - a.created delta,
                    b.task task,
                    b.event_data::json->'task_action' task_action,
--- a/awx/main/management/commands/list_instances.py
+++ b/awx/main/management/commands/list_instances.py
@@ -54,7 +54,7 @@ class Command(BaseCommand):

                capacity = f' capacity={x.capacity}' if x.node_type != 'hop' else ''
                version = f" version={x.version or '?'}" if x.node_type != 'hop' else ''
-                heartbeat = f' heartbeat="{x.modified:%Y-%m-%d %H:%M:%S}"' if x.capacity or x.node_type == 'hop' else ''
+                heartbeat = f' heartbeat="{x.last_seen:%Y-%m-%d %H:%M:%S}"' if x.capacity or x.node_type == 'hop' else ''
                print(f'\t{color}{x.hostname}{capacity} node_type={x.node_type}{version}{heartbeat}\033[0m')

            print()
--- a/awx/main/management/commands/provision_instance.py
+++ b/awx/main/management/commands/provision_instance.py
@@ -38,7 +38,14 @@ class Command(BaseCommand):
            (changed, instance) = Instance.objects.register(ip_address=os.environ.get('MY_POD_IP'), node_type='control', uuid=settings.SYSTEM_UUID)
            RegisterQueue(settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME, 100, 0, [], is_container_group=False).register()
            RegisterQueue(
-                settings.DEFAULT_EXECUTION_QUEUE_NAME, 100, 0, [], is_container_group=True, pod_spec_override=settings.DEFAULT_EXECUTION_QUEUE_POD_SPEC_OVERRIDE
+                settings.DEFAULT_EXECUTION_QUEUE_NAME,
+                100,
+                0,
+                [],
+                is_container_group=True,
+                pod_spec_override=settings.DEFAULT_EXECUTION_QUEUE_POD_SPEC_OVERRIDE,
+                max_forks=settings.DEFAULT_EXECUTION_QUEUE_MAX_FORKS,
+                max_concurrent_jobs=settings.DEFAULT_EXECUTION_QUEUE_MAX_CONCURRENT_JOBS,
            ).register()
        else:
            (changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, uuid=uuid)
--- a/awx/main/management/commands/regenerate_secret_key.py
+++ b/awx/main/management/commands/regenerate_secret_key.py
@@ -32,8 +32,14 @@ class Command(BaseCommand):
    def handle(self, **options):
        self.old_key = settings.SECRET_KEY
        custom_key = os.environ.get("TOWER_SECRET_KEY")
-        if options.get("use_custom_key") and custom_key:
-            self.new_key = custom_key
+        if options.get("use_custom_key"):
+            if custom_key:
+                self.new_key = custom_key
+            else:
+                print("Use custom key was specified but the env var TOWER_SECRET_KEY was not available")
+                import sys
+
+                sys.exit(1)
        else:
            self.new_key = base64.encodebytes(os.urandom(33)).decode().rstrip()
        self._notification_templates()
--- a/awx/main/management/commands/register_peers.py
+++ b/awx/main/management/commands/register_peers.py
@@ -27,7 +27,9 @@ class Command(BaseCommand):
        )

    def handle(self, **options):
+        # provides a mapping of hostname to Instance objects
        nodes = Instance.objects.in_bulk(field_name='hostname')
+
        if options['source'] not in nodes:
            raise CommandError(f"Host {options['source']} is not a registered instance.")
        if not (options['peers'] or options['disconnect'] or options['exact'] is not None):
@@ -57,7 +59,9 @@ class Command(BaseCommand):

            results = 0
            for target in options['peers']:
-                _, created = InstanceLink.objects.get_or_create(source=nodes[options['source']], target=nodes[target])
+                _, created = InstanceLink.objects.update_or_create(
+                    source=nodes[options['source']], target=nodes[target], defaults={'link_state': InstanceLink.States.ESTABLISHED}
+                )
                if created:
                    results += 1

@@ -80,7 +84,9 @@ class Command(BaseCommand):
                links = set(InstanceLink.objects.filter(source=nodes[options['source']]).values_list('target__hostname', flat=True))
                removals, _ = InstanceLink.objects.filter(source=nodes[options['source']], target__hostname__in=links - peers).delete()
                for target in peers - links:
-                    _, created = InstanceLink.objects.get_or_create(source=nodes[options['source']], target=nodes[target])
+                    _, created = InstanceLink.objects.update_or_create(
+                        source=nodes[options['source']], target=nodes[target], defaults={'link_state': InstanceLink.States.ESTABLISHED}
+                    )
                    if created:
                        additions += 1

--- a/awx/main/management/commands/register_queue.py
+++ b/awx/main/management/commands/register_queue.py
@@ -17,7 +17,9 @@ class InstanceNotFound(Exception):


 class RegisterQueue:
-    def __init__(self, queuename, instance_percent, inst_min, hostname_list, is_container_group=None, pod_spec_override=None):
+    def __init__(
+        self, queuename, instance_percent, inst_min, hostname_list, is_container_group=None, pod_spec_override=None, max_forks=None, max_concurrent_jobs=None
+    ):
        self.instance_not_found_err = None
        self.queuename = queuename
        self.instance_percent = instance_percent
@@ -25,6 +27,8 @@ class RegisterQueue:
        self.hostname_list = hostname_list
        self.is_container_group = is_container_group
        self.pod_spec_override = pod_spec_override
+        self.max_forks = max_forks
+        self.max_concurrent_jobs = max_concurrent_jobs

    def get_create_update_instance_group(self):
        created = False
@@ -45,6 +49,14 @@ class RegisterQueue:
            ig.pod_spec_override = self.pod_spec_override
            changed = True

+        if self.max_forks and (ig.max_forks != self.max_forks):
+            ig.max_forks = self.max_forks
+            changed = True
+
+        if self.max_concurrent_jobs and (ig.max_concurrent_jobs != self.max_concurrent_jobs):
+            ig.max_concurrent_jobs = self.max_concurrent_jobs
+            changed = True
+
        if changed:
            ig.save()

--- a/awx/main/management/commands/run_dispatcher.py
+++ b/awx/main/management/commands/run_dispatcher.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2015 Ansible, Inc.
 # All Rights Reserved.
 import logging
+import yaml

 from django.conf import settings
 from django.core.cache import cache as django_cache
@@ -30,7 +31,16 @@ class Command(BaseCommand):
            '--reload',
            dest='reload',
            action='store_true',
-            help=('cause the dispatcher to recycle all of its worker processes;' 'running jobs will run to completion first'),
+            help=('cause the dispatcher to recycle all of its worker processes; running jobs will run to completion first'),
+        )
+        parser.add_argument(
+            '--cancel',
+            dest='cancel',
+            help=(
+                'Cancel a particular task id. Takes either a single id string, or a JSON list of multiple ids. '
+                'Can take in output from the --running argument as input to cancel all tasks. '
+                'Only running tasks can be canceled, queued tasks must be started before they can be canceled.'
+            ),
        )

    def handle(self, *arg, **options):
@@ -42,6 +52,16 @@ class Command(BaseCommand):
            return
        if options.get('reload'):
            return Control('dispatcher').control({'control': 'reload'})
+        if options.get('cancel'):
+            cancel_str = options.get('cancel')
+            try:
+                cancel_data = yaml.safe_load(cancel_str)
+            except Exception:
+                cancel_data = [cancel_str]
+            if not isinstance(cancel_data, list):
+                cancel_data = [cancel_str]
+            print(Control('dispatcher').cancel(cancel_data))
+            return

        # It's important to close these because we're _about_ to fork, and we
        # don't want the forked processes to inherit the open sockets
--- a/awx/main/management/commands/run_wsbroadcast.py
+++ b/awx/main/management/commands/run_wsbroadcast.py
@@ -53,7 +53,7 @@ class Command(BaseCommand):
        return lines

    @classmethod
-    def get_connection_status(cls, me, hostnames, data):
+    def get_connection_status(cls, hostnames, data):
        host_stats = [('hostname', 'state', 'start time', 'duration (sec)')]
        for h in hostnames:
            connection_color = '91'  # red
@@ -78,7 +78,7 @@ class Command(BaseCommand):
        return host_stats

    @classmethod
-    def get_connection_stats(cls, me, hostnames, data):
+    def get_connection_stats(cls, hostnames, data):
        host_stats = [('hostname', 'total', 'per minute')]
        for h in hostnames:
            h_safe = safe_name(h)
@@ -119,8 +119,8 @@ class Command(BaseCommand):
            return

        try:
-            me = Instance.objects.me()
-            logger.info('Active instance with hostname {} is registered.'.format(me.hostname))
+            my_hostname = Instance.objects.my_hostname()
+            logger.info('Active instance with hostname {} is registered.'.format(my_hostname))
        except RuntimeError as e:
            # the CLUSTER_HOST_ID in the task, and web instance must match and
            # ensure network connectivity between the task and web instance
@@ -145,19 +145,19 @@ class Command(BaseCommand):
                else:
                    data[family.name] = family.samples[0].value

-            me = Instance.objects.me()
-            hostnames = [i.hostname for i in Instance.objects.exclude(hostname=me.hostname)]
+            my_hostname = Instance.objects.my_hostname()
+            hostnames = [i.hostname for i in Instance.objects.exclude(hostname=my_hostname)]

-            host_stats = Command.get_connection_status(me, hostnames, data)
+            host_stats = Command.get_connection_status(hostnames, data)
            lines = Command._format_lines(host_stats)

-            print(f'Broadcast websocket connection status from "{me.hostname}" to:')
+            print(f'Broadcast websocket connection status from "{my_hostname}" to:')
            print('\n'.join(lines))

-            host_stats = Command.get_connection_stats(me, hostnames, data)
+            host_stats = Command.get_connection_stats(hostnames, data)
            lines = Command._format_lines(host_stats)

-            print(f'\nBroadcast websocket connection stats from "{me.hostname}" to:')
+            print(f'\nBroadcast websocket connection stats from "{my_hostname}" to:')
            print('\n'.join(lines))

            return
--- a/awx/main/managers.py
+++ b/awx/main/managers.py
@@ -99,9 +99,12 @@ class InstanceManager(models.Manager):
    instance or role.
    """

+    def my_hostname(self):
+        return settings.CLUSTER_HOST_ID
+
    def me(self):
        """Return the currently active instance."""
-        node = self.filter(hostname=settings.CLUSTER_HOST_ID)
+        node = self.filter(hostname=self.my_hostname())
        if node.exists():
            return node[0]
        raise RuntimeError("No instance found with the current cluster host id")
@@ -129,10 +132,13 @@ class InstanceManager(models.Manager):
                # if instance was not retrieved by uuid and hostname was, use the hostname
                instance = self.filter(hostname=hostname)

+            from awx.main.models import Instance
+
            # Return existing instance
            if instance.exists():
                instance = instance.first()  # in the unusual occasion that there is more than one, only get one
-                update_fields = []
+                instance.node_state = Instance.States.INSTALLED  # Wait for it to show up on the mesh
+                update_fields = ['node_state']
                # if instance was retrieved by uuid and hostname has changed, update hostname
                if instance.hostname != hostname:
                    logger.warning("passed in hostname {0} is different from the original hostname {1}, updating to {0}".format(hostname, instance.hostname))
@@ -141,6 +147,7 @@ class InstanceManager(models.Manager):
                # if any other fields are to be updated
                if instance.ip_address != ip_address:
                    instance.ip_address = ip_address
+                    update_fields.append('ip_address')
                if instance.node_type != node_type:
                    instance.node_type = node_type
                    update_fields.append('node_type')
@@ -151,12 +158,16 @@ class InstanceManager(models.Manager):
                    return (False, instance)

            # Create new instance, and fill in default values
-            create_defaults = dict(capacity=0)
+            create_defaults = {
+                'node_state': Instance.States.INSTALLED,
+                'capacity': 0,
+                'listener_port': 27199,
+            }
            if defaults is not None:
                create_defaults.update(defaults)
            uuid_option = {}
            if uuid is not None:
-                uuid_option = dict(uuid=uuid)
+                uuid_option = {'uuid': uuid}
            if node_type == 'execution' and 'version' not in create_defaults:
                create_defaults['version'] = RECEPTOR_PENDING
            instance = self.create(hostname=hostname, ip_address=ip_address, node_type=node_type, **create_defaults, **uuid_option)
--- a/awx/main/migrations/0164_remove_inventorysource_update_on_project_update.py
+++ b/awx/main/migrations/0164_remove_inventorysource_update_on_project_update.py
@@ -1,24 +1,14 @@
 # Generated by Django 3.2.13 on 2022-06-21 21:29

 from django.db import migrations
-import logging
-
-logger = logging.getLogger("awx")


 def forwards(apps, schema_editor):
    InventorySource = apps.get_model('main', 'InventorySource')
-    sources = InventorySource.objects.filter(update_on_project_update=True)
-    for src in sources:
-        if src.update_on_launch == False:
-            src.update_on_launch = True
-            src.save(update_fields=['update_on_launch'])
-            logger.info(f"Setting update_on_launch to True for {src}")
-        proj = src.source_project
-        if proj and proj.scm_update_on_launch is False:
-            proj.scm_update_on_launch = True
-            proj.save(update_fields=['scm_update_on_launch'])
-            logger.warning(f"Setting scm_update_on_launch to True for {proj}")
+    InventorySource.objects.filter(update_on_project_update=True).update(update_on_launch=True)
+
+    Project = apps.get_model('main', 'Project')
+    Project.objects.filter(scm_inventory_sources__update_on_project_update=True).update(scm_update_on_launch=True)


 class Migration(migrations.Migration):
--- a/awx/main/migrations/0167_project_signature_validation_credential.py
+++ b/awx/main/migrations/0167_project_signature_validation_credential.py
@@ -0,0 +1,57 @@
+# Generated by Django 3.2.13 on 2022-08-24 14:02
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+from awx.main.models import CredentialType
+from awx.main.utils.common import set_current_apps
+
+
+def setup_tower_managed_defaults(apps, schema_editor):
+    set_current_apps(apps)
+    CredentialType.setup_tower_managed_defaults(apps)
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0166_alter_jobevent_host'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='project',
+            name='signature_validation_credential',
+            field=models.ForeignKey(
+                blank=True,
+                default=None,
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name='projects_signature_validation',
+                to='main.credential',
+                help_text='An optional credential used for validating files in the project against unexpected changes.',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='credentialtype',
+            name='kind',
+            field=models.CharField(
+                choices=[
+                    ('ssh', 'Machine'),
+                    ('vault', 'Vault'),
+                    ('net', 'Network'),
+                    ('scm', 'Source Control'),
+                    ('cloud', 'Cloud'),
+                    ('registry', 'Container Registry'),
+                    ('token', 'Personal Access Token'),
+                    ('insights', 'Insights'),
+                    ('external', 'External'),
+                    ('kubernetes', 'Kubernetes'),
+                    ('galaxy', 'Galaxy/Automation Hub'),
+                    ('cryptography', 'Cryptography'),
+                ],
+                max_length=32,
+            ),
+        ),
+        migrations.RunPython(setup_tower_managed_defaults),
+    ]
--- a/awx/main/migrations/0168_inventoryupdate_scm_revision.py
+++ b/awx/main/migrations/0168_inventoryupdate_scm_revision.py
@@ -0,0 +1,25 @@
+# Generated by Django 3.2.13 on 2022-09-08 16:03
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0167_project_signature_validation_credential'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='inventoryupdate',
+            name='scm_revision',
+            field=models.CharField(
+                blank=True,
+                default='',
+                editable=False,
+                help_text='The SCM Revision from the Project used for this inventory update.  Only applicable to inventories source from scm',
+                max_length=1024,
+                verbose_name='SCM Revision',
+            ),
+        ),
+    ]
--- a/awx/main/migrations/0169_jt_prompt_everything_on_launch.py
+++ b/awx/main/migrations/0169_jt_prompt_everything_on_launch.py
@@ -0,0 +1,225 @@
+# Generated by Django 3.2.13 on 2022-09-15 14:07
+
+import awx.main.fields
+import awx.main.utils.polymorphic
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0168_inventoryupdate_scm_revision'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='joblaunchconfig',
+            name='execution_environment',
+            field=models.ForeignKey(
+                blank=True,
+                default=None,
+                help_text='The container image to be used for execution.',
+                null=True,
+                on_delete=awx.main.utils.polymorphic.SET_NULL,
+                related_name='joblaunchconfig_as_prompt',
+                to='main.executionenvironment',
+            ),
+        ),
+        migrations.AddField(
+            model_name='joblaunchconfig',
+            name='labels',
+            field=models.ManyToManyField(related_name='joblaunchconfig_labels', to='main.Label'),
+        ),
+        migrations.AddField(
+            model_name='jobtemplate',
+            name='ask_execution_environment_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='jobtemplate',
+            name='ask_forks_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='jobtemplate',
+            name='ask_instance_groups_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='jobtemplate',
+            name='ask_job_slice_count_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='jobtemplate',
+            name='ask_labels_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='jobtemplate',
+            name='ask_timeout_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='schedule',
+            name='execution_environment',
+            field=models.ForeignKey(
+                blank=True,
+                default=None,
+                help_text='The container image to be used for execution.',
+                null=True,
+                on_delete=awx.main.utils.polymorphic.SET_NULL,
+                related_name='schedule_as_prompt',
+                to='main.executionenvironment',
+            ),
+        ),
+        migrations.AddField(
+            model_name='schedule',
+            name='labels',
+            field=models.ManyToManyField(related_name='schedule_labels', to='main.Label'),
+        ),
+        migrations.AddField(
+            model_name='workflowjobnode',
+            name='execution_environment',
+            field=models.ForeignKey(
+                blank=True,
+                default=None,
+                help_text='The container image to be used for execution.',
+                null=True,
+                on_delete=awx.main.utils.polymorphic.SET_NULL,
+                related_name='workflowjobnode_as_prompt',
+                to='main.executionenvironment',
+            ),
+        ),
+        migrations.AddField(
+            model_name='workflowjobnode',
+            name='labels',
+            field=models.ManyToManyField(related_name='workflowjobnode_labels', to='main.Label'),
+        ),
+        migrations.AddField(
+            model_name='workflowjobtemplate',
+            name='ask_labels_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='workflowjobtemplate',
+            name='ask_skip_tags_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='workflowjobtemplate',
+            name='ask_tags_on_launch',
+            field=awx.main.fields.AskForField(blank=True, default=False),
+        ),
+        migrations.AddField(
+            model_name='workflowjobtemplatenode',
+            name='execution_environment',
+            field=models.ForeignKey(
+                blank=True,
+                default=None,
+                help_text='The container image to be used for execution.',
+                null=True,
+                on_delete=awx.main.utils.polymorphic.SET_NULL,
+                related_name='workflowjobtemplatenode_as_prompt',
+                to='main.executionenvironment',
+            ),
+        ),
+        migrations.AddField(
+            model_name='workflowjobtemplatenode',
+            name='labels',
+            field=models.ManyToManyField(related_name='workflowjobtemplatenode_labels', to='main.Label'),
+        ),
+        migrations.CreateModel(
+            name='WorkflowJobTemplateNodeBaseInstanceGroupMembership',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
+                ('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
+                ('workflowjobtemplatenode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.workflowjobtemplatenode')),
+            ],
+        ),
+        migrations.CreateModel(
+            name='WorkflowJobNodeBaseInstanceGroupMembership',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
+                ('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
+                ('workflowjobnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.workflowjobnode')),
+            ],
+        ),
+        migrations.CreateModel(
+            name='WorkflowJobInstanceGroupMembership',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
+                ('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
+                ('workflowjobnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.workflowjob')),
+            ],
+        ),
+        migrations.CreateModel(
+            name='ScheduleInstanceGroupMembership',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
+                ('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
+                ('schedule', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.schedule')),
+            ],
+        ),
+        migrations.CreateModel(
+            name='JobLaunchConfigInstanceGroupMembership',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('position', models.PositiveIntegerField(db_index=True, default=None, null=True)),
+                ('instancegroup', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.instancegroup')),
+                ('joblaunchconfig', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.joblaunchconfig')),
+            ],
+        ),
+        migrations.AddField(
+            model_name='joblaunchconfig',
+            name='instance_groups',
+            field=awx.main.fields.OrderedManyToManyField(
+                blank=True, editable=False, related_name='joblaunchconfigs', through='main.JobLaunchConfigInstanceGroupMembership', to='main.InstanceGroup'
+            ),
+        ),
+        migrations.AddField(
+            model_name='schedule',
+            name='instance_groups',
+            field=awx.main.fields.OrderedManyToManyField(
+                blank=True, editable=False, related_name='schedule_instance_groups', through='main.ScheduleInstanceGroupMembership', to='main.InstanceGroup'
+            ),
+        ),
+        migrations.AddField(
+            model_name='workflowjob',
+            name='instance_groups',
+            field=awx.main.fields.OrderedManyToManyField(
+                blank=True,
+                editable=False,
+                related_name='workflow_job_instance_groups',
+                through='main.WorkflowJobInstanceGroupMembership',
+                to='main.InstanceGroup',
+            ),
+        ),
+        migrations.AddField(
+            model_name='workflowjobnode',
+            name='instance_groups',
+            field=awx.main.fields.OrderedManyToManyField(
+                blank=True,
+                editable=False,
+                related_name='workflow_job_node_instance_groups',
+                through='main.WorkflowJobNodeBaseInstanceGroupMembership',
+                to='main.InstanceGroup',
+            ),
+        ),
+        migrations.AddField(
+            model_name='workflowjobtemplatenode',
+            name='instance_groups',
+            field=awx.main.fields.OrderedManyToManyField(
+                blank=True,
+                editable=False,
+                related_name='workflow_job_template_node_instance_groups',
+                through='main.WorkflowJobTemplateNodeBaseInstanceGroupMembership',
+                to='main.InstanceGroup',
+            ),
+        ),
+    ]
--- a/awx/main/migrations/0170_node_and_link_state.py
+++ b/awx/main/migrations/0170_node_and_link_state.py
@@ -0,0 +1,79 @@
+# Generated by Django 3.2.13 on 2022-08-02 17:53
+
+import django.core.validators
+from django.db import migrations, models
+
+
+def forwards(apps, schema_editor):
+    # All existing InstanceLink objects need to be in the state
+    # 'Established', which is the default, so nothing needs to be done
+    # for that.
+
+    Instance = apps.get_model('main', 'Instance')
+    for instance in Instance.objects.all():
+        instance.node_state = 'ready' if not instance.errors else 'unavailable'
+        instance.save(update_fields=['node_state'])
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0169_jt_prompt_everything_on_launch'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='instance',
+            name='listener_port',
+            field=models.PositiveIntegerField(
+                blank=True,
+                default=27199,
+                help_text='Port that Receptor will listen for incoming connections on.',
+                validators=[django.core.validators.MinValueValidator(1), django.core.validators.MaxValueValidator(65535)],
+            ),
+        ),
+        migrations.AddField(
+            model_name='instance',
+            name='node_state',
+            field=models.CharField(
+                choices=[
+                    ('provisioning', 'Provisioning'),
+                    ('provision-fail', 'Provisioning Failure'),
+                    ('installed', 'Installed'),
+                    ('ready', 'Ready'),
+                    ('unavailable', 'Unavailable'),
+                    ('deprovisioning', 'De-provisioning'),
+                    ('deprovision-fail', 'De-provisioning Failure'),
+                ],
+                default='ready',
+                help_text='Indicates the current life cycle stage of this instance.',
+                max_length=16,
+            ),
+        ),
+        migrations.AddField(
+            model_name='instancelink',
+            name='link_state',
+            field=models.CharField(
+                choices=[('adding', 'Adding'), ('established', 'Established'), ('removing', 'Removing')],
+                default='established',
+                help_text='Indicates the current life cycle stage of this peer link.',
+                max_length=16,
+            ),
+        ),
+        migrations.AlterField(
+            model_name='instance',
+            name='node_type',
+            field=models.CharField(
+                choices=[
+                    ('control', 'Control plane node'),
+                    ('execution', 'Execution plane node'),
+                    ('hybrid', 'Controller and execution'),
+                    ('hop', 'Message-passing node, no execution capability'),
+                ],
+                default='hybrid',
+                help_text='Role that this node plays in the mesh.',
+                max_length=16,
+            ),
+        ),
+        migrations.RunPython(forwards, reverse_code=migrations.RunPython.noop),
+    ]
--- a/awx/main/migrations/0171_add_health_check_started.py
+++ b/awx/main/migrations/0171_add_health_check_started.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.2.13 on 2022-09-26 20:54
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0170_node_and_link_state'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='instance',
+            name='health_check_started',
+            field=models.DateTimeField(editable=False, help_text='The last time a health check was initiated on this instance.', null=True),
+        ),
+    ]
--- a/awx/main/migrations/0172_prevent_instance_fallback.py
+++ b/awx/main/migrations/0172_prevent_instance_fallback.py
@@ -0,0 +1,29 @@
+# Generated by Django 3.2.13 on 2022-09-29 18:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0171_add_health_check_started'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='inventory',
+            name='prevent_instance_group_fallback',
+            field=models.BooleanField(
+                default=False,
+                help_text='If enabled, the inventory will prevent adding any organization instance groups to the list of preferred instances groups to run associated job templates on.If this setting is enabled and you provided an empty list, the global instance groups will be applied.',
+            ),
+        ),
+        migrations.AddField(
+            model_name='jobtemplate',
+            name='prevent_instance_group_fallback',
+            field=models.BooleanField(
+                default=False,
+                help_text='If enabled, the job template will prevent adding any inventory or organization instance groups to the list of preferred instances groups to run on.If this setting is enabled and you provided an empty list, the global instance groups will be applied.',
+            ),
+        ),
+    ]
--- a/awx/main/migrations/0173_instancegroup_max_limits.py
+++ b/awx/main/migrations/0173_instancegroup_max_limits.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.2.13 on 2022-10-24 18:22
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0172_prevent_instance_fallback'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='instancegroup',
+            name='max_concurrent_jobs',
+            field=models.IntegerField(default=0, help_text='Maximum number of concurrent jobs to run on this group. Zero means no limit.'),
+        ),
+        migrations.AddField(
+            model_name='instancegroup',
+            name='max_forks',
+            field=models.IntegerField(default=0, help_text='Max forks to execute on this group. Zero means no limit.'),
+        ),
+    ]
--- a/awx/main/migrations/0174_ensure_org_ee_admin_roles.py
+++ b/awx/main/migrations/0174_ensure_org_ee_admin_roles.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.2.16 on 2022-12-07 21:11
+
+from django.db import migrations
+
+from awx.main.migrations import _rbac as rbac
+from awx.main.migrations import _migration_utils as migration_utils
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('main', '0173_instancegroup_max_limits'),
+    ]
+
+    operations = [
+        migrations.RunPython(migration_utils.set_current_apps_for_migrations),
+        migrations.RunPython(rbac.create_roles),
+    ]
--- a/awx/main/migrations/_create_system_jobs.py
+++ b/awx/main/migrations/_create_system_jobs.py
@@ -4,7 +4,7 @@ from django.utils.timezone import now

 logger = logging.getLogger('awx.main.migrations')

-__all__ = ['create_collection_jt', 'create_clearsessions_jt', 'create_cleartokens_jt']
+__all__ = ['create_clearsessions_jt', 'create_cleartokens_jt']

 '''
 These methods are called by migrations to create various system job templates
@@ -36,7 +36,7 @@ def create_clearsessions_jt(apps, schema_editor):
    if created:
        sched = Schedule(
            name='Cleanup Expired Sessions',
-            rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1;COUNT=1' % schedule_time,
+            rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1' % schedule_time,
            description='Cleans out expired browser sessions',
            enabled=True,
            created=now_dt,
@@ -69,7 +69,7 @@ def create_cleartokens_jt(apps, schema_editor):
    if created:
        sched = Schedule(
            name='Cleanup Expired OAuth 2 Tokens',
-            rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1;COUNT=1' % schedule_time,
+            rrule='DTSTART:%s RRULE:FREQ=WEEKLY;INTERVAL=1' % schedule_time,
            description='Removes expired OAuth 2 access and refresh tokens',
            enabled=True,
            created=now_dt,
--- a/awx/main/migrations/_galaxy.py
+++ b/awx/main/migrations/_galaxy.py
@@ -44,7 +44,7 @@ def migrate_galaxy_settings(apps, schema_editor):
            credential_type=galaxy_type,
            inputs={'url': 'https://galaxy.ansible.com/'},
        )
-    except:
+    except Exception:
        # Needed for new migrations, tests
        public_galaxy_credential = Credential(
            created=now(), modified=now(), name='Ansible Galaxy', managed=True, credential_type=galaxy_type, inputs={'url': 'https://galaxy.ansible.com/'}
--- a/awx/main/models/ad_hoc_commands.py
+++ b/awx/main/models/ad_hoc_commands.py
@@ -228,15 +228,14 @@ class AdHocCommand(UnifiedJob, JobNotificationMixin):

    @property
    def preferred_instance_groups(self):
-        if self.inventory is not None and self.inventory.organization is not None:
-            organization_groups = [x for x in self.inventory.organization.instance_groups.all()]
-        else:
-            organization_groups = []
+        selected_groups = []
        if self.inventory is not None:
-            inventory_groups = [x for x in self.inventory.instance_groups.all()]
-        else:
-            inventory_groups = []
-        selected_groups = inventory_groups + organization_groups
+            for instance_group in self.inventory.instance_groups.all():
+                selected_groups.append(instance_group)
+            if not self.inventory.prevent_instance_group_fallback and self.inventory.organization is not None:
+                for instance_group in self.inventory.organization.instance_groups.all():
+                    selected_groups.append(instance_group)
+
        if not selected_groups:
            return self.global_instance_groups
        return selected_groups
--- a/awx/main/models/credential/init.py
+++ b/awx/main/models/credential/init.py
@@ -282,7 +282,7 @@ class Credential(PasswordFieldsModel, CommonModelNameNotUnique, ResourceMixin):
                        return field['default']
                if 'default' in kwargs:
                    return kwargs['default']
-                raise AttributeError
+                raise AttributeError(field_name)
        if field_name in self.inputs:
            return self.inputs[field_name]
        if 'default' in kwargs:
@@ -336,6 +336,7 @@ class CredentialType(CommonModelNameNotUnique):
        ('external', _('External')),
        ('kubernetes', _('Kubernetes')),
        ('galaxy', _('Galaxy/Automation Hub')),
+        ('cryptography', _('Cryptography')),
    )

    kind = models.CharField(max_length=32, choices=KIND_CHOICES)
@@ -1171,6 +1172,25 @@ ManagedCredentialType(
    },
 )

+ManagedCredentialType(
+    namespace='gpg_public_key',
+    kind='cryptography',
+    name=gettext_noop('GPG Public Key'),
+    inputs={
+        'fields': [
+            {
+                'id': 'gpg_public_key',
+                'label': gettext_noop('GPG Public Key'),
+                'type': 'string',
+                'secret': True,
+                'multiline': True,
+                'help_text': gettext_noop('GPG Public Key used to validate content signatures.'),
+            },
+        ],
+        'required': ['gpg_public_key'],
+    },
+)
+

 class CredentialInputSource(PrimordialModel):
    class Meta:
--- a/awx/main/models/credential/injectors.py
+++ b/awx/main/models/credential/injectors.py
@@ -15,6 +15,7 @@ def aws(cred, env, private_data_dir):

    if cred.has_input('security_token'):
        env['AWS_SECURITY_TOKEN'] = cred.get_input('security_token', default='')
+        env['AWS_SESSION_TOKEN'] = env['AWS_SECURITY_TOKEN']


 def gce(cred, env, private_data_dir):
--- a/awx/main/models/ha.py
+++ b/awx/main/models/ha.py
@@ -5,7 +5,7 @@ from decimal import Decimal
 import logging
 import os

-from django.core.validators import MinValueValidator
+from django.core.validators import MinValueValidator, MaxValueValidator
 from django.db import models, connection
 from django.db.models.signals import post_save, post_delete
 from django.dispatch import receiver
@@ -59,6 +59,15 @@ class InstanceLink(BaseModel):
    source = models.ForeignKey('Instance', on_delete=models.CASCADE, related_name='+')
    target = models.ForeignKey('Instance', on_delete=models.CASCADE, related_name='reverse_peers')

+    class States(models.TextChoices):
+        ADDING = 'adding', _('Adding')
+        ESTABLISHED = 'established', _('Established')
+        REMOVING = 'removing', _('Removing')
+
+    link_state = models.CharField(
+        choices=States.choices, default=States.ESTABLISHED, max_length=16, help_text=_("Indicates the current life cycle stage of this peer link.")
+    )
+
    class Meta:
        unique_together = ('source', 'target')

@@ -105,6 +114,11 @@ class Instance(HasPolicyEditsMixin, BaseModel):
        editable=False,
        help_text=_('Last time instance ran its heartbeat task for main cluster nodes. Last known connection to receptor mesh for execution nodes.'),
    )
+    health_check_started = models.DateTimeField(
+        null=True,
+        editable=False,
+        help_text=_("The last time a health check was initiated on this instance."),
+    )
    last_health_check = models.DateTimeField(
        null=True,
        editable=False,
@@ -127,13 +141,33 @@ class Instance(HasPolicyEditsMixin, BaseModel):
        default=0,
        editable=False,
    )
-    NODE_TYPE_CHOICES = [
-        ("control", "Control plane node"),
-        ("execution", "Execution plane node"),
-        ("hybrid", "Controller and execution"),
-        ("hop", "Message-passing node, no execution capability"),
-    ]
-    node_type = models.CharField(default='hybrid', choices=NODE_TYPE_CHOICES, max_length=16)
+
+    class Types(models.TextChoices):
+        CONTROL = 'control', _("Control plane node")
+        EXECUTION = 'execution', _("Execution plane node")
+        HYBRID = 'hybrid', _("Controller and execution")
+        HOP = 'hop', _("Message-passing node, no execution capability")
+
+    node_type = models.CharField(default=Types.HYBRID, choices=Types.choices, max_length=16, help_text=_("Role that this node plays in the mesh."))
+
+    class States(models.TextChoices):
+        PROVISIONING = 'provisioning', _('Provisioning')
+        PROVISION_FAIL = 'provision-fail', _('Provisioning Failure')
+        INSTALLED = 'installed', _('Installed')
+        READY = 'ready', _('Ready')
+        UNAVAILABLE = 'unavailable', _('Unavailable')
+        DEPROVISIONING = 'deprovisioning', _('De-provisioning')
+        DEPROVISION_FAIL = 'deprovision-fail', _('De-provisioning Failure')
+
+    node_state = models.CharField(
+        choices=States.choices, default=States.READY, max_length=16, help_text=_("Indicates the current life cycle stage of this instance.")
+    )
+    listener_port = models.PositiveIntegerField(
+        blank=True,
+        default=27199,
+        validators=[MinValueValidator(1), MaxValueValidator(65535)],
+        help_text=_("Port that Receptor will listen for incoming connections on."),
+    )

    peers = models.ManyToManyField('self', symmetrical=False, through=InstanceLink, through_fields=('source', 'target'))

@@ -178,6 +212,14 @@ class Instance(HasPolicyEditsMixin, BaseModel):
    def jobs_total(self):
        return UnifiedJob.objects.filter(execution_node=self.hostname).count()

+    @property
+    def health_check_pending(self):
+        if self.health_check_started is None:
+            return False
+        if self.last_health_check is None:
+            return True
+        return self.health_check_started > self.last_health_check
+
    def get_cleanup_task_kwargs(self, **kwargs):
        """
        Produce options to use for the command: ansible-runner worker cleanup
@@ -191,11 +233,12 @@ class Instance(HasPolicyEditsMixin, BaseModel):
        if not isinstance(vargs.get('grace_period'), int):
            vargs['grace_period'] = 60  # grace period of 60 minutes, need to set because CLI default will not take effect
        if 'exclude_strings' not in vargs and vargs.get('file_pattern'):
-            active_pks = list(
-                UnifiedJob.objects.filter(
-                    (models.Q(execution_node=self.hostname) | models.Q(controller_node=self.hostname)) & models.Q(status__in=('running', 'waiting'))
-                ).values_list('pk', flat=True)
-            )
+            active_job_qs = UnifiedJob.objects.filter(status__in=('running', 'waiting'))
+            if self.node_type == 'execution':
+                active_job_qs = active_job_qs.filter(execution_node=self.hostname)
+            else:
+                active_job_qs = active_job_qs.filter(controller_node=self.hostname)
+            active_pks = list(active_job_qs.values_list('pk', flat=True))
            if active_pks:
                vargs['exclude_strings'] = [JOB_FOLDER_PREFIX % job_id for job_id in active_pks]
        if 'remove_images' in vargs or 'image_prune' in vargs:
@@ -213,18 +256,22 @@ class Instance(HasPolicyEditsMixin, BaseModel):
        return self.last_seen < ref_time - timedelta(seconds=grace_period)

    def mark_offline(self, update_last_seen=False, perform_save=True, errors=''):
-        if self.cpu_capacity == 0 and self.mem_capacity == 0 and self.capacity == 0 and self.errors == errors and (not update_last_seen):
-            return
+        if self.node_state not in (Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
+            return []
+        if self.node_state == Instance.States.UNAVAILABLE and self.errors == errors and (not update_last_seen):
+            return []
+        self.node_state = Instance.States.UNAVAILABLE
        self.cpu_capacity = self.mem_capacity = self.capacity = 0
        self.errors = errors
        if update_last_seen:
            self.last_seen = now()

+        update_fields = ['node_state', 'capacity', 'cpu_capacity', 'mem_capacity', 'errors']
+        if update_last_seen:
+            update_fields += ['last_seen']
        if perform_save:
-            update_fields = ['capacity', 'cpu_capacity', 'mem_capacity', 'errors']
-            if update_last_seen:
-                update_fields += ['last_seen']
            self.save(update_fields=update_fields)
+        return update_fields

    def set_capacity_value(self):
        """Sets capacity according to capacity adjustment rule (no save)"""
@@ -278,8 +325,12 @@ class Instance(HasPolicyEditsMixin, BaseModel):
        if not errors:
            self.refresh_capacity_fields()
            self.errors = ''
+            if self.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
+                self.node_state = Instance.States.READY
+                update_fields.append('node_state')
        else:
-            self.mark_offline(perform_save=False, errors=errors)
+            fields_to_update = self.mark_offline(perform_save=False, errors=errors)
+            update_fields.extend(fields_to_update)
        update_fields.extend(['cpu_capacity', 'mem_capacity', 'capacity'])

        # disabling activity stream will avoid extra queries, which is important for heatbeat actions
@@ -296,7 +347,7 @@ class Instance(HasPolicyEditsMixin, BaseModel):
            # playbook event data; we should consider this a zero capacity event
            redis.Redis.from_url(settings.BROKER_URL).ping()
        except redis.ConnectionError:
-            errors = _('Failed to connect ot Redis')
+            errors = _('Failed to connect to Redis')

        self.save_health_data(awx_application_version, get_cpu_count(), get_mem_in_bytes(), update_last_seen=True, errors=errors)

@@ -328,6 +379,8 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
            default='',
        )
    )
+    max_concurrent_jobs = models.IntegerField(default=0, help_text=_("Maximum number of concurrent jobs to run on this group. Zero means no limit."))
+    max_forks = models.IntegerField(default=0, help_text=_("Max forks to execute on this group. Zero means no limit."))
    policy_instance_percentage = models.IntegerField(default=0, help_text=_("Percentage of Instances to automatically assign to this group"))
    policy_instance_minimum = models.IntegerField(default=0, help_text=_("Static minimum number of Instances to automatically assign to this group"))
    policy_instance_list = JSONBlob(
@@ -341,6 +394,8 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):

    @property
    def capacity(self):
+        if self.is_container_group:
+            return self.max_forks
        return sum(inst.capacity for inst in self.instances.all())

    @property
@@ -388,6 +443,20 @@ def on_instance_group_saved(sender, instance, created=False, raw=False, **kwargs

@receiver(post_save, sender=Instance)
 def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
+    if settings.IS_K8S and instance.node_type in (Instance.Types.EXECUTION,):
+        if instance.node_state == Instance.States.DEPROVISIONING:
+            from awx.main.tasks.receptor import remove_deprovisioned_node  # prevents circular import
+
+            # wait for jobs on the node to complete, then delete the
+            # node and kick off write_receptor_config
+            connection.on_commit(lambda: remove_deprovisioned_node.apply_async([instance.hostname]))
+
+        if instance.node_state == Instance.States.INSTALLED:
+            from awx.main.tasks.receptor import write_receptor_config  # prevents circular import
+
+            # broadcast to all control instances to update their receptor configs
+            connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))
+
    if created or instance.has_policy_changes():
        schedule_policy_task()

@@ -434,3 +503,58 @@ class InventoryInstanceGroupMembership(models.Model):
        default=None,
        db_index=True,
    )
+
+
+class JobLaunchConfigInstanceGroupMembership(models.Model):
+
+    joblaunchconfig = models.ForeignKey('JobLaunchConfig', on_delete=models.CASCADE)
+    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
+    position = models.PositiveIntegerField(
+        null=True,
+        default=None,
+        db_index=True,
+    )
+
+
+class ScheduleInstanceGroupMembership(models.Model):
+
+    schedule = models.ForeignKey('Schedule', on_delete=models.CASCADE)
+    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
+    position = models.PositiveIntegerField(
+        null=True,
+        default=None,
+        db_index=True,
+    )
+
+
+class WorkflowJobTemplateNodeBaseInstanceGroupMembership(models.Model):
+
+    workflowjobtemplatenode = models.ForeignKey('WorkflowJobTemplateNode', on_delete=models.CASCADE)
+    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
+    position = models.PositiveIntegerField(
+        null=True,
+        default=None,
+        db_index=True,
+    )
+
+
+class WorkflowJobNodeBaseInstanceGroupMembership(models.Model):
+
+    workflowjobnode = models.ForeignKey('WorkflowJobNode', on_delete=models.CASCADE)
+    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
+    position = models.PositiveIntegerField(
+        null=True,
+        default=None,
+        db_index=True,
+    )
+
+
+class WorkflowJobInstanceGroupMembership(models.Model):
+
+    workflowjobnode = models.ForeignKey('WorkflowJob', on_delete=models.CASCADE)
+    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
+    position = models.PositiveIntegerField(
+        null=True,
+        default=None,
+        db_index=True,
+    )
--- a/awx/main/models/inventory.py
+++ b/awx/main/models/inventory.py
@@ -63,7 +63,7 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
    an inventory source contains lists and hosts.
    """

-    FIELDS_TO_PRESERVE_AT_COPY = ['hosts', 'groups', 'instance_groups']
+    FIELDS_TO_PRESERVE_AT_COPY = ['hosts', 'groups', 'instance_groups', 'prevent_instance_group_fallback']
    KIND_CHOICES = [
        ('', _('Hosts have a direct link to this inventory.')),
        ('smart', _('Hosts for inventory generated using the host_filter property.')),
@@ -175,6 +175,16 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
        related_name='inventory_labels',
        help_text=_('Labels associated with this inventory.'),
    )
+    prevent_instance_group_fallback = models.BooleanField(
+        default=False,
+        help_text=(
+            "If enabled, the inventory will prevent adding any organization "
+            "instance groups to the list of preferred instances groups to run "
+            "associated job templates on."
+            "If this setting is enabled and you provided an empty list, the global instance "
+            "groups will be applied."
+        ),
+    )

    def get_absolute_url(self, request=None):
        return reverse('api:inventory_detail', kwargs={'pk': self.pk}, request=request)
@@ -236,6 +246,25 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
            raise ParseError(_('Slice number must be 1 or higher.'))
        return (number, step)

+    def get_sliced_hosts(self, host_queryset, slice_number, slice_count):
+        """
+        Returns a slice of Hosts given a slice number and total slice count, or
+        the original queryset if slicing is not requested.
+
+        NOTE: If slicing is performed, this will return a List[Host] with the
+        resulting slice. If slicing is not performed it will return the
+        original queryset (not evaluating it or forcing it to a list). This
+        puts the burden on the caller to check the resulting type. This is
+        non-ideal because it's easy to get wrong, but I think the only way
+        around it is to force the queryset which has memory implications for
+        large inventories.
+        """
+
+        if slice_count > 1 and slice_number > 0:
+            offset = slice_number - 1
+            host_queryset = host_queryset[offset::slice_count]
+        return host_queryset
+
    def get_script_data(self, hostvars=False, towervars=False, show_all=False, slice_number=1, slice_count=1):
        hosts_kw = dict()
        if not show_all:
@@ -243,10 +272,8 @@ class Inventory(CommonModelNameNotUnique, ResourceMixin, RelatedJobsMixin):
        fetch_fields = ['name', 'id', 'variables', 'inventory_id']
        if towervars:
            fetch_fields.append('enabled')
-        hosts = self.hosts.filter(**hosts_kw).order_by('name').only(*fetch_fields)
-        if slice_count > 1 and slice_number > 0:
-            offset = slice_number - 1
-            hosts = hosts[offset::slice_count]
+        host_queryset = self.hosts.filter(**hosts_kw).order_by('name').only(*fetch_fields)
+        hosts = self.get_sliced_hosts(host_queryset, slice_number, slice_count)

        data = dict()
        all_group = data.setdefault('all', dict())
@@ -540,17 +567,6 @@ class Host(CommonModelNameNotUnique, RelatedJobsMixin):
    # Use .job_host_summaries.all() to get jobs affecting this host.
    # Use .job_events.all() to get events affecting this host.

-    '''
-    We don't use timestamp, but we may in the future.
-    '''
-
-    def update_ansible_facts(self, module, facts, timestamp=None):
-        if module == "ansible":
-            self.ansible_facts.update(facts)
-        else:
-            self.ansible_facts[module] = facts
-        self.save()
-
    def get_effective_host_name(self):
        """
        Return the name of the host that will be used in actual ansible
@@ -1187,6 +1203,14 @@ class InventoryUpdate(UnifiedJob, InventorySourceOptions, JobNotificationMixin,
        default=None,
        null=True,
    )
+    scm_revision = models.CharField(
+        max_length=1024,
+        blank=True,
+        default='',
+        editable=False,
+        verbose_name=_('SCM Revision'),
+        help_text=_('The SCM Revision from the Project used for this inventory update.  Only applicable to inventories source from scm'),
+    )

    @property
    def is_container_group_task(self):
@@ -1256,15 +1280,19 @@ class InventoryUpdate(UnifiedJob, InventorySourceOptions, JobNotificationMixin,

    @property
    def preferred_instance_groups(self):
-        if self.inventory_source.inventory is not None and self.inventory_source.inventory.organization is not None:
-            organization_groups = [x for x in self.inventory_source.inventory.organization.instance_groups.all()]
-        else:
-            organization_groups = []
+        selected_groups = []
        if self.inventory_source.inventory is not None:
-            inventory_groups = [x for x in self.inventory_source.inventory.instance_groups.all()]
-        else:
-            inventory_groups = []
-        selected_groups = inventory_groups + organization_groups
+            # Add the inventory sources IG to the selected IGs first
+            for instance_group in self.inventory_source.inventory.instance_groups.all():
+                selected_groups.append(instance_group)
+            # If the inventory allows for fallback and we have an organization then also append the orgs IGs to the end of the list
+            if (
+                not getattr(self.inventory_source.inventory, 'prevent_instance_group_fallback', False)
+                and self.inventory_source.inventory.organization is not None
+            ):
+                for instance_group in self.inventory_source.inventory.organization.instance_groups.all():
+                    selected_groups.append(instance_group)
+
        if not selected_groups:
            return self.global_instance_groups
        return selected_groups
--- a/awx/main/models/jobs.py
+++ b/awx/main/models/jobs.py
@@ -15,6 +15,7 @@ from urllib.parse import urljoin
 from django.conf import settings
 from django.core.exceptions import ValidationError
 from django.db import models
+from django.db.models.query import QuerySet

 # from django.core.cache import cache
 from django.utils.encoding import smart_str
@@ -43,8 +44,8 @@ from awx.main.models.notifications import (
    NotificationTemplate,
    JobNotificationMixin,
 )
-from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField
-from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob
+from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField, polymorphic, log_excess_runtime
+from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob, OrderedManyToManyField
 from awx.main.models.mixins import (
    ResourceMixin,
    SurveyJobTemplateMixin,
@@ -203,7 +204,7 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
    playbook) to an inventory source with a given credential.
    """

-    FIELDS_TO_PRESERVE_AT_COPY = ['labels', 'instance_groups', 'credentials', 'survey_spec']
+    FIELDS_TO_PRESERVE_AT_COPY = ['labels', 'instance_groups', 'credentials', 'survey_spec', 'prevent_instance_group_fallback']
    FIELDS_TO_DISCARD_AT_COPY = ['vault_credential', 'credential']
    SOFT_UNIQUE_TOGETHER = [('polymorphic_ctype', 'name', 'organization')]

@@ -227,15 +228,6 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
        blank=True,
        default=False,
    )
-    ask_limit_on_launch = AskForField(
-        blank=True,
-        default=False,
-    )
-    ask_tags_on_launch = AskForField(blank=True, default=False, allows_field='job_tags')
-    ask_skip_tags_on_launch = AskForField(
-        blank=True,
-        default=False,
-    )
    ask_job_type_on_launch = AskForField(
        blank=True,
        default=False,
@@ -244,12 +236,27 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
        blank=True,
        default=False,
    )
-    ask_inventory_on_launch = AskForField(
+    ask_credential_on_launch = AskForField(blank=True, default=False, allows_field='credentials')
+    ask_execution_environment_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
+    ask_forks_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
+    ask_job_slice_count_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
+    ask_timeout_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
+    ask_instance_groups_on_launch = AskForField(
        blank=True,
        default=False,
    )
-    ask_credential_on_launch = AskForField(blank=True, default=False, allows_field='credentials')
-    ask_scm_branch_on_launch = AskForField(blank=True, default=False, allows_field='scm_branch')
    job_slice_count = models.PositiveIntegerField(
        blank=True,
        default=1,
@@ -268,6 +275,15 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
            'admin_role',
        ],
    )
+    prevent_instance_group_fallback = models.BooleanField(
+        default=False,
+        help_text=(
+            "If enabled, the job template will prevent adding any inventory or organization "
+            "instance groups to the list of preferred instances groups to run on."
+            "If this setting is enabled and you provided an empty list, the global instance "
+            "groups will be applied."
+        ),
+    )

    @classmethod
    def _get_unified_job_class(cls):
@@ -276,7 +292,17 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
    @classmethod
    def _get_unified_job_field_names(cls):
        return set(f.name for f in JobOptions._meta.fields) | set(
-            ['name', 'description', 'organization', 'survey_passwords', 'labels', 'credentials', 'job_slice_number', 'job_slice_count', 'execution_environment']
+            [
+                'name',
+                'description',
+                'organization',
+                'survey_passwords',
+                'labels',
+                'credentials',
+                'job_slice_number',
+                'job_slice_count',
+                'execution_environment',
+            ]
        )

    @property
@@ -314,10 +340,13 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
        actual_inventory = self.inventory
        if self.ask_inventory_on_launch and 'inventory' in kwargs:
            actual_inventory = kwargs['inventory']
+        actual_slice_count = self.job_slice_count
+        if self.ask_job_slice_count_on_launch and 'job_slice_count' in kwargs:
+            actual_slice_count = kwargs['job_slice_count']
        if actual_inventory:
-            return min(self.job_slice_count, actual_inventory.hosts.count())
+            return min(actual_slice_count, actual_inventory.hosts.count())
        else:
-            return self.job_slice_count
+            return actual_slice_count

    def save(self, *args, **kwargs):
        update_fields = kwargs.get('update_fields', [])
@@ -425,10 +454,15 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour

            field = self._meta.get_field(field_name)
            if isinstance(field, models.ManyToManyField):
-                old_value = set(old_value.all())
-                new_value = set(kwargs[field_name]) - old_value
-                if not new_value:
-                    continue
+                if field_name == 'instance_groups':
+                    # Instance groups are ordered so we can't make a set out of them
+                    old_value = old_value.all()
+                elif field_name == 'credentials':
+                    # Credentials have a weird pattern because of how they are layered
+                    old_value = set(old_value.all())
+                    new_value = set(kwargs[field_name]) - old_value
+                    if not new_value:
+                        continue

            if new_value == old_value:
                # no-op case: Fields the same as template's value
@@ -449,6 +483,10 @@ class JobTemplate(UnifiedJobTemplate, JobOptions, SurveyJobTemplateMixin, Resour
                        rejected_data[field_name] = new_value
                        errors_dict[field_name] = _('Project does not allow override of branch.')
                        continue
+                elif field_name == 'job_slice_count' and (new_value > 1) and (self.get_effective_slice_ct(kwargs) <= 1):
+                    rejected_data[field_name] = new_value
+                    errors_dict[field_name] = _('Job inventory does not have enough hosts for slicing')
+                    continue
                # accepted prompt
                prompted_data[field_name] = new_value
            else:
@@ -767,19 +805,15 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana

    @property
    def preferred_instance_groups(self):
-        if self.organization is not None:
-            organization_groups = [x for x in self.organization.instance_groups.all()]
-        else:
-            organization_groups = []
-        if self.inventory is not None:
-            inventory_groups = [x for x in self.inventory.instance_groups.all()]
-        else:
-            inventory_groups = []
-        if self.job_template is not None:
-            template_groups = [x for x in self.job_template.instance_groups.all()]
-        else:
-            template_groups = []
-        selected_groups = template_groups + inventory_groups + organization_groups
+        # If the user specified instance groups those will be handled by the unified_job.create_unified_job
+        # This function handles only the defaults for a template w/o user specification
+        selected_groups = []
+        for obj_type in ['job_template', 'inventory', 'organization']:
+            if getattr(self, obj_type) is not None:
+                for instance_group in getattr(self, obj_type).instance_groups.all():
+                    selected_groups.append(instance_group)
+                if getattr(getattr(self, obj_type), 'prevent_instance_group_fallback', False):
+                    break
        if not selected_groups:
            return self.global_instance_groups
        return selected_groups
@@ -811,21 +845,35 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
    def get_notification_friendly_name(self):
        return "Job"

-    def _get_inventory_hosts(self, only=['name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id']):
+    def _get_inventory_hosts(self, only=('name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id'), **filters):
+        """Return value is an iterable for the relevant hosts for this job"""
        if not self.inventory:
            return []
-        return self.inventory.hosts.only(*only)
+        host_queryset = self.inventory.hosts.only(*only)
+        if filters:
+            host_queryset = host_queryset.filter(**filters)
+        host_queryset = self.inventory.get_sliced_hosts(host_queryset, self.job_slice_number, self.job_slice_count)
+        if isinstance(host_queryset, QuerySet):
+            return host_queryset.iterator()
+        return host_queryset

-    def start_job_fact_cache(self, destination, modification_times, timeout=None):
+    @log_excess_runtime(logger, debug_cutoff=0.01, msg='Job {job_id} host facts prepared for {written_ct} hosts, took {delta:.3f} s', add_log_data=True)
+    def start_job_fact_cache(self, destination, log_data, timeout=None):
        self.log_lifecycle("start_job_fact_cache")
+        log_data['job_id'] = self.id
+        log_data['written_ct'] = 0
        os.makedirs(destination, mode=0o700)
-        hosts = self._get_inventory_hosts()
+
        if timeout is None:
            timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT
        if timeout > 0:
            # exclude hosts with fact data older than `settings.ANSIBLE_FACT_CACHE_TIMEOUT seconds`
            timeout = now() - datetime.timedelta(seconds=timeout)
-            hosts = hosts.filter(ansible_facts_modified__gte=timeout)
+            hosts = self._get_inventory_hosts(ansible_facts_modified__gte=timeout)
+        else:
+            hosts = self._get_inventory_hosts()
+
+        last_filepath_written = None
        for host in hosts:
            filepath = os.sep.join(map(str, [destination, host.name]))
            if not os.path.realpath(filepath).startswith(destination):
@@ -835,23 +883,38 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
                with codecs.open(filepath, 'w', encoding='utf-8') as f:
                    os.chmod(f.name, 0o600)
                    json.dump(host.ansible_facts, f)
+                    log_data['written_ct'] += 1
+                    last_filepath_written = filepath
            except IOError:
                system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
                continue
-            # make note of the time we wrote the file so we can check if it changed later
-            modification_times[filepath] = os.path.getmtime(filepath)
+        # make note of the time we wrote the last file so we can check if any file changed later
+        if last_filepath_written:
+            return os.path.getmtime(last_filepath_written)
+        return None

-    def finish_job_fact_cache(self, destination, modification_times):
+    @log_excess_runtime(
+        logger,
+        debug_cutoff=0.01,
+        msg='Job {job_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
+        add_log_data=True,
+    )
+    def finish_job_fact_cache(self, destination, facts_write_time, log_data):
        self.log_lifecycle("finish_job_fact_cache")
+        log_data['job_id'] = self.id
+        log_data['updated_ct'] = 0
+        log_data['unmodified_ct'] = 0
+        log_data['cleared_ct'] = 0
+        hosts_to_update = []
        for host in self._get_inventory_hosts():
            filepath = os.sep.join(map(str, [destination, host.name]))
            if not os.path.realpath(filepath).startswith(destination):
                system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
                continue
            if os.path.exists(filepath):
-                # If the file changed since we wrote it pre-playbook run...
+                # If the file changed since we wrote the last facts file, pre-playbook run...
                modified = os.path.getmtime(filepath)
-                if modified > modification_times.get(filepath, 0):
+                if (not facts_write_time) or modified > facts_write_time:
                    with codecs.open(filepath, 'r', encoding='utf-8') as f:
                        try:
                            ansible_facts = json.load(f)
@@ -859,7 +922,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
                            continue
                        host.ansible_facts = ansible_facts
                        host.ansible_facts_modified = now()
-                        host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
+                        hosts_to_update.append(host)
                        system_tracking_logger.info(
                            'New fact for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)),
                            extra=dict(
@@ -870,12 +933,21 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
                                job_id=self.id,
                            ),
                        )
+                        log_data['updated_ct'] += 1
+                else:
+                    log_data['unmodified_ct'] += 1
            else:
                # if the file goes missing, ansible removed it (likely via clear_facts)
                host.ansible_facts = {}
                host.ansible_facts_modified = now()
+                hosts_to_update.append(host)
                system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
-                host.save()
+                log_data['cleared_ct'] += 1
+            if len(hosts_to_update) > 100:
+                self.inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
+                hosts_to_update = []
+        if hosts_to_update:
+            self.inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])


 class LaunchTimeConfigBase(BaseModel):
@@ -905,10 +977,36 @@ class LaunchTimeConfigBase(BaseModel):
    # This is a solution to the nullable CharField problem, specific to prompting
    char_prompts = JSONBlob(default=dict, blank=True)

-    def prompts_dict(self, display=False):
+    # Define fields that are not really fields, but alias to char_prompts lookups
+    limit = NullablePromptPseudoField('limit')
+    scm_branch = NullablePromptPseudoField('scm_branch')
+    job_tags = NullablePromptPseudoField('job_tags')
+    skip_tags = NullablePromptPseudoField('skip_tags')
+    diff_mode = NullablePromptPseudoField('diff_mode')
+    job_type = NullablePromptPseudoField('job_type')
+    verbosity = NullablePromptPseudoField('verbosity')
+    forks = NullablePromptPseudoField('forks')
+    job_slice_count = NullablePromptPseudoField('job_slice_count')
+    timeout = NullablePromptPseudoField('timeout')
+
+    # NOTE: additional fields are assumed to exist but must be defined in subclasses
+    # due to technical limitations
+    SUBCLASS_FIELDS = (
+        'instance_groups',  # needs a through model defined
+        'extra_vars',  # alternates between extra_vars and extra_data
+        'credentials',  # already a unified job and unified JT field
+        'labels',  # already a unified job and unified JT field
+        'execution_environment',  # already a unified job and unified JT field
+    )
+
+    def prompts_dict(self, display=False, for_cls=None):
        data = {}
+        if for_cls:
+            cls = for_cls
+        else:
+            cls = JobTemplate
        # Some types may have different prompts, but always subset of JT prompts
-        for prompt_name in JobTemplate.get_ask_mapping().keys():
+        for prompt_name in cls.get_ask_mapping().keys():
            try:
                field = self._meta.get_field(prompt_name)
            except FieldDoesNotExist:
@@ -916,18 +1014,23 @@ class LaunchTimeConfigBase(BaseModel):
            if isinstance(field, models.ManyToManyField):
                if not self.pk:
                    continue  # unsaved object can't have related many-to-many
-                prompt_val = set(getattr(self, prompt_name).all())
-                if len(prompt_val) > 0:
-                    data[prompt_name] = prompt_val
+                prompt_values = list(getattr(self, prompt_name).all())
+                # Many to manys can't distinguish between None and []
+                # Because of this, from a config perspective, we assume [] is none and we don't save [] into the config
+                if len(prompt_values) > 0:
+                    data[prompt_name] = prompt_values
            elif prompt_name == 'extra_vars':
                if self.extra_vars:
+                    extra_vars = {}
                    if display:
-                        data[prompt_name] = self.display_extra_vars()
+                        extra_vars = self.display_extra_vars()
                    else:
-                        data[prompt_name] = self.extra_vars
+                        extra_vars = self.extra_vars
                    # Depending on model, field type may save and return as string
-                    if isinstance(data[prompt_name], str):
-                        data[prompt_name] = parse_yaml_or_json(data[prompt_name])
+                    if isinstance(extra_vars, str):
+                        extra_vars = parse_yaml_or_json(extra_vars)
+                    if extra_vars:
+                        data['extra_vars'] = extra_vars
                if self.survey_passwords and not display:
                    data['survey_passwords'] = self.survey_passwords
            else:
@@ -937,15 +1040,6 @@ class LaunchTimeConfigBase(BaseModel):
        return data


-for field_name in JobTemplate.get_ask_mapping().keys():
-    if field_name == 'extra_vars':
-        continue
-    try:
-        LaunchTimeConfigBase._meta.get_field(field_name)
-    except FieldDoesNotExist:
-        setattr(LaunchTimeConfigBase, field_name, NullablePromptPseudoField(field_name))
-
-
 class LaunchTimeConfig(LaunchTimeConfigBase):
    """
    Common model for all objects that save details of a saved launch config
@@ -964,8 +1058,18 @@ class LaunchTimeConfig(LaunchTimeConfigBase):
            blank=True,
        )
    )
-    # Credentials needed for non-unified job / unified JT models
+    # Fields needed for non-unified job / unified JT models, because they are defined on unified models
    credentials = models.ManyToManyField('Credential', related_name='%(class)ss')
+    labels = models.ManyToManyField('Label', related_name='%(class)s_labels')
+    execution_environment = models.ForeignKey(
+        'ExecutionEnvironment',
+        null=True,
+        blank=True,
+        default=None,
+        on_delete=polymorphic.SET_NULL,
+        related_name='%(class)s_as_prompt',
+        help_text="The container image to be used for execution.",
+    )

    @property
    def extra_vars(self):
@@ -1009,6 +1113,11 @@ class JobLaunchConfig(LaunchTimeConfig):
        editable=False,
    )

+    # Instance Groups needed for non-unified job / unified JT models
+    instance_groups = OrderedManyToManyField(
+        'InstanceGroup', related_name='%(class)ss', blank=True, editable=False, through='JobLaunchConfigInstanceGroupMembership'
+    )
+
    def has_user_prompts(self, template):
        """
        Returns True if any fields exist in the launch config that are
--- a/awx/main/models/label.py
+++ b/awx/main/models/label.py
@@ -10,6 +10,8 @@ from awx.api.versioning import reverse
 from awx.main.models.base import CommonModelNameNotUnique
 from awx.main.models.unified_jobs import UnifiedJobTemplate, UnifiedJob
 from awx.main.models.inventory import Inventory
+from awx.main.models.schedules import Schedule
+from awx.main.models.workflow import WorkflowJobTemplateNode, WorkflowJobNode

 __all__ = ('Label',)

@@ -34,16 +36,22 @@ class Label(CommonModelNameNotUnique):
    def get_absolute_url(self, request=None):
        return reverse('api:label_detail', kwargs={'pk': self.pk}, request=request)

-    @staticmethod
-    def get_orphaned_labels():
-        return Label.objects.filter(organization=None, unifiedjobtemplate_labels__isnull=True, inventory_labels__isnull=True)
-
    def is_detached(self):
-        return Label.objects.filter(id=self.id, unifiedjob_labels__isnull=True, unifiedjobtemplate_labels__isnull=True, inventory_labels__isnull=True).exists()
+        return Label.objects.filter(
+            id=self.id,
+            unifiedjob_labels__isnull=True,
+            unifiedjobtemplate_labels__isnull=True,
+            inventory_labels__isnull=True,
+            schedule_labels__isnull=True,
+            workflowjobtemplatenode_labels__isnull=True,
+            workflowjobnode_labels__isnull=True,
+        ).exists()

    def is_candidate_for_detach(self):
-
-        c1 = UnifiedJob.objects.filter(labels__in=[self.id]).count()
-        c2 = UnifiedJobTemplate.objects.filter(labels__in=[self.id]).count()
-        c3 = Inventory.objects.filter(labels__in=[self.id]).count()
-        return (c1 + c2 + c3 - 1) == 0
+        count = UnifiedJob.objects.filter(labels__in=[self.id]).count()  # Both Jobs and WFJobs
+        count += UnifiedJobTemplate.objects.filter(labels__in=[self.id]).count()  # Both JTs and WFJT
+        count += Inventory.objects.filter(labels__in=[self.id]).count()
+        count += Schedule.objects.filter(labels__in=[self.id]).count()
+        count += WorkflowJobTemplateNode.objects.filter(labels__in=[self.id]).count()
+        count += WorkflowJobNode.objects.filter(labels__in=[self.id]).count()
+        return (count - 1) == 0
--- a/awx/main/models/mixins.py
+++ b/awx/main/models/mixins.py
@@ -104,6 +104,33 @@ class SurveyJobTemplateMixin(models.Model):
        default=False,
    )
    survey_spec = prevent_search(JSONBlob(default=dict, blank=True))
+
+    ask_inventory_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
+    ask_limit_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
+    ask_scm_branch_on_launch = AskForField(
+        blank=True,
+        default=False,
+        allows_field='scm_branch',
+    )
+    ask_labels_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
+    ask_tags_on_launch = AskForField(
+        blank=True,
+        default=False,
+        allows_field='job_tags',
+    )
+    ask_skip_tags_on_launch = AskForField(
+        blank=True,
+        default=False,
+    )
    ask_variables_on_launch = AskForField(blank=True, default=False, allows_field='extra_vars')

    def survey_password_variables(self):
@@ -412,6 +439,11 @@ class TaskManagerJobMixin(TaskManagerUnifiedJobMixin):
    class Meta:
        abstract = True

+    def get_jobs_fail_chain(self):
+        if self.project_update_id:
+            return [self.project_update]
+        return []
+

 class TaskManagerUpdateOnLaunchMixin(TaskManagerUnifiedJobMixin):
    class Meta:
--- a/awx/main/models/projects.py
+++ b/awx/main/models/projects.py
@@ -284,6 +284,17 @@ class Project(UnifiedJobTemplate, ProjectOptions, ResourceMixin, CustomVirtualEn
        help_text=_('Allow changing the SCM branch or revision in a job template ' 'that uses this project.'),
    )

+    # credential (keys) used to validate content signature
+    signature_validation_credential = models.ForeignKey(
+        'Credential',
+        related_name='%(class)ss_signature_validation',
+        blank=True,
+        null=True,
+        default=None,
+        on_delete=models.SET_NULL,
+        help_text=_('An optional credential used for validating files in the project against unexpected changes.'),
+    )
+
    scm_revision = models.CharField(
        max_length=1024,
        blank=True,
@@ -460,6 +471,29 @@ class Project(UnifiedJobTemplate, ProjectOptions, ResourceMixin, CustomVirtualEn
    def get_absolute_url(self, request=None):
        return reverse('api:project_detail', kwargs={'pk': self.pk}, request=request)

+    def get_reason_if_failed(self):
+        """
+        If the project is in a failed or errored state, return a human-readable
+        error message explaining why. Otherwise return None.
+
+        This is used during validation in the serializer and also by
+        RunProjectUpdate/RunInventoryUpdate.
+        """
+
+        if self.status not in ('error', 'failed'):
+            return None
+
+        latest_update = self.project_updates.last()
+        if latest_update is not None and latest_update.failed:
+            failed_validation_tasks = latest_update.project_update_events.filter(
+                event='runner_on_failed',
+                play="Perform project signature/checksum verification",
+            )
+            if failed_validation_tasks:
+                return _("Last project update failed due to signature validation failure.")
+
+        return _("Missing a revision to run due to failed project update.")
+
    '''
    RelatedJobsMixin
    '''
@@ -620,6 +654,10 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
        added_update_fields = []
        if not self.job_tags:
            job_tags = ['update_{}'.format(self.scm_type), 'install_roles', 'install_collections']
+            if self.project.signature_validation_credential is not None:
+                credential_type = self.project.signature_validation_credential.credential_type.namespace
+                job_tags.append(f'validation_{credential_type}')
+                job_tags.append('validation_checksum_manifest')
            self.job_tags = ','.join(job_tags)
            added_update_fields.append('job_tags')
        if self.scm_delete_on_update and 'delete' not in self.job_tags and self.job_type == 'check':
--- a/awx/main/models/schedules.py
+++ b/awx/main/models/schedules.py
@@ -18,6 +18,7 @@ from django.utils.translation import gettext_lazy as _

 # AWX
 from awx.api.versioning import reverse
+from awx.main.fields import OrderedManyToManyField
 from awx.main.models.base import PrimordialModel
 from awx.main.models.jobs import LaunchTimeConfig
 from awx.main.utils import ignore_inventory_computed_fields
@@ -83,6 +84,13 @@ class Schedule(PrimordialModel, LaunchTimeConfig):
    )
    rrule = models.TextField(help_text=_("A value representing the schedules iCal recurrence rule."))
    next_run = models.DateTimeField(null=True, default=None, editable=False, help_text=_("The next time that the scheduled action will run."))
+    instance_groups = OrderedManyToManyField(
+        'InstanceGroup',
+        related_name='schedule_instance_groups',
+        blank=True,
+        editable=False,
+        through='ScheduleInstanceGroupMembership',
+    )

    @classmethod
    def get_zoneinfo(cls):
@@ -145,7 +153,7 @@ class Schedule(PrimordialModel, LaunchTimeConfig):
        #

        # Find the DTSTART rule or raise an error, its usually the first rule but that is not strictly enforced
-        start_date_rule = re.sub('^.*(DTSTART[^\s]+)\s.*$', r'\1', rrule)
+        start_date_rule = re.sub(r'^.*(DTSTART[^\s]+)\s.*$', r'\1', rrule)
        if not start_date_rule:
            raise ValueError('A DTSTART field needs to be in the rrule')

--- a/awx/main/models/unified_jobs.py
+++ b/awx/main/models/unified_jobs.py
@@ -332,10 +332,11 @@ class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, ExecutionEn

        return NotificationTemplate.objects.none()

-    def create_unified_job(self, **kwargs):
+    def create_unified_job(self, instance_groups=None, **kwargs):
        """
        Create a new unified job based on this unified job template.
        """
+        # TODO: rename kwargs to prompts, to set expectation that these are runtime values
        new_job_passwords = kwargs.pop('survey_passwords', {})
        eager_fields = kwargs.pop('_eager_fields', None)

@@ -382,7 +383,10 @@ class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, ExecutionEn
            unified_job.survey_passwords = new_job_passwords
            kwargs['survey_passwords'] = new_job_passwords  # saved in config object for relaunch

-        unified_job.preferred_instance_groups_cache = unified_job._get_preferred_instance_group_cache()
+        if instance_groups:
+            unified_job.preferred_instance_groups_cache = [ig.id for ig in instance_groups]
+        else:
+            unified_job.preferred_instance_groups_cache = unified_job._get_preferred_instance_group_cache()

        unified_job._set_default_dependencies_processed()
        unified_job.task_impact = unified_job._get_task_impact()
@@ -412,13 +416,17 @@ class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, ExecutionEn
            unified_job.handle_extra_data(validated_kwargs['extra_vars'])

        # Create record of provided prompts for relaunch and rescheduling
-        unified_job.create_config_from_prompts(kwargs, parent=self)
+        config = unified_job.create_config_from_prompts(kwargs, parent=self)
+        if instance_groups:
+            for ig in instance_groups:
+                config.instance_groups.add(ig)

        # manually issue the create activity stream entry _after_ M2M relations
        # have been associated to the UJ
        if unified_job.__class__ in activity_stream_registrar.models:
            activity_stream_create(None, unified_job, True)
        unified_job.log_lifecycle("created")
+
        return unified_job

    @classmethod
@@ -973,22 +981,38 @@ class UnifiedJob(
            valid_fields.extend(['survey_passwords', 'extra_vars'])
        else:
            kwargs.pop('survey_passwords', None)
+        many_to_many_fields = []
        for field_name, value in kwargs.items():
            if field_name not in valid_fields:
                raise Exception('Unrecognized launch config field {}.'.format(field_name))
-            if field_name == 'credentials':
+            field = None
+            # may use extra_data as a proxy for extra_vars
+            if field_name in config.SUBCLASS_FIELDS and field_name != 'extra_vars':
+                field = config._meta.get_field(field_name)
+            if isinstance(field, models.ManyToManyField):
+                many_to_many_fields.append(field_name)
                continue
-            key = field_name
-            if key == 'extra_vars':
-                key = 'extra_data'
-            setattr(config, key, value)
+            if isinstance(field, (models.ForeignKey)) and (value is None):
+                continue  # the null value indicates not-provided for ForeignKey case
+            setattr(config, field_name, value)
        config.save()

-        job_creds = set(kwargs.get('credentials', []))
-        if 'credentials' in [field.name for field in parent._meta.get_fields()]:
-            job_creds = job_creds - set(parent.credentials.all())
-        if job_creds:
-            config.credentials.add(*job_creds)
+        for field_name in many_to_many_fields:
+            prompted_items = kwargs.get(field_name, [])
+            if not prompted_items:
+                continue
+            if field_name == 'instance_groups':
+                # Here we are doing a loop to make sure we preserve order for this Ordered field
+                # also do not merge IGs with parent, so this saves the literal list
+                for item in prompted_items:
+                    getattr(config, field_name).add(item)
+            else:
+                # Assuming this field merges prompts with parent, save just the diff
+                if field_name in [field.name for field in parent._meta.get_fields()]:
+                    prompted_items = set(prompted_items) - set(getattr(parent, field_name).all())
+                if prompted_items:
+                    getattr(config, field_name).add(*prompted_items)
+
        return config

    @property
@@ -1281,6 +1305,8 @@ class UnifiedJob(
                    status_data['instance_group_name'] = None
            elif status in ['successful', 'failed', 'canceled'] and self.finished:
                status_data['finished'] = datetime.datetime.strftime(self.finished, "%Y-%m-%dT%H:%M:%S.%fZ")
+            elif status == 'running':
+                status_data['started'] = datetime.datetime.strftime(self.finished, "%Y-%m-%dT%H:%M:%S.%fZ")
            status_data.update(self.websocket_emit_data())
            status_data['group_name'] = 'jobs'
            if getattr(self, 'unified_job_template_id', None):
@@ -1325,12 +1351,12 @@ class UnifiedJob(
                if required in defined_fields and not credential.has_input(required):
                    missing_credential_inputs.append(required)

-        if missing_credential_inputs:
-            self.job_explanation = '{} cannot start because Credential {} does not provide one or more required fields ({}).'.format(
-                self._meta.verbose_name.title(), credential.name, ', '.join(sorted(missing_credential_inputs))
-            )
-            self.save(update_fields=['job_explanation'])
-            return (False, None)
+            if missing_credential_inputs:
+                self.job_explanation = '{} cannot start because Credential {} does not provide one or more required fields ({}).'.format(
+                    self._meta.verbose_name.title(), credential.name, ', '.join(sorted(missing_credential_inputs))
+                )
+                self.save(update_fields=['job_explanation'])
+                return (False, None)

        needed = self.get_passwords_needed_to_start()
        try:
@@ -1395,22 +1421,6 @@ class UnifiedJob(
        # Done!
        return True

-    @property
-    def actually_running(self):
-        # returns True if the job is running in the appropriate dispatcher process
-        running = False
-        if all([self.status == 'running', self.celery_task_id, self.execution_node]):
-            # If the job is marked as running, but the dispatcher
-            # doesn't know about it (or the dispatcher doesn't reply),
-            # then cancel the job
-            timeout = 5
-            try:
-                running = self.celery_task_id in ControlDispatcher('dispatcher', self.controller_node or self.execution_node).running(timeout=timeout)
-            except (socket.timeout, RuntimeError):
-                logger.error('could not reach dispatcher on {} within {}s'.format(self.execution_node, timeout))
-                running = False
-        return running
-
    @property
    def can_cancel(self):
        return bool(self.status in CAN_CANCEL)
@@ -1420,27 +1430,61 @@ class UnifiedJob(
            return 'Previous Task Canceled: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (self.model_to_str(), self.name, self.id)
        return None

+    def fallback_cancel(self):
+        if not self.celery_task_id:
+            self.refresh_from_db(fields=['celery_task_id'])
+        self.cancel_dispatcher_process()
+
+    def cancel_dispatcher_process(self):
+        """Returns True if dispatcher running this job acknowledged request and sent SIGTERM"""
+        if not self.celery_task_id:
+            return
+        canceled = []
+        try:
+            # Use control and reply mechanism to cancel and obtain confirmation
+            timeout = 5
+            canceled = ControlDispatcher('dispatcher', self.controller_node).cancel([self.celery_task_id])
+        except socket.timeout:
+            logger.error(f'could not reach dispatcher on {self.controller_node} within {timeout}s')
+        except Exception:
+            logger.exception("error encountered when checking task status")
+        return bool(self.celery_task_id in canceled)  # True or False, whether confirmation was obtained
+
    def cancel(self, job_explanation=None, is_chain=False):
        if self.can_cancel:
            if not is_chain:
                for x in self.get_jobs_fail_chain():
                    x.cancel(job_explanation=self._build_job_explanation(), is_chain=True)

+            cancel_fields = []
            if not self.cancel_flag:
                self.cancel_flag = True
                self.start_args = ''  # blank field to remove encrypted passwords
-                cancel_fields = ['cancel_flag', 'start_args']
-                if self.status in ('pending', 'waiting', 'new'):
-                    self.status = 'canceled'
-                    cancel_fields.append('status')
-                if self.status == 'running' and not self.actually_running:
-                    self.status = 'canceled'
-                    cancel_fields.append('status')
+                cancel_fields.extend(['cancel_flag', 'start_args'])
+                connection.on_commit(lambda: self.websocket_emit_status("canceled"))
+
                if job_explanation is not None:
                    self.job_explanation = job_explanation
                    cancel_fields.append('job_explanation')
+
+                # Important to save here before sending cancel signal to dispatcher to cancel because
+                # the job control process will use the cancel_flag to distinguish a shutdown from a cancel
                self.save(update_fields=cancel_fields)
-                self.websocket_emit_status("canceled")
+
+            controller_notified = False
+            if self.celery_task_id:
+                controller_notified = self.cancel_dispatcher_process()
+
+            # If a SIGTERM signal was sent to the control process, and acked by the dispatcher
+            # then we want to let its own cleanup change status, otherwise change status now
+            if not controller_notified:
+                if self.status != 'canceled':
+                    self.status = 'canceled'
+                    self.save(update_fields=['status'])
+                # Avoid race condition where we have stale model from pending state but job has already started,
+                # its checking signal but not cancel_flag, so re-send signal after updating cancel fields
+                self.fallback_cancel()
+
        return self.cancel_flag

    @property
--- a/awx/main/models/workflow.py
+++ b/awx/main/models/workflow.py
@@ -29,7 +29,7 @@ from awx.main.models import prevent_search, accepts_json, UnifiedJobTemplate, Un
 from awx.main.models.notifications import NotificationTemplate, JobNotificationMixin
 from awx.main.models.base import CreatedModifiedModel, VarsDictProperty
 from awx.main.models.rbac import ROLE_SINGLETON_SYSTEM_ADMINISTRATOR, ROLE_SINGLETON_SYSTEM_AUDITOR
-from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob
+from awx.main.fields import ImplicitRoleField, JSONBlob, OrderedManyToManyField
 from awx.main.models.mixins import (
    ResourceMixin,
    SurveyJobTemplateMixin,
@@ -114,6 +114,9 @@ class WorkflowNodeBase(CreatedModifiedModel, LaunchTimeConfig):
            'credentials',
            'char_prompts',
            'all_parents_must_converge',
+            'labels',
+            'instance_groups',
+            'execution_environment',
        ]

    def create_workflow_job_node(self, **kwargs):
@@ -122,7 +125,7 @@ class WorkflowNodeBase(CreatedModifiedModel, LaunchTimeConfig):
        """
        create_kwargs = {}
        for field_name in self._get_workflow_job_field_names():
-            if field_name == 'credentials':
+            if field_name in ['credentials', 'labels', 'instance_groups']:
                continue
            if field_name in kwargs:
                create_kwargs[field_name] = kwargs[field_name]
@@ -132,10 +135,20 @@ class WorkflowNodeBase(CreatedModifiedModel, LaunchTimeConfig):
        new_node = WorkflowJobNode.objects.create(**create_kwargs)
        if self.pk:
            allowed_creds = self.credentials.all()
+            allowed_labels = self.labels.all()
+            allowed_instance_groups = self.instance_groups.all()
        else:
            allowed_creds = []
+            allowed_labels = []
+            allowed_instance_groups = []
        for cred in allowed_creds:
            new_node.credentials.add(cred)
+
+        for label in allowed_labels:
+            new_node.labels.add(label)
+        for instance_group in allowed_instance_groups:
+            new_node.instance_groups.add(instance_group)
+
        return new_node


@@ -153,6 +166,9 @@ class WorkflowJobTemplateNode(WorkflowNodeBase):
        'char_prompts',
        'all_parents_must_converge',
        'identifier',
+        'labels',
+        'execution_environment',
+        'instance_groups',
    ]
    REENCRYPTION_BLOCKLIST_AT_COPY = ['extra_data', 'survey_passwords']

@@ -167,6 +183,13 @@ class WorkflowJobTemplateNode(WorkflowNodeBase):
        blank=False,
        help_text=_('An identifier for this node that is unique within its workflow. ' 'It is copied to workflow job nodes corresponding to this node.'),
    )
+    instance_groups = OrderedManyToManyField(
+        'InstanceGroup',
+        related_name='workflow_job_template_node_instance_groups',
+        blank=True,
+        editable=False,
+        through='WorkflowJobTemplateNodeBaseInstanceGroupMembership',
+    )

    class Meta:
        app_label = 'main'
@@ -211,7 +234,7 @@ class WorkflowJobTemplateNode(WorkflowNodeBase):
        approval_template = WorkflowApprovalTemplate(**kwargs)
        approval_template.save()
        self.unified_job_template = approval_template
-        self.save()
+        self.save(update_fields=['unified_job_template'])
        return approval_template


@@ -250,6 +273,9 @@ class WorkflowJobNode(WorkflowNodeBase):
        blank=True,  # blank denotes pre-migration job nodes
        help_text=_('An identifier coresponding to the workflow job template node that this node was created from.'),
    )
+    instance_groups = OrderedManyToManyField(
+        'InstanceGroup', related_name='workflow_job_node_instance_groups', blank=True, editable=False, through='WorkflowJobNodeBaseInstanceGroupMembership'
+    )

    class Meta:
        app_label = 'main'
@@ -265,19 +291,6 @@ class WorkflowJobNode(WorkflowNodeBase):
    def get_absolute_url(self, request=None):
        return reverse('api:workflow_job_node_detail', kwargs={'pk': self.pk}, request=request)

-    def prompts_dict(self, *args, **kwargs):
-        r = super(WorkflowJobNode, self).prompts_dict(*args, **kwargs)
-        # Explanation - WFJT extra_vars still break pattern, so they are not
-        # put through prompts processing, but inventory and others are only accepted
-        # if JT prompts for it, so it goes through this mechanism
-        if self.workflow_job:
-            if self.workflow_job.inventory_id:
-                # workflow job inventory takes precedence
-                r['inventory'] = self.workflow_job.inventory
-            if self.workflow_job.char_prompts:
-                r.update(self.workflow_job.char_prompts)
-        return r
-
    def get_job_kwargs(self):
        """
        In advance of creating a new unified job as part of a workflow,
@@ -287,16 +300,38 @@ class WorkflowJobNode(WorkflowNodeBase):
        """
        # reject/accept prompted fields
        data = {}
+        wj_special_vars = {}
+        wj_special_passwords = {}
        ujt_obj = self.unified_job_template
        if ujt_obj is not None:
-            # MERGE note: move this to prompts_dict method on node when merging
-            # with the workflow inventory branch
-            prompts_data = self.prompts_dict()
-            if isinstance(ujt_obj, WorkflowJobTemplate):
-                if self.workflow_job.extra_vars:
-                    prompts_data.setdefault('extra_vars', {})
-                    prompts_data['extra_vars'].update(self.workflow_job.extra_vars_dict)
-            accepted_fields, ignored_fields, errors = ujt_obj._accept_or_ignore_job_kwargs(**prompts_data)
+            node_prompts_data = self.prompts_dict(for_cls=ujt_obj.__class__)
+            wj_prompts_data = self.workflow_job.prompts_dict(for_cls=ujt_obj.__class__)
+            # Explanation - special historical case
+            # WFJT extra_vars ignored JobTemplate.ask_variables_on_launch, bypassing _accept_or_ignore_job_kwargs
+            # inventory and others are only accepted if JT prompts for it with related ask_ field
+            # this is inconsistent, but maintained
+            if not isinstance(ujt_obj, WorkflowJobTemplate):
+                wj_special_vars = wj_prompts_data.pop('extra_vars', {})
+                wj_special_passwords = wj_prompts_data.pop('survey_passwords', {})
+            elif 'extra_vars' in node_prompts_data:
+                # Follow the vars combination rules
+                node_prompts_data['extra_vars'].update(wj_prompts_data.pop('extra_vars', {}))
+            elif 'survey_passwords' in node_prompts_data:
+                node_prompts_data['survey_passwords'].update(wj_prompts_data.pop('survey_passwords', {}))
+
+            # Follow the credential combination rules
+            if ('credentials' in wj_prompts_data) and ('credentials' in node_prompts_data):
+                wj_pivoted_creds = Credential.unique_dict(wj_prompts_data['credentials'])
+                node_pivoted_creds = Credential.unique_dict(node_prompts_data['credentials'])
+                node_pivoted_creds.update(wj_pivoted_creds)
+                wj_prompts_data['credentials'] = [cred for cred in node_pivoted_creds.values()]
+
+            # NOTE: no special rules for instance_groups, because they do not merge
+            # or labels, because they do not propogate WFJT-->node at all
+
+            # Combine WFJT prompts with node here, WFJT at higher level
+            node_prompts_data.update(wj_prompts_data)
+            accepted_fields, ignored_fields, errors = ujt_obj._accept_or_ignore_job_kwargs(**node_prompts_data)
            if errors:
                logger.info(
                    _('Bad launch configuration starting template {template_pk} as part of ' 'workflow {workflow_pk}. Errors:\n{error_text}').format(
@@ -304,15 +339,6 @@ class WorkflowJobNode(WorkflowNodeBase):
                    )
                )
            data.update(accepted_fields)  # missing fields are handled in the scheduler
-            try:
-                # config saved on the workflow job itself
-                wj_config = self.workflow_job.launch_config
-            except ObjectDoesNotExist:
-                wj_config = None
-            if wj_config:
-                accepted_fields, ignored_fields, errors = ujt_obj._accept_or_ignore_job_kwargs(**wj_config.prompts_dict())
-                accepted_fields.pop('extra_vars', None)  # merge handled with other extra_vars later
-                data.update(accepted_fields)
        # build ancestor artifacts, save them to node model for later
        aa_dict = {}
        is_root_node = True
@@ -325,15 +351,12 @@ class WorkflowJobNode(WorkflowNodeBase):
            self.ancestor_artifacts = aa_dict
            self.save(update_fields=['ancestor_artifacts'])
        # process password list
-        password_dict = {}
+        password_dict = data.get('survey_passwords', {})
        if '_ansible_no_log' in aa_dict:
            for key in aa_dict:
                if key != '_ansible_no_log':
                    password_dict[key] = REPLACE_STR
-        if self.workflow_job.survey_passwords:
-            password_dict.update(self.workflow_job.survey_passwords)
-        if self.survey_passwords:
-            password_dict.update(self.survey_passwords)
+        password_dict.update(wj_special_passwords)
        if password_dict:
            data['survey_passwords'] = password_dict
        # process extra_vars
@@ -343,12 +366,12 @@ class WorkflowJobNode(WorkflowNodeBase):
                functional_aa_dict = copy(aa_dict)
                functional_aa_dict.pop('_ansible_no_log', None)
                extra_vars.update(functional_aa_dict)
-        if ujt_obj and isinstance(ujt_obj, JobTemplate):
-            # Workflow Job extra_vars higher precedence than ancestor artifacts
-            if self.workflow_job and self.workflow_job.extra_vars:
-                extra_vars.update(self.workflow_job.extra_vars_dict)
+
+        # Workflow Job extra_vars higher precedence than ancestor artifacts
+        extra_vars.update(wj_special_vars)
        if extra_vars:
            data['extra_vars'] = extra_vars
+
        # ensure that unified jobs created by WorkflowJobs are marked
        data['_eager_fields'] = {'launch_type': 'workflow'}
        if self.workflow_job and self.workflow_job.created_by:
@@ -374,6 +397,10 @@ class WorkflowJobOptions(LaunchTimeConfigBase):
            )
        )
    )
+    # Workflow jobs are used for sliced jobs, and thus, must be a conduit for any JT prompts
+    instance_groups = OrderedManyToManyField(
+        'InstanceGroup', related_name='workflow_job_instance_groups', blank=True, editable=False, through='WorkflowJobInstanceGroupMembership'
+    )
    allow_simultaneous = models.BooleanField(default=False)

    extra_vars_dict = VarsDictProperty('extra_vars', True)
@@ -385,7 +412,7 @@ class WorkflowJobOptions(LaunchTimeConfigBase):
    @classmethod
    def _get_unified_job_field_names(cls):
        r = set(f.name for f in WorkflowJobOptions._meta.fields) | set(
-            ['name', 'description', 'organization', 'survey_passwords', 'labels', 'limit', 'scm_branch']
+            ['name', 'description', 'organization', 'survey_passwords', 'labels', 'limit', 'scm_branch', 'job_tags', 'skip_tags']
        )
        r.remove('char_prompts')  # needed due to copying launch config to launch config
        return r
@@ -425,26 +452,29 @@ class WorkflowJobOptions(LaunchTimeConfigBase):
 class WorkflowJobTemplate(UnifiedJobTemplate, WorkflowJobOptions, SurveyJobTemplateMixin, ResourceMixin, RelatedJobsMixin, WebhookTemplateMixin):

    SOFT_UNIQUE_TOGETHER = [('polymorphic_ctype', 'name', 'organization')]
-    FIELDS_TO_PRESERVE_AT_COPY = ['labels', 'organization', 'instance_groups', 'workflow_job_template_nodes', 'credentials', 'survey_spec']
+    FIELDS_TO_PRESERVE_AT_COPY = [
+        'labels',
+        'organization',
+        'instance_groups',
+        'workflow_job_template_nodes',
+        'credentials',
+        'survey_spec',
+        'skip_tags',
+        'job_tags',
+        'execution_environment',
+    ]

    class Meta:
        app_label = 'main'

-    ask_inventory_on_launch = AskForField(
+    notification_templates_approvals = models.ManyToManyField(
+        "NotificationTemplate",
        blank=True,
-        default=False,
+        related_name='%(class)s_notification_templates_for_approvals',
    )
-    ask_limit_on_launch = AskForField(
-        blank=True,
-        default=False,
+    admin_role = ImplicitRoleField(
+        parent_role=['singleton:' + ROLE_SINGLETON_SYSTEM_ADMINISTRATOR, 'organization.workflow_admin_role'],
    )
-    ask_scm_branch_on_launch = AskForField(
-        blank=True,
-        default=False,
-    )
-    notification_templates_approvals = models.ManyToManyField("NotificationTemplate", blank=True, related_name='%(class)s_notification_templates_for_approvals')
-
-    admin_role = ImplicitRoleField(parent_role=['singleton:' + ROLE_SINGLETON_SYSTEM_ADMINISTRATOR, 'organization.workflow_admin_role'])
    execute_role = ImplicitRoleField(
        parent_role=[
            'admin_role',
@@ -713,6 +743,25 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
            artifacts.update(job.get_effective_artifacts(parents_set=new_parents_set))
        return artifacts

+    def prompts_dict(self, *args, **kwargs):
+        if self.job_template_id:
+            # HACK: Exception for sliced jobs here, this is bad
+            # when sliced jobs were introduced, workflows did not have all the prompted JT fields
+            # so to support prompting with slicing, we abused the workflow job launch config
+            # these would be more properly saved on the workflow job, but it gets the wrong fields now
+            try:
+                wj_config = self.launch_config
+                r = wj_config.prompts_dict(*args, **kwargs)
+            except ObjectDoesNotExist:
+                r = {}
+        else:
+            r = super().prompts_dict(*args, **kwargs)
+            # Workflow labels and job labels are treated separately
+            # that means that they do not propogate from WFJT / workflow job to jobs in workflow
+            r.pop('labels', None)
+
+        return r
+
    def get_notification_templates(self):
        return self.workflow_job_template.notification_templates

@@ -723,11 +772,10 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
    def preferred_instance_groups(self):
        return []

-    @property
-    def actually_running(self):
+    def cancel_dispatcher_process(self):
        # WorkflowJobs don't _actually_ run anything in the dispatcher, so
        # there's no point in asking the dispatcher if it knows about this task
-        return self.status == 'running'
+        return True


 class WorkflowApprovalTemplate(UnifiedJobTemplate, RelatedJobsMixin):
--- a/awx/main/notifications/webhook_backend.py
+++ b/awx/main/notifications/webhook_backend.py
@@ -5,9 +5,6 @@ import json
 import logging
 import requests

-from django.utils.encoding import smart_str
-from django.utils.translation import gettext_lazy as _
-
 from awx.main.notifications.base import AWXBaseEmailBackend
 from awx.main.utils import get_awx_http_client_headers
 from awx.main.notifications.custom_notification_base import CustomNotificationBase
@@ -17,6 +14,8 @@ logger = logging.getLogger('awx.main.notifications.webhook_backend')

 class WebhookBackend(AWXBaseEmailBackend, CustomNotificationBase):

+    MAX_RETRIES = 5
+
    init_parameters = {
        "url": {"label": "Target URL", "type": "string"},
        "http_method": {"label": "HTTP Method", "type": "string", "default": "POST"},
@@ -64,20 +63,67 @@ class WebhookBackend(AWXBaseEmailBackend, CustomNotificationBase):
        if self.http_method.lower() not in ['put', 'post']:
            raise ValueError("HTTP method must be either 'POST' or 'PUT'.")
        chosen_method = getattr(requests, self.http_method.lower(), None)
+
        for m in messages:
+
            auth = None
            if self.username or self.password:
                auth = (self.username, self.password)
-            r = chosen_method(
-                "{}".format(m.recipients()[0]),
-                auth=auth,
-                data=json.dumps(m.body, ensure_ascii=False).encode('utf-8'),
-                headers=dict(list(get_awx_http_client_headers().items()) + list((self.headers or {}).items())),
-                verify=(not self.disable_ssl_verification),
-            )
-            if r.status_code >= 400:
-                logger.error(smart_str(_("Error sending notification webhook: {}").format(r.status_code)))
+
+            # the constructor for EmailMessage - https://docs.djangoproject.com/en/4.1/_modules/django/core/mail/message will turn an empty dictionary to an empty string
+            # sometimes an empty dict is intentional and we added this conditional to enforce that
+            if not m.body:
+                m.body = {}
+
+            url = str(m.recipients()[0])
+            data = json.dumps(m.body, ensure_ascii=False).encode('utf-8')
+            headers = {**(get_awx_http_client_headers()), **(self.headers or {})}
+
+            err = None
+
+            for retries in range(self.MAX_RETRIES):
+
+                # Sometimes we hit redirect URLs. We must account for this. We still extract the redirect URL from the response headers and try again. Max retires == 5
+                resp = chosen_method(
+                    url=url,
+                    auth=auth,
+                    data=data,
+                    headers=headers,
+                    verify=(not self.disable_ssl_verification),
+                    allow_redirects=False,  # override default behaviour for redirects
+                )
+
+                # either success or error reached if this conditional fires
+                if resp.status_code not in [301, 307]:
+                    break
+
+                # we've hit a redirect. extract the redirect URL out of the first response header and try again
+                logger.warning(
+                    f"Received a {resp.status_code} from {url}, trying to reach redirect url {resp.headers.get('Location', None)}; attempt #{retries+1}"
+                )
+
+                # take the first redirect URL in the response header and try that
+                url = resp.headers.get("Location", None)
+
+                if url is None:
+                    err = f"Webhook notification received redirect to a blank URL from {url}. Response headers={resp.headers}"
+                    break
+            else:
+                # no break condition in the loop encountered; therefore we have hit the maximum number of retries
+                err = f"Webhook notification max number of retries [{self.MAX_RETRIES}] exceeded. Failed to send webhook notification to {url}"
+
+            if resp.status_code >= 400:
+                err = f"Error sending webhook notification: {resp.status_code}"
+
+            # log error message
+            if err:
+                logger.error(err)
                if not self.fail_silently:
-                    raise Exception(smart_str(_("Error sending notification webhook: {}").format(r.status_code)))
-            sent_messages += 1
+                    raise Exception(err)
+
+            # no errors were encountered therefore we successfully sent off the notification webhook
+            if resp.status_code in range(200, 299):
+                logger.debug(f"Notification webhook successfully sent to {url}. Received {resp.status_code}")
+                sent_messages += 1
+
        return sent_messages
--- a/awx/main/registrar.py
+++ b/awx/main/registrar.py
@@ -3,6 +3,8 @@

 from django.db.models.signals import pre_save, post_save, pre_delete, m2m_changed

+from taggit.managers import TaggableManager
+

 class ActivityStreamRegistrar(object):
    def __init__(self):
@@ -19,6 +21,8 @@ class ActivityStreamRegistrar(object):
            pre_delete.connect(activity_stream_delete, sender=model, dispatch_uid=str(self.__class__) + str(model) + "_delete")

            for m2mfield in model._meta.many_to_many:
+                if isinstance(m2mfield, TaggableManager):
+                    continue  # Special case for taggit app
                try:
                    m2m_attr = getattr(model, m2mfield.name)
                    m2m_changed.connect(
--- a/awx/main/routing.py
+++ b/awx/main/routing.py
@@ -27,8 +27,8 @@ class AWXProtocolTypeRouter(ProtocolTypeRouter):


 websocket_urlpatterns = [
-    re_path(r'websocket/$', consumers.EventConsumer),
-    re_path(r'websocket/broadcast/$', consumers.BroadcastConsumer),
+    re_path(r'websocket/$', consumers.EventConsumer.as_asgi()),
+    re_path(r'websocket/broadcast/$', consumers.BroadcastConsumer.as_asgi()),
 ]

 application = AWXProtocolTypeRouter(
--- a/awx/main/scheduler/task_manager.py
+++ b/awx/main/scheduler/task_manager.py
@@ -39,12 +39,11 @@ from awx.main.utils import (
    ScheduleTaskManager,
    ScheduleWorkflowManager,
 )
-from awx.main.utils.common import task_manager_bulk_reschedule
+from awx.main.utils.common import task_manager_bulk_reschedule, is_testing
 from awx.main.signals import disable_activity_stream
 from awx.main.constants import ACTIVE_STATES
 from awx.main.scheduler.dependency_graph import DependencyGraph
-from awx.main.scheduler.task_manager_models import TaskManagerInstances
-from awx.main.scheduler.task_manager_models import TaskManagerInstanceGroups
+from awx.main.scheduler.task_manager_models import TaskManagerModels
 import awx.main.analytics.subsystem_metrics as s_metrics
 from awx.main.utils import decrypt_field

@@ -71,7 +70,12 @@ class TaskBase:
        # is called later.
        self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False)
        self.start_time = time.time()
+
+        # We want to avoid calling settings in loops, so cache these settings at init time
        self.start_task_limit = settings.START_TASK_LIMIT
+        self.task_manager_timeout = settings.TASK_MANAGER_TIMEOUT
+        self.control_task_impact = settings.AWX_CONTROL_NODE_TASK_IMPACT
+
        for m in self.subsystem_metrics.METRICS:
            if m.startswith(self.prefix):
                self.subsystem_metrics.set(m, 0)
@@ -79,7 +83,7 @@ class TaskBase:
    def timed_out(self):
        """Return True/False if we have met or exceeded the timeout for the task manager."""
        elapsed = time.time() - self.start_time
-        if elapsed >= settings.TASK_MANAGER_TIMEOUT:
+        if elapsed >= self.task_manager_timeout:
            logger.warning(f"{self.prefix} manager has run for {elapsed} which is greater than TASK_MANAGER_TIMEOUT of {settings.TASK_MANAGER_TIMEOUT}.")
            return True
        return False
@@ -97,7 +101,7 @@ class TaskBase:
        self.all_tasks = [t for t in qs]

    def record_aggregate_metrics(self, *args):
-        if not settings.IS_TESTING():
+        if not is_testing():
            # increment task_manager_schedule_calls regardless if the other
            # metrics are recorded
            s_metrics.Metrics(auto_pipe_execute=True).inc(f"{self.prefix}__schedule_calls", 1)
@@ -471,9 +475,8 @@ class TaskManager(TaskBase):
        Init AFTER we know this instance of the task manager will run because the lock is acquired.
        """
        self.dependency_graph = DependencyGraph()
-        self.instances = TaskManagerInstances(self.all_tasks)
-        self.instance_groups = TaskManagerInstanceGroups(instances_by_hostname=self.instances)
-        self.controlplane_ig = self.instance_groups.controlplane_ig
+        self.tm_models = TaskManagerModels()
+        self.controlplane_ig = self.tm_models.instance_groups.controlplane_ig

    def job_blocked_by(self, task):
        # TODO: I'm not happy with this, I think blocking behavior should be decided outside of the dependency graph
@@ -504,8 +507,16 @@ class TaskManager(TaskBase):
        return None

    @timeit
-    def start_task(self, task, instance_group, dependent_tasks=None, instance=None):
+    def start_task(self, task, instance_group, instance=None):
+        # Just like for process_running_tasks, add the job to the dependency graph and
+        # ask the TaskManagerInstanceGroups object to update consumed capacity on all
+        # implicated instances and container groups.
        self.dependency_graph.add_job(task)
+        if instance_group is not None:
+            task.instance_group = instance_group
+        # We need the instance group assigned to correctly account for container group max_concurrent_jobs and max_forks
+        self.tm_models.consume_capacity(task)
+
        self.subsystem_metrics.inc(f"{self.prefix}_tasks_started", 1)
        self.start_task_limit -= 1
        if self.start_task_limit == 0:
@@ -513,20 +524,6 @@ class TaskManager(TaskBase):
            ScheduleTaskManager().schedule()
        from awx.main.tasks.system import handle_work_error, handle_work_success

-        # update capacity for control node and execution node
-        if task.controller_node:
-            self.instances[task.controller_node].consume_capacity(settings.AWX_CONTROL_NODE_TASK_IMPACT)
-        if task.execution_node:
-            self.instances[task.execution_node].consume_capacity(task.task_impact)
-
-        dependent_tasks = dependent_tasks or []
-
-        task_actual = {
-            'type': get_type_for_model(type(task)),
-            'id': task.id,
-        }
-        dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]
-
        task.status = 'waiting'

        (start_status, opts) = task.pre_start()
@@ -546,7 +543,6 @@ class TaskManager(TaskBase):
                ScheduleWorkflowManager().schedule()
            # at this point we already have control/execution nodes selected for the following cases
            else:
-                task.instance_group = instance_group
                execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else ''
                logger.debug(
                    f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {instance_group.name}{execution_node_msg}.'
@@ -559,6 +555,7 @@ class TaskManager(TaskBase):
        # apply_async does a NOTIFY to the channel dispatcher is listening to
        # postgres will treat this as part of the transaction, which is what we want
        if task.status != 'failed' and type(task) is not WorkflowJob:
+            task_actual = {'type': get_type_for_model(type(task)), 'id': task.id}
            task_cls = task._get_task_class()
            task_cls.apply_async(
                [task.pk],
@@ -566,7 +563,7 @@ class TaskManager(TaskBase):
                queue=task.get_queue_name(),
                uuid=task.celery_task_id,
                callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
-                errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}],
+                errbacks=[{'task': handle_work_error.name, 'kwargs': {'task_actual': task_actual}}],
            )

        # In exception cases, like a job failing pre-start checks, we send the websocket status message
@@ -580,6 +577,7 @@ class TaskManager(TaskBase):
            if type(task) is WorkflowJob:
                ScheduleWorkflowManager().schedule()
            self.dependency_graph.add_job(task)
+            self.tm_models.consume_capacity(task)

    @timeit
    def process_pending_tasks(self, pending_tasks):
@@ -604,20 +602,18 @@ class TaskManager(TaskBase):
            if isinstance(task, WorkflowJob):
                # Previously we were tracking allow_simultaneous blocking both here and in DependencyGraph.
                # Double check that using just the DependencyGraph works for Workflows and Sliced Jobs.
-                self.start_task(task, None, task.get_jobs_fail_chain(), None)
+                self.start_task(task, None, None)
                continue

            found_acceptable_queue = False

-            preferred_instance_groups = self.instance_groups.get_instance_groups_from_task_cache(task)
-
            # Determine if there is control capacity for the task
            if task.capacity_type == 'control':
-                control_impact = task.task_impact + settings.AWX_CONTROL_NODE_TASK_IMPACT
+                control_impact = task.task_impact + self.control_task_impact
            else:
-                control_impact = settings.AWX_CONTROL_NODE_TASK_IMPACT
-            control_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
-                task, instance_group_name=settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME, impact=control_impact, capacity_type='control'
+                control_impact = self.control_task_impact
+            control_instance = self.tm_models.instance_groups.fit_task_to_most_remaining_capacity_instance(
+                task, instance_group_name=self.controlplane_ig.name, impact=control_impact, capacity_type='control'
            )
            if not control_instance:
                self.task_needs_capacity(task, tasks_to_update_job_explanation)
@@ -628,29 +624,29 @@ class TaskManager(TaskBase):

            # All task.capacity_type == 'control' jobs should run on control plane, no need to loop over instance groups
            if task.capacity_type == 'control':
+                if not self.tm_models.instance_groups[self.controlplane_ig.name].has_remaining_capacity(control_impact=True):
+                    continue
                task.execution_node = control_instance.hostname
-                execution_instance = self.instances[control_instance.hostname].obj
+                execution_instance = self.tm_models.instances[control_instance.hostname].obj
                task.log_lifecycle("controller_node_chosen")
                task.log_lifecycle("execution_node_chosen")
-                self.start_task(task, self.controlplane_ig, task.get_jobs_fail_chain(), execution_instance)
+                self.start_task(task, self.controlplane_ig, execution_instance)
                found_acceptable_queue = True
                continue

-            for instance_group in preferred_instance_groups:
+            for instance_group in self.tm_models.instance_groups.get_instance_groups_from_task_cache(task):
+                if not self.tm_models.instance_groups[instance_group.name].has_remaining_capacity(task):
+                    continue
                if instance_group.is_container_group:
-                    self.start_task(task, instance_group, task.get_jobs_fail_chain(), None)
+                    self.start_task(task, instance_group, None)
                    found_acceptable_queue = True
                    break

-                # TODO: remove this after we have confidence that OCP control nodes are reporting node_type=control
-                if settings.IS_K8S and task.capacity_type == 'execution':
-                    logger.debug("Skipping group {}, task cannot run on control plane".format(instance_group.name))
-                    continue
                # at this point we know the instance group is NOT a container group
                # because if it was, it would have started the task and broke out of the loop.
-                execution_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
+                execution_instance = self.tm_models.instance_groups.fit_task_to_most_remaining_capacity_instance(
                    task, instance_group_name=instance_group.name, add_hybrid_control_cost=True
-                ) or self.instance_groups.find_largest_idle_instance(instance_group_name=instance_group.name, capacity_type=task.capacity_type)
+                ) or self.tm_models.instance_groups.find_largest_idle_instance(instance_group_name=instance_group.name, capacity_type=task.capacity_type)

                if execution_instance:
                    task.execution_node = execution_instance.hostname
@@ -666,8 +662,8 @@ class TaskManager(TaskBase):
                            task.log_format, instance_group.name, execution_instance.hostname, execution_instance.remaining_capacity
                        )
                    )
-                    execution_instance = self.instances[execution_instance.hostname].obj
-                    self.start_task(task, instance_group, task.get_jobs_fail_chain(), execution_instance)
+                    execution_instance = self.tm_models.instances[execution_instance.hostname].obj
+                    self.start_task(task, instance_group, execution_instance)
                    found_acceptable_queue = True
                    break
                else:
--- a/awx/main/scheduler/task_manager_models.py
+++ b/awx/main/scheduler/task_manager_models.py
@@ -15,15 +15,18 @@ logger = logging.getLogger('awx.main.scheduler')
 class TaskManagerInstance:
    """A class representing minimal data the task manager needs to represent an Instance."""

-    def __init__(self, obj):
+    def __init__(self, obj, **kwargs):
        self.obj = obj
        self.node_type = obj.node_type
        self.consumed_capacity = 0
        self.capacity = obj.capacity
        self.hostname = obj.hostname
+        self.jobs_running = 0

-    def consume_capacity(self, impact):
+    def consume_capacity(self, impact, job_impact=False):
        self.consumed_capacity += impact
+        if job_impact:
+            self.jobs_running += 1

    @property
    def remaining_capacity(self):
@@ -33,26 +36,122 @@ class TaskManagerInstance:
        return remaining


+class TaskManagerInstanceGroup:
+    """A class representing minimal data the task manager needs to represent an InstanceGroup."""
+
+    def __init__(self, obj, task_manager_instances=None, **kwargs):
+        self.name = obj.name
+        self.is_container_group = obj.is_container_group
+        self.container_group_jobs = 0
+        self.container_group_consumed_forks = 0
+        _instances = obj.instances.all()
+        # We want the list of TaskManagerInstance objects because these are shared across the TaskManagerInstanceGroup objects.
+        # This way when we consume capacity on an instance that is in multiple groups, we tabulate across all the groups correctly.
+        self.instances = [task_manager_instances[instance.hostname] for instance in _instances if instance.hostname in task_manager_instances]
+        self.instance_hostnames = tuple([instance.hostname for instance in _instances if instance.hostname in task_manager_instances])
+        self.max_concurrent_jobs = obj.max_concurrent_jobs
+        self.max_forks = obj.max_forks
+        self.control_task_impact = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
+
+    def consume_capacity(self, task):
+        """We only consume capacity on an instance group level if it is a container group. Otherwise we consume capacity on an instance level."""
+        if self.is_container_group:
+            self.container_group_jobs += 1
+            self.container_group_consumed_forks += task.task_impact
+        else:
+            raise RuntimeError("We only track capacity for container groups at the instance group level. Otherwise, consume capacity on instances.")
+
+    def get_remaining_instance_capacity(self):
+        return sum(inst.remaining_capacity for inst in self.instances)
+
+    def get_instance_capacity(self):
+        return sum(inst.capacity for inst in self.instances)
+
+    def get_consumed_instance_capacity(self):
+        return sum(inst.consumed_capacity for inst in self.instances)
+
+    def get_instance_jobs_running(self):
+        return sum(inst.jobs_running for inst in self.instances)
+
+    def get_jobs_running(self):
+        if self.is_container_group:
+            return self.container_group_jobs
+        return sum(inst.jobs_running for inst in self.instances)
+
+    def get_capacity(self):
+        """This reports any type of capacity, including that of container group jobs.
+
+        Container groups don't really have capacity, but if they have max_forks set,
+        we can interperet that as how much capacity the user has defined them to have.
+        """
+        if self.is_container_group:
+            return self.max_forks
+        return self.get_instance_capacity()
+
+    def get_consumed_capacity(self):
+        if self.is_container_group:
+            return self.container_group_consumed_forks
+        return self.get_consumed_instance_capacity()
+
+    def get_remaining_capacity(self):
+        return self.get_capacity() - self.get_consumed_capacity()
+
+    def has_remaining_capacity(self, task=None, control_impact=False):
+        """Pass either a task or control_impact=True to determine if the IG has capacity to run the control task or job task."""
+        task_impact = self.control_task_impact if control_impact else task.task_impact
+        job_impact = 0 if control_impact else 1
+        task_string = f"task {task.log_format} with impact of {task_impact}" if task else f"control task with impact of {task_impact}"
+
+        # We only want to loop over instances if self.max_concurrent_jobs is set
+        if self.max_concurrent_jobs == 0:
+            # Override the calculated remaining capacity, because when max_concurrent_jobs == 0 we don't enforce any max
+            remaining_jobs = 0
+        else:
+            remaining_jobs = self.max_concurrent_jobs - self.get_jobs_running() - job_impact
+
+        # We only want to loop over instances if self.max_forks is set
+        if self.max_forks == 0:
+            # Override the calculated remaining capacity, because when max_forks == 0 we don't enforce any max
+            remaining_forks = 0
+        else:
+            remaining_forks = self.max_forks - self.get_consumed_capacity() - task_impact
+
+        if remaining_jobs < 0 or remaining_forks < 0:
+            # A value less than zero means the task will not fit on the group
+            if remaining_jobs < 0:
+                logger.debug(f"{task_string} cannot fit on instance group {self.name} with {remaining_jobs} remaining jobs")
+            if remaining_forks < 0:
+                logger.debug(f"{task_string} cannot fit on instance group {self.name} with {remaining_forks} remaining forks")
+            return False
+
+        # Returning true means there is enough remaining capacity on the group to run the task (or no instance group level limits are being set)
+        logger.debug(f"{task_string} can fit on instance group {self.name} with {remaining_forks} remaining forks and {remaining_jobs}")
+        return True
+
+
 class TaskManagerInstances:
-    def __init__(self, active_tasks, instances=None):
+    def __init__(self, instances=None, instance_fields=('node_type', 'capacity', 'hostname', 'enabled'), **kwargs):
        self.instances_by_hostname = dict()
+        self.instance_groups_container_group_jobs = dict()
+        self.instance_groups_container_group_consumed_forks = dict()
+        self.control_task_impact = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
+
        if instances is None:
            instances = (
-                Instance.objects.filter(hostname__isnull=False, enabled=True).exclude(node_type='hop').only('node_type', 'capacity', 'hostname', 'enabled')
+                Instance.objects.filter(hostname__isnull=False, node_state=Instance.States.READY, enabled=True)
+                .exclude(node_type='hop')
+                .only('node_type', 'node_state', 'capacity', 'hostname', 'enabled')
            )
        for instance in instances:
-            self.instances_by_hostname[instance.hostname] = TaskManagerInstance(instance)
+            self.instances_by_hostname[instance.hostname] = TaskManagerInstance(instance, **kwargs)

-        # initialize remaining capacity based on currently waiting and running tasks
-        for task in active_tasks:
-            if task.status not in ['waiting', 'running']:
-                continue
-            control_instance = self.instances_by_hostname.get(task.controller_node, '')
-            execution_instance = self.instances_by_hostname.get(task.execution_node, '')
-            if execution_instance and execution_instance.node_type in ('hybrid', 'execution'):
-                self.instances_by_hostname[task.execution_node].consume_capacity(task.task_impact)
-            if control_instance and control_instance.node_type in ('hybrid', 'control'):
-                self.instances_by_hostname[task.controller_node].consume_capacity(settings.AWX_CONTROL_NODE_TASK_IMPACT)
+    def consume_capacity(self, task):
+        control_instance = self.instances_by_hostname.get(task.controller_node, '')
+        execution_instance = self.instances_by_hostname.get(task.execution_node, '')
+        if execution_instance and execution_instance.node_type in ('hybrid', 'execution'):
+            self.instances_by_hostname[task.execution_node].consume_capacity(task.task_impact, job_impact=True)
+        if control_instance and control_instance.node_type in ('hybrid', 'control'):
+            self.instances_by_hostname[task.controller_node].consume_capacity(self.control_task_impact)

    def __getitem__(self, hostname):
        return self.instances_by_hostname.get(hostname)
@@ -62,42 +161,57 @@ class TaskManagerInstances:


 class TaskManagerInstanceGroups:
-    """A class representing minimal data the task manager needs to represent an InstanceGroup."""
+    """A class representing minimal data the task manager needs to represent all the InstanceGroups."""

-    def __init__(self, instances_by_hostname=None, instance_groups=None, instance_groups_queryset=None):
+    def __init__(self, task_manager_instances=None, instance_groups=None, instance_groups_queryset=None, **kwargs):
        self.instance_groups = dict()
+        self.task_manager_instances = task_manager_instances if task_manager_instances is not None else TaskManagerInstances()
        self.controlplane_ig = None
        self.pk_ig_map = dict()
+        self.control_task_impact = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
+        self.controlplane_ig_name = kwargs.get('controlplane_ig_name', settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME)

        if instance_groups is not None:  # for testing
-            self.instance_groups = instance_groups
+            self.instance_groups = {ig.name: TaskManagerInstanceGroup(ig, self.task_manager_instances, **kwargs) for ig in instance_groups}
+            self.pk_ig_map = {ig.pk: ig for ig in instance_groups}
        else:
            if instance_groups_queryset is None:
-                instance_groups_queryset = InstanceGroup.objects.prefetch_related('instances').only('name', 'instances')
-            for instance_group in instance_groups_queryset:
-                if instance_group.name == settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME:
-                    self.controlplane_ig = instance_group
-                self.instance_groups[instance_group.name] = dict(
-                    instances=[
-                        instances_by_hostname[instance.hostname] for instance in instance_group.instances.all() if instance.hostname in instances_by_hostname
-                    ],
+                instance_groups_queryset = InstanceGroup.objects.prefetch_related('instances').only(
+                    'name', 'instances', 'max_concurrent_jobs', 'max_forks', 'is_container_group'
                )
+            for instance_group in instance_groups_queryset:
+                if instance_group.name == self.controlplane_ig_name:
+                    self.controlplane_ig = instance_group
+                self.instance_groups[instance_group.name] = TaskManagerInstanceGroup(instance_group, self.task_manager_instances, **kwargs)
                self.pk_ig_map[instance_group.pk] = instance_group

+    def __getitem__(self, ig_name):
+        return self.instance_groups.get(ig_name)
+
+    def __contains__(self, ig_name):
+        return ig_name in self.instance_groups
+
    def get_remaining_capacity(self, group_name):
-        instances = self.instance_groups[group_name]['instances']
-        return sum(inst.remaining_capacity for inst in instances)
+        return self.instance_groups[group_name].get_remaining_instance_capacity()

    def get_consumed_capacity(self, group_name):
-        instances = self.instance_groups[group_name]['instances']
-        return sum(inst.consumed_capacity for inst in instances)
+        return self.instance_groups[group_name].get_consumed_capacity()
+
+    def get_jobs_running(self, group_name):
+        return self.instance_groups[group_name].get_jobs_running()
+
+    def get_capacity(self, group_name):
+        return self.instance_groups[group_name].get_capacity()
+
+    def get_instances(self, group_name):
+        return self.instance_groups[group_name].instances

    def fit_task_to_most_remaining_capacity_instance(self, task, instance_group_name, impact=None, capacity_type=None, add_hybrid_control_cost=False):
        impact = impact if impact else task.task_impact
        capacity_type = capacity_type if capacity_type else task.capacity_type
        instance_most_capacity = None
        most_remaining_capacity = -1
-        instances = self.instance_groups[instance_group_name]['instances']
+        instances = self.instance_groups[instance_group_name].instances

        for i in instances:
            if i.node_type not in (capacity_type, 'hybrid'):
@@ -105,7 +219,7 @@ class TaskManagerInstanceGroups:
            would_be_remaining = i.remaining_capacity - impact
            # hybrid nodes _always_ control their own tasks
            if add_hybrid_control_cost and i.node_type == 'hybrid':
-                would_be_remaining -= settings.AWX_CONTROL_NODE_TASK_IMPACT
+                would_be_remaining -= self.control_task_impact
            if would_be_remaining >= 0 and (instance_most_capacity is None or would_be_remaining > most_remaining_capacity):
                instance_most_capacity = i
                most_remaining_capacity = would_be_remaining
@@ -113,10 +227,13 @@ class TaskManagerInstanceGroups:

    def find_largest_idle_instance(self, instance_group_name, capacity_type='execution'):
        largest_instance = None
-        instances = self.instance_groups[instance_group_name]['instances']
+        instances = self.instance_groups[instance_group_name].instances
        for i in instances:
            if i.node_type not in (capacity_type, 'hybrid'):
                continue
+            if i.capacity <= 0:
+                # We don't want to select an idle instance with 0 capacity
+                continue
            if (hasattr(i, 'jobs_running') and i.jobs_running == 0) or i.remaining_capacity == i.capacity:
                if largest_instance is None:
                    largest_instance = i
@@ -137,3 +254,56 @@ class TaskManagerInstanceGroups:
            logger.warn(f"No instance groups in cache exist, defaulting to global instance groups for task {task}")
            return task.global_instance_groups
        return igs
+
+
+class TaskManagerModels:
+    def __init__(self, **kwargs):
+        # We want to avoid calls to settings over and over in loops, so cache this information here
+        kwargs['control_task_impact'] = kwargs.get('control_task_impact', settings.AWX_CONTROL_NODE_TASK_IMPACT)
+        kwargs['controlplane_ig_name'] = kwargs.get('controlplane_ig_name', settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME)
+        self.instances = TaskManagerInstances(**kwargs)
+        self.instance_groups = TaskManagerInstanceGroups(task_manager_instances=self.instances, **kwargs)
+
+    @classmethod
+    def init_with_consumed_capacity(cls, **kwargs):
+        tmm = cls(**kwargs)
+        tasks = kwargs.get('tasks', None)
+
+        if tasks is None:
+            instance_group_queryset = kwargs.get('instance_groups_queryset', None)
+            # No tasks were provided, so we will fetch them from the database
+            task_status_filter_list = kwargs.get('task_status_filter_list', ['running', 'waiting'])
+            task_fields = kwargs.get('task_fields', ('task_impact', 'controller_node', 'execution_node', 'instance_group'))
+            from awx.main.models import UnifiedJob
+
+            if instance_group_queryset is not None:
+                logger.debug("******************INSTANCE GROUP QUERYSET PASSED -- FILTERING TASKS ****************************")
+                # Sometimes things like the serializer pass a queryset that looks at not all instance groups. in this case,
+                # we also need to filter the tasks we look at
+                tasks = UnifiedJob.objects.filter(status__in=task_status_filter_list, instance_group__in=[ig.id for ig in instance_group_queryset]).only(
+                    *task_fields
+                )
+            else:
+                # No instance group query set, look at all tasks in whole system
+                tasks = UnifiedJob.objects.filter(status__in=task_status_filter_list).only(*task_fields)
+
+        for task in tasks:
+            tmm.consume_capacity(task)
+
+        return tmm
+
+    def consume_capacity(self, task):
+        # Consume capacity on instances, which bubbles up to instance groups they are a member of
+        self.instances.consume_capacity(task)
+
+        # For container group jobs, additionally we must account for capacity consumed since
+        # The container groups have no instances to look at to track how many jobs/forks are consumed
+        if task.instance_group_id:
+            if not task.instance_group_id in self.instance_groups.pk_ig_map.keys():
+                logger.warn(
+                    f"Task {task.log_format} assigned {task.instance_group_id} but this instance group not present in map of instance groups{self.instance_groups.pk_ig_map.keys()}"
+                )
+            else:
+                ig = self.instance_groups.pk_ig_map[task.instance_group_id]
+                if ig.is_container_group:
+                    self.instance_groups[ig.name].consume_capacity(task)
--- a/awx/main/tasks/callback.py
+++ b/awx/main/tasks/callback.py
@@ -2,21 +2,18 @@ import json
 import time
 import logging
 from collections import deque
-import os
-import stat

 # Django
-from django.utils.timezone import now
 from django.conf import settings
 from django_guid import get_guid
 from django.utils.functional import cached_property
+from django.db import connections

 # AWX
 from awx.main.redact import UriCleaner
 from awx.main.constants import MINIMAL_EVENTS, ANSIBLE_RUNNER_NEEDS_UPDATE_MESSAGE
 from awx.main.utils.update_model import update_model
 from awx.main.queue import CallbackQueueDispatcher
-from awx.main.tasks.signals import signal_callback

 logger = logging.getLogger('awx.main.tasks.callback')

@@ -175,28 +172,6 @@ class RunnerCallback:

        return False

-    def cancel_callback(self):
-        """
-        Ansible runner callback to tell the job when/if it is canceled
-        """
-        unified_job_id = self.instance.pk
-        if signal_callback():
-            return True
-        try:
-            self.instance = self.update_model(unified_job_id)
-        except Exception:
-            logger.exception(f'Encountered error during cancel check for {unified_job_id}, canceling now')
-            return True
-        if not self.instance:
-            logger.error('unified job {} was deleted while running, canceling'.format(unified_job_id))
-            return True
-        if self.instance.cancel_flag or self.instance.status == 'canceled':
-            cancel_wait = (now() - self.instance.modified).seconds if self.instance.modified else 0
-            if cancel_wait > 5:
-                logger.warning('Request to cancel {} took {} seconds to complete.'.format(self.instance.log_format, cancel_wait))
-            return True
-        return False
-
    def finished_callback(self, runner_obj):
        """
        Ansible runner callback triggered on finished run
@@ -227,21 +202,8 @@ class RunnerCallback:

            with disable_activity_stream():
                self.instance = self.update_model(self.instance.pk, job_args=json.dumps(runner_config.command), job_cwd=runner_config.cwd, job_env=job_env)
-        elif status_data['status'] == 'failed':
-            # For encrypted ssh_key_data, ansible-runner worker will open and write the
-            # ssh_key_data to a named pipe. Then, once the podman container starts, ssh-agent will
-            # read from this named pipe so that the key can be used in ansible-playbook.
-            # Once the podman container exits, the named pipe is deleted.
-            # However, if the podman container fails to start in the first place, e.g. the image
-            # name is incorrect, then this pipe is not cleaned up. Eventually ansible-runner
-            # processor will attempt to write artifacts to the private data dir via unstream_dir, requiring
-            # that it open this named pipe. This leads to a hang. Thus, before any artifacts
-            # are written by the processor, it's important to remove this ssh_key_data pipe.
-            private_data_dir = self.instance.job_env.get('AWX_PRIVATE_DATA_DIR', None)
-            if private_data_dir:
-                key_data_file = os.path.join(private_data_dir, 'artifacts', str(self.instance.id), 'ssh_key_data')
-                if os.path.exists(key_data_file) and stat.S_ISFIFO(os.stat(key_data_file).st_mode):
-                    os.remove(key_data_file)
+            # We opened a connection just for that save, close it here now
+            connections.close_all()
        elif status_data['status'] == 'error':
            result_traceback = status_data.get('result_traceback', None)
            if result_traceback:
--- a/awx/main/tasks/jobs.py
+++ b/awx/main/tasks/jobs.py
@@ -145,7 +145,7 @@ class BaseTask(object):
        """
        Return params structure to be executed by the container runtime
        """
-        if settings.IS_K8S:
+        if settings.IS_K8S and instance.instance_group.is_container_group:
            return {}

        image = instance.execution_environment.image
@@ -402,6 +402,10 @@ class BaseTask(object):
                    raise
                else:
                    time.sleep(1.0)
+            self.instance.refresh_from_db(fields=['cancel_flag'])
+            if self.instance.cancel_flag or signal_callback():
+                logger.debug(f"Unified job {self.instance.id} was canceled while waiting for project file lock")
+                return
        waiting_time = time.time() - start_time

        if waiting_time > 1.0:
@@ -422,7 +426,7 @@ class BaseTask(object):
        """
        instance.log_lifecycle("post_run")

-    def final_run_hook(self, instance, status, private_data_dir, fact_modification_times):
+    def final_run_hook(self, instance, status, private_data_dir):
        """
        Hook for any steps to run after job/task is marked as complete.
        """
@@ -465,7 +469,6 @@ class BaseTask(object):
        self.instance = self.update_model(pk, status='running', start_args='')  # blank field to remove encrypted passwords
        self.instance.websocket_emit_status("running")
        status, rc = 'error', None
-        fact_modification_times = {}
        self.runner_callback.event_ct = 0

        '''
@@ -483,6 +486,7 @@ class BaseTask(object):
            self.instance.log_lifecycle("preparing_playbook")
            if self.instance.cancel_flag or signal_callback():
                self.instance = self.update_model(self.instance.pk, status='canceled')
+
            if self.instance.status != 'running':
                # Stop the task chain and prevent starting the job if it has
                # already been canceled.
@@ -493,14 +497,6 @@ class BaseTask(object):
            if not os.path.exists(settings.AWX_ISOLATION_BASE_PATH):
                raise RuntimeError('AWX_ISOLATION_BASE_PATH=%s does not exist' % settings.AWX_ISOLATION_BASE_PATH)

-            # Fetch "cached" fact data from prior runs and put on the disk
-            # where ansible expects to find it
-            if getattr(self.instance, 'use_fact_cache', False):
-                self.instance.start_job_fact_cache(
-                    os.path.join(private_data_dir, 'artifacts', str(self.instance.id), 'fact_cache'),
-                    fact_modification_times,
-                )
-
            # May have to serialize the value
            private_data_files, ssh_key_data = self.build_private_data_files(self.instance, private_data_dir)
            passwords = self.build_passwords(self.instance, kwargs)
@@ -585,7 +581,7 @@ class BaseTask(object):
                    event_handler=self.runner_callback.event_handler,
                    finished_callback=self.runner_callback.finished_callback,
                    status_handler=self.runner_callback.status_handler,
-                    cancel_callback=self.runner_callback.cancel_callback,
+                    cancel_callback=signal_callback,
                    **params,
                )
            else:
@@ -641,7 +637,7 @@ class BaseTask(object):
            self.instance.send_notification_templates('succeeded' if status == 'successful' else 'failed')

        try:
-            self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times)
+            self.final_run_hook(self.instance, status, private_data_dir)
        except Exception:
            logger.exception('{} Final run hook errored.'.format(self.instance.log_format))

@@ -695,7 +691,7 @@ class SourceControlMixin(BaseTask):

    def spawn_project_sync(self, project, sync_needs, scm_branch=None):
        pu_ig = self.instance.instance_group
-        pu_en = Instance.objects.me().hostname
+        pu_en = Instance.objects.my_hostname()

        sync_metafields = dict(
            launch_type="sync",
@@ -734,8 +730,7 @@ class SourceControlMixin(BaseTask):
                sync_task = RunProjectUpdate(job_private_data_dir=private_data_dir)
                sync_task.run(local_project_sync.id)
                local_project_sync.refresh_from_db()
-                if isinstance(self.instance, Job):
-                    self.instance = self.update_model(self.instance.pk, scm_revision=local_project_sync.scm_revision)
+                self.instance = self.update_model(self.instance.pk, scm_revision=local_project_sync.scm_revision)
            except Exception:
                local_project_sync.refresh_from_db()
                if local_project_sync.status != 'canceled':
@@ -754,8 +749,7 @@ class SourceControlMixin(BaseTask):
        else:
            # Case where a local sync is not needed, meaning that local tree is
            # up-to-date with project, job is running project current version
-            if isinstance(self.instance, Job):
-                self.instance = self.update_model(self.instance.pk, scm_revision=project.scm_revision)
+            self.instance = self.update_model(self.instance.pk, scm_revision=project.scm_revision)
            # Project update does not copy the folder, so copy here
            RunProjectUpdate.make_local_copy(project, private_data_dir)

@@ -764,6 +758,10 @@ class SourceControlMixin(BaseTask):

        try:
            original_branch = None
+            failed_reason = project.get_reason_if_failed()
+            if failed_reason:
+                self.update_model(self.instance.pk, status='failed', job_explanation=failed_reason)
+                raise RuntimeError(failed_reason)
            project_path = project.get_project_path(check_if_exists=False)
            if project.scm_type == 'git' and (scm_branch and scm_branch != project.scm_branch):
                if os.path.exists(project_path):
@@ -1053,22 +1051,25 @@ class RunJob(SourceControlMixin, BaseTask):
            error = _('Job could not start because no Execution Environment could be found.')
            self.update_model(job.pk, status='error', job_explanation=error)
            raise RuntimeError(error)
-        elif job.project.status in ('error', 'failed'):
-            msg = _('The project revision for this job template is unknown due to a failed update.')
-            job = self.update_model(job.pk, status='failed', job_explanation=msg)
-            raise RuntimeError(msg)

        if job.inventory.kind == 'smart':
            # cache smart inventory memberships so that the host_filter query is not
            # ran inside of the event saving code
            update_smart_memberships_for_inventory(job.inventory)

+        # Fetch "cached" fact data from prior runs and put on the disk
+        # where ansible expects to find it
+        if job.use_fact_cache:
+            self.facts_write_time = self.instance.start_job_fact_cache(os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'))
+
    def build_project_dir(self, job, private_data_dir):
        self.sync_and_copy(job.project, private_data_dir, scm_branch=job.scm_branch)

-    def final_run_hook(self, job, status, private_data_dir, fact_modification_times):
-        super(RunJob, self).final_run_hook(job, status, private_data_dir, fact_modification_times)
-        if not private_data_dir:
+    def post_run_hook(self, job, status):
+        super(RunJob, self).post_run_hook(job, status)
+        job.refresh_from_db(fields=['job_env'])
+        private_data_dir = job.job_env.get('AWX_PRIVATE_DATA_DIR')
+        if (not private_data_dir) or (not hasattr(self, 'facts_write_time')):
            # If there's no private data dir, that means we didn't get into the
            # actual `run()` call; this _usually_ means something failed in
            # the pre_run_hook method
@@ -1076,9 +1077,11 @@ class RunJob(SourceControlMixin, BaseTask):
        if job.use_fact_cache:
            job.finish_job_fact_cache(
                os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'),
-                fact_modification_times,
+                self.facts_write_time,
            )

+    def final_run_hook(self, job, status, private_data_dir):
+        super(RunJob, self).final_run_hook(job, status, private_data_dir)
        try:
            inventory = job.inventory
        except Inventory.DoesNotExist:
@@ -1266,6 +1269,10 @@ class RunProjectUpdate(BaseTask):
            # for raw archive, prevent error moving files between volumes
            extra_vars['ansible_remote_tmp'] = os.path.join(project_update.get_project_path(check_if_exists=False), '.ansible_awx', 'tmp')

+        if project_update.project.signature_validation_credential is not None:
+            pubkey = project_update.project.signature_validation_credential.get_input('gpg_public_key')
+            extra_vars['gpg_pubkey'] = pubkey
+
        self._write_extra_vars_file(private_data_dir, extra_vars)

    def build_playbook_path_relative_to_cwd(self, project_update, private_data_dir):
@@ -1288,10 +1295,6 @@ class RunProjectUpdate(BaseTask):
        # re-create root project folder if a natural disaster has destroyed it
        project_path = instance.project.get_project_path(check_if_exists=False)

-        instance.refresh_from_db(fields=['cancel_flag'])
-        if instance.cancel_flag:
-            logger.debug("ProjectUpdate({0}) was canceled".format(instance.pk))
-            return
        if instance.launch_type != 'sync':
            self.acquire_lock(instance.project, instance.id)

@@ -1622,7 +1625,7 @@ class RunInventoryUpdate(SourceControlMixin, BaseTask):

        handler = SpecialInventoryHandler(
            self.runner_callback.event_handler,
-            self.runner_callback.cancel_callback,
+            signal_callback,
            verbosity=inventory_update.verbosity,
            job_timeout=self.get_instance_timeout(self.instance),
            start_time=inventory_update.started,
--- a/awx/main/tasks/receptor.py
+++ b/awx/main/tasks/receptor.py
@@ -12,6 +12,7 @@ import yaml

 # Django
 from django.conf import settings
+from django.db import connections

 # Runner
 import ansible_runner
@@ -25,12 +26,19 @@ from awx.main.utils.common import (
    cleanup_new_process,
 )
 from awx.main.constants import MAX_ISOLATED_PATH_COLON_DELIMITER
+from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
+from awx.main.models import Instance, InstanceLink, UnifiedJob
+from awx.main.dispatch import get_local_queuename
+from awx.main.dispatch.publish import task

 # Receptorctl
 from receptorctl.socket_interface import ReceptorControl

+from filelock import FileLock
+
 logger = logging.getLogger('awx.main.tasks.receptor')
 __RECEPTOR_CONF = '/etc/receptor/receptor.conf'
+__RECEPTOR_CONF_LOCKFILE = f'{__RECEPTOR_CONF}.lock'
 RECEPTOR_ACTIVE_STATES = ('Pending', 'Running')


@@ -40,10 +48,28 @@ class ReceptorConnectionType(Enum):
    STREAMTLS = 2


-def get_receptor_sockfile():
-    with open(__RECEPTOR_CONF, 'r') as f:
-        data = yaml.safe_load(f)
-    for section in data:
+def read_receptor_config():
+    # for K8S deployments, getting a lock is necessary as another process
+    # may be re-writing the config at this time
+    if settings.IS_K8S:
+        lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
+        with lock:
+            with open(__RECEPTOR_CONF, 'r') as f:
+                return yaml.safe_load(f)
+    else:
+        with open(__RECEPTOR_CONF, 'r') as f:
+            return yaml.safe_load(f)
+
+
+def work_signing_enabled(config_data):
+    for section in config_data:
+        if 'work-signing' in section:
+            return True
+    return False
+
+
+def get_receptor_sockfile(config_data):
+    for section in config_data:
        for entry_name, entry_data in section.items():
            if entry_name == 'control-service':
                if 'filename' in entry_data:
@@ -54,13 +80,11 @@ def get_receptor_sockfile():
        raise RuntimeError(f'Receptor conf {__RECEPTOR_CONF} does not have control-service entry needed to get sockfile')


-def get_tls_client(use_stream_tls=None):
+def get_tls_client(config_data, use_stream_tls=None):
    if not use_stream_tls:
        return None

-    with open(__RECEPTOR_CONF, 'r') as f:
-        data = yaml.safe_load(f)
-    for section in data:
+    for section in config_data:
        for entry_name, entry_data in section.items():
            if entry_name == 'tls-client':
                if 'name' in entry_data:
@@ -68,20 +92,35 @@ def get_tls_client(use_stream_tls=None):
    return None


-def get_receptor_ctl():
-    receptor_sockfile = get_receptor_sockfile()
+def get_receptor_ctl(config_data=None):
+    if config_data is None:
+        config_data = read_receptor_config()
+    receptor_sockfile = get_receptor_sockfile(config_data)
    try:
-        return ReceptorControl(receptor_sockfile, config=__RECEPTOR_CONF, tlsclient=get_tls_client(True))
+        return ReceptorControl(receptor_sockfile, config=__RECEPTOR_CONF, tlsclient=get_tls_client(config_data, True))
    except RuntimeError:
        return ReceptorControl(receptor_sockfile)


+def find_node_in_mesh(node_name, receptor_ctl):
+    attempts = 10
+    backoff = 1
+    for attempt in range(attempts):
+        all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
+        for node in all_nodes:
+            if node.get('NodeID') == node_name:
+                return node
+        else:
+            logger.warning(f"Instance {node_name} is not in the receptor mesh. {attempts-attempt} attempts left.")
+            time.sleep(backoff)
+            backoff += 1
+    else:
+        raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
+
+
 def get_conn_type(node_name, receptor_ctl):
-    all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
-    for node in all_nodes:
-        if node.get('NodeID') == node_name:
-            return ReceptorConnectionType(node.get('ConnType'))
-    raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
+    node = find_node_in_mesh(node_name, receptor_ctl)
+    return ReceptorConnectionType(node.get('ConnType'))


 def administrative_workunit_reaper(work_list=None):
@@ -126,16 +165,18 @@ def run_until_complete(node, timing_data=None, **kwargs):
    """
    Runs an ansible-runner work_type on remote node, waits until it completes, then returns stdout.
    """
-    receptor_ctl = get_receptor_ctl()
+    config_data = read_receptor_config()
+    receptor_ctl = get_receptor_ctl(config_data)

    use_stream_tls = getattr(get_conn_type(node, receptor_ctl), 'name', None) == "STREAMTLS"
-    kwargs.setdefault('tlsclient', get_tls_client(use_stream_tls))
+    kwargs.setdefault('tlsclient', get_tls_client(config_data, use_stream_tls))
    kwargs.setdefault('ttl', '20s')
    kwargs.setdefault('payload', '')
+    if work_signing_enabled(config_data):
+        kwargs['signwork'] = True

    transmit_start = time.time()
-    sign_work = False if settings.IS_K8S else True
-    result = receptor_ctl.submit_work(worktype='ansible-runner', node=node, signwork=sign_work, **kwargs)
+    result = receptor_ctl.submit_work(worktype='ansible-runner', node=node, **kwargs)

    unit_id = result['unitid']
    run_start = time.time()
@@ -176,7 +217,10 @@ def run_until_complete(node, timing_data=None, **kwargs):
    if state_name.lower() == 'failed':
        work_detail = status.get('Detail', '')
        if work_detail:
-            raise RemoteJobError(f'Receptor error from {node}, detail:\n{work_detail}')
+            if stdout:
+                raise RemoteJobError(f'Receptor error from {node}, detail:\n{work_detail}\nstdout:\n{stdout}')
+            else:
+                raise RemoteJobError(f'Receptor error from {node}, detail:\n{work_detail}')
        else:
            raise RemoteJobError(f'Unknown ansible-runner error on node {node}, stdout:\n{stdout}')

@@ -210,7 +254,7 @@ def worker_info(node_name, work_type='ansible-runner'):
        else:
            error_list.append(details)

-    except (ReceptorNodeNotFound, RuntimeError) as exc:
+    except Exception as exc:
        error_list.append(str(exc))

    # If we have a connection error, missing keys would be trivial consequence of that
@@ -267,7 +311,8 @@ class AWXReceptorJob:

    def run(self):
        # We establish a connection to the Receptor socket
-        receptor_ctl = get_receptor_ctl()
+        self.config_data = read_receptor_config()
+        receptor_ctl = get_receptor_ctl(self.config_data)

        res = None
        try:
@@ -281,10 +326,6 @@ class AWXReceptorJob:
                except Exception:
                    logger.exception(f"Error releasing work unit {self.unit_id}.")

-    @property
-    def sign_work(self):
-        return False if settings.IS_K8S else True
-
    def _run_internal(self, receptor_ctl):
        # Create a socketpair. Where the left side will be used for writing our payload
        # (private data dir, kwargs). The right side will be passed to Receptor for
@@ -296,7 +337,7 @@ class AWXReceptorJob:
        if self.work_type == 'ansible-runner':
            work_submit_kw['node'] = self.task.instance.execution_node
            use_stream_tls = get_conn_type(work_submit_kw['node'], receptor_ctl).name == "STREAMTLS"
-            work_submit_kw['tlsclient'] = get_tls_client(use_stream_tls)
+            work_submit_kw['tlsclient'] = get_tls_client(self.config_data, use_stream_tls)

        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
            transmitter_future = executor.submit(self.transmit, sockin)
@@ -335,24 +376,32 @@ class AWXReceptorJob:
            shutil.rmtree(artifact_dir)

        resultsock, resultfile = receptor_ctl.get_work_results(self.unit_id, return_socket=True, return_sockfile=True)
-        # Both "processor" and "cancel_watcher" are spawned in separate threads.
-        # We wait for the first one to return. If cancel_watcher returns first,
-        # we yank the socket out from underneath the processor, which will cause it
-        # to exit. A reference to the processor_future is passed into the cancel_watcher_future,
-        # Which exits if the job has finished normally. The context manager ensures we do not
-        # leave any threads laying around.
-        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
-            processor_future = executor.submit(self.processor, resultfile)
-            cancel_watcher_future = executor.submit(self.cancel_watcher, processor_future)
-            futures = [processor_future, cancel_watcher_future]
-            first_future = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)

-            res = list(first_future.done)[0].result()
-            if res.status == 'canceled':
+        connections.close_all()
+
+        # "processor" and the main thread will be separate threads.
+        # If a cancel happens, the main thread will encounter an exception, in which case
+        # we yank the socket out from underneath the processor, which will cause it to exit.
+        # The ThreadPoolExecutor context manager ensures we do not leave any threads laying around.
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+            processor_future = executor.submit(self.processor, resultfile)
+
+            try:
+                signal_state.raise_exception = True
+                # address race condition where SIGTERM was issued after this dispatcher task started
+                if signal_callback():
+                    raise SignalExit()
+                res = processor_future.result()
+            except SignalExit:
                receptor_ctl.simple_command(f"work cancel {self.unit_id}")
                resultsock.shutdown(socket.SHUT_RDWR)
                resultfile.close()
-            elif res.status == 'error':
+                result = namedtuple('result', ['status', 'rc'])
+                res = result('canceled', 1)
+            finally:
+                signal_state.raise_exception = False
+
+            if res.status == 'error':
                # If ansible-runner ran, but an error occured at runtime, the traceback information
                # is saved via the status_handler passed in to the processor.
                if 'result_traceback' in self.task.runner_callback.extra_update_fields:
@@ -436,6 +485,12 @@ class AWXReceptorJob:

        return receptor_params

+    @property
+    def sign_work(self):
+        if self.work_type in ('ansible-runner', 'local'):
+            return work_signing_enabled(self.config_data)
+        return False
+
    @property
    def work_type(self):
        if self.task.instance.is_container_group_task:
@@ -446,18 +501,6 @@ class AWXReceptorJob:
            return 'local'
        return 'ansible-runner'

-    @cleanup_new_process
-    def cancel_watcher(self, processor_future):
-        while True:
-            if processor_future.done():
-                return processor_future.result()
-
-            if self.task.runner_callback.cancel_callback():
-                result = namedtuple('result', ['status', 'rc'])
-                return result('canceled', 1)
-
-            time.sleep(1)
-
    @property
    def pod_definition(self):
        ee = self.task.instance.execution_environment
@@ -576,3 +619,105 @@ class AWXReceptorJob:
        else:
            config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
        return config
+
+
+# TODO: receptor reload expects ordering within config items to be preserved
+# if python dictionary is not preserving order properly, may need to find a
+# solution. yaml.dump does not seem to work well with OrderedDict. below line may help
+# yaml.add_representer(OrderedDict, lambda dumper, data: dumper.represent_mapping('tag:yaml.org,2002:map', data.items()))
+#
+RECEPTOR_CONFIG_STARTER = (
+    {'local-only': None},
+    {'log-level': 'debug'},
+    {'node': {'firewallrules': [{'action': 'reject', 'tonode': settings.CLUSTER_HOST_ID, 'toservice': 'control'}]}},
+    {'control-service': {'service': 'control', 'filename': '/var/run/receptor/receptor.sock', 'permissions': '0660'}},
+    {'work-command': {'worktype': 'local', 'command': 'ansible-runner', 'params': 'worker', 'allowruntimeparams': True}},
+    {'work-signing': {'privatekey': '/etc/receptor/signing/work-private-key.pem', 'tokenexpiration': '1m'}},
+    {
+        'work-kubernetes': {
+            'worktype': 'kubernetes-runtime-auth',
+            'authmethod': 'runtime',
+            'allowruntimeauth': True,
+            'allowruntimepod': True,
+            'allowruntimeparams': True,
+        }
+    },
+    {
+        'work-kubernetes': {
+            'worktype': 'kubernetes-incluster-auth',
+            'authmethod': 'incluster',
+            'allowruntimeauth': True,
+            'allowruntimepod': True,
+            'allowruntimeparams': True,
+        }
+    },
+    {
+        'tls-client': {
+            'name': 'tlsclient',
+            'rootcas': '/etc/receptor/tls/ca/receptor-ca.crt',
+            'cert': '/etc/receptor/tls/receptor.crt',
+            'key': '/etc/receptor/tls/receptor.key',
+        }
+    },
+)
+
+
+@task()
+def write_receptor_config():
+    lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
+    with lock:
+        receptor_config = list(RECEPTOR_CONFIG_STARTER)
+
+        this_inst = Instance.objects.me()
+        instances = Instance.objects.filter(node_type=Instance.Types.EXECUTION)
+        existing_peers = {link.target_id for link in InstanceLink.objects.filter(source=this_inst)}
+        new_links = []
+        for instance in instances:
+            peer = {'tcp-peer': {'address': f'{instance.hostname}:{instance.listener_port}', 'tls': 'tlsclient'}}
+            receptor_config.append(peer)
+            if instance.id not in existing_peers:
+                new_links.append(InstanceLink(source=this_inst, target=instance, link_state=InstanceLink.States.ADDING))
+
+        InstanceLink.objects.bulk_create(new_links)
+
+        with open(__RECEPTOR_CONF, 'w') as file:
+            yaml.dump(receptor_config, file, default_flow_style=False)
+
+    # This needs to be outside of the lock because this function itself will acquire the lock.
+    receptor_ctl = get_receptor_ctl()
+
+    attempts = 10
+    for backoff in range(1, attempts + 1):
+        try:
+            receptor_ctl.simple_command("reload")
+            break
+        except ValueError:
+            logger.warning(f"Unable to reload Receptor configuration. {attempts-backoff} attempts left.")
+            time.sleep(backoff)
+    else:
+        raise RuntimeError("Receptor reload failed")
+
+    links = InstanceLink.objects.filter(source=this_inst, target__in=instances, link_state=InstanceLink.States.ADDING)
+    links.update(link_state=InstanceLink.States.ESTABLISHED)
+
+
+@task(queue=get_local_queuename)
+def remove_deprovisioned_node(hostname):
+    InstanceLink.objects.filter(source__hostname=hostname).update(link_state=InstanceLink.States.REMOVING)
+    InstanceLink.objects.filter(target__hostname=hostname).update(link_state=InstanceLink.States.REMOVING)
+
+    node_jobs = UnifiedJob.objects.filter(
+        execution_node=hostname,
+        status__in=(
+            'running',
+            'waiting',
+        ),
+    )
+    while node_jobs.exists():
+        time.sleep(60)
+
+    # This will as a side effect also delete the InstanceLinks that are tied to it.
+    Instance.objects.filter(hostname=hostname).delete()
+
+    # Update the receptor configs for all of the control-plane.
+    write_receptor_config.apply_async(queue='tower_broadcast_all')
--- a/awx/main/tasks/signals.py
+++ b/awx/main/tasks/signals.py
@@ -9,12 +9,17 @@ logger = logging.getLogger('awx.main.tasks.signals')
 __all__ = ['with_signal_handling', 'signal_callback']


+class SignalExit(Exception):
+    pass
+
+
 class SignalState:
    def reset(self):
        self.sigterm_flag = False
        self.is_active = False
        self.original_sigterm = None
        self.original_sigint = None
+        self.raise_exception = False

    def __init__(self):
        self.reset()
@@ -22,6 +27,9 @@ class SignalState:
    def set_flag(self, *args):
        """Method to pass into the python signal.signal method to receive signals"""
        self.sigterm_flag = True
+        if self.raise_exception:
+            self.raise_exception = False  # so it is not raised a second time in error handling
+            raise SignalExit()

    def connect_signals(self):
        self.original_sigterm = signal.getsignal(signal.SIGTERM)
--- a/awx/main/tasks/system.py
+++ b/awx/main/tasks/system.py
@@ -52,6 +52,7 @@ from awx.main.constants import ACTIVE_STATES
 from awx.main.dispatch.publish import task
 from awx.main.dispatch import get_local_queuename, reaper
 from awx.main.utils.common import (
+    get_type_for_model,
    ignore_inventory_computed_fields,
    ignore_inventory_group_removal,
    ScheduleWorkflowManager,
@@ -61,7 +62,7 @@ from awx.main.utils.common import (
 from awx.main.utils.external_logging import reconfigure_rsyslog
 from awx.main.utils.reload import stop_local_services
 from awx.main.utils.pglock import advisory_lock
-from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper
+from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config
 from awx.main.consumers import emit_channel_notification
 from awx.main import analytics
 from awx.conf import settings_registry
@@ -81,6 +82,10 @@ Try upgrading OpenSSH or providing your private key in an different format. \
 def dispatch_startup():
    startup_logger = logging.getLogger('awx.main.tasks')

+    # TODO: Enable this on VM installs
+    if settings.IS_K8S:
+        write_receptor_config()
+
    startup_logger.debug("Syncing Schedules")
    for sch in Schedule.objects.all():
        try:
@@ -122,7 +127,7 @@ def inform_cluster_of_shutdown():
            reaper.reap_waiting(this_inst, grace_period=0)
        except Exception:
            logger.exception('failed to reap waiting jobs for {}'.format(this_inst.hostname))
-        logger.warning('Normal shutdown signal for instance {}, ' 'removed self from capacity pool.'.format(this_inst.hostname))
+        logger.warning('Normal shutdown signal for instance {}, removed self from capacity pool.'.format(this_inst.hostname))
    except Exception:
        logger.exception('Encountered problem with normal shutdown signal.')

@@ -349,9 +354,13 @@ def _cleanup_images_and_files(**kwargs):
            logger.info(f'Performed local cleanup with kwargs {kwargs}, output:\n{stdout}')

    # if we are the first instance alphabetically, then run cleanup on execution nodes
-    checker_instance = Instance.objects.filter(node_type__in=['hybrid', 'control'], enabled=True, capacity__gt=0).order_by('-hostname').first()
+    checker_instance = (
+        Instance.objects.filter(node_type__in=['hybrid', 'control'], node_state=Instance.States.READY, enabled=True, capacity__gt=0)
+        .order_by('-hostname')
+        .first()
+    )
    if checker_instance and this_inst.hostname == checker_instance.hostname:
-        for inst in Instance.objects.filter(node_type='execution', enabled=True, capacity__gt=0):
+        for inst in Instance.objects.filter(node_type='execution', node_state=Instance.States.READY, enabled=True, capacity__gt=0):
            runner_cleanup_kwargs = inst.get_cleanup_task_kwargs(**kwargs)
            if not runner_cleanup_kwargs:
                continue
@@ -405,7 +414,12 @@ def execution_node_health_check(node):
        return

    if instance.node_type != 'execution':
-        raise RuntimeError(f'Execution node health check ran against {instance.node_type} node {instance.hostname}')
+        logger.warning(f'Execution node health check ran against {instance.node_type} node {instance.hostname}')
+        return
+
+    if instance.node_state not in (Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
+        logger.warning(f"Execution node health check ran against node {instance.hostname} in state {instance.node_state}")
+        return

    data = worker_info(node)

@@ -440,6 +454,7 @@ def inspect_execution_nodes(instance_list):

        nowtime = now()
        workers = mesh_status['Advertisements']
+
        for ad in workers:
            hostname = ad['NodeID']

@@ -450,25 +465,23 @@ def inspect_execution_nodes(instance_list):
                continue

            # Control-plane nodes are dealt with via local_health_check instead.
-            if instance.node_type in ('control', 'hybrid'):
+            if instance.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID):
                continue

-            was_lost = instance.is_lost(ref_time=nowtime)
            last_seen = parse_date(ad['Time'])
-
            if instance.last_seen and instance.last_seen >= last_seen:
                continue
            instance.last_seen = last_seen
            instance.save(update_fields=['last_seen'])

            # Only execution nodes should be dealt with by execution_node_health_check
-            if instance.node_type == 'hop':
-                if was_lost and (not instance.is_lost(ref_time=nowtime)):
+            if instance.node_type == Instance.Types.HOP:
+                if instance.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
                    logger.warning(f'Hop node {hostname}, has rejoined the receptor mesh')
                    instance.save_health_data(errors='')
                continue

-            if was_lost:
+            if instance.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
                # if the instance *was* lost, but has appeared again,
                # attempt to re-establish the initial capacity and version
                # check
@@ -487,7 +500,7 @@ def inspect_execution_nodes(instance_list):
 def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
    logger.debug("Cluster node heartbeat task.")
    nowtime = now()
-    instance_list = list(Instance.objects.all())
+    instance_list = list(Instance.objects.filter(node_state__in=(Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED)))
    this_inst = None
    lost_instances = []

@@ -549,11 +562,11 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
        except Exception:
            logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
        try:
-            if settings.AWX_AUTO_DEPROVISION_INSTANCES:
+            if settings.AWX_AUTO_DEPROVISION_INSTANCES and other_inst.node_type == "control":
                deprovision_hostname = other_inst.hostname
-                other_inst.delete()
+                other_inst.delete()  # FIXME: what about associated inbound links?
                logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname))
-            elif other_inst.capacity != 0 or (not other_inst.errors):
+            elif other_inst.node_state == Instance.States.READY:
                other_inst.mark_offline(errors=_('Another cluster node has determined this instance to be unresponsive'))
                logger.error("Host {} last checked in at {}, marked as lost.".format(other_inst.hostname, other_inst.last_seen))

@@ -708,45 +721,43 @@ def handle_work_success(task_actual):


@task(queue=get_local_queuename)
-def handle_work_error(task_id, *args, **kwargs):
-    subtasks = kwargs.get('subtasks', None)
-    logger.debug('Executing error task id %s, subtasks: %s' % (task_id, str(subtasks)))
-    first_instance = None
-    first_instance_type = ''
-    if subtasks is not None:
-        for each_task in subtasks:
-            try:
-                instance = UnifiedJob.get_instance_by_type(each_task['type'], each_task['id'])
-                if not instance:
-                    # Unknown task type
-                    logger.warning("Unknown task type: {}".format(each_task['type']))
-                    continue
-            except ObjectDoesNotExist:
-                logger.warning('Missing {} `{}` in error callback.'.format(each_task['type'], each_task['id']))
-                continue
+def handle_work_error(task_actual):
+    try:
+        instance = UnifiedJob.get_instance_by_type(task_actual['type'], task_actual['id'])
+    except ObjectDoesNotExist:
+        logger.warning('Missing {} `{}` in error callback.'.format(task_actual['type'], task_actual['id']))
+        return
+    if not instance:
+        return

-            if first_instance is None:
-                first_instance = instance
-                first_instance_type = each_task['type']
+    subtasks = instance.get_jobs_fail_chain()  # reverse of dependent_jobs mostly
+    logger.debug(f'Executing error task id {task_actual["id"]}, subtasks: {[subtask.id for subtask in subtasks]}')

-            if instance.celery_task_id != task_id and not instance.cancel_flag and not instance.status in ('successful', 'failed'):
-                instance.status = 'failed'
-                instance.failed = True
-                if not instance.job_explanation:
-                    instance.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
-                        first_instance_type,
-                        first_instance.name,
-                        first_instance.id,
-                    )
-                instance.save()
-                instance.websocket_emit_status("failed")
+    deps_of_deps = {}
+
+    for subtask in subtasks:
+        if subtask.celery_task_id != instance.celery_task_id and not subtask.cancel_flag and not subtask.status in ('successful', 'failed'):
+            # If there are multiple in the dependency chain, A->B->C, and this was called for A, blame B for clarity
+            blame_job = deps_of_deps.get(subtask.id, instance)
+            subtask.status = 'failed'
+            subtask.failed = True
+            if not subtask.job_explanation:
+                subtask.job_explanation = 'Previous Task Failed: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % (
+                    get_type_for_model(type(blame_job)),
+                    blame_job.name,
+                    blame_job.id,
+                )
+            subtask.save()
+            subtask.websocket_emit_status("failed")
+
+            for sub_subtask in subtask.get_jobs_fail_chain():
+                deps_of_deps[sub_subtask.id] = subtask

    # We only send 1 job complete message since all the job completion message
    # handling does is trigger the scheduler. If we extend the functionality of
    # what the job complete message handler does then we may want to send a
    # completion event for each job here.
-    if first_instance:
-        schedule_manager_success_or_error(first_instance)
+    schedule_manager_success_or_error(instance)


@task(queue=get_local_queuename)
--- a/awx/main/tests/data/inventory/plugins/ec2/env.json
+++ b/awx/main/tests/data/inventory/plugins/ec2/env.json
@@ -3,5 +3,6 @@
    "ANSIBLE_TRANSFORM_INVALID_GROUP_CHARS": "never",
    "AWS_ACCESS_KEY_ID": "fooo",
    "AWS_SECRET_ACCESS_KEY": "fooo",
-    "AWS_SECURITY_TOKEN": "fooo"
+    "AWS_SECURITY_TOKEN": "fooo",
+    "AWS_SESSION_TOKEN": "fooo"
 }
--- a/Show More
+++ b/Show More